1 /**
2 * Copyright 2017 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "run_tflite.h"
18
19 #include <android/log.h>
20 #include <dirent.h>
21 #include <dlfcn.h>
22 #include <fcntl.h>
23 #include <ftw.h>
24 #include <sys/time.h>
25 #include <unistd.h>
26
27 #include <cstdio>
28 #include <fstream>
29
30 #include "tensorflow/lite/delegates/nnapi/nnapi_delegate.h"
31 #include "tensorflow/lite/nnapi/NeuralNetworksTypes.h"
32
33 #include "tensorflow/lite/kernels/register.h"
34
35 #define LOG_TAG "NN_BENCHMARK"
36
37 #define FATAL(fmt, ...) \
38 do { \
39 __android_log_print(ANDROID_LOG_FATAL, LOG_TAG, fmt, ##__VA_ARGS__); \
40 assert(false); \
41 } while (0)
42
43 namespace {
44
currentTimeInUsec()45 long long currentTimeInUsec() {
46 timeval tv;
47 gettimeofday(&tv, NULL);
48 return ((tv.tv_sec * 1000000L) + tv.tv_usec);
49 }
50
51 // Workaround for build systems that make difficult to pick the correct NDK API
52 // level. NDK tracing methods are dynamically loaded from libandroid.so.
53 typedef void* (*fp_ATrace_beginSection)(const char* sectionName);
54 typedef void* (*fp_ATrace_endSection)();
55 struct TraceFunc {
56 fp_ATrace_beginSection ATrace_beginSection;
57 fp_ATrace_endSection ATrace_endSection;
58 };
setupTraceFunc()59 TraceFunc setupTraceFunc() {
60 void* lib = dlopen("libandroid.so", RTLD_NOW | RTLD_LOCAL);
61 if (lib == nullptr) {
62 FATAL("unable to open libandroid.so");
63 }
64 return {
65 reinterpret_cast<fp_ATrace_beginSection>(
66 dlsym(lib, "ATrace_beginSection")),
67 reinterpret_cast<fp_ATrace_endSection>(dlsym(lib, "ATrace_endSection"))};
68 }
69 static TraceFunc kTraceFunc{setupTraceFunc()};
70
71 } // namespace
72
create(const char * modelfile,int tfliteBackend,bool enable_intermediate_tensors_dump,int * nnapiErrno,const char * nnapi_device_name,bool mmapModel,const char * nnapi_cache_dir)73 BenchmarkModel* BenchmarkModel::create(const char* modelfile, int tfliteBackend,
74 bool enable_intermediate_tensors_dump, int* nnapiErrno,
75 const char* nnapi_device_name, bool mmapModel,
76 const char* nnapi_cache_dir) {
77 BenchmarkModel* model = new BenchmarkModel();
78 if (!model->init(modelfile, tfliteBackend, enable_intermediate_tensors_dump, nnapiErrno,
79 nnapi_device_name, mmapModel, nnapi_cache_dir)) {
80 __android_log_print(ANDROID_LOG_ERROR, LOG_TAG, "Failed to init model %s", modelfile);
81 delete model;
82 return nullptr;
83 }
84 return model;
85 }
86
init(const char * modelfile,int tfliteBackend,bool enable_intermediate_tensors_dump,int * nnapiErrno,const char * nnapi_device_name,bool mmapModel,const char * nnapi_cache_dir)87 bool BenchmarkModel::init(const char* modelfile, int tfliteBackend,
88 bool enable_intermediate_tensors_dump, int* nnapiErrno,
89 const char* nnapi_device_name, bool mmapModel,
90 const char* nnapi_cache_dir) {
91 __android_log_print(ANDROID_LOG_INFO, LOG_TAG, "BenchmarkModel %s",
92 modelfile);
93 mModelFile = modelfile;
94 if (nnapi_cache_dir) {
95 mCacheDir = nnapi_cache_dir;
96 }
97 if (nnapi_device_name) {
98 mNnApiDeviceName = nnapi_device_name;
99 }
100
101 if (mmapModel) {
102 // Memory map the model. NOTE this needs lifetime greater than or equal
103 // to interpreter context.
104 mTfliteModel = tflite::FlatBufferModel::BuildFromFile(modelfile);
105 } else {
106 std::ifstream t(modelfile);
107 mModelBuffer = std::string((std::istreambuf_iterator<char>(t)), std::istreambuf_iterator<char>());
108 mTfliteModel = tflite::FlatBufferModel::BuildFromBuffer(mModelBuffer.c_str(), mModelBuffer.size());
109 }
110 if (!mTfliteModel) {
111 __android_log_print(ANDROID_LOG_ERROR, LOG_TAG, "Failed to load model %s",
112 modelfile);
113 return false;
114 }
115
116 tflite::ops::builtin::BuiltinOpResolver resolver;
117 tflite::InterpreterBuilder(*mTfliteModel, resolver)(&mTfliteInterpreter);
118 if (!mTfliteInterpreter) {
119 __android_log_print(ANDROID_LOG_ERROR, LOG_TAG,
120 "Failed to create TFlite interpreter");
121 return false;
122 }
123
124 if (enable_intermediate_tensors_dump) {
125 // Make output of every op a model output. This way we will be able to
126 // fetch each intermediate tensor when running with delegates.
127 outputs.clear();
128 for (size_t node = 0; node < mTfliteInterpreter->nodes_size(); ++node) {
129 auto node_outputs =
130 mTfliteInterpreter->node_and_registration(node)->first.outputs;
131 outputs.insert(outputs.end(), node_outputs->data,
132 node_outputs->data + node_outputs->size);
133 }
134 mTfliteInterpreter->SetOutputs(outputs);
135 }
136
137 // Allow Fp16 precision for all models
138 mTfliteInterpreter->SetAllowFp16PrecisionForFp32(true);
139
140 mTfliteBackend = tfliteBackend;
141 switch (mTfliteBackend) {
142 case TFLITE_NNAPI: {
143 tflite::StatefulNnApiDelegate::Options nnapi_options;
144 nnapi_options.accelerator_name = nnapi_device_name;
145 mTfliteNnapiDelegate = std::make_unique<tflite::StatefulNnApiDelegate>(nnapi_options);
146 int delegationStatus = mTfliteInterpreter->ModifyGraphWithDelegate(mTfliteNnapiDelegate.get());
147 *nnapiErrno = mTfliteNnapiDelegate->GetNnApiErrno();
148 if (delegationStatus != kTfLiteOk ||
149 *nnapiErrno != ANEURALNETWORKS_NO_ERROR) {
150 __android_log_print(
151 ANDROID_LOG_ERROR, LOG_TAG,
152 "Failed to initialize NNAPI Delegate for model %s, nnapi_errno is %d",
153 modelfile, *nnapiErrno);
154 return false;
155 }
156 } break;
157 case TFLITE_GPU: {
158 #if defined(NN_BENCHMARK_ENABLE_GPU)
159 mGpuDelegate = TfLiteGpuDelegateV2Create(/*default options=*/nullptr);
160 if (mTfliteInterpreter->ModifyGraphWithDelegate(mGpuDelegate) !=
161 kTfLiteOk) {
162 __android_log_print(ANDROID_LOG_ERROR, LOG_TAG,
163 "Failed to initialize GPU Delegate");
164 return false;
165 }
166 #else // !defined(NN_BENCHMARK_ENABLE_GPU)
167 __android_log_print(ANDROID_LOG_ERROR, LOG_TAG,
168 "GPU delegate requested but not enabled with "
169 "NN_BENCHMARK_ENABLE_GPU");
170 return false;
171 #endif // defined(NN_BENCHMARK_ENABLE_GPU)
172 } break;
173 default:
174 break;
175 }
176 return true;
177 }
178
~BenchmarkModel()179 BenchmarkModel::~BenchmarkModel() {
180 switch (mTfliteBackend) {
181 case TFLITE_GPU: {
182 #if defined(NN_BENCHMARK_ENABLE_GPU) // !defined(NN_BENCHMARK_ENABLE_GPU)
183 TfLiteGpuDelegateV2Delete(mGpuDelegate);
184 #endif // !defined(NN_BENCHMARK_ENABLE_GPU)
185 } break;
186 default:
187 break;
188 }
189 }
190
setInput(const uint8_t * dataPtr,size_t length)191 bool BenchmarkModel::setInput(const uint8_t* dataPtr, size_t length) {
192 int input = mTfliteInterpreter->inputs()[0];
193 auto* input_tensor = mTfliteInterpreter->tensor(input);
194
195 switch (input_tensor->type) {
196 case kTfLiteFloat32:
197 case kTfLiteUInt8: {
198 void* raw = input_tensor->data.raw;
199 memcpy(raw, dataPtr, length);
200 break;
201 }
202 default:
203 __android_log_print(ANDROID_LOG_ERROR, LOG_TAG,
204 "Input tensor type not supported");
205 return false;
206 }
207 return true;
208 }
saveInferenceOutput(InferenceResult * result,int output_index)209 void BenchmarkModel::saveInferenceOutput(InferenceResult* result,
210 int output_index) {
211 int output = mTfliteInterpreter->outputs()[output_index];
212 auto* output_tensor = mTfliteInterpreter->tensor(output);
213 auto& sink = result->inferenceOutputs[output_index];
214 sink.insert(sink.end(), output_tensor->data.uint8,
215 output_tensor->data.uint8 + output_tensor->bytes);
216 }
217
getOutputError(const uint8_t * expected_data,size_t length,InferenceResult * result,int output_index)218 void BenchmarkModel::getOutputError(const uint8_t* expected_data, size_t length,
219 InferenceResult* result, int output_index) {
220 int output = mTfliteInterpreter->outputs()[output_index];
221 auto* output_tensor = mTfliteInterpreter->tensor(output);
222 if (output_tensor->bytes != length) {
223 FATAL("Wrong size of output tensor, expected %zu, is %zu",
224 output_tensor->bytes, length);
225 }
226
227 size_t elements_count = 0;
228 float err_sum = 0.0;
229 float max_error = 0.0;
230 switch (output_tensor->type) {
231 case kTfLiteUInt8: {
232 uint8_t* output_raw = mTfliteInterpreter->typed_tensor<uint8_t>(output);
233 elements_count = output_tensor->bytes;
234 for (size_t i = 0; i < output_tensor->bytes; ++i) {
235 float err = ((float)output_raw[i]) - ((float)expected_data[i]);
236 if (err > max_error) max_error = err;
237 err_sum += err * err;
238 }
239 break;
240 }
241 case kTfLiteFloat32: {
242 const float* expected = reinterpret_cast<const float*>(expected_data);
243 float* output_raw = mTfliteInterpreter->typed_tensor<float>(output);
244 elements_count = output_tensor->bytes / sizeof(float);
245 for (size_t i = 0; i < output_tensor->bytes / sizeof(float); ++i) {
246 float err = output_raw[i] - expected[i];
247 if (err > max_error) max_error = err;
248 err_sum += err * err;
249 }
250 break;
251 }
252 default:
253 FATAL("Output sensor type %d not supported", output_tensor->type);
254 }
255 result->meanSquareErrors[output_index] = err_sum / elements_count;
256 result->maxSingleErrors[output_index] = max_error;
257 }
258
resizeInputTensors(std::vector<int> shape)259 bool BenchmarkModel::resizeInputTensors(std::vector<int> shape) {
260 // The benchmark only expects single input tensor, hardcoded as 0.
261 int input = mTfliteInterpreter->inputs()[0];
262 mTfliteInterpreter->ResizeInputTensor(input, shape);
263 if (mTfliteInterpreter->AllocateTensors() != kTfLiteOk) {
264 __android_log_print(ANDROID_LOG_ERROR, LOG_TAG,
265 "Failed to allocate tensors!");
266 return false;
267 }
268 return true;
269 }
270
runInference()271 bool BenchmarkModel::runInference() {
272 auto status = mTfliteInterpreter->Invoke();
273 auto nnapi_errno = mTfliteNnapiDelegate
274 ? mTfliteNnapiDelegate->GetNnApiErrno()
275 : ANEURALNETWORKS_NO_ERROR;
276 if (status != kTfLiteOk || nnapi_errno != ANEURALNETWORKS_NO_ERROR) {
277 __android_log_print(ANDROID_LOG_ERROR, LOG_TAG,
278 "Failed to invoke, tflite status: %d, nnapi errno: %d!",
279 (int)status, nnapi_errno);
280 return false;
281 }
282 return true;
283 }
284
resetStates()285 bool BenchmarkModel::resetStates() {
286 auto status = mTfliteInterpreter->ResetVariableTensors();
287 if (status != kTfLiteOk) {
288 __android_log_print(ANDROID_LOG_ERROR, LOG_TAG,
289 "Failed to reset variable tensors: %d!", (int)status);
290 return false;
291 }
292 return true;
293 }
294
benchmark(const std::vector<InferenceInOutSequence> & inOutData,int seqInferencesMaxCount,float timeout,int flags,std::vector<InferenceResult> * results)295 bool BenchmarkModel::benchmark(
296 const std::vector<InferenceInOutSequence>& inOutData,
297 int seqInferencesMaxCount, float timeout, int flags,
298 std::vector<InferenceResult>* results) {
299 if (inOutData.empty()) {
300 __android_log_print(ANDROID_LOG_WARN, LOG_TAG,
301 "Input/output vector is empty");
302 return true;
303 }
304
305 float inferenceTotal = 0.0;
306 for (int seqInferenceIndex = 0; seqInferenceIndex < seqInferencesMaxCount;
307 ++seqInferenceIndex) {
308 resetStates();
309
310 const int inputOutputSequenceIndex = seqInferenceIndex % inOutData.size();
311 const InferenceInOutSequence& seq = inOutData[inputOutputSequenceIndex];
312 const bool sampleResults = (flags & FLAG_SAMPLE_BENCHMARK_RESULTS) != 0;
313 for (int i = 0; i < seq.size(); ++i) {
314 const InferenceInOut& data = seq[i];
315
316 // For NNAPI systrace usage documentation, see
317 // frameworks/ml/nn/common/include/Tracing.h.
318 kTraceFunc.ATrace_beginSection("[NN_LA_PE]BenchmarkModel::benchmark");
319 kTraceFunc.ATrace_beginSection("[NN_LA_PIO]BenchmarkModel::input");
320 if (data.input) {
321 setInput(data.input, data.input_size);
322 } else {
323 int input = mTfliteInterpreter->inputs()[0];
324 auto* input_tensor = mTfliteInterpreter->tensor(input);
325 if (!data.createInput((uint8_t*)input_tensor->data.raw,
326 input_tensor->bytes)) {
327 __android_log_print(ANDROID_LOG_ERROR, LOG_TAG,
328 "Input creation %d failed", i);
329 return false;
330 }
331 }
332 kTraceFunc.ATrace_endSection();
333 long long startTime = currentTimeInUsec();
334 const bool success = runInference();
335 kTraceFunc.ATrace_endSection();
336 long long endTime = currentTimeInUsec();
337 if (!success) {
338 __android_log_print(ANDROID_LOG_ERROR, LOG_TAG, "Inference %d failed",
339 i);
340 return false;
341 }
342
343 float inferenceTime =
344 static_cast<float>(endTime - startTime) / 1000000.0f;
345 size_t outputsCount = mTfliteInterpreter->outputs().size();
346 InferenceResult result{
347 inferenceTime, {}, {}, {}, inputOutputSequenceIndex, i};
348 result.meanSquareErrors.resize(outputsCount);
349 result.maxSingleErrors.resize(outputsCount);
350 result.inferenceOutputs.resize(outputsCount);
351
352 if ((flags & FLAG_IGNORE_GOLDEN_OUTPUT) == 0) {
353 if (outputsCount != data.outputs.size()) {
354 __android_log_print(ANDROID_LOG_ERROR, LOG_TAG,
355 "Golden/actual outputs (%zu/%zu) count mismatch",
356 data.outputs.size(), outputsCount);
357 return false;
358 }
359 for (int j = 0; j < outputsCount; ++j) {
360 getOutputError(data.outputs[j].ptr, data.outputs[j].size, &result, j);
361 }
362 }
363
364 if ((flags & FLAG_DISCARD_INFERENCE_OUTPUT) == 0) {
365 for (int j = 0; j < outputsCount; ++j) {
366 saveInferenceOutput(&result, j);
367 }
368 }
369
370 if (!sampleResults || (seqInferenceIndex % INFERENCE_OUT_SAMPLE_RATE) == 0) {
371 results->push_back(result);
372 }
373 inferenceTotal += inferenceTime;
374 }
375
376 // Timeout?
377 if (timeout > 0.001 && inferenceTotal > timeout) {
378 return true;
379 }
380 }
381 return true;
382 }
383
384 // If cacheDir is not nullptr, compilation caching will be used with NNAPI.
runCompilation(const char * cacheDir)385 bool BenchmarkModel::runCompilation(const char* cacheDir) {
386 std::unique_ptr<tflite::Interpreter> interpreter;
387 tflite::ops::builtin::BuiltinOpResolver resolver;
388 tflite::InterpreterBuilder(*mTfliteModel, resolver)(&interpreter);
389 if (!interpreter) {
390 __android_log_print(ANDROID_LOG_ERROR, LOG_TAG, "Failed to create TFlite interpreter");
391 return false;
392 }
393
394 // Allow Fp16 precision for all models
395 interpreter->SetAllowFp16PrecisionForFp32(true);
396
397 if (mTfliteBackend == TFLITE_NNAPI) {
398 tflite::StatefulNnApiDelegate::Options nnapi_options;
399 nnapi_options.accelerator_name = mNnApiDeviceName.empty() ? nullptr : mNnApiDeviceName.c_str();
400 if (cacheDir) {
401 nnapi_options.cache_dir = cacheDir;
402 nnapi_options.model_token = mModelFile.c_str();
403 }
404 tflite::StatefulNnApiDelegate delegate(nnapi_options);
405 int delegationStatus = interpreter->ModifyGraphWithDelegate(&delegate);
406 auto nnapiErrno = delegate.GetNnApiErrno();
407 if (delegationStatus != kTfLiteOk || nnapiErrno != ANEURALNETWORKS_NO_ERROR) {
408 __android_log_print(ANDROID_LOG_ERROR, LOG_TAG,
409 "Failed to initialize NNAPI Delegate for model %s, nnapi_errno is %d",
410 mModelFile.c_str(), nnapiErrno);
411 return false;
412 }
413 }
414 return true;
415 }
416
417 // A helper class to manage the lifetime of a temporary cache directory.
418 class ScopedTempDirectory {
419 public:
ScopedTempDirectory(std::string base)420 ScopedTempDirectory(std::string base) : mBase(std::move(base)) {}
~ScopedTempDirectory()421 ~ScopedTempDirectory() { cleanup(); }
422
423 // Create a new temp directory, remove the old one if needed.
recreate()424 void recreate() {
425 cleanup();
426 mTempDir = mBase + "/XXXXXX";
427 mkdtemp(&mTempDir[0]);
428 }
429
430 // Get the path to the temp directory.
get() const431 const char* get() const { return mTempDir.empty() ? nullptr : mTempDir.c_str(); }
432
433 private:
cleanup()434 void cleanup() {
435 if (mTempDir.empty()) {
436 return;
437 }
438 auto callback = [](const char* entry, const struct stat*, int, struct FTW*) {
439 return remove(entry);
440 };
441 nftw(mTempDir.c_str(), callback, 128, FTW_DEPTH | FTW_MOUNT | FTW_PHYS);
442 mTempDir.clear();
443 }
444
445 std::string mBase;
446 std::string mTempDir;
447 };
448
getCompilationCacheSize(int * cacheSizeBytes)449 bool BenchmarkModel::getCompilationCacheSize(int* cacheSizeBytes) {
450 if (cacheSizeBytes == nullptr) return false;
451
452 // Create cache files.
453 ScopedTempDirectory tempDir(mCacheDir.value());
454 tempDir.recreate();
455 const bool success = runCompilation(tempDir.get());
456 if (!success) {
457 __android_log_print(ANDROID_LOG_ERROR, LOG_TAG, "Save to cache failed");
458 return false;
459 }
460
461 // Compute total size of cache files.
462 int totalSize = 0;
463 DIR* dir = opendir(tempDir.get());
464 if (dir == nullptr) {
465 __android_log_print(ANDROID_LOG_ERROR, LOG_TAG, "Failed to open cache directory");
466 return false;
467 }
468 struct dirent* dp = nullptr;
469 while ((dp = readdir(dir)) != nullptr) {
470 char fullPath[1024];
471 snprintf(fullPath, 1024, "%s/%s", tempDir.get(), dp->d_name);
472 struct stat st;
473 int err = stat(fullPath, &st);
474 if (err != 0) {
475 closedir(dir);
476 __android_log_print(ANDROID_LOG_ERROR, LOG_TAG, "Failed to stat %s", fullPath);
477 return false;
478 }
479 // Only accumulate sizes of regular files. This will exclude '.' and '..'.
480 if (S_ISREG(st.st_mode)) {
481 totalSize += st.st_size;
482 }
483 }
484 closedir(dir);
485 *cacheSizeBytes = totalSize;
486 return true;
487 }
488
benchmarkSingleTypeOfCompilation(CompilationBenchmarkType type,int maxNumIterations,float timeout,std::vector<float> * results)489 bool BenchmarkModel::benchmarkSingleTypeOfCompilation(CompilationBenchmarkType type,
490 int maxNumIterations, float timeout,
491 std::vector<float>* results) {
492 if (results != nullptr) {
493 results->clear();
494 }
495 ScopedTempDirectory tempDir(mCacheDir.value());
496
497 // Initialize cache files to benchmark cache hit.
498 if (type == CompilationBenchmarkType::PREPARE_FROM_CACHE) {
499 tempDir.recreate();
500 const bool success = runCompilation(tempDir.get());
501 if (!success) {
502 __android_log_print(ANDROID_LOG_ERROR, LOG_TAG, "Save to cache failed");
503 return false;
504 }
505 }
506
507 float compilationTotal = 0.0;
508 for (int i = 0; i < maxNumIterations; i++) {
509 const char* cacheDir = nullptr;
510 switch (type) {
511 case CompilationBenchmarkType::WITHOUT_CACHE:
512 cacheDir = nullptr;
513 break;
514 case CompilationBenchmarkType::SAVE_TO_CACHE:
515 // Remove the cache files from the last iteration to benchmark cache miss.
516 tempDir.recreate();
517 [[fallthrough]];
518 case CompilationBenchmarkType::PREPARE_FROM_CACHE:
519 cacheDir = tempDir.get();
520 break;
521 default:
522 __android_log_print(ANDROID_LOG_ERROR, LOG_TAG, "Unknown CompilationBenchmarkType: %d",
523 static_cast<int>(type));
524 return false;
525 }
526
527 kTraceFunc.ATrace_beginSection("[NN_LA_PC]BenchmarkModel::benchmarkCompilation");
528 const long long startTime = currentTimeInUsec();
529 const bool success = runCompilation(cacheDir);
530 const long long endTime = currentTimeInUsec();
531 kTraceFunc.ATrace_endSection();
532 if (!success) {
533 __android_log_print(ANDROID_LOG_ERROR, LOG_TAG, "Compilation %d failed", i);
534 return false;
535 }
536
537 const float compilationTime = static_cast<float>(endTime - startTime) / 1000000.0f;
538 if (results != nullptr) {
539 results->push_back(compilationTime);
540 }
541
542 // Timeout?
543 compilationTotal += compilationTime;
544 if (timeout > 0.001 && compilationTotal > timeout) {
545 return true;
546 }
547 }
548 return true;
549 }
550
benchmarkSingleTypeOfCompilationWithWarmup(CompilationBenchmarkType type,int maxNumIterations,float warmupTimeout,float runTimeout,std::vector<float> * results)551 bool BenchmarkModel::benchmarkSingleTypeOfCompilationWithWarmup(CompilationBenchmarkType type,
552 int maxNumIterations,
553 float warmupTimeout,
554 float runTimeout,
555 std::vector<float>* results) {
556 kTraceFunc.ATrace_beginSection(
557 "[NN_LA_PWM]BenchmarkModel::benchmarkSingleTypeOfCompilationWithWarmup");
558 bool success = benchmarkSingleTypeOfCompilation(type, maxNumIterations, warmupTimeout, nullptr);
559 kTraceFunc.ATrace_endSection();
560 if (!success) return false;
561
562 kTraceFunc.ATrace_beginSection(
563 "[NN_LA_PBM]BenchmarkModel::benchmarkSingleTypeOfCompilationWithWarmup");
564 success = benchmarkSingleTypeOfCompilation(type, maxNumIterations, runTimeout, results);
565 kTraceFunc.ATrace_endSection();
566 return success;
567 }
568
benchmarkCompilation(int maxNumIterations,float warmupTimeout,float runTimeout,CompilationBenchmarkResult * result)569 bool BenchmarkModel::benchmarkCompilation(int maxNumIterations, float warmupTimeout,
570 float runTimeout, CompilationBenchmarkResult* result) {
571 if (result == nullptr) return false;
572
573 // Benchmark compile without cache.
574 bool success = benchmarkSingleTypeOfCompilationWithWarmup(
575 CompilationBenchmarkType::WITHOUT_CACHE, maxNumIterations, warmupTimeout, runTimeout,
576 &result->compileWithoutCacheTimeSec);
577 if (!success) {
578 __android_log_print(ANDROID_LOG_ERROR, LOG_TAG,
579 "Failed to benchmark compilation without cache");
580 return false;
581 }
582
583 // Get compilation cache size.
584 success = getCompilationCacheSize(&result->cacheSizeBytes);
585 if (!success) {
586 __android_log_print(ANDROID_LOG_ERROR, LOG_TAG, "Failed to retrieve compilation cache size");
587 return false;
588 }
589
590 // Benchmark saving to cache and preparing from cache only if supported.
591 if (result->cacheSizeBytes > 0) {
592 // Benchmark saving to cache.
593 auto& saveToCacheTimeSec = result->saveToCacheTimeSec.emplace();
594 success = benchmarkSingleTypeOfCompilationWithWarmup(
595 CompilationBenchmarkType::SAVE_TO_CACHE, maxNumIterations, warmupTimeout, runTimeout,
596 &saveToCacheTimeSec);
597 if (!success) {
598 __android_log_print(ANDROID_LOG_ERROR, LOG_TAG, "Failed to benchmark saving to cache");
599 return false;
600 }
601
602 // Benchmark preparing from cache.
603 auto& prepareFromCacheTimeSec = result->prepareFromCacheTimeSec.emplace();
604 success = benchmarkSingleTypeOfCompilationWithWarmup(
605 CompilationBenchmarkType::PREPARE_FROM_CACHE, maxNumIterations, warmupTimeout,
606 runTimeout, &prepareFromCacheTimeSec);
607 if (!success) {
608 __android_log_print(ANDROID_LOG_ERROR, LOG_TAG, "Failed to benchmark preparing from cache");
609 return false;
610 }
611 }
612 return result;
613 }
614
dumpAllLayers(const char * path,const std::vector<InferenceInOutSequence> & inOutData)615 bool BenchmarkModel::dumpAllLayers(
616 const char* path, const std::vector<InferenceInOutSequence>& inOutData) {
617 if (inOutData.empty()) {
618 FATAL("Input/output vector is empty");
619 }
620
621 for (int seqInferenceIndex = 0; seqInferenceIndex < inOutData.size();
622 ++seqInferenceIndex) {
623 resetStates();
624
625 const InferenceInOutSequence& seq = inOutData[seqInferenceIndex];
626 for (int i = 0; i < seq.size(); ++i) {
627 const InferenceInOut& data = seq[i];
628 setInput(data.input, data.input_size);
629 const bool success = runInference();
630 if (!success) {
631 __android_log_print(ANDROID_LOG_ERROR, LOG_TAG, "Inference %d failed",
632 i);
633 return false;
634 }
635
636 // The order of the tensor is not sorted by the tensor index
637 for (int tensor_order = 0; tensor_order < outputs.size(); ++tensor_order) {
638 int tensor_index = outputs[tensor_order];
639 auto* output_tensor = mTfliteInterpreter->tensor(tensor_index);
640 if (output_tensor->data.raw == nullptr) {
641 __android_log_print(ANDROID_LOG_ERROR, LOG_TAG,
642 "output_tensor->data.raw == nullptr at index %d ", tensor_index);
643 continue;
644 }
645 char fullpath[1024];
646 snprintf(fullpath, 1024, "%s/dump_%.3d_seq_%.3d_order_%.3d_tensor_%.3d", path,
647 seqInferenceIndex, i, tensor_order, tensor_index);
648 FILE* f = fopen(fullpath, "wb");
649 fwrite(output_tensor->data.raw, output_tensor->bytes, 1, f);
650 fclose(f);
651 }
652 }
653 }
654 return true;
655 }
656