1 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 #include <cstdarg>
16 #include <cstring>
17 #include <iostream>
18 #include <memory>
19 #include <vector>
20
21 #include "tensorflow/lite/allocation.h"
22 #include "tensorflow/lite/builtin_op_data.h"
23 #include "tensorflow/lite/builtin_ops.h"
24 #include "tensorflow/lite/c/c_api_internal.h"
25 #include "tensorflow/lite/context_util.h"
26 #include "tensorflow/lite/delegates/nnapi/nnapi_delegate.h"
27 #include "tensorflow/lite/kernels/kernel_util.h"
28 #include "tensorflow/lite/nnapi/nnapi_implementation.h"
29
30 #ifdef __ANDROID__
31 #include <sys/system_properties.h>
32 #endif
33 #if defined __ANDROID__ || defined __unix__
34 #include <sys/mman.h>
35 #include <unistd.h>
36 #endif
37
38 namespace tflite {
39 namespace {
40
41 // TODO(b/80621585): Consider printing error string, but don't for now to
42 // minimize binary size.
43 #define RETURN_TFLITE_ERROR_IF_NN_ERROR(context, code) \
44 do { \
45 const auto _code = (code); \
46 if (_code != ANEURALNETWORKS_NO_ERROR) { \
47 context->ReportError(context, "NN API returned error (%d, line %d).\n", \
48 _code, __LINE__); \
49 return kTfLiteError; \
50 } \
51 } while (0)
52
53 namespace {
54
IsFloat(TfLiteType type)55 bool IsFloat(TfLiteType type) {
56 switch (type) {
57 case kTfLiteFloat32:
58 return true;
59 default:
60 return false;
61 }
62 }
63
IsQuantized(TfLiteType type)64 bool IsQuantized(TfLiteType type) {
65 switch (type) {
66 case kTfLiteUInt8:
67 case kTfLiteInt8:
68 case kTfLiteInt16:
69 return true;
70 default:
71 return false;
72 }
73 }
74
IsHybridOperator(const TfLiteContext * context,int builtin_code,const TfLiteNode * node)75 bool IsHybridOperator(const TfLiteContext* context, int builtin_code,
76 const TfLiteNode* node) {
77 switch (builtin_code) {
78 case kTfLiteBuiltinConv2d:
79 case kTfLiteBuiltinFullyConnected: {
80 const int input_id = node->inputs->data[0];
81 const int filter_id = node->inputs->data[1];
82 const TfLiteType input_type = context->tensors[input_id].type;
83 const TfLiteType filter_type = context->tensors[filter_id].type;
84 return IsFloat(input_type) && IsQuantized(filter_type);
85 }
86 default:
87 return false;
88 }
89 }
90
91 constexpr int32_t kMinSdkVersionForNNAPI = 27;
92 constexpr int32_t kMinSdkVersionForNNAPI11 = 28;
93 constexpr int32_t kMinSdkVersionForNNAPI12 = 29;
94 constexpr size_t kDefaultByteAlignmentForNNAPI = 16;
95
getNumPaddingBytes(size_t byte_size)96 static size_t getNumPaddingBytes(size_t byte_size) {
97 size_t num_padding_bytes = 0;
98 if (byte_size % kDefaultByteAlignmentForNNAPI) {
99 num_padding_bytes = kDefaultByteAlignmentForNNAPI -
100 (byte_size % kDefaultByteAlignmentForNNAPI);
101 }
102 return num_padding_bytes;
103 }
104 } // namespace
105
106 // RAII NN API Model Destructor for use with std::unique_ptr
107 struct NNFreeModel {
operator ()tflite::__anon2e5b0dd80111::NNFreeModel108 void operator()(ANeuralNetworksModel* model) {
109 NnApiImplementation()->ANeuralNetworksModel_free(model);
110 }
111 };
112 // RAII NN API Compilation Destructor for use with std::unique_ptr
113 struct NNFreeCompilation {
operator ()tflite::__anon2e5b0dd80111::NNFreeCompilation114 void operator()(ANeuralNetworksCompilation* model) {
115 NnApiImplementation()->ANeuralNetworksCompilation_free(model);
116 }
117 };
118
119 // RAII NN API Execution Destructor for use with std::unique_ptr
120 struct NNFreeExecution {
operator ()tflite::__anon2e5b0dd80111::NNFreeExecution121 void operator()(ANeuralNetworksExecution* execution) {
122 NnApiImplementation()->ANeuralNetworksExecution_free(execution);
123 }
124 };
125
126 // Manage NNAPI shared memory handle
127 class NNMemory {
128 public:
129 #if defined __ANDROID__ || defined __unix__
NNMemory(const NnApi * nnapi,const char * name,size_t size)130 NNMemory(const NnApi* nnapi, const char* name, size_t size) {
131 nnapi_ = nnapi;
132 byte_size_ = size;
133 fd_ = nnapi_->ASharedMemory_create(name, size);
134 data_ptr_ = reinterpret_cast<uint8_t*>(
135 mmap(nullptr, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd_, 0));
136 nnapi_->ANeuralNetworksMemory_createFromFd(size, PROT_READ | PROT_WRITE,
137 fd_, 0, &nn_memory_handle_);
138 }
139 #else
140 NNMemory(const NnApi* /*nnapi*/, const char* /*name*/, size_t /*size*/) {}
141 #endif
142
~NNMemory()143 ~NNMemory() {
144 #if defined __ANDROID__ || defined __unix__
145 if (data_ptr_) {
146 munmap(data_ptr_, byte_size_);
147 }
148 if (nn_memory_handle_) {
149 nnapi_->ANeuralNetworksMemory_free(nn_memory_handle_);
150 }
151 if (fd_ > 0) close(fd_);
152 #endif
153 }
154
get_handle()155 ANeuralNetworksMemory* get_handle() { return nn_memory_handle_; }
get_data_ptr()156 uint8_t* get_data_ptr() { return data_ptr_; }
157
158 private:
159 #if defined __ANDROID__ || defined __unix__
160 const NnApi* nnapi_;
161 int fd_ = 0;
162 size_t byte_size_ = 0;
163 #endif
164 uint8_t* data_ptr_ = nullptr;
165 ANeuralNetworksMemory* nn_memory_handle_ = nullptr;
166 }; // namespace
167
168 // Track tensor indices to NN API tensor indices mapping.
169 class OperandMapping {
170 public:
171 // Given a TFLite index return the ANN index. If it doesn't exist
172 // return -1.
lite_index_to_ann(int index) const173 int lite_index_to_ann(int index) const {
174 if (index < lite_tensor_to_ann_tensor_.size())
175 return lite_tensor_to_ann_tensor_[index];
176 else
177 return -1;
178 }
179
180 // NN API uses non tensor operands instead of structs. This creates one
181 // and returns the index. It uses a std::vector and resizes it as needed
182 // keeping -1 to unmapped values. Intermediate tensors likely will not
183 // be mapped.
add_new_non_tensor_operand()184 int add_new_non_tensor_operand() { return next_ann_tensor_index_++; }
185
186 // Add a new mapping from `tflite_index` and return the NN API tensor index.
add_new_ann_tensor_index(int tflite_index)187 int add_new_ann_tensor_index(int tflite_index) {
188 if (tflite_index >= lite_tensor_to_ann_tensor_.size()) {
189 lite_tensor_to_ann_tensor_.resize(tflite_index + 1, -1);
190 }
191 int new_tensor_index = next_ann_tensor_index_++;
192 lite_tensor_to_ann_tensor_[tflite_index] = new_tensor_index;
193 return new_tensor_index;
194 }
195
196 private:
197 // Next index of ann tensor
198 int next_ann_tensor_index_ = 0;
199
200 // Mapping from lite index. Use a std::vector for speed and code size
201 // rather than a map.
202 std::vector<int> lite_tensor_to_ann_tensor_;
203 };
204
205 class DequantizeMapping {
206 public:
DequantizedAnnIndex(int ann_index,TfLiteType type) const207 int DequantizedAnnIndex(int ann_index, TfLiteType type) const {
208 for (const auto& element : mapping_) {
209 if (ann_index == std::get<0>(element) && type == std::get<1>(element)) {
210 return std::get<2>(element);
211 }
212 }
213 return -1;
214 }
215
Add(int ann_index,TfLiteType type,int dequantized_ann_index)216 void Add(int ann_index, TfLiteType type, int dequantized_ann_index) {
217 // This assumes it is not already mapped.
218 mapping_.emplace_back(ann_index, type, dequantized_ann_index);
219 }
220
221 private:
222 // Each tuple specifies the ANN (quantized) tensor index, the desired
223 // floating-point type and the matching ANN (dequantized) tensor index. This
224 // could use a map but instead std::vector is used to keep code size lower.
225 std::vector<std::tuple<int, TfLiteType, int>> mapping_;
226 };
227
228 // Abstract builder for building an op in the NN API graph. This handles
229 // the disparity between TFLite and NN API operand types. NN API has singular
230 // operands for both tensors and parameters, and TFLite separates the two.
231 class NNAPIOpBuilder {
232 public:
NNAPIOpBuilder(const NnApi * nnapi,TfLiteContext * context,OperandMapping * tensor_mapping,DequantizeMapping * dequantize_mapping,ANeuralNetworksModel * nn_model)233 NNAPIOpBuilder(const NnApi* nnapi, TfLiteContext* context,
234 OperandMapping* tensor_mapping,
235 DequantizeMapping* dequantize_mapping,
236 ANeuralNetworksModel* nn_model)
237 : nnapi_(nnapi),
238 context_(context),
239 operand_mapping_(tensor_mapping),
240 dequantize_mapping_(dequantize_mapping),
241 nn_model_(nn_model) {}
242
AddScalarInt32Operand(int32_t value)243 TfLiteStatus AddScalarInt32Operand(int32_t value) {
244 return AddScalarOperand<int32_t>(value, ANEURALNETWORKS_INT32);
245 }
246
AddScalarFloat32Operand(float value)247 TfLiteStatus AddScalarFloat32Operand(float value) {
248 return AddScalarOperand<float>(value, ANEURALNETWORKS_FLOAT32);
249 }
250
AddVectorInt32Operand(const int32_t * values,uint32_t num_values)251 TfLiteStatus AddVectorInt32Operand(const int32_t* values,
252 uint32_t num_values) {
253 return AddVectorOperand<int32_t>(values, num_values,
254 ANEURALNETWORKS_TENSOR_INT32);
255 }
256
AddVectorFloat32Operand(const float * values,uint32_t num_values)257 TfLiteStatus AddVectorFloat32Operand(const float* values,
258 uint32_t num_values) {
259 return AddVectorOperand<float>(values, num_values,
260 ANEURALNETWORKS_TENSOR_FLOAT32);
261 }
262
AddPoolingParams(void * data)263 TfLiteStatus AddPoolingParams(void* data) {
264 auto builtin = reinterpret_cast<TfLitePoolParams*>(data);
265 AddScalarInt32Operand(builtin->padding);
266 AddScalarInt32Operand(builtin->stride_width);
267 AddScalarInt32Operand(builtin->stride_height);
268 AddScalarInt32Operand(builtin->filter_width);
269 AddScalarInt32Operand(builtin->filter_height);
270 AddScalarInt32Operand(builtin->activation);
271 return kTfLiteOk;
272 }
273
AddTensorInput(int tensor_index,bool hybrid_op)274 TfLiteStatus AddTensorInput(int tensor_index, bool hybrid_op) {
275 return AddTensor(tensor_index, hybrid_op, &augmented_inputs_);
276 }
277
AddTensorOutput(int tensor_index)278 TfLiteStatus AddTensorOutput(int tensor_index) {
279 return AddTensor(tensor_index, /*hybrid_op=*/false, &augmented_outputs_);
280 }
281
AddAdditionalFloat32OutputTensor(uint32_t dimension_count)282 TfLiteStatus AddAdditionalFloat32OutputTensor(uint32_t dimension_count) {
283 std::vector<uint32_t> dims(dimension_count, 0);
284 return AddFloat32OutputTensor(dimension_count, dims.data(), nullptr);
285 }
286
AddStateFloat32Tensor(int tensor_index,int * ann_tensor_index_out)287 TfLiteStatus AddStateFloat32Tensor(int tensor_index,
288 int* ann_tensor_index_out) {
289 TfLiteTensor* tensor = &context_->tensors[tensor_index];
290 return AddFloat32OutputTensor(
291 tensor->dims->size, reinterpret_cast<uint32_t*>(tensor->dims->data),
292 ann_tensor_index_out);
293 }
294
295 // Adds a Dequantize operator and replaces the input tensor index with the
296 // dequantized version. If the dequantized version of the operator already
297 // exists then it is not added again.
AddDequantize(int nn_input_index,int lite_index,TfLiteType dequantized_type)298 TfLiteStatus AddDequantize(int nn_input_index, int lite_index,
299 TfLiteType dequantized_type) {
300 const int ann_index = operand_mapping_->lite_index_to_ann(lite_index);
301 int dequantized_ann_index =
302 dequantize_mapping_->DequantizedAnnIndex(ann_index, dequantized_type);
303
304 if (dequantized_ann_index == -1) {
305 // The dequantized version does not exist yet, it has to be added: a new
306 // Dequantize operation is added, yielding a new tensor.
307 const TfLiteTensor& tensor = context_->tensors[lite_index];
308 ANeuralNetworksOperandType operand_type{
309 dequantized_type, static_cast<uint32_t>(tensor.dims->size),
310 reinterpret_cast<uint32_t*>(tensor.dims->data), 0.f, 0};
311 RETURN_TFLITE_ERROR_IF_NN_ERROR(
312 context_,
313 nnapi_->ANeuralNetworksModel_addOperand(nn_model_, &operand_type));
314 dequantized_ann_index = operand_mapping_->add_new_non_tensor_operand();
315
316 // Add Dequantize operation.
317 const uint32_t dequantize_input[1] = {static_cast<uint32_t>(ann_index)};
318 const uint32_t dequantize_output[1] = {
319 static_cast<uint32_t>(dequantized_ann_index)};
320 RETURN_TFLITE_ERROR_IF_NN_ERROR(
321 context_, nnapi_->ANeuralNetworksModel_addOperation(
322 nn_model_, ANEURALNETWORKS_DEQUANTIZE, 1,
323 dequantize_input, 1, dequantize_output));
324 dequantize_mapping_->Add(ann_index, dequantized_type,
325 dequantized_ann_index);
326 }
327
328 // The input for the original operation is modified so that the operation
329 // now uses the dequantized tensor as input.
330 augmented_inputs_[nn_input_index] = dequantized_ann_index;
331
332 return kTfLiteOk;
333 }
334
335 // Finish emitting the op (of type `type`) into the NN API.
FinalizeAddOperation(ANeuralNetworksOperationType type)336 TfLiteStatus FinalizeAddOperation(ANeuralNetworksOperationType type) {
337 // Actually add a NN API operation
338 RETURN_TFLITE_ERROR_IF_NN_ERROR(
339 context_,
340 nnapi_->ANeuralNetworksModel_addOperation(
341 nn_model_, type, static_cast<uint32_t>(augmented_inputs_.size()),
342 augmented_inputs_.data(),
343 static_cast<uint32_t>(augmented_outputs_.size()),
344 augmented_outputs_.data()));
345 augmented_inputs_.clear();
346 augmented_outputs_.clear();
347 return kTfLiteOk;
348 }
349
350 private:
351 template <typename T>
AddScalarOperand(T value,int32_t nn_type)352 TfLiteStatus AddScalarOperand(T value, int32_t nn_type) {
353 ANeuralNetworksOperandType operand_type{.type = nn_type};
354 RETURN_TFLITE_ERROR_IF_NN_ERROR(
355 context_,
356 nnapi_->ANeuralNetworksModel_addOperand(nn_model_, &operand_type));
357 const int ann_index = operand_mapping_->add_new_non_tensor_operand();
358 RETURN_TFLITE_ERROR_IF_NN_ERROR(
359 context_, nnapi_->ANeuralNetworksModel_setOperandValue(
360 nn_model_, ann_index, &value, sizeof(T)));
361 augmented_inputs_.push_back(ann_index);
362 return kTfLiteOk;
363 }
364
365 template <typename T>
AddVectorOperand(const T * values,uint32_t num_values,int32_t nn_type)366 TfLiteStatus AddVectorOperand(const T* values, uint32_t num_values,
367 int32_t nn_type) {
368 ANeuralNetworksOperandType operand_type{
369 .type = nn_type, .dimensionCount = 1, .dimensions = &num_values};
370
371 RETURN_TFLITE_ERROR_IF_NN_ERROR(
372 context_,
373 nnapi_->ANeuralNetworksModel_addOperand(nn_model_, &operand_type));
374
375 const int ann_index = operand_mapping_->add_new_non_tensor_operand();
376 RETURN_TFLITE_ERROR_IF_NN_ERROR(
377 context_, nnapi_->ANeuralNetworksModel_setOperandValue(
378 nn_model_, ann_index, values, sizeof(T) * num_values));
379 augmented_inputs_.push_back(ann_index);
380 return kTfLiteOk;
381 }
382
AddFloat32OutputTensor(uint32_t dimension_count,const uint32_t * dimension_data,int * ann_index_out)383 TfLiteStatus AddFloat32OutputTensor(uint32_t dimension_count,
384 const uint32_t* dimension_data,
385 int* ann_index_out) {
386 ANeuralNetworksOperandType operand_type{
387 .type = ANEURALNETWORKS_TENSOR_FLOAT32,
388 .dimensionCount = dimension_count,
389 .dimensions = dimension_data,
390 };
391 RETURN_TFLITE_ERROR_IF_NN_ERROR(
392 context_,
393 nnapi_->ANeuralNetworksModel_addOperand(nn_model_, &operand_type));
394 const int ann_index = operand_mapping_->add_new_non_tensor_operand();
395 augmented_outputs_.push_back(ann_index);
396 if (ann_index_out) *ann_index_out = ann_index;
397 return kTfLiteOk;
398 }
399
400 // Adds a new NN API tensor that shadows the TF Lite tensor `tensor_index`.
401 // This returns the NN API tensor index corresponding to the created tensor.
402 // If another caller previously created a NN API tensor for `tensor_index`
403 // then the existing one is returned.
AddTensor(int tensor_index,bool hybrid_op,std::vector<uint32_t> * indices)404 TfLiteStatus AddTensor(int tensor_index, bool hybrid_op,
405 std::vector<uint32_t>* indices) {
406 int ann_tensor_index = operand_mapping_->lite_index_to_ann(tensor_index);
407 if (ann_tensor_index != -1) {
408 indices->push_back(ann_tensor_index);
409 return kTfLiteOk;
410 }
411 // Allocate a new tensor index
412 ann_tensor_index = operand_mapping_->add_new_ann_tensor_index(tensor_index);
413
414 // Parameters needed for new type.
415 int32_t nn_type = 0;
416 float scale = 0.0f;
417 int32_t zeroPoint = 0;
418 TfLiteTensor* tensor = &context_->tensors[tensor_index];
419 TfLiteType tensor_type = tensor->type;
420 if (hybrid_op && (tensor_type == kTfLiteUInt8)) {
421 // For legacy reason, UINT8 weights in hybrid operators are actually INT8
422 // values and should be interpreted as such.
423 tensor_type = kTfLiteInt8;
424 }
425 switch (tensor_type) {
426 case kTfLiteNoType:
427 // Tensors added during initialization of Ops don't have a type yet and
428 // should not be registered with the NNAPI.
429 indices->push_back(-1);
430 return kTfLiteOk;
431 case kTfLiteFloat32:
432 nn_type = ANEURALNETWORKS_TENSOR_FLOAT32;
433 break;
434 case kTfLiteUInt8:
435 nn_type = ANEURALNETWORKS_TENSOR_QUANT8_ASYMM;
436 scale = tensor->params.scale;
437 zeroPoint = tensor->params.zero_point;
438 if (scale == 0) {
439 // TENSOR_QUANT8_ASYMM with zero scale is not valid in NNAPI.
440 scale = 1;
441 }
442 break;
443 case kTfLiteInt8:
444 nn_type = ANEURALNETWORKS_TENSOR_QUANT8_SYMM;
445 scale = tensor->params.scale;
446 break;
447 case kTfLiteInt32:
448 nn_type = ANEURALNETWORKS_TENSOR_INT32;
449 scale = tensor->params.scale;
450 zeroPoint = tensor->params.zero_point;
451 break;
452 default:
453 context_->ReportError(context_, "Logic error in NN API Delegate.\n");
454 return kTfLiteError;
455 }
456
457 ANeuralNetworksOperandType operand_type{
458 nn_type, static_cast<uint32_t>(tensor->dims->size),
459 reinterpret_cast<uint32_t*>(tensor->dims->data), scale, zeroPoint};
460 RETURN_TFLITE_ERROR_IF_NN_ERROR(
461 context_,
462 nnapi_->ANeuralNetworksModel_addOperand(nn_model_, &operand_type));
463
464 if (tensor->allocation_type == kTfLiteMmapRo) {
465 // TODO(b/80630405): Use NNAPIAllocation.
466 RETURN_TFLITE_ERROR_IF_NN_ERROR(
467 context_,
468 nnapi_->ANeuralNetworksModel_setOperandValue(
469 nn_model_, ann_tensor_index, tensor->data.raw, tensor->bytes));
470 }
471
472 indices->push_back(ann_tensor_index);
473 return kTfLiteOk;
474 }
475
476 // Access to NNAPI.
477 const NnApi* const nnapi_;
478
479 // TfLiteContext for error handling.
480 TfLiteContext* const context_;
481
482 // Tracks relationship between indices.
483 OperandMapping* const operand_mapping_;
484
485 // Keeps mapping of ANN quantized tensor and float data type to equivalent
486 // dequantized ANN tensor. For example, tensor #4 (UINT8) + FLOAT32 could map
487 // to tensor #10 (FLOAT32) because a DEQUANTIZE operator was added to convert
488 // tensor #4 to a FLOAT32 tensor.
489 DequantizeMapping* const dequantize_mapping_;
490
491 // The NNAPI model.
492 ANeuralNetworksModel* const nn_model_;
493
494 // Inputs and outputs for the current op. These are augmented in the sense
495 // that NN API uses operands for all arguments, not just tensors, unlike
496 // TensorFlow Lite.
497 std::vector<uint32_t> augmented_inputs_;
498 std::vector<uint32_t> augmented_outputs_;
499 };
500
501 struct NNAPIOpMappingArgs {
502 TfLiteContext* context;
503 NNAPIOpBuilder* builder;
504 TfLiteNode* node;
505 std::vector<int>* model_state_outputs;
506 std::vector<int>* model_state_tfl_inputs;
507 };
508
509 // Mapping function simply returning the operation type without adding any
510 // additional parameter.
511 template <ANeuralNetworksOperationType OperationType>
BasicMappingFn(const NNAPIOpMappingArgs & mapping_args)512 ANeuralNetworksOperationType BasicMappingFn(
513 const NNAPIOpMappingArgs& mapping_args) {
514 return OperationType;
515 }
516
517 // The kernel that represents the node sub set of TF Lite being run on NN API.
518 class NNAPIDelegateKernel {
519 public:
NNAPIDelegateKernel()520 NNAPIDelegateKernel() { nnapi_ = NnApiImplementation(); }
521
522 typedef ANeuralNetworksOperationType (*MappingFn)(
523 const NNAPIOpMappingArgs& mapping_args);
524
525 // Return a function that knows how to translate a node into its operands
526 // when called. You can use this function to see if a node is supported
527 // (i.e. that MappingFn is not nullptr).
Map(const TfLiteContext * context,int builtin_code,int version,int android_sdk_version,const TfLiteNode * node)528 static MappingFn Map(const TfLiteContext* context, int builtin_code,
529 int version, int android_sdk_version,
530 const TfLiteNode* node) {
531 switch (builtin_code) {
532 case kTfLiteBuiltinAdd:
533 if (version == 1) {
534 return [](const NNAPIOpMappingArgs& mapping_args)
535 -> ANeuralNetworksOperationType {
536 auto builtin = reinterpret_cast<TfLiteAddParams*>(
537 mapping_args.node->builtin_data);
538 mapping_args.builder->AddScalarInt32Operand(builtin->activation);
539 return ANEURALNETWORKS_ADD;
540 };
541 }
542 break;
543 case kTfLiteBuiltinMul:
544 if (version == 1) {
545 return [](const NNAPIOpMappingArgs& mapping_args)
546 -> ANeuralNetworksOperationType {
547 auto builtin = reinterpret_cast<TfLiteMulParams*>(
548 mapping_args.node->builtin_data);
549 mapping_args.builder->AddScalarInt32Operand(builtin->activation);
550 return ANEURALNETWORKS_MUL;
551 };
552 }
553 break;
554 case kTfLiteBuiltinAveragePool2d:
555 if (version == 1) {
556 return [](const NNAPIOpMappingArgs& mapping_args)
557 -> ANeuralNetworksOperationType {
558 mapping_args.builder->AddPoolingParams(
559 mapping_args.node->builtin_data);
560 return ANEURALNETWORKS_AVERAGE_POOL_2D;
561 };
562 }
563 break;
564 case kTfLiteBuiltinMaxPool2d:
565 if (version == 1) {
566 return [](const NNAPIOpMappingArgs& mapping_args)
567 -> ANeuralNetworksOperationType {
568 mapping_args.builder->AddPoolingParams(
569 mapping_args.node->builtin_data);
570 return ANEURALNETWORKS_MAX_POOL_2D;
571 };
572 }
573 break;
574 case kTfLiteBuiltinL2Pool2d:
575 if (version == 1) {
576 return [](const NNAPIOpMappingArgs& mapping_args)
577 -> ANeuralNetworksOperationType {
578 mapping_args.builder->AddPoolingParams(
579 mapping_args.node->builtin_data);
580 return ANEURALNETWORKS_L2_POOL_2D;
581 };
582 }
583 break;
584 case kTfLiteBuiltinConv2d:
585 if (version == 1) {
586 if (android_sdk_version < kMinSdkVersionForNNAPI12 &&
587 IsHybridOperator(context, builtin_code, node)) {
588 // Hybrid operators not supported before NNAPI 1.2.
589 return nullptr;
590 }
591 auto builtin =
592 reinterpret_cast<TfLiteConvParams*>(node->builtin_data);
593 if (builtin->dilation_width_factor != 1 ||
594 builtin->dilation_height_factor != 1 || node->inputs->size != 3) {
595 // NNAPI does not support dilated Conv2D.
596 return nullptr;
597 }
598 return [](const NNAPIOpMappingArgs& mapping_args)
599 -> ANeuralNetworksOperationType {
600 auto builtin = reinterpret_cast<TfLiteConvParams*>(
601 mapping_args.node->builtin_data);
602 mapping_args.builder->AddScalarInt32Operand(builtin->padding);
603 mapping_args.builder->AddScalarInt32Operand(builtin->stride_width);
604 mapping_args.builder->AddScalarInt32Operand(builtin->stride_height);
605 mapping_args.builder->AddScalarInt32Operand(builtin->activation);
606 return ANEURALNETWORKS_CONV_2D;
607 };
608 }
609 break;
610 case kTfLiteBuiltinDepthwiseConv2d:
611 if (version == 1) {
612 return [](const NNAPIOpMappingArgs& mapping_args)
613 -> ANeuralNetworksOperationType {
614 auto builtin = reinterpret_cast<TfLiteDepthwiseConvParams*>(
615 mapping_args.node->builtin_data);
616 mapping_args.builder->AddScalarInt32Operand(builtin->padding);
617 mapping_args.builder->AddScalarInt32Operand(builtin->stride_width);
618 mapping_args.builder->AddScalarInt32Operand(builtin->stride_height);
619 mapping_args.builder->AddScalarInt32Operand(
620 builtin->depth_multiplier);
621 mapping_args.builder->AddScalarInt32Operand(builtin->activation);
622 return ANEURALNETWORKS_DEPTHWISE_CONV_2D;
623 };
624 }
625 break;
626 case kTfLiteBuiltinFullyConnected:
627 if (version == 1) {
628 if (android_sdk_version < kMinSdkVersionForNNAPI12 &&
629 IsHybridOperator(context, builtin_code, node)) {
630 // Hybrid operators not supported before NNAPI 1.2.
631 return nullptr;
632 }
633 return [](const NNAPIOpMappingArgs& mapping_args)
634 -> ANeuralNetworksOperationType {
635 auto builtin = reinterpret_cast<TfLiteFullyConnectedParams*>(
636 mapping_args.node->builtin_data);
637 mapping_args.builder->AddScalarInt32Operand(builtin->activation);
638 return ANEURALNETWORKS_FULLY_CONNECTED;
639 };
640 }
641 break;
642 case kTfLiteBuiltinSoftmax:
643 if (version == 1) {
644 return [](const NNAPIOpMappingArgs& mapping_args)
645 -> ANeuralNetworksOperationType {
646 auto builtin = reinterpret_cast<TfLiteSoftmaxParams*>(
647 mapping_args.node->builtin_data);
648 mapping_args.builder->AddScalarFloat32Operand(builtin->beta);
649 return ANEURALNETWORKS_SOFTMAX;
650 };
651 }
652 break;
653 case kTfLiteBuiltinReshape:
654 if (version == 1 && node->inputs->size == 2) {
655 return BasicMappingFn<ANEURALNETWORKS_RESHAPE>;
656 }
657 break;
658 case kTfLiteBuiltinSqueeze:
659 if (version == 1 && android_sdk_version >= kMinSdkVersionForNNAPI11) {
660 return [](const NNAPIOpMappingArgs& mapping_args)
661 -> ANeuralNetworksOperationType {
662 auto builtin = reinterpret_cast<TfLiteSqueezeParams*>(
663 mapping_args.node->builtin_data);
664 // Note that we add the squeeze dimensions even if the dimensions
665 // were unspecified (empty), as NNAPI requires the operand.
666 mapping_args.builder->AddVectorInt32Operand(
667 builtin->num_squeeze_dims ? builtin->squeeze_dims : nullptr,
668 static_cast<uint32_t>(builtin->num_squeeze_dims));
669 return ANEURALNETWORKS_SQUEEZE;
670 };
671 }
672 break;
673 case kTfLiteBuiltinL2Normalization: {
674 auto builtin =
675 reinterpret_cast<TfLiteL2NormParams*>(node->builtin_data);
676 if (builtin->activation == kTfLiteActNone) {
677 return BasicMappingFn<ANEURALNETWORKS_L2_NORMALIZATION>;
678 }
679 break;
680 }
681 case kTfLiteBuiltinLocalResponseNormalization:
682 if (version == 1) {
683 return [](const NNAPIOpMappingArgs& mapping_args)
684 -> ANeuralNetworksOperationType {
685 auto builtin = reinterpret_cast<TfLiteLocalResponseNormParams*>(
686 mapping_args.node->builtin_data);
687 mapping_args.builder->AddScalarInt32Operand(builtin->radius);
688 mapping_args.builder->AddScalarFloat32Operand(builtin->bias);
689 mapping_args.builder->AddScalarFloat32Operand(builtin->alpha);
690 mapping_args.builder->AddScalarFloat32Operand(builtin->beta);
691 return ANEURALNETWORKS_LOCAL_RESPONSE_NORMALIZATION;
692 };
693 }
694 break;
695 case kTfLiteBuiltinLshProjection:
696 if (version == 1) {
697 // NNAPI does not support sparse projection correctly (b/111751836).
698 if (reinterpret_cast<TfLiteLSHProjectionParams*>(node->builtin_data)
699 ->type == kTfLiteLshProjectionSparse) {
700 return nullptr;
701 }
702 return [](const NNAPIOpMappingArgs& mapping_args)
703 -> ANeuralNetworksOperationType {
704 auto builtin = reinterpret_cast<TfLiteLSHProjectionParams*>(
705 mapping_args.node->builtin_data);
706 mapping_args.builder->AddScalarInt32Operand(builtin->type);
707 return ANEURALNETWORKS_LSH_PROJECTION;
708 };
709 }
710 break;
711 case kTfLiteBuiltinConcatenation:
712 if (version == 1 &&
713 reinterpret_cast<TfLiteConcatenationParams*>(node->builtin_data)
714 ->activation == kTfLiteActNone) {
715 if (context->tensors[node->inputs->data[0]].type == kTfLiteUInt8 &&
716 android_sdk_version < kMinSdkVersionForNNAPI12) {
717 // NNAPI 1.0-1 only supported concatenating quantized tensor of the
718 // same scale and offset.
719 auto first_param = context->tensors[node->inputs->data[0]].params;
720 for (int i = 1; i < node->inputs->size; i++) {
721 auto curr_param = context->tensors[node->inputs->data[i]].params;
722 if (curr_param.scale != first_param.scale ||
723 curr_param.zero_point != first_param.zero_point) {
724 return nullptr;
725 }
726 }
727 }
728 return [](const NNAPIOpMappingArgs& mapping_args)
729 -> ANeuralNetworksOperationType {
730 auto builtin = reinterpret_cast<TfLiteConcatenationParams*>(
731 mapping_args.node->builtin_data);
732 mapping_args.builder->AddScalarInt32Operand(builtin->axis);
733 return ANEURALNETWORKS_CONCATENATION;
734 };
735 }
736 break;
737 case kTfLiteBuiltinDequantize:
738 if (version == 1 || version == 2) {
739 const auto& input = context->tensors[node->inputs->data[0]];
740 const auto zero_point = input.params.zero_point;
741 // NN API supports int8 type since version 1.2 but only for symmetric
742 // quantization.
743 if (input.type == kTfLiteInt8 &&
744 (zero_point != 0 ||
745 android_sdk_version < kMinSdkVersionForNNAPI12)) {
746 return nullptr;
747 }
748 return BasicMappingFn<ANEURALNETWORKS_DEQUANTIZE>;
749 }
750 break;
751 case kTfLiteBuiltinFloor:
752 if (version == 1) {
753 return BasicMappingFn<ANEURALNETWORKS_FLOOR>;
754 }
755 break;
756 case kTfLiteBuiltinRelu:
757 if (version == 1) {
758 return BasicMappingFn<ANEURALNETWORKS_RELU>;
759 }
760 break;
761 case kTfLiteBuiltinReluN1To1:
762 if (version == 1) {
763 return BasicMappingFn<ANEURALNETWORKS_RELU1>;
764 }
765 break;
766 case kTfLiteBuiltinRelu6:
767 if (version == 1) {
768 return BasicMappingFn<ANEURALNETWORKS_RELU6>;
769 }
770 break;
771 case kTfLiteBuiltinLogistic:
772 if (version == 1) {
773 return BasicMappingFn<ANEURALNETWORKS_LOGISTIC>;
774 }
775 break;
776 case kTfLiteBuiltinTanh:
777 // TODO(miaowang): add additional checks for the parameters.
778 if (version == 1 &&
779 context->tensors[node->inputs->data[0]].type == kTfLiteFloat32) {
780 // NNAPI only support float tanh.
781 return BasicMappingFn<ANEURALNETWORKS_TANH>;
782 }
783 break;
784 case kTfLiteBuiltinSub:
785 if (version == 1 && android_sdk_version >= kMinSdkVersionForNNAPI11 &&
786 context->tensors[node->inputs->data[0]].type == kTfLiteFloat32) {
787 // NNAPI only support float sub.
788 return [](const NNAPIOpMappingArgs& mapping_args)
789 -> ANeuralNetworksOperationType {
790 auto builtin = reinterpret_cast<TfLiteSubParams*>(
791 mapping_args.node->builtin_data);
792 mapping_args.builder->AddScalarInt32Operand(builtin->activation);
793 return ANEURALNETWORKS_SUB;
794 };
795 }
796 break;
797 case kTfLiteBuiltinDiv:
798 if (version == 1 && android_sdk_version >= kMinSdkVersionForNNAPI11 &&
799 context->tensors[node->inputs->data[0]].type == kTfLiteFloat32) {
800 // NNAPI only support float div.
801 return [](const NNAPIOpMappingArgs& mapping_args)
802 -> ANeuralNetworksOperationType {
803 auto builtin = reinterpret_cast<TfLiteDivParams*>(
804 mapping_args.node->builtin_data);
805 mapping_args.builder->AddScalarInt32Operand(builtin->activation);
806 return ANEURALNETWORKS_DIV;
807 };
808 }
809 break;
810 case kTfLiteBuiltinPad:
811 if (version == 1 && node->inputs->size == 2 &&
812 (android_sdk_version >= kMinSdkVersionForNNAPI11) &&
813 (context->tensors[node->inputs->data[0]].type == kTfLiteFloat32 ||
814 android_sdk_version >= kMinSdkVersionForNNAPI12)) {
815 // NNAPI does not support specifying the padding value.
816 // Before 1.2, NNAPI pads physical zero for quantized tensors, so only
817 // delegate float pad to NNAPI. NNAPI 1.2 onwards pads with
818 // zero-point, so delegate quantized pad as well.
819 return BasicMappingFn<ANEURALNETWORKS_PAD>;
820 }
821 break;
822 case kTfLiteBuiltinSpaceToBatchNd:
823 if (version == 1 && android_sdk_version >= kMinSdkVersionForNNAPI11) {
824 return BasicMappingFn<ANEURALNETWORKS_SPACE_TO_BATCH_ND>;
825 }
826 break;
827 case kTfLiteBuiltinStridedSlice:
828 if (version == 1 && android_sdk_version >= kMinSdkVersionForNNAPI11) {
829 return [](const NNAPIOpMappingArgs& mapping_args)
830 -> ANeuralNetworksOperationType {
831 auto builtin = reinterpret_cast<TfLiteStridedSliceParams*>(
832 mapping_args.node->builtin_data);
833 mapping_args.builder->AddScalarInt32Operand(builtin->begin_mask);
834 mapping_args.builder->AddScalarInt32Operand(builtin->end_mask);
835 mapping_args.builder->AddScalarInt32Operand(
836 builtin->shrink_axis_mask);
837 return ANEURALNETWORKS_STRIDED_SLICE;
838 };
839 }
840 break;
841 case kTfLiteBuiltinTranspose:
842 // Note that the permutation input tensor value dictates the output
843 // dimensions.
844 // TODO(b/110888333): Support dynamically-sized tensors in delegates.
845 if ((version == 1) &&
846 (android_sdk_version >= kMinSdkVersionForNNAPI11) &&
847 (node->inputs->size > 1) &&
848 (context->tensors[node->inputs->data[1]].allocation_type ==
849 kTfLiteMmapRo)) {
850 return BasicMappingFn<ANEURALNETWORKS_TRANSPOSE>;
851 }
852 break;
853 case kTfLiteBuiltinRnn:
854 // NNAPI only support float32 weights.
855 if (version == 1 && node->inputs->size == 5 &&
856 context->tensors[node->inputs->data[/*kWeightsTensor*/ 1]].type ==
857 kTfLiteFloat32) {
858 return [](const NNAPIOpMappingArgs& mapping_args)
859 -> ANeuralNetworksOperationType {
860 // NNAPI need both state_in and state_out.
861 int ann_index;
862 mapping_args.builder->AddStateFloat32Tensor(
863 mapping_args.node->inputs->data[/*kHiddenStateTensor*/ 4],
864 &ann_index);
865 mapping_args.model_state_outputs->push_back(ann_index);
866 mapping_args.model_state_tfl_inputs->push_back(
867 mapping_args.node->inputs->data[/*kHiddenStateTensor*/ 4]);
868 auto builtin = reinterpret_cast<TfLiteRNNParams*>(
869 mapping_args.node->builtin_data);
870 mapping_args.builder->AddScalarInt32Operand(builtin->activation);
871 return ANEURALNETWORKS_RNN;
872 };
873 }
874 break;
875 case kTfLiteBuiltinSvdf:
876 // NNAPI only support float32 weights.
877 // Only delegate to NNAPI 1.1, as SVDF does not support rank > 1 on 1.0.
878 if (version == 1 && node->inputs->size == 5 &&
879 android_sdk_version >= kMinSdkVersionForNNAPI11 &&
880 context->tensors[node->inputs->data[/*kWeightsFeatureTensor*/ 1]]
881 .type == kTfLiteFloat32) {
882 return [](const NNAPIOpMappingArgs& mapping_args)
883 -> ANeuralNetworksOperationType {
884 // NNAPI need both state_in and state_out.
885 int ann_index;
886 mapping_args.builder->AddStateFloat32Tensor(
887 mapping_args.node->inputs
888 ->data[/*kInputActivationStateTensor*/ 4],
889 &ann_index);
890 mapping_args.model_state_outputs->push_back(ann_index);
891 mapping_args.model_state_tfl_inputs->push_back(
892 mapping_args.node->inputs
893 ->data[/*kInputActivationStateTensor*/ 4]);
894
895 auto builtin = reinterpret_cast<TfLiteSVDFParams*>(
896 mapping_args.node->builtin_data);
897 mapping_args.builder->AddScalarInt32Operand(builtin->rank);
898 mapping_args.builder->AddScalarInt32Operand(builtin->activation);
899 return ANEURALNETWORKS_SVDF;
900 };
901 }
902 break;
903 case kTfLiteBuiltinLstm:
904 // NNAPI only support float32 weights.
905 // Only delegate to NNAPI 1.1, as 1.0 has a bug for optional tensors
906 // which would affect LSTM.
907 // TODO(miaowang): add loggings to indicate why the op is rejected.
908 if (version == 1 && node->inputs->size == 20 &&
909 android_sdk_version >= kMinSdkVersionForNNAPI11 &&
910 context->tensors[node->inputs
911 ->data[/*kInputToOutputWeightsTensor*/ 4]]
912 .type == kTfLiteFloat32) {
913 return [](const NNAPIOpMappingArgs& mapping_args)
914 -> ANeuralNetworksOperationType {
915 auto builtin = reinterpret_cast<TfLiteLSTMParams*>(
916 mapping_args.node->builtin_data);
917 mapping_args.builder->AddScalarInt32Operand(builtin->activation);
918 mapping_args.builder->AddScalarFloat32Operand(builtin->cell_clip);
919 mapping_args.builder->AddScalarFloat32Operand(builtin->proj_clip);
920
921 // Current NNAPI implementation requires the sratch_buffer as
922 // output.
923 mapping_args.builder->AddAdditionalFloat32OutputTensor(2);
924
925 // NNAPI need both state_in and state_out for cell_state and
926 // output_state.
927 int ann_index;
928 mapping_args.builder->AddStateFloat32Tensor(
929 mapping_args.node->inputs
930 ->data[/*kInputActivationStateTensor*/ 18],
931 &ann_index);
932 mapping_args.model_state_outputs->push_back(ann_index);
933 mapping_args.model_state_tfl_inputs->push_back(
934 mapping_args.node->inputs
935 ->data[/*kInputActivationStateTensor*/ 18]);
936 mapping_args.builder->AddStateFloat32Tensor(
937 mapping_args.node->inputs->data[/*kInputCellStateTensor*/ 19],
938 &ann_index);
939 mapping_args.model_state_outputs->push_back(ann_index);
940 mapping_args.model_state_tfl_inputs->push_back(
941 mapping_args.node->inputs->data[/*kInputCellStateTensor*/ 19]);
942
943 return ANEURALNETWORKS_LSTM;
944 };
945 }
946 break;
947 case kTfLiteBuiltinMean:
948 // NNAPI does not support generating a scalar as output for MEAN.
949 if (version == 1 && android_sdk_version >= kMinSdkVersionForNNAPI11 &&
950 context->tensors[node->inputs->data[0]].type == kTfLiteFloat32 &&
951 context->tensors[node->outputs->data[0]].dims->size > 0) {
952 return [](const NNAPIOpMappingArgs& mapping_args)
953 -> ANeuralNetworksOperationType {
954 auto builtin = reinterpret_cast<TfLiteReducerParams*>(
955 mapping_args.node->builtin_data);
956 int32_t keep_dims = 0;
957 if (builtin->keep_dims) keep_dims = 1;
958 mapping_args.builder->AddScalarInt32Operand(keep_dims);
959 return ANEURALNETWORKS_MEAN;
960 };
961 }
962 break;
963 case kTfLiteBuiltinEmbeddingLookup:
964 // NNAPI only support float32 values.
965 if (version == 1 &&
966 context->tensors[node->inputs->data[1]].type == kTfLiteFloat32) {
967 return BasicMappingFn<ANEURALNETWORKS_EMBEDDING_LOOKUP>;
968 }
969 break;
970 case kTfLiteBuiltinHashtableLookup:
971 // NNAPI only support float32 output.
972 if (version == 1 &&
973 context->tensors[node->outputs->data[0]].type == kTfLiteFloat32) {
974 return BasicMappingFn<ANEURALNETWORKS_HASHTABLE_LOOKUP>;
975 }
976 break;
977 default:
978 // All other operators are not mapped.
979 return nullptr;
980 }
981 return nullptr;
982 }
983
984 // Initialize the kernel (a NN model).
Init(TfLiteContext * context,const TfLiteDelegateParams * params)985 TfLiteStatus Init(TfLiteContext* context,
986 const TfLiteDelegateParams* params) {
987 for (auto node_index : TfLiteIntArrayView(params->nodes_to_replace)) {
988 nodes_.push_back(node_index);
989 }
990
991 if (params->delegate->data_ != nullptr) {
992 // user specified an acclelerator to use.
993 const char* device_name_ptr = reinterpret_cast<const char*>(params->delegate->data_);
994 std::string device_name(device_name_ptr);
995 uint32_t numDevices = 0;
996 RETURN_TFLITE_ERROR_IF_NN_ERROR(
997 context, nnapi_->ANeuralNetworks_getDeviceCount(&numDevices));
998
999 for (uint32_t i = 0; i < numDevices; i++) {
1000 ANeuralNetworksDevice* device = nullptr;
1001 const char* buffer = nullptr;
1002 RETURN_TFLITE_ERROR_IF_NN_ERROR(
1003 context, nnapi_->ANeuralNetworks_getDevice(i, &device));
1004 RETURN_TFLITE_ERROR_IF_NN_ERROR(
1005 context, nnapi_->ANeuralNetworksDevice_getName(device, &buffer));
1006 if (device_name.compare(buffer) == 0) {
1007 nnapi_device_ = device;
1008 break;
1009 }
1010 }
1011 if (nnapi_device_ == nullptr) {
1012 context->ReportError(context, "Could not find the specified accelerator.");
1013 return kTfLiteError;
1014 }
1015 }
1016
1017 if (!nn_model_) {
1018 ANeuralNetworksModel* model = nullptr;
1019 RETURN_TFLITE_ERROR_IF_NN_ERROR(
1020 context, nnapi_->ANeuralNetworksModel_create(&model));
1021 nn_model_.reset(model);
1022
1023 TF_LITE_ENSURE_STATUS(
1024 BuildGraph(context, params->input_tensors, params->output_tensors));
1025 }
1026
1027 if (!nn_compilation_) {
1028 ANeuralNetworksCompilation* compilation = nullptr;
1029 if (nnapi_device_ != nullptr) {
1030 // Compile for the selected accelerator.
1031 RETURN_TFLITE_ERROR_IF_NN_ERROR(
1032 context, nnapi_->ANeuralNetworksCompilation_createForDevices(nn_model_.get(),
1033 &nnapi_device_, 1,
1034 &compilation));
1035 } else {
1036 RETURN_TFLITE_ERROR_IF_NN_ERROR(
1037 context, nnapi_->ANeuralNetworksCompilation_create(nn_model_.get(),
1038 &compilation));
1039 }
1040 const int finish_result =
1041 nnapi_->ANeuralNetworksCompilation_finish(compilation);
1042 if (finish_result != ANEURALNETWORKS_NO_ERROR) {
1043 nnapi_->ANeuralNetworksCompilation_free(compilation);
1044 compilation = nullptr;
1045 }
1046 RETURN_TFLITE_ERROR_IF_NN_ERROR(context, finish_result);
1047 nn_compilation_.reset(compilation);
1048 }
1049 return kTfLiteOk;
1050 }
1051
Invoke(TfLiteContext * context,TfLiteNode * node)1052 TfLiteStatus Invoke(TfLiteContext* context, TfLiteNode* node) {
1053 ANeuralNetworksExecution* execution = nullptr;
1054 RETURN_TFLITE_ERROR_IF_NN_ERROR(
1055 context, nnapi_->ANeuralNetworksExecution_create(nn_compilation_.get(),
1056 &execution));
1057 std::unique_ptr<ANeuralNetworksExecution, NNFreeExecution>
1058 execution_unique_ptr(execution);
1059
1060 // Set the input tensor buffers. Note: we access tflite tensors using
1061 // absolute indices but NN api indices inputs by relative indices.
1062 int relative_input_index = 0;
1063
1064 size_t input_offset = 0;
1065 for (auto absolute_input_index : TfLiteIntArrayView(node->inputs)) {
1066 if (absolute_input_index == kOptionalTensor) {
1067 continue;
1068 }
1069 TfLiteTensor* tensor = &context->tensors[absolute_input_index];
1070 // TODO(miaowang): make sure the delegation works with dequantized weights
1071 // as intermediate tensors.
1072 if (tensor->allocation_type != kTfLiteMmapRo) {
1073 // copy data to pre-allocated shared memory.
1074 memcpy(nn_input_memory_->get_data_ptr() + input_offset,
1075 tensor->data.raw, tensor->bytes);
1076 RETURN_TFLITE_ERROR_IF_NN_ERROR(
1077 context,
1078 nnapi_->ANeuralNetworksExecution_setInputFromMemory(
1079 execution, relative_input_index, nullptr,
1080 nn_input_memory_->get_handle(), input_offset, tensor->bytes));
1081 input_offset += tensor->bytes;
1082 input_offset += getNumPaddingBytes(tensor->bytes);
1083 relative_input_index++;
1084 }
1085 }
1086
1087 // Set the output tensor buffers.
1088 int relative_output_index = 0;
1089 size_t output_offset = 0;
1090 for (auto output_index : TfLiteIntArrayView(node->outputs)) {
1091 TfLiteTensor* tensor = &context->tensors[output_index];
1092 RETURN_TFLITE_ERROR_IF_NN_ERROR(
1093 context,
1094 nnapi_->ANeuralNetworksExecution_setOutputFromMemory(
1095 execution, relative_output_index, nullptr,
1096 nn_output_memory_->get_handle(), output_offset, tensor->bytes));
1097 output_offset += tensor->bytes;
1098 output_offset += getNumPaddingBytes(tensor->bytes);
1099 relative_output_index++;
1100 }
1101
1102 // The state_out of previous invocation need to be mapped to state_in of
1103 // current invocation.
1104 for (size_t i = 0; i < model_state_tfl_inputs_.size(); i++) {
1105 int state_tensor_idx = model_state_tfl_inputs_[i];
1106 TfLiteTensor* tensor = &context->tensors[state_tensor_idx];
1107 // Here we are using a deep copy for state_in tensors so that we are not
1108 // reading and writing into the same buffer during a invocation.
1109 // TODO(110369471): using double shared buffer to minimize the copies.
1110 RETURN_TFLITE_ERROR_IF_NN_ERROR(
1111 context, nnapi_->ANeuralNetworksExecution_setOutput(
1112 execution, relative_output_index, nullptr,
1113 tensor->data.raw, tensor->bytes));
1114 relative_output_index++;
1115 }
1116 // Invoke ANN in blocking fashion.
1117 if (nnapi_->android_sdk_version < kMinSdkVersionForNNAPI12) {
1118 ANeuralNetworksEvent* event = nullptr;
1119 RETURN_TFLITE_ERROR_IF_NN_ERROR(
1120 context,
1121 nnapi_->ANeuralNetworksExecution_startCompute(execution, &event));
1122 const int wait_result = nnapi_->ANeuralNetworksEvent_wait(event);
1123 nnapi_->ANeuralNetworksEvent_free(event);
1124 RETURN_TFLITE_ERROR_IF_NN_ERROR(context, wait_result);
1125 } else {
1126 // Use synchronous execution for NNAPI 1.2+.
1127 RETURN_TFLITE_ERROR_IF_NN_ERROR(
1128 context, nnapi_->ANeuralNetworksExecution_compute(execution));
1129 }
1130
1131 // copy results from shared memory to the destination.
1132 output_offset = 0;
1133 for (auto output_index : TfLiteIntArrayView(node->outputs)) {
1134 TfLiteTensor* tensor = &context->tensors[output_index];
1135 memcpy(tensor->data.raw,
1136 nn_output_memory_->get_data_ptr() + output_offset, tensor->bytes);
1137 output_offset += tensor->bytes;
1138 output_offset += getNumPaddingBytes(tensor->bytes);
1139 }
1140
1141 return kTfLiteOk;
1142 }
1143
1144 // NN API Delegate Registration (the pseudo kernel that will invoke NN
1145 // API node sub sets)
1146 static const TfLiteRegistration registration;
1147
1148 private:
1149 // Access to NNApi.
1150 const NnApi* nnapi_;
1151 // ANN device handle.
1152 ANeuralNetworksDevice* nnapi_device_ = nullptr;
1153 // ANN API state.
1154 std::unique_ptr<ANeuralNetworksModel, NNFreeModel> nn_model_;
1155 std::unique_ptr<ANeuralNetworksCompilation, NNFreeCompilation>
1156 nn_compilation_;
1157 // Node indices that this delegate is responsible for. Indices here
1158 // indexes into the nodes array in the TfLiteContext.
1159 std::vector<int> nodes_;
1160 // Track indices we use
1161 OperandMapping operand_mapping_;
1162
1163 std::vector<int> model_state_outputs_;
1164 std::vector<int> model_state_tfl_inputs_;
1165
1166 std::unique_ptr<NNMemory> nn_input_memory_;
1167 std::unique_ptr<NNMemory> nn_output_memory_;
1168
AddDequantizeOperatorsWhereNeeded(const TfLiteContext * context,int builtin_code,const TfLiteNode * node,NNAPIOpBuilder * builder)1169 void AddDequantizeOperatorsWhereNeeded(const TfLiteContext* context,
1170 int builtin_code,
1171 const TfLiteNode* node,
1172 NNAPIOpBuilder* builder) {
1173 // Depending on the operator and the input data format, Dequantize
1174 // operators may need to be added. For example when the input is
1175 // floating-point but weights are quantized then the weights will first be
1176 // dequantized to the same format as the input before being passed to the
1177 // operator.
1178
1179 // The tensor determining whether the inputs should be floating-point.
1180 int input_tensor_index = -1;
1181 std::vector<int> inputs_to_potentially_dequantize;
1182
1183 switch (builtin_code) {
1184 case kTfLiteBuiltinConv2d:
1185 case kTfLiteBuiltinFullyConnected: {
1186 input_tensor_index = 0;
1187 // Weights and bias are inputs #1 and #2 respectively and may require
1188 // dequantization.
1189 inputs_to_potentially_dequantize = {1, 2};
1190 break;
1191 }
1192 default:
1193 return;
1194 }
1195
1196 int tensor_id = node->inputs->data[input_tensor_index];
1197 if (tensor_id < 0) return;
1198
1199 // Nothing to do if the input is not floating-point.
1200 if (!IsFloat(context->tensors[tensor_id].type)) return;
1201
1202 for (int i : inputs_to_potentially_dequantize) {
1203 tensor_id = node->inputs->data[i];
1204 if (tensor_id < 0) continue; // Ignore optional input.
1205
1206 const TfLiteType type = context->tensors[tensor_id].type;
1207 // Nothing to do for this tensor if it's not quantized.
1208 if (type != kTfLiteUInt8) continue;
1209
1210 // Insert Dequantize operator if it hasn't been done already and change
1211 // the node's input accordingly.
1212 builder->AddDequantize(i, node->inputs->data[i], type);
1213 }
1214 }
1215
AddOpsAndTensors(TfLiteContext * context)1216 TfLiteStatus AddOpsAndTensors(TfLiteContext* context) {
1217 DequantizeMapping dequantize_mapping;
1218 // The operand builder allows creating a single op. It is created outside
1219 // the for loop to avoid reallocating the vectors.
1220 NNAPIOpBuilder builder(nnapi_, context, &operand_mapping_,
1221 &dequantize_mapping, nn_model_.get());
1222 // Add Tensors.
1223 for (auto node_index : nodes_) {
1224 // Obtain the op and registration.
1225 TfLiteNode* node;
1226 TfLiteRegistration* reg;
1227 TF_LITE_ENSURE_STATUS(
1228 context->GetNodeAndRegistration(context, node_index, &node, ®));
1229
1230 const bool hybrid_op = IsHybridOperator(context, reg->builtin_code, node);
1231
1232 // Map inputs to NN API tensor indices.
1233 for (auto input_index : TfLiteIntArrayView(node->inputs)) {
1234 if (input_index == kOptionalTensor &&
1235 (reg->builtin_code == kTfLiteBuiltinLstm ||
1236 reg->builtin_code == kTfLiteBuiltinSvdf)) {
1237 // properly handle the optional tensor for LSTM and SVDF.
1238 // currently only support float32.
1239 // TODO(miaowang): make sure this is also able to handle quantized
1240 // tensor when supported by NNAPI.
1241 TF_LITE_ENSURE_STATUS(builder.AddVectorFloat32Operand(nullptr, 0));
1242 } else {
1243 TF_LITE_ENSURE_STATUS(builder.AddTensorInput(input_index, hybrid_op));
1244 }
1245 }
1246 // Get op type and operands
1247 int nn_op_type = Map(
1248 context, reg->builtin_code, reg->version, nnapi_->android_sdk_version,
1249 node)({context, &builder, node, &model_state_outputs_,
1250 &model_state_tfl_inputs_});
1251 // Map outputs to NN API tensor indices.
1252 for (auto output_index : TfLiteIntArrayView(node->outputs)) {
1253 TF_LITE_ENSURE_STATUS(builder.AddTensorOutput(output_index));
1254 }
1255
1256 // Dequantize operators may have to be added in case inputs are to be
1257 // floating-point.
1258 AddDequantizeOperatorsWhereNeeded(context, reg->builtin_code, node,
1259 &builder);
1260
1261 builder.FinalizeAddOperation(nn_op_type);
1262 }
1263 return kTfLiteOk;
1264 }
1265
BuildGraph(TfLiteContext * context,const TfLiteIntArray * input_tensors,const TfLiteIntArray * output_tensors)1266 TfLiteStatus BuildGraph(TfLiteContext* context,
1267 const TfLiteIntArray* input_tensors,
1268 const TfLiteIntArray* output_tensors) {
1269 // Build the ops and tensors.
1270 TF_LITE_ENSURE_STATUS(AddOpsAndTensors(context));
1271 // Map input and output tensor indices to ANN
1272 std::vector<uint32_t> inputs;
1273 inputs.reserve(input_tensors->size);
1274 std::vector<uint32_t> outputs;
1275 outputs.reserve(output_tensors->size);
1276
1277 size_t total_input_byte_size = 0;
1278 // Make the TensorFlow Lite inputs and outputs to ann_indices.
1279 for (int i : TfLiteIntArrayView(input_tensors)) {
1280 // Constant tensors are not NNAPI inputs.
1281 if (i != kOptionalTensor &&
1282 context->tensors[i].allocation_type != kTfLiteMmapRo) {
1283 inputs.push_back(operand_mapping_.lite_index_to_ann(i));
1284 total_input_byte_size += context->tensors[i].bytes;
1285 total_input_byte_size += getNumPaddingBytes(context->tensors[i].bytes);
1286 }
1287 }
1288
1289 size_t total_output_byte_size = 0;
1290 for (int i : TfLiteIntArrayView(output_tensors)) {
1291 outputs.push_back(operand_mapping_.lite_index_to_ann(i));
1292 total_output_byte_size += context->tensors[i].bytes;
1293 total_output_byte_size += getNumPaddingBytes(context->tensors[i].bytes);
1294 }
1295
1296 // Add state output tensors as model outputs.
1297 for (int i : model_state_outputs_) {
1298 outputs.push_back(i);
1299 }
1300
1301 // Tell ANN to declare inputs/outputs
1302 RETURN_TFLITE_ERROR_IF_NN_ERROR(
1303 context, nnapi_->ANeuralNetworksModel_identifyInputsAndOutputs(
1304 nn_model_.get(), inputs.size(), inputs.data(),
1305 outputs.size(), outputs.data()));
1306
1307 // Set relaxed computation mode for fp32 if possible.
1308 if (nnapi_->android_sdk_version >= kMinSdkVersionForNNAPI11) {
1309 RETURN_TFLITE_ERROR_IF_NN_ERROR(
1310 context,
1311 nnapi_->ANeuralNetworksModel_relaxComputationFloat32toFloat16(
1312 nn_model_.get(), context->allow_fp32_relax_to_fp16));
1313 }
1314
1315 // Finalize the model
1316 RETURN_TFLITE_ERROR_IF_NN_ERROR(
1317 context, nnapi_->ANeuralNetworksModel_finish(nn_model_.get()));
1318
1319 // Create shared memory pool for inputs and outputs.
1320 nn_input_memory_.reset(
1321 new NNMemory(nnapi_, "input_pool", total_input_byte_size));
1322 nn_output_memory_.reset(
1323 new NNMemory(nnapi_, "output_pool", total_output_byte_size));
1324
1325 return kTfLiteOk;
1326 }
1327 };
1328
1329 const TfLiteRegistration NNAPIDelegateKernel::registration = {
1330 .init = [](TfLiteContext* context, const char* buffer,
__anon2e5b0dd81702() 1331 size_t length) -> void* {
1332 const TfLiteDelegateParams* params =
1333 reinterpret_cast<const TfLiteDelegateParams*>(buffer);
1334 NNAPIDelegateKernel* kernel_state = new NNAPIDelegateKernel;
1335 kernel_state->Init(context, params);
1336 return kernel_state;
1337 },
1338
__anon2e5b0dd81802() 1339 .free = [](TfLiteContext* context, void* buffer) -> void {
1340 delete reinterpret_cast<NNAPIDelegateKernel*>(buffer);
1341 },
1342
__anon2e5b0dd81902() 1343 .prepare = [](TfLiteContext* context, TfLiteNode* node) -> TfLiteStatus {
1344 NNAPIDelegateKernel* state =
1345 reinterpret_cast<NNAPIDelegateKernel*>(node->user_data);
1346 return state->nn_compilation_ == nullptr ? kTfLiteError : kTfLiteOk;
1347 },
1348
__anon2e5b0dd81a02() 1349 .invoke = [](TfLiteContext* context, TfLiteNode* node) -> TfLiteStatus {
1350 NNAPIDelegateKernel* state =
1351 reinterpret_cast<NNAPIDelegateKernel*>(node->user_data);
1352 return state->Invoke(context, node);
1353 },
1354
1355 .profiling_string = nullptr,
1356 .builtin_code = kTfLiteBuiltinDelegate,
1357 };
1358
1359 } // namespace
1360
1361 // Return a NN API Delegate struct that can check for support of ops.
NnApiDelegate(const char * device_name)1362 TfLiteDelegate* NnApiDelegate(const char* device_name) {
1363 static TfLiteDelegate delegate = {
1364 .data_ = nullptr,
1365 .Prepare = [](TfLiteContext* context,
1366 TfLiteDelegate* delegate) -> TfLiteStatus {
1367 // Do not check nodes_ if NN API is unavailable.
1368 const NnApi* nnapi = NnApiImplementation();
1369 if (nnapi->android_sdk_version < kMinSdkVersionForNNAPI ||
1370 !nnapi->nnapi_exists) {
1371 return kTfLiteOk;
1372 }
1373 // For NNAPI 1.2+, check if there is any accelerator available.
1374 // If not, don't delegate to NNAPI's CPU reference implementation.
1375 if (nnapi->android_sdk_version >= kMinSdkVersionForNNAPI12) {
1376 uint32_t device_count = 0;
1377 RETURN_TFLITE_ERROR_IF_NN_ERROR(
1378 context, nnapi->ANeuralNetworks_getDeviceCount(&device_count));
1379 // Any available accelerator will make the device_count larger than 1.
1380 // More sophisticated check and whitelisting can be added later.
1381 if (device_count <= 1) {
1382 return kTfLiteOk;
1383 }
1384 }
1385 // Allocate one element in vector already since TensorFlow Lite uses
1386 // the first value as the number of nodes. The actual value will be set
1387 // later, after the vector has been filled.
1388 std::vector<int> supported_nodes(1);
1389 // We don't care about all nodes_, we only care about ones in the
1390 // current plan.
1391 TfLiteIntArray* plan;
1392 TF_LITE_ENSURE_STATUS(context->GetExecutionPlan(context, &plan));
1393
1394 int android_sdk_version = NnApiImplementation()->android_sdk_version;
1395 // Check for every node if it is supported
1396 // TODO(b/80625235): Fix this to do more careful checking of versioning.
1397 for (int node_index : TfLiteIntArrayView(plan)) {
1398 TfLiteNode* node;
1399 TfLiteRegistration* registration;
1400 TF_LITE_ENSURE_STATUS(context->GetNodeAndRegistration(
1401 context, node_index, &node, ®istration));
1402 if (NNAPIDelegateKernel::Map(context, registration->builtin_code,
1403 registration->version,
1404 android_sdk_version, node)) {
1405 supported_nodes.push_back(node_index);
1406 }
1407 }
1408 // First element in vector must be the number of actual nodes.
1409 supported_nodes[0] = supported_nodes.size() - 1;
1410
1411 // Request TFLite to partition the graph and make kernels
1412 // for each independent node sub set a new NNAPIDelegateKernel.
1413 return context->ReplaceNodeSubsetsWithDelegateKernels(
1414 context, NNAPIDelegateKernel::registration,
1415 reinterpret_cast<TfLiteIntArray*>(supported_nodes.data()),
1416 delegate);
1417 },
1418
1419 .CopyFromBufferHandle = nullptr,
1420 .CopyToBufferHandle = nullptr,
1421 .FreeBufferHandle = nullptr,
1422 .flags = kTfLiteDelegateFlagsNone,
1423 };
1424 static std::string device_name_;
1425 if (device_name == nullptr) {
1426 device_name_.clear();
1427 delegate.data_ = nullptr;
1428 } else {
1429 device_name_ = device_name;
1430 delegate.data_ = (void *) device_name_.c_str();
1431 }
1432 return &delegate;
1433 }
1434
1435 } // namespace tflite
1436