1 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 #include <cstdarg>
16 #include <cstring>
17 #include <iostream>
18 #include <memory>
19 #include <vector>
20 
21 #include "tensorflow/lite/allocation.h"
22 #include "tensorflow/lite/builtin_op_data.h"
23 #include "tensorflow/lite/builtin_ops.h"
24 #include "tensorflow/lite/c/c_api_internal.h"
25 #include "tensorflow/lite/context_util.h"
26 #include "tensorflow/lite/delegates/nnapi/nnapi_delegate.h"
27 #include "tensorflow/lite/kernels/kernel_util.h"
28 #include "tensorflow/lite/nnapi/nnapi_implementation.h"
29 
30 #ifdef __ANDROID__
31 #include <sys/system_properties.h>
32 #endif
33 #if defined __ANDROID__ || defined __unix__
34 #include <sys/mman.h>
35 #include <unistd.h>
36 #endif
37 
38 namespace tflite {
39 namespace {
40 
41 // TODO(b/80621585): Consider printing error string, but don't for now to
42 // minimize binary size.
43 #define RETURN_TFLITE_ERROR_IF_NN_ERROR(context, code)                        \
44   do {                                                                        \
45     const auto _code = (code);                                                \
46     if (_code != ANEURALNETWORKS_NO_ERROR) {                                  \
47       context->ReportError(context, "NN API returned error (%d, line %d).\n", \
48                            _code, __LINE__);                                  \
49       return kTfLiteError;                                                    \
50     }                                                                         \
51   } while (0)
52 
53 namespace {
54 
IsFloat(TfLiteType type)55 bool IsFloat(TfLiteType type) {
56   switch (type) {
57     case kTfLiteFloat32:
58       return true;
59     default:
60       return false;
61   }
62 }
63 
IsQuantized(TfLiteType type)64 bool IsQuantized(TfLiteType type) {
65   switch (type) {
66     case kTfLiteUInt8:
67     case kTfLiteInt8:
68     case kTfLiteInt16:
69       return true;
70     default:
71       return false;
72   }
73 }
74 
IsHybridOperator(const TfLiteContext * context,int builtin_code,const TfLiteNode * node)75 bool IsHybridOperator(const TfLiteContext* context, int builtin_code,
76                       const TfLiteNode* node) {
77   switch (builtin_code) {
78     case kTfLiteBuiltinConv2d:
79     case kTfLiteBuiltinFullyConnected: {
80       const int input_id = node->inputs->data[0];
81       const int filter_id = node->inputs->data[1];
82       const TfLiteType input_type = context->tensors[input_id].type;
83       const TfLiteType filter_type = context->tensors[filter_id].type;
84       return IsFloat(input_type) && IsQuantized(filter_type);
85     }
86     default:
87       return false;
88   }
89 }
90 
91 constexpr int32_t kMinSdkVersionForNNAPI = 27;
92 constexpr int32_t kMinSdkVersionForNNAPI11 = 28;
93 constexpr int32_t kMinSdkVersionForNNAPI12 = 29;
94 constexpr size_t kDefaultByteAlignmentForNNAPI = 16;
95 
getNumPaddingBytes(size_t byte_size)96 static size_t getNumPaddingBytes(size_t byte_size) {
97   size_t num_padding_bytes = 0;
98   if (byte_size % kDefaultByteAlignmentForNNAPI) {
99     num_padding_bytes = kDefaultByteAlignmentForNNAPI -
100                         (byte_size % kDefaultByteAlignmentForNNAPI);
101   }
102   return num_padding_bytes;
103 }
104 }  // namespace
105 
106 // RAII NN API Model Destructor for use with std::unique_ptr
107 struct NNFreeModel {
operator ()tflite::__anon2e5b0dd80111::NNFreeModel108   void operator()(ANeuralNetworksModel* model) {
109     NnApiImplementation()->ANeuralNetworksModel_free(model);
110   }
111 };
112 // RAII NN API Compilation Destructor for use with std::unique_ptr
113 struct NNFreeCompilation {
operator ()tflite::__anon2e5b0dd80111::NNFreeCompilation114   void operator()(ANeuralNetworksCompilation* model) {
115     NnApiImplementation()->ANeuralNetworksCompilation_free(model);
116   }
117 };
118 
119 // RAII NN API Execution Destructor for use with std::unique_ptr
120 struct NNFreeExecution {
operator ()tflite::__anon2e5b0dd80111::NNFreeExecution121   void operator()(ANeuralNetworksExecution* execution) {
122     NnApiImplementation()->ANeuralNetworksExecution_free(execution);
123   }
124 };
125 
126 // Manage NNAPI shared memory handle
127 class NNMemory {
128  public:
129 #if defined __ANDROID__ || defined __unix__
NNMemory(const NnApi * nnapi,const char * name,size_t size)130   NNMemory(const NnApi* nnapi, const char* name, size_t size) {
131     nnapi_ = nnapi;
132     byte_size_ = size;
133     fd_ = nnapi_->ASharedMemory_create(name, size);
134     data_ptr_ = reinterpret_cast<uint8_t*>(
135         mmap(nullptr, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd_, 0));
136     nnapi_->ANeuralNetworksMemory_createFromFd(size, PROT_READ | PROT_WRITE,
137                                                fd_, 0, &nn_memory_handle_);
138   }
139 #else
140   NNMemory(const NnApi* /*nnapi*/, const char* /*name*/, size_t /*size*/) {}
141 #endif
142 
~NNMemory()143   ~NNMemory() {
144 #if defined __ANDROID__ || defined __unix__
145     if (data_ptr_) {
146       munmap(data_ptr_, byte_size_);
147     }
148     if (nn_memory_handle_) {
149       nnapi_->ANeuralNetworksMemory_free(nn_memory_handle_);
150     }
151     if (fd_ > 0) close(fd_);
152 #endif
153   }
154 
get_handle()155   ANeuralNetworksMemory* get_handle() { return nn_memory_handle_; }
get_data_ptr()156   uint8_t* get_data_ptr() { return data_ptr_; }
157 
158  private:
159 #if defined __ANDROID__ || defined __unix__
160   const NnApi* nnapi_;
161   int fd_ = 0;
162   size_t byte_size_ = 0;
163 #endif
164   uint8_t* data_ptr_ = nullptr;
165   ANeuralNetworksMemory* nn_memory_handle_ = nullptr;
166 };  // namespace
167 
168 // Track tensor indices to NN API tensor indices mapping.
169 class OperandMapping {
170  public:
171   // Given a TFLite index return the ANN index. If it doesn't exist
172   // return -1.
lite_index_to_ann(int index) const173   int lite_index_to_ann(int index) const {
174     if (index < lite_tensor_to_ann_tensor_.size())
175       return lite_tensor_to_ann_tensor_[index];
176     else
177       return -1;
178   }
179 
180   // NN API uses non tensor operands instead of structs. This creates one
181   // and returns the index. It uses a std::vector and resizes it as needed
182   // keeping -1 to unmapped values. Intermediate tensors likely will not
183   // be mapped.
add_new_non_tensor_operand()184   int add_new_non_tensor_operand() { return next_ann_tensor_index_++; }
185 
186   // Add a new mapping from `tflite_index` and return the NN API tensor index.
add_new_ann_tensor_index(int tflite_index)187   int add_new_ann_tensor_index(int tflite_index) {
188     if (tflite_index >= lite_tensor_to_ann_tensor_.size()) {
189       lite_tensor_to_ann_tensor_.resize(tflite_index + 1, -1);
190     }
191     int new_tensor_index = next_ann_tensor_index_++;
192     lite_tensor_to_ann_tensor_[tflite_index] = new_tensor_index;
193     return new_tensor_index;
194   }
195 
196  private:
197   // Next index of ann tensor
198   int next_ann_tensor_index_ = 0;
199 
200   // Mapping from lite index. Use a std::vector for speed and code size
201   // rather than a map.
202   std::vector<int> lite_tensor_to_ann_tensor_;
203 };
204 
205 class DequantizeMapping {
206  public:
DequantizedAnnIndex(int ann_index,TfLiteType type) const207   int DequantizedAnnIndex(int ann_index, TfLiteType type) const {
208     for (const auto& element : mapping_) {
209       if (ann_index == std::get<0>(element) && type == std::get<1>(element)) {
210         return std::get<2>(element);
211       }
212     }
213     return -1;
214   }
215 
Add(int ann_index,TfLiteType type,int dequantized_ann_index)216   void Add(int ann_index, TfLiteType type, int dequantized_ann_index) {
217     // This assumes it is not already mapped.
218     mapping_.emplace_back(ann_index, type, dequantized_ann_index);
219   }
220 
221  private:
222   // Each tuple specifies the ANN (quantized) tensor index, the desired
223   // floating-point type and the matching ANN (dequantized) tensor index. This
224   // could use a map but instead std::vector is used to keep code size lower.
225   std::vector<std::tuple<int, TfLiteType, int>> mapping_;
226 };
227 
228 // Abstract builder for building an op in the NN API graph. This handles
229 // the disparity between TFLite and NN API operand types. NN API has singular
230 // operands for both tensors and parameters, and TFLite separates the two.
231 class NNAPIOpBuilder {
232  public:
NNAPIOpBuilder(const NnApi * nnapi,TfLiteContext * context,OperandMapping * tensor_mapping,DequantizeMapping * dequantize_mapping,ANeuralNetworksModel * nn_model)233   NNAPIOpBuilder(const NnApi* nnapi, TfLiteContext* context,
234                  OperandMapping* tensor_mapping,
235                  DequantizeMapping* dequantize_mapping,
236                  ANeuralNetworksModel* nn_model)
237       : nnapi_(nnapi),
238         context_(context),
239         operand_mapping_(tensor_mapping),
240         dequantize_mapping_(dequantize_mapping),
241         nn_model_(nn_model) {}
242 
AddScalarInt32Operand(int32_t value)243   TfLiteStatus AddScalarInt32Operand(int32_t value) {
244     return AddScalarOperand<int32_t>(value, ANEURALNETWORKS_INT32);
245   }
246 
AddScalarFloat32Operand(float value)247   TfLiteStatus AddScalarFloat32Operand(float value) {
248     return AddScalarOperand<float>(value, ANEURALNETWORKS_FLOAT32);
249   }
250 
AddVectorInt32Operand(const int32_t * values,uint32_t num_values)251   TfLiteStatus AddVectorInt32Operand(const int32_t* values,
252                                      uint32_t num_values) {
253     return AddVectorOperand<int32_t>(values, num_values,
254                                      ANEURALNETWORKS_TENSOR_INT32);
255   }
256 
AddVectorFloat32Operand(const float * values,uint32_t num_values)257   TfLiteStatus AddVectorFloat32Operand(const float* values,
258                                        uint32_t num_values) {
259     return AddVectorOperand<float>(values, num_values,
260                                    ANEURALNETWORKS_TENSOR_FLOAT32);
261   }
262 
AddPoolingParams(void * data)263   TfLiteStatus AddPoolingParams(void* data) {
264     auto builtin = reinterpret_cast<TfLitePoolParams*>(data);
265     AddScalarInt32Operand(builtin->padding);
266     AddScalarInt32Operand(builtin->stride_width);
267     AddScalarInt32Operand(builtin->stride_height);
268     AddScalarInt32Operand(builtin->filter_width);
269     AddScalarInt32Operand(builtin->filter_height);
270     AddScalarInt32Operand(builtin->activation);
271     return kTfLiteOk;
272   }
273 
AddTensorInput(int tensor_index,bool hybrid_op)274   TfLiteStatus AddTensorInput(int tensor_index, bool hybrid_op) {
275     return AddTensor(tensor_index, hybrid_op, &augmented_inputs_);
276   }
277 
AddTensorOutput(int tensor_index)278   TfLiteStatus AddTensorOutput(int tensor_index) {
279     return AddTensor(tensor_index, /*hybrid_op=*/false, &augmented_outputs_);
280   }
281 
AddAdditionalFloat32OutputTensor(uint32_t dimension_count)282   TfLiteStatus AddAdditionalFloat32OutputTensor(uint32_t dimension_count) {
283     std::vector<uint32_t> dims(dimension_count, 0);
284     return AddFloat32OutputTensor(dimension_count, dims.data(), nullptr);
285   }
286 
AddStateFloat32Tensor(int tensor_index,int * ann_tensor_index_out)287   TfLiteStatus AddStateFloat32Tensor(int tensor_index,
288                                      int* ann_tensor_index_out) {
289     TfLiteTensor* tensor = &context_->tensors[tensor_index];
290     return AddFloat32OutputTensor(
291         tensor->dims->size, reinterpret_cast<uint32_t*>(tensor->dims->data),
292         ann_tensor_index_out);
293   }
294 
295   // Adds a Dequantize operator and replaces the input tensor index with the
296   // dequantized version. If the dequantized version of the operator already
297   // exists then it is not added again.
AddDequantize(int nn_input_index,int lite_index,TfLiteType dequantized_type)298   TfLiteStatus AddDequantize(int nn_input_index, int lite_index,
299                              TfLiteType dequantized_type) {
300     const int ann_index = operand_mapping_->lite_index_to_ann(lite_index);
301     int dequantized_ann_index =
302         dequantize_mapping_->DequantizedAnnIndex(ann_index, dequantized_type);
303 
304     if (dequantized_ann_index == -1) {
305       // The dequantized version does not exist yet, it has to be added: a new
306       // Dequantize operation is added, yielding a new tensor.
307       const TfLiteTensor& tensor = context_->tensors[lite_index];
308       ANeuralNetworksOperandType operand_type{
309           dequantized_type, static_cast<uint32_t>(tensor.dims->size),
310           reinterpret_cast<uint32_t*>(tensor.dims->data), 0.f, 0};
311       RETURN_TFLITE_ERROR_IF_NN_ERROR(
312           context_,
313           nnapi_->ANeuralNetworksModel_addOperand(nn_model_, &operand_type));
314       dequantized_ann_index = operand_mapping_->add_new_non_tensor_operand();
315 
316       // Add Dequantize operation.
317       const uint32_t dequantize_input[1] = {static_cast<uint32_t>(ann_index)};
318       const uint32_t dequantize_output[1] = {
319           static_cast<uint32_t>(dequantized_ann_index)};
320       RETURN_TFLITE_ERROR_IF_NN_ERROR(
321           context_, nnapi_->ANeuralNetworksModel_addOperation(
322                         nn_model_, ANEURALNETWORKS_DEQUANTIZE, 1,
323                         dequantize_input, 1, dequantize_output));
324       dequantize_mapping_->Add(ann_index, dequantized_type,
325                                dequantized_ann_index);
326     }
327 
328     // The input for the original operation is modified so that the operation
329     // now uses the dequantized tensor as input.
330     augmented_inputs_[nn_input_index] = dequantized_ann_index;
331 
332     return kTfLiteOk;
333   }
334 
335   // Finish emitting the op (of type `type`) into the NN API.
FinalizeAddOperation(ANeuralNetworksOperationType type)336   TfLiteStatus FinalizeAddOperation(ANeuralNetworksOperationType type) {
337     // Actually add a NN API operation
338     RETURN_TFLITE_ERROR_IF_NN_ERROR(
339         context_,
340         nnapi_->ANeuralNetworksModel_addOperation(
341             nn_model_, type, static_cast<uint32_t>(augmented_inputs_.size()),
342             augmented_inputs_.data(),
343             static_cast<uint32_t>(augmented_outputs_.size()),
344             augmented_outputs_.data()));
345     augmented_inputs_.clear();
346     augmented_outputs_.clear();
347     return kTfLiteOk;
348   }
349 
350  private:
351   template <typename T>
AddScalarOperand(T value,int32_t nn_type)352   TfLiteStatus AddScalarOperand(T value, int32_t nn_type) {
353     ANeuralNetworksOperandType operand_type{.type = nn_type};
354     RETURN_TFLITE_ERROR_IF_NN_ERROR(
355         context_,
356         nnapi_->ANeuralNetworksModel_addOperand(nn_model_, &operand_type));
357     const int ann_index = operand_mapping_->add_new_non_tensor_operand();
358     RETURN_TFLITE_ERROR_IF_NN_ERROR(
359         context_, nnapi_->ANeuralNetworksModel_setOperandValue(
360                       nn_model_, ann_index, &value, sizeof(T)));
361     augmented_inputs_.push_back(ann_index);
362     return kTfLiteOk;
363   }
364 
365   template <typename T>
AddVectorOperand(const T * values,uint32_t num_values,int32_t nn_type)366   TfLiteStatus AddVectorOperand(const T* values, uint32_t num_values,
367                                 int32_t nn_type) {
368     ANeuralNetworksOperandType operand_type{
369         .type = nn_type, .dimensionCount = 1, .dimensions = &num_values};
370 
371     RETURN_TFLITE_ERROR_IF_NN_ERROR(
372         context_,
373         nnapi_->ANeuralNetworksModel_addOperand(nn_model_, &operand_type));
374 
375     const int ann_index = operand_mapping_->add_new_non_tensor_operand();
376     RETURN_TFLITE_ERROR_IF_NN_ERROR(
377         context_, nnapi_->ANeuralNetworksModel_setOperandValue(
378                       nn_model_, ann_index, values, sizeof(T) * num_values));
379     augmented_inputs_.push_back(ann_index);
380     return kTfLiteOk;
381   }
382 
AddFloat32OutputTensor(uint32_t dimension_count,const uint32_t * dimension_data,int * ann_index_out)383   TfLiteStatus AddFloat32OutputTensor(uint32_t dimension_count,
384                                       const uint32_t* dimension_data,
385                                       int* ann_index_out) {
386     ANeuralNetworksOperandType operand_type{
387         .type = ANEURALNETWORKS_TENSOR_FLOAT32,
388         .dimensionCount = dimension_count,
389         .dimensions = dimension_data,
390     };
391     RETURN_TFLITE_ERROR_IF_NN_ERROR(
392         context_,
393         nnapi_->ANeuralNetworksModel_addOperand(nn_model_, &operand_type));
394     const int ann_index = operand_mapping_->add_new_non_tensor_operand();
395     augmented_outputs_.push_back(ann_index);
396     if (ann_index_out) *ann_index_out = ann_index;
397     return kTfLiteOk;
398   }
399 
400   // Adds a new NN API tensor that shadows the TF Lite tensor `tensor_index`.
401   // This returns the NN API tensor index corresponding to the created tensor.
402   // If another caller previously created a NN API tensor for `tensor_index`
403   // then the existing one is returned.
AddTensor(int tensor_index,bool hybrid_op,std::vector<uint32_t> * indices)404   TfLiteStatus AddTensor(int tensor_index, bool hybrid_op,
405                          std::vector<uint32_t>* indices) {
406     int ann_tensor_index = operand_mapping_->lite_index_to_ann(tensor_index);
407     if (ann_tensor_index != -1) {
408       indices->push_back(ann_tensor_index);
409       return kTfLiteOk;
410     }
411     // Allocate a new tensor index
412     ann_tensor_index = operand_mapping_->add_new_ann_tensor_index(tensor_index);
413 
414     // Parameters needed for new type.
415     int32_t nn_type = 0;
416     float scale = 0.0f;
417     int32_t zeroPoint = 0;
418     TfLiteTensor* tensor = &context_->tensors[tensor_index];
419     TfLiteType tensor_type = tensor->type;
420     if (hybrid_op && (tensor_type == kTfLiteUInt8)) {
421       // For legacy reason, UINT8 weights in hybrid operators are actually INT8
422       // values and should be interpreted as such.
423       tensor_type = kTfLiteInt8;
424     }
425     switch (tensor_type) {
426       case kTfLiteNoType:
427         // Tensors added during initialization of Ops don't have a type yet and
428         // should not be registered with the NNAPI.
429         indices->push_back(-1);
430         return kTfLiteOk;
431       case kTfLiteFloat32:
432         nn_type = ANEURALNETWORKS_TENSOR_FLOAT32;
433         break;
434       case kTfLiteUInt8:
435         nn_type = ANEURALNETWORKS_TENSOR_QUANT8_ASYMM;
436         scale = tensor->params.scale;
437         zeroPoint = tensor->params.zero_point;
438         if (scale == 0) {
439           // TENSOR_QUANT8_ASYMM with zero scale is not valid in NNAPI.
440           scale = 1;
441         }
442         break;
443       case kTfLiteInt8:
444         nn_type = ANEURALNETWORKS_TENSOR_QUANT8_SYMM;
445         scale = tensor->params.scale;
446         break;
447       case kTfLiteInt32:
448         nn_type = ANEURALNETWORKS_TENSOR_INT32;
449         scale = tensor->params.scale;
450         zeroPoint = tensor->params.zero_point;
451         break;
452       default:
453         context_->ReportError(context_, "Logic error in NN API Delegate.\n");
454         return kTfLiteError;
455     }
456 
457     ANeuralNetworksOperandType operand_type{
458         nn_type, static_cast<uint32_t>(tensor->dims->size),
459         reinterpret_cast<uint32_t*>(tensor->dims->data), scale, zeroPoint};
460     RETURN_TFLITE_ERROR_IF_NN_ERROR(
461         context_,
462         nnapi_->ANeuralNetworksModel_addOperand(nn_model_, &operand_type));
463 
464     if (tensor->allocation_type == kTfLiteMmapRo) {
465       // TODO(b/80630405): Use NNAPIAllocation.
466       RETURN_TFLITE_ERROR_IF_NN_ERROR(
467           context_,
468           nnapi_->ANeuralNetworksModel_setOperandValue(
469               nn_model_, ann_tensor_index, tensor->data.raw, tensor->bytes));
470     }
471 
472     indices->push_back(ann_tensor_index);
473     return kTfLiteOk;
474   }
475 
476   // Access to NNAPI.
477   const NnApi* const nnapi_;
478 
479   // TfLiteContext for error handling.
480   TfLiteContext* const context_;
481 
482   // Tracks relationship between indices.
483   OperandMapping* const operand_mapping_;
484 
485   // Keeps mapping of ANN quantized tensor and float data type to equivalent
486   // dequantized ANN tensor. For example, tensor #4 (UINT8) + FLOAT32 could map
487   // to tensor #10 (FLOAT32) because a DEQUANTIZE operator was added to convert
488   // tensor #4 to a FLOAT32 tensor.
489   DequantizeMapping* const dequantize_mapping_;
490 
491   // The NNAPI model.
492   ANeuralNetworksModel* const nn_model_;
493 
494   // Inputs and outputs for the current op. These are augmented in the sense
495   // that NN API uses operands for all arguments, not just tensors, unlike
496   // TensorFlow Lite.
497   std::vector<uint32_t> augmented_inputs_;
498   std::vector<uint32_t> augmented_outputs_;
499 };
500 
501 struct NNAPIOpMappingArgs {
502   TfLiteContext* context;
503   NNAPIOpBuilder* builder;
504   TfLiteNode* node;
505   std::vector<int>* model_state_outputs;
506   std::vector<int>* model_state_tfl_inputs;
507 };
508 
509 // Mapping function simply returning the operation type without adding any
510 // additional parameter.
511 template <ANeuralNetworksOperationType OperationType>
BasicMappingFn(const NNAPIOpMappingArgs & mapping_args)512 ANeuralNetworksOperationType BasicMappingFn(
513     const NNAPIOpMappingArgs& mapping_args) {
514   return OperationType;
515 }
516 
517 // The kernel that represents the node sub set of TF Lite being run on NN API.
518 class NNAPIDelegateKernel {
519  public:
NNAPIDelegateKernel()520   NNAPIDelegateKernel() { nnapi_ = NnApiImplementation(); }
521 
522   typedef ANeuralNetworksOperationType (*MappingFn)(
523       const NNAPIOpMappingArgs& mapping_args);
524 
525   // Return a function that knows how to translate a node into its operands
526   // when called. You can use this function to see if a node is supported
527   // (i.e. that MappingFn is not nullptr).
Map(const TfLiteContext * context,int builtin_code,int version,int android_sdk_version,const TfLiteNode * node)528   static MappingFn Map(const TfLiteContext* context, int builtin_code,
529                        int version, int android_sdk_version,
530                        const TfLiteNode* node) {
531     switch (builtin_code) {
532       case kTfLiteBuiltinAdd:
533         if (version == 1) {
534           return [](const NNAPIOpMappingArgs& mapping_args)
535                      -> ANeuralNetworksOperationType {
536             auto builtin = reinterpret_cast<TfLiteAddParams*>(
537                 mapping_args.node->builtin_data);
538             mapping_args.builder->AddScalarInt32Operand(builtin->activation);
539             return ANEURALNETWORKS_ADD;
540           };
541         }
542         break;
543       case kTfLiteBuiltinMul:
544         if (version == 1) {
545           return [](const NNAPIOpMappingArgs& mapping_args)
546                      -> ANeuralNetworksOperationType {
547             auto builtin = reinterpret_cast<TfLiteMulParams*>(
548                 mapping_args.node->builtin_data);
549             mapping_args.builder->AddScalarInt32Operand(builtin->activation);
550             return ANEURALNETWORKS_MUL;
551           };
552         }
553         break;
554       case kTfLiteBuiltinAveragePool2d:
555         if (version == 1) {
556           return [](const NNAPIOpMappingArgs& mapping_args)
557                      -> ANeuralNetworksOperationType {
558             mapping_args.builder->AddPoolingParams(
559                 mapping_args.node->builtin_data);
560             return ANEURALNETWORKS_AVERAGE_POOL_2D;
561           };
562         }
563         break;
564       case kTfLiteBuiltinMaxPool2d:
565         if (version == 1) {
566           return [](const NNAPIOpMappingArgs& mapping_args)
567                      -> ANeuralNetworksOperationType {
568             mapping_args.builder->AddPoolingParams(
569                 mapping_args.node->builtin_data);
570             return ANEURALNETWORKS_MAX_POOL_2D;
571           };
572         }
573         break;
574       case kTfLiteBuiltinL2Pool2d:
575         if (version == 1) {
576           return [](const NNAPIOpMappingArgs& mapping_args)
577                      -> ANeuralNetworksOperationType {
578             mapping_args.builder->AddPoolingParams(
579                 mapping_args.node->builtin_data);
580             return ANEURALNETWORKS_L2_POOL_2D;
581           };
582         }
583         break;
584       case kTfLiteBuiltinConv2d:
585         if (version == 1) {
586           if (android_sdk_version < kMinSdkVersionForNNAPI12 &&
587               IsHybridOperator(context, builtin_code, node)) {
588             // Hybrid operators not supported before NNAPI 1.2.
589             return nullptr;
590           }
591           auto builtin =
592               reinterpret_cast<TfLiteConvParams*>(node->builtin_data);
593           if (builtin->dilation_width_factor != 1 ||
594               builtin->dilation_height_factor != 1 || node->inputs->size != 3) {
595             // NNAPI does not support dilated Conv2D.
596             return nullptr;
597           }
598           return [](const NNAPIOpMappingArgs& mapping_args)
599                      -> ANeuralNetworksOperationType {
600             auto builtin = reinterpret_cast<TfLiteConvParams*>(
601                 mapping_args.node->builtin_data);
602             mapping_args.builder->AddScalarInt32Operand(builtin->padding);
603             mapping_args.builder->AddScalarInt32Operand(builtin->stride_width);
604             mapping_args.builder->AddScalarInt32Operand(builtin->stride_height);
605             mapping_args.builder->AddScalarInt32Operand(builtin->activation);
606             return ANEURALNETWORKS_CONV_2D;
607           };
608         }
609         break;
610       case kTfLiteBuiltinDepthwiseConv2d:
611         if (version == 1) {
612           return [](const NNAPIOpMappingArgs& mapping_args)
613                      -> ANeuralNetworksOperationType {
614             auto builtin = reinterpret_cast<TfLiteDepthwiseConvParams*>(
615                 mapping_args.node->builtin_data);
616             mapping_args.builder->AddScalarInt32Operand(builtin->padding);
617             mapping_args.builder->AddScalarInt32Operand(builtin->stride_width);
618             mapping_args.builder->AddScalarInt32Operand(builtin->stride_height);
619             mapping_args.builder->AddScalarInt32Operand(
620                 builtin->depth_multiplier);
621             mapping_args.builder->AddScalarInt32Operand(builtin->activation);
622             return ANEURALNETWORKS_DEPTHWISE_CONV_2D;
623           };
624         }
625         break;
626       case kTfLiteBuiltinFullyConnected:
627         if (version == 1) {
628           if (android_sdk_version < kMinSdkVersionForNNAPI12 &&
629               IsHybridOperator(context, builtin_code, node)) {
630             // Hybrid operators not supported before NNAPI 1.2.
631             return nullptr;
632           }
633           return [](const NNAPIOpMappingArgs& mapping_args)
634                      -> ANeuralNetworksOperationType {
635             auto builtin = reinterpret_cast<TfLiteFullyConnectedParams*>(
636                 mapping_args.node->builtin_data);
637             mapping_args.builder->AddScalarInt32Operand(builtin->activation);
638             return ANEURALNETWORKS_FULLY_CONNECTED;
639           };
640         }
641         break;
642       case kTfLiteBuiltinSoftmax:
643         if (version == 1) {
644           return [](const NNAPIOpMappingArgs& mapping_args)
645                      -> ANeuralNetworksOperationType {
646             auto builtin = reinterpret_cast<TfLiteSoftmaxParams*>(
647                 mapping_args.node->builtin_data);
648             mapping_args.builder->AddScalarFloat32Operand(builtin->beta);
649             return ANEURALNETWORKS_SOFTMAX;
650           };
651         }
652         break;
653       case kTfLiteBuiltinReshape:
654         if (version == 1 && node->inputs->size == 2) {
655           return BasicMappingFn<ANEURALNETWORKS_RESHAPE>;
656         }
657         break;
658       case kTfLiteBuiltinSqueeze:
659         if (version == 1 && android_sdk_version >= kMinSdkVersionForNNAPI11) {
660           return [](const NNAPIOpMappingArgs& mapping_args)
661                      -> ANeuralNetworksOperationType {
662             auto builtin = reinterpret_cast<TfLiteSqueezeParams*>(
663                 mapping_args.node->builtin_data);
664             // Note that we add the squeeze dimensions even if the dimensions
665             // were unspecified (empty), as NNAPI requires the operand.
666             mapping_args.builder->AddVectorInt32Operand(
667                 builtin->num_squeeze_dims ? builtin->squeeze_dims : nullptr,
668                 static_cast<uint32_t>(builtin->num_squeeze_dims));
669             return ANEURALNETWORKS_SQUEEZE;
670           };
671         }
672         break;
673       case kTfLiteBuiltinL2Normalization: {
674         auto builtin =
675             reinterpret_cast<TfLiteL2NormParams*>(node->builtin_data);
676         if (builtin->activation == kTfLiteActNone) {
677           return BasicMappingFn<ANEURALNETWORKS_L2_NORMALIZATION>;
678         }
679         break;
680       }
681       case kTfLiteBuiltinLocalResponseNormalization:
682         if (version == 1) {
683           return [](const NNAPIOpMappingArgs& mapping_args)
684                      -> ANeuralNetworksOperationType {
685             auto builtin = reinterpret_cast<TfLiteLocalResponseNormParams*>(
686                 mapping_args.node->builtin_data);
687             mapping_args.builder->AddScalarInt32Operand(builtin->radius);
688             mapping_args.builder->AddScalarFloat32Operand(builtin->bias);
689             mapping_args.builder->AddScalarFloat32Operand(builtin->alpha);
690             mapping_args.builder->AddScalarFloat32Operand(builtin->beta);
691             return ANEURALNETWORKS_LOCAL_RESPONSE_NORMALIZATION;
692           };
693         }
694         break;
695       case kTfLiteBuiltinLshProjection:
696         if (version == 1) {
697           // NNAPI does not support sparse projection correctly (b/111751836).
698           if (reinterpret_cast<TfLiteLSHProjectionParams*>(node->builtin_data)
699                   ->type == kTfLiteLshProjectionSparse) {
700             return nullptr;
701           }
702           return [](const NNAPIOpMappingArgs& mapping_args)
703                      -> ANeuralNetworksOperationType {
704             auto builtin = reinterpret_cast<TfLiteLSHProjectionParams*>(
705                 mapping_args.node->builtin_data);
706             mapping_args.builder->AddScalarInt32Operand(builtin->type);
707             return ANEURALNETWORKS_LSH_PROJECTION;
708           };
709         }
710         break;
711       case kTfLiteBuiltinConcatenation:
712         if (version == 1 &&
713             reinterpret_cast<TfLiteConcatenationParams*>(node->builtin_data)
714                     ->activation == kTfLiteActNone) {
715           if (context->tensors[node->inputs->data[0]].type == kTfLiteUInt8 &&
716               android_sdk_version < kMinSdkVersionForNNAPI12) {
717             // NNAPI 1.0-1 only supported concatenating quantized tensor of the
718             // same scale and offset.
719             auto first_param = context->tensors[node->inputs->data[0]].params;
720             for (int i = 1; i < node->inputs->size; i++) {
721               auto curr_param = context->tensors[node->inputs->data[i]].params;
722               if (curr_param.scale != first_param.scale ||
723                   curr_param.zero_point != first_param.zero_point) {
724                 return nullptr;
725               }
726             }
727           }
728           return [](const NNAPIOpMappingArgs& mapping_args)
729                      -> ANeuralNetworksOperationType {
730             auto builtin = reinterpret_cast<TfLiteConcatenationParams*>(
731                 mapping_args.node->builtin_data);
732             mapping_args.builder->AddScalarInt32Operand(builtin->axis);
733             return ANEURALNETWORKS_CONCATENATION;
734           };
735         }
736         break;
737       case kTfLiteBuiltinDequantize:
738         if (version == 1 || version == 2) {
739           const auto& input = context->tensors[node->inputs->data[0]];
740           const auto zero_point = input.params.zero_point;
741           // NN API supports int8 type since version 1.2 but only for symmetric
742           // quantization.
743           if (input.type == kTfLiteInt8 &&
744               (zero_point != 0 ||
745                android_sdk_version < kMinSdkVersionForNNAPI12)) {
746             return nullptr;
747           }
748           return BasicMappingFn<ANEURALNETWORKS_DEQUANTIZE>;
749         }
750         break;
751       case kTfLiteBuiltinFloor:
752         if (version == 1) {
753           return BasicMappingFn<ANEURALNETWORKS_FLOOR>;
754         }
755         break;
756       case kTfLiteBuiltinRelu:
757         if (version == 1) {
758           return BasicMappingFn<ANEURALNETWORKS_RELU>;
759         }
760         break;
761       case kTfLiteBuiltinReluN1To1:
762         if (version == 1) {
763           return BasicMappingFn<ANEURALNETWORKS_RELU1>;
764         }
765         break;
766       case kTfLiteBuiltinRelu6:
767         if (version == 1) {
768           return BasicMappingFn<ANEURALNETWORKS_RELU6>;
769         }
770         break;
771       case kTfLiteBuiltinLogistic:
772         if (version == 1) {
773           return BasicMappingFn<ANEURALNETWORKS_LOGISTIC>;
774         }
775         break;
776       case kTfLiteBuiltinTanh:
777         // TODO(miaowang): add additional checks for the parameters.
778         if (version == 1 &&
779             context->tensors[node->inputs->data[0]].type == kTfLiteFloat32) {
780           // NNAPI only support float tanh.
781           return BasicMappingFn<ANEURALNETWORKS_TANH>;
782         }
783         break;
784       case kTfLiteBuiltinSub:
785         if (version == 1 && android_sdk_version >= kMinSdkVersionForNNAPI11 &&
786             context->tensors[node->inputs->data[0]].type == kTfLiteFloat32) {
787           // NNAPI only support float sub.
788           return [](const NNAPIOpMappingArgs& mapping_args)
789                      -> ANeuralNetworksOperationType {
790             auto builtin = reinterpret_cast<TfLiteSubParams*>(
791                 mapping_args.node->builtin_data);
792             mapping_args.builder->AddScalarInt32Operand(builtin->activation);
793             return ANEURALNETWORKS_SUB;
794           };
795         }
796         break;
797       case kTfLiteBuiltinDiv:
798         if (version == 1 && android_sdk_version >= kMinSdkVersionForNNAPI11 &&
799             context->tensors[node->inputs->data[0]].type == kTfLiteFloat32) {
800           // NNAPI only support float div.
801           return [](const NNAPIOpMappingArgs& mapping_args)
802                      -> ANeuralNetworksOperationType {
803             auto builtin = reinterpret_cast<TfLiteDivParams*>(
804                 mapping_args.node->builtin_data);
805             mapping_args.builder->AddScalarInt32Operand(builtin->activation);
806             return ANEURALNETWORKS_DIV;
807           };
808         }
809         break;
810       case kTfLiteBuiltinPad:
811         if (version == 1 && node->inputs->size == 2 &&
812             (android_sdk_version >= kMinSdkVersionForNNAPI11) &&
813             (context->tensors[node->inputs->data[0]].type == kTfLiteFloat32 ||
814              android_sdk_version >= kMinSdkVersionForNNAPI12)) {
815           // NNAPI does not support specifying the padding value.
816           // Before 1.2, NNAPI pads physical zero for quantized tensors, so only
817           // delegate float pad to NNAPI. NNAPI 1.2 onwards pads with
818           // zero-point, so delegate quantized pad as well.
819           return BasicMappingFn<ANEURALNETWORKS_PAD>;
820         }
821         break;
822       case kTfLiteBuiltinSpaceToBatchNd:
823         if (version == 1 && android_sdk_version >= kMinSdkVersionForNNAPI11) {
824           return BasicMappingFn<ANEURALNETWORKS_SPACE_TO_BATCH_ND>;
825         }
826         break;
827       case kTfLiteBuiltinStridedSlice:
828         if (version == 1 && android_sdk_version >= kMinSdkVersionForNNAPI11) {
829           return [](const NNAPIOpMappingArgs& mapping_args)
830                      -> ANeuralNetworksOperationType {
831             auto builtin = reinterpret_cast<TfLiteStridedSliceParams*>(
832                 mapping_args.node->builtin_data);
833             mapping_args.builder->AddScalarInt32Operand(builtin->begin_mask);
834             mapping_args.builder->AddScalarInt32Operand(builtin->end_mask);
835             mapping_args.builder->AddScalarInt32Operand(
836                 builtin->shrink_axis_mask);
837             return ANEURALNETWORKS_STRIDED_SLICE;
838           };
839         }
840         break;
841       case kTfLiteBuiltinTranspose:
842         // Note that the permutation input tensor value dictates the output
843         // dimensions.
844         // TODO(b/110888333): Support dynamically-sized tensors in delegates.
845         if ((version == 1) &&
846             (android_sdk_version >= kMinSdkVersionForNNAPI11) &&
847             (node->inputs->size > 1) &&
848             (context->tensors[node->inputs->data[1]].allocation_type ==
849              kTfLiteMmapRo)) {
850           return BasicMappingFn<ANEURALNETWORKS_TRANSPOSE>;
851         }
852         break;
853       case kTfLiteBuiltinRnn:
854         // NNAPI only support float32 weights.
855         if (version == 1 && node->inputs->size == 5 &&
856             context->tensors[node->inputs->data[/*kWeightsTensor*/ 1]].type ==
857                 kTfLiteFloat32) {
858           return [](const NNAPIOpMappingArgs& mapping_args)
859                      -> ANeuralNetworksOperationType {
860             // NNAPI need both state_in and state_out.
861             int ann_index;
862             mapping_args.builder->AddStateFloat32Tensor(
863                 mapping_args.node->inputs->data[/*kHiddenStateTensor*/ 4],
864                 &ann_index);
865             mapping_args.model_state_outputs->push_back(ann_index);
866             mapping_args.model_state_tfl_inputs->push_back(
867                 mapping_args.node->inputs->data[/*kHiddenStateTensor*/ 4]);
868             auto builtin = reinterpret_cast<TfLiteRNNParams*>(
869                 mapping_args.node->builtin_data);
870             mapping_args.builder->AddScalarInt32Operand(builtin->activation);
871             return ANEURALNETWORKS_RNN;
872           };
873         }
874         break;
875       case kTfLiteBuiltinSvdf:
876         // NNAPI only support float32 weights.
877         // Only delegate to NNAPI 1.1, as SVDF does not support rank > 1 on 1.0.
878         if (version == 1 && node->inputs->size == 5 &&
879             android_sdk_version >= kMinSdkVersionForNNAPI11 &&
880             context->tensors[node->inputs->data[/*kWeightsFeatureTensor*/ 1]]
881                     .type == kTfLiteFloat32) {
882           return [](const NNAPIOpMappingArgs& mapping_args)
883                      -> ANeuralNetworksOperationType {
884             // NNAPI need both state_in and state_out.
885             int ann_index;
886             mapping_args.builder->AddStateFloat32Tensor(
887                 mapping_args.node->inputs
888                     ->data[/*kInputActivationStateTensor*/ 4],
889                 &ann_index);
890             mapping_args.model_state_outputs->push_back(ann_index);
891             mapping_args.model_state_tfl_inputs->push_back(
892                 mapping_args.node->inputs
893                     ->data[/*kInputActivationStateTensor*/ 4]);
894 
895             auto builtin = reinterpret_cast<TfLiteSVDFParams*>(
896                 mapping_args.node->builtin_data);
897             mapping_args.builder->AddScalarInt32Operand(builtin->rank);
898             mapping_args.builder->AddScalarInt32Operand(builtin->activation);
899             return ANEURALNETWORKS_SVDF;
900           };
901         }
902         break;
903       case kTfLiteBuiltinLstm:
904         // NNAPI only support float32 weights.
905         // Only delegate to NNAPI 1.1,  as 1.0 has a bug for optional tensors
906         // which would affect LSTM.
907         // TODO(miaowang): add loggings to indicate why the op is rejected.
908         if (version == 1 && node->inputs->size == 20 &&
909             android_sdk_version >= kMinSdkVersionForNNAPI11 &&
910             context->tensors[node->inputs
911                                  ->data[/*kInputToOutputWeightsTensor*/ 4]]
912                     .type == kTfLiteFloat32) {
913           return [](const NNAPIOpMappingArgs& mapping_args)
914                      -> ANeuralNetworksOperationType {
915             auto builtin = reinterpret_cast<TfLiteLSTMParams*>(
916                 mapping_args.node->builtin_data);
917             mapping_args.builder->AddScalarInt32Operand(builtin->activation);
918             mapping_args.builder->AddScalarFloat32Operand(builtin->cell_clip);
919             mapping_args.builder->AddScalarFloat32Operand(builtin->proj_clip);
920 
921             // Current NNAPI implementation requires the sratch_buffer as
922             // output.
923             mapping_args.builder->AddAdditionalFloat32OutputTensor(2);
924 
925             // NNAPI need both state_in and state_out for cell_state and
926             // output_state.
927             int ann_index;
928             mapping_args.builder->AddStateFloat32Tensor(
929                 mapping_args.node->inputs
930                     ->data[/*kInputActivationStateTensor*/ 18],
931                 &ann_index);
932             mapping_args.model_state_outputs->push_back(ann_index);
933             mapping_args.model_state_tfl_inputs->push_back(
934                 mapping_args.node->inputs
935                     ->data[/*kInputActivationStateTensor*/ 18]);
936             mapping_args.builder->AddStateFloat32Tensor(
937                 mapping_args.node->inputs->data[/*kInputCellStateTensor*/ 19],
938                 &ann_index);
939             mapping_args.model_state_outputs->push_back(ann_index);
940             mapping_args.model_state_tfl_inputs->push_back(
941                 mapping_args.node->inputs->data[/*kInputCellStateTensor*/ 19]);
942 
943             return ANEURALNETWORKS_LSTM;
944           };
945         }
946         break;
947       case kTfLiteBuiltinMean:
948         // NNAPI does not support generating a scalar as output for MEAN.
949         if (version == 1 && android_sdk_version >= kMinSdkVersionForNNAPI11 &&
950             context->tensors[node->inputs->data[0]].type == kTfLiteFloat32 &&
951             context->tensors[node->outputs->data[0]].dims->size > 0) {
952           return [](const NNAPIOpMappingArgs& mapping_args)
953                      -> ANeuralNetworksOperationType {
954             auto builtin = reinterpret_cast<TfLiteReducerParams*>(
955                 mapping_args.node->builtin_data);
956             int32_t keep_dims = 0;
957             if (builtin->keep_dims) keep_dims = 1;
958             mapping_args.builder->AddScalarInt32Operand(keep_dims);
959             return ANEURALNETWORKS_MEAN;
960           };
961         }
962         break;
963       case kTfLiteBuiltinEmbeddingLookup:
964         // NNAPI only support float32 values.
965         if (version == 1 &&
966             context->tensors[node->inputs->data[1]].type == kTfLiteFloat32) {
967           return BasicMappingFn<ANEURALNETWORKS_EMBEDDING_LOOKUP>;
968         }
969         break;
970       case kTfLiteBuiltinHashtableLookup:
971         // NNAPI only support float32 output.
972         if (version == 1 &&
973             context->tensors[node->outputs->data[0]].type == kTfLiteFloat32) {
974           return BasicMappingFn<ANEURALNETWORKS_HASHTABLE_LOOKUP>;
975         }
976         break;
977       default:
978         // All other operators are not mapped.
979         return nullptr;
980     }
981     return nullptr;
982   }
983 
984   // Initialize the kernel (a NN model).
Init(TfLiteContext * context,const TfLiteDelegateParams * params)985   TfLiteStatus Init(TfLiteContext* context,
986                     const TfLiteDelegateParams* params) {
987     for (auto node_index : TfLiteIntArrayView(params->nodes_to_replace)) {
988       nodes_.push_back(node_index);
989     }
990 
991     if (params->delegate->data_ != nullptr) {
992       // user specified an acclelerator to use.
993       const char* device_name_ptr = reinterpret_cast<const char*>(params->delegate->data_);
994       std::string device_name(device_name_ptr);
995       uint32_t numDevices = 0;
996       RETURN_TFLITE_ERROR_IF_NN_ERROR(
997         context, nnapi_->ANeuralNetworks_getDeviceCount(&numDevices));
998 
999       for (uint32_t i = 0; i < numDevices; i++) {
1000         ANeuralNetworksDevice* device = nullptr;
1001         const char* buffer = nullptr;
1002         RETURN_TFLITE_ERROR_IF_NN_ERROR(
1003           context, nnapi_->ANeuralNetworks_getDevice(i, &device));
1004         RETURN_TFLITE_ERROR_IF_NN_ERROR(
1005           context, nnapi_->ANeuralNetworksDevice_getName(device, &buffer));
1006         if (device_name.compare(buffer) == 0) {
1007           nnapi_device_ = device;
1008           break;
1009         }
1010       }
1011       if (nnapi_device_ == nullptr) {
1012         context->ReportError(context, "Could not find the specified accelerator.");
1013         return kTfLiteError;
1014       }
1015     }
1016 
1017     if (!nn_model_) {
1018       ANeuralNetworksModel* model = nullptr;
1019       RETURN_TFLITE_ERROR_IF_NN_ERROR(
1020           context, nnapi_->ANeuralNetworksModel_create(&model));
1021       nn_model_.reset(model);
1022 
1023       TF_LITE_ENSURE_STATUS(
1024           BuildGraph(context, params->input_tensors, params->output_tensors));
1025     }
1026 
1027     if (!nn_compilation_) {
1028       ANeuralNetworksCompilation* compilation = nullptr;
1029       if (nnapi_device_ != nullptr) {
1030         // Compile for the selected accelerator.
1031         RETURN_TFLITE_ERROR_IF_NN_ERROR(
1032           context, nnapi_->ANeuralNetworksCompilation_createForDevices(nn_model_.get(),
1033                                                                        &nnapi_device_, 1,
1034                                                                        &compilation));
1035       } else {
1036         RETURN_TFLITE_ERROR_IF_NN_ERROR(
1037             context, nnapi_->ANeuralNetworksCompilation_create(nn_model_.get(),
1038                                                                &compilation));
1039       }
1040       const int finish_result =
1041           nnapi_->ANeuralNetworksCompilation_finish(compilation);
1042       if (finish_result != ANEURALNETWORKS_NO_ERROR) {
1043         nnapi_->ANeuralNetworksCompilation_free(compilation);
1044         compilation = nullptr;
1045       }
1046       RETURN_TFLITE_ERROR_IF_NN_ERROR(context, finish_result);
1047       nn_compilation_.reset(compilation);
1048     }
1049     return kTfLiteOk;
1050   }
1051 
Invoke(TfLiteContext * context,TfLiteNode * node)1052   TfLiteStatus Invoke(TfLiteContext* context, TfLiteNode* node) {
1053     ANeuralNetworksExecution* execution = nullptr;
1054     RETURN_TFLITE_ERROR_IF_NN_ERROR(
1055         context, nnapi_->ANeuralNetworksExecution_create(nn_compilation_.get(),
1056                                                          &execution));
1057     std::unique_ptr<ANeuralNetworksExecution, NNFreeExecution>
1058         execution_unique_ptr(execution);
1059 
1060     // Set the input tensor buffers. Note: we access tflite tensors using
1061     // absolute indices but NN api indices inputs by relative indices.
1062     int relative_input_index = 0;
1063 
1064     size_t input_offset = 0;
1065     for (auto absolute_input_index : TfLiteIntArrayView(node->inputs)) {
1066       if (absolute_input_index == kOptionalTensor) {
1067         continue;
1068       }
1069       TfLiteTensor* tensor = &context->tensors[absolute_input_index];
1070       // TODO(miaowang): make sure the delegation works with dequantized weights
1071       // as intermediate tensors.
1072       if (tensor->allocation_type != kTfLiteMmapRo) {
1073         // copy data to pre-allocated shared memory.
1074         memcpy(nn_input_memory_->get_data_ptr() + input_offset,
1075                tensor->data.raw, tensor->bytes);
1076         RETURN_TFLITE_ERROR_IF_NN_ERROR(
1077             context,
1078             nnapi_->ANeuralNetworksExecution_setInputFromMemory(
1079                 execution, relative_input_index, nullptr,
1080                 nn_input_memory_->get_handle(), input_offset, tensor->bytes));
1081         input_offset += tensor->bytes;
1082         input_offset += getNumPaddingBytes(tensor->bytes);
1083         relative_input_index++;
1084       }
1085     }
1086 
1087     // Set the output tensor buffers.
1088     int relative_output_index = 0;
1089     size_t output_offset = 0;
1090     for (auto output_index : TfLiteIntArrayView(node->outputs)) {
1091       TfLiteTensor* tensor = &context->tensors[output_index];
1092       RETURN_TFLITE_ERROR_IF_NN_ERROR(
1093           context,
1094           nnapi_->ANeuralNetworksExecution_setOutputFromMemory(
1095               execution, relative_output_index, nullptr,
1096               nn_output_memory_->get_handle(), output_offset, tensor->bytes));
1097       output_offset += tensor->bytes;
1098       output_offset += getNumPaddingBytes(tensor->bytes);
1099       relative_output_index++;
1100     }
1101 
1102     // The state_out of previous invocation need to be mapped to state_in of
1103     // current invocation.
1104     for (size_t i = 0; i < model_state_tfl_inputs_.size(); i++) {
1105       int state_tensor_idx = model_state_tfl_inputs_[i];
1106       TfLiteTensor* tensor = &context->tensors[state_tensor_idx];
1107       // Here we are using a deep copy for state_in tensors so that we are not
1108       // reading and writing into the same buffer during a invocation.
1109       // TODO(110369471): using double shared buffer to minimize the copies.
1110       RETURN_TFLITE_ERROR_IF_NN_ERROR(
1111           context, nnapi_->ANeuralNetworksExecution_setOutput(
1112                        execution, relative_output_index, nullptr,
1113                        tensor->data.raw, tensor->bytes));
1114       relative_output_index++;
1115     }
1116     // Invoke ANN in blocking fashion.
1117     if (nnapi_->android_sdk_version < kMinSdkVersionForNNAPI12) {
1118       ANeuralNetworksEvent* event = nullptr;
1119       RETURN_TFLITE_ERROR_IF_NN_ERROR(
1120           context,
1121           nnapi_->ANeuralNetworksExecution_startCompute(execution, &event));
1122       const int wait_result = nnapi_->ANeuralNetworksEvent_wait(event);
1123       nnapi_->ANeuralNetworksEvent_free(event);
1124       RETURN_TFLITE_ERROR_IF_NN_ERROR(context, wait_result);
1125     } else {
1126       // Use synchronous execution for NNAPI 1.2+.
1127       RETURN_TFLITE_ERROR_IF_NN_ERROR(
1128           context, nnapi_->ANeuralNetworksExecution_compute(execution));
1129     }
1130 
1131     // copy results from shared memory to the destination.
1132     output_offset = 0;
1133     for (auto output_index : TfLiteIntArrayView(node->outputs)) {
1134       TfLiteTensor* tensor = &context->tensors[output_index];
1135       memcpy(tensor->data.raw,
1136              nn_output_memory_->get_data_ptr() + output_offset, tensor->bytes);
1137       output_offset += tensor->bytes;
1138       output_offset += getNumPaddingBytes(tensor->bytes);
1139     }
1140 
1141     return kTfLiteOk;
1142   }
1143 
1144   // NN API Delegate Registration (the pseudo kernel that will invoke NN
1145   // API node sub sets)
1146   static const TfLiteRegistration registration;
1147 
1148  private:
1149   // Access to NNApi.
1150   const NnApi* nnapi_;
1151   // ANN device handle.
1152   ANeuralNetworksDevice* nnapi_device_ = nullptr;
1153   // ANN API state.
1154   std::unique_ptr<ANeuralNetworksModel, NNFreeModel> nn_model_;
1155   std::unique_ptr<ANeuralNetworksCompilation, NNFreeCompilation>
1156       nn_compilation_;
1157   // Node indices that this delegate is responsible for. Indices here
1158   // indexes into the nodes array in the TfLiteContext.
1159   std::vector<int> nodes_;
1160   // Track indices we use
1161   OperandMapping operand_mapping_;
1162 
1163   std::vector<int> model_state_outputs_;
1164   std::vector<int> model_state_tfl_inputs_;
1165 
1166   std::unique_ptr<NNMemory> nn_input_memory_;
1167   std::unique_ptr<NNMemory> nn_output_memory_;
1168 
AddDequantizeOperatorsWhereNeeded(const TfLiteContext * context,int builtin_code,const TfLiteNode * node,NNAPIOpBuilder * builder)1169   void AddDequantizeOperatorsWhereNeeded(const TfLiteContext* context,
1170                                          int builtin_code,
1171                                          const TfLiteNode* node,
1172                                          NNAPIOpBuilder* builder) {
1173     // Depending on the operator and the input data format, Dequantize
1174     // operators may need to be added. For example when the input is
1175     // floating-point but weights are quantized then the weights will first be
1176     // dequantized to the same format as the input before being passed to the
1177     // operator.
1178 
1179     // The tensor determining whether the inputs should be floating-point.
1180     int input_tensor_index = -1;
1181     std::vector<int> inputs_to_potentially_dequantize;
1182 
1183     switch (builtin_code) {
1184       case kTfLiteBuiltinConv2d:
1185       case kTfLiteBuiltinFullyConnected: {
1186         input_tensor_index = 0;
1187         // Weights and bias are inputs #1 and #2 respectively and may require
1188         // dequantization.
1189         inputs_to_potentially_dequantize = {1, 2};
1190         break;
1191       }
1192       default:
1193         return;
1194     }
1195 
1196     int tensor_id = node->inputs->data[input_tensor_index];
1197     if (tensor_id < 0) return;
1198 
1199     // Nothing to do if the input is not floating-point.
1200     if (!IsFloat(context->tensors[tensor_id].type)) return;
1201 
1202     for (int i : inputs_to_potentially_dequantize) {
1203       tensor_id = node->inputs->data[i];
1204       if (tensor_id < 0) continue;  // Ignore optional input.
1205 
1206       const TfLiteType type = context->tensors[tensor_id].type;
1207       // Nothing to do for this tensor if it's not quantized.
1208       if (type != kTfLiteUInt8) continue;
1209 
1210       // Insert Dequantize operator if it hasn't been done already and change
1211       // the node's input accordingly.
1212       builder->AddDequantize(i, node->inputs->data[i], type);
1213     }
1214   }
1215 
AddOpsAndTensors(TfLiteContext * context)1216   TfLiteStatus AddOpsAndTensors(TfLiteContext* context) {
1217     DequantizeMapping dequantize_mapping;
1218     // The operand builder allows creating a single op. It is created outside
1219     // the for loop to avoid reallocating the vectors.
1220     NNAPIOpBuilder builder(nnapi_, context, &operand_mapping_,
1221                            &dequantize_mapping, nn_model_.get());
1222     // Add Tensors.
1223     for (auto node_index : nodes_) {
1224       // Obtain the op and registration.
1225       TfLiteNode* node;
1226       TfLiteRegistration* reg;
1227       TF_LITE_ENSURE_STATUS(
1228           context->GetNodeAndRegistration(context, node_index, &node, &reg));
1229 
1230       const bool hybrid_op = IsHybridOperator(context, reg->builtin_code, node);
1231 
1232       // Map inputs to NN API tensor indices.
1233       for (auto input_index : TfLiteIntArrayView(node->inputs)) {
1234         if (input_index == kOptionalTensor &&
1235             (reg->builtin_code == kTfLiteBuiltinLstm ||
1236              reg->builtin_code == kTfLiteBuiltinSvdf)) {
1237           // properly handle the optional tensor for LSTM and SVDF.
1238           // currently only support float32.
1239           // TODO(miaowang): make sure this is also able to handle quantized
1240           // tensor when supported by NNAPI.
1241           TF_LITE_ENSURE_STATUS(builder.AddVectorFloat32Operand(nullptr, 0));
1242         } else {
1243           TF_LITE_ENSURE_STATUS(builder.AddTensorInput(input_index, hybrid_op));
1244         }
1245       }
1246       // Get op type and operands
1247       int nn_op_type = Map(
1248           context, reg->builtin_code, reg->version, nnapi_->android_sdk_version,
1249           node)({context, &builder, node, &model_state_outputs_,
1250                  &model_state_tfl_inputs_});
1251       // Map outputs to NN API tensor indices.
1252       for (auto output_index : TfLiteIntArrayView(node->outputs)) {
1253         TF_LITE_ENSURE_STATUS(builder.AddTensorOutput(output_index));
1254       }
1255 
1256       // Dequantize operators may have to be added in case inputs are to be
1257       // floating-point.
1258       AddDequantizeOperatorsWhereNeeded(context, reg->builtin_code, node,
1259                                         &builder);
1260 
1261       builder.FinalizeAddOperation(nn_op_type);
1262     }
1263     return kTfLiteOk;
1264   }
1265 
BuildGraph(TfLiteContext * context,const TfLiteIntArray * input_tensors,const TfLiteIntArray * output_tensors)1266   TfLiteStatus BuildGraph(TfLiteContext* context,
1267                           const TfLiteIntArray* input_tensors,
1268                           const TfLiteIntArray* output_tensors) {
1269     // Build the ops and tensors.
1270     TF_LITE_ENSURE_STATUS(AddOpsAndTensors(context));
1271     // Map input and output tensor indices to ANN
1272     std::vector<uint32_t> inputs;
1273     inputs.reserve(input_tensors->size);
1274     std::vector<uint32_t> outputs;
1275     outputs.reserve(output_tensors->size);
1276 
1277     size_t total_input_byte_size = 0;
1278     // Make the TensorFlow Lite inputs and outputs to ann_indices.
1279     for (int i : TfLiteIntArrayView(input_tensors)) {
1280       // Constant tensors are not NNAPI inputs.
1281       if (i != kOptionalTensor &&
1282           context->tensors[i].allocation_type != kTfLiteMmapRo) {
1283         inputs.push_back(operand_mapping_.lite_index_to_ann(i));
1284         total_input_byte_size += context->tensors[i].bytes;
1285         total_input_byte_size += getNumPaddingBytes(context->tensors[i].bytes);
1286       }
1287     }
1288 
1289     size_t total_output_byte_size = 0;
1290     for (int i : TfLiteIntArrayView(output_tensors)) {
1291       outputs.push_back(operand_mapping_.lite_index_to_ann(i));
1292       total_output_byte_size += context->tensors[i].bytes;
1293       total_output_byte_size += getNumPaddingBytes(context->tensors[i].bytes);
1294     }
1295 
1296     // Add state output tensors as model outputs.
1297     for (int i : model_state_outputs_) {
1298       outputs.push_back(i);
1299     }
1300 
1301     // Tell ANN to declare inputs/outputs
1302     RETURN_TFLITE_ERROR_IF_NN_ERROR(
1303         context, nnapi_->ANeuralNetworksModel_identifyInputsAndOutputs(
1304                      nn_model_.get(), inputs.size(), inputs.data(),
1305                      outputs.size(), outputs.data()));
1306 
1307     // Set relaxed computation mode for fp32 if possible.
1308     if (nnapi_->android_sdk_version >= kMinSdkVersionForNNAPI11) {
1309       RETURN_TFLITE_ERROR_IF_NN_ERROR(
1310           context,
1311           nnapi_->ANeuralNetworksModel_relaxComputationFloat32toFloat16(
1312               nn_model_.get(), context->allow_fp32_relax_to_fp16));
1313     }
1314 
1315     // Finalize the model
1316     RETURN_TFLITE_ERROR_IF_NN_ERROR(
1317         context, nnapi_->ANeuralNetworksModel_finish(nn_model_.get()));
1318 
1319     // Create shared memory pool for inputs and outputs.
1320     nn_input_memory_.reset(
1321         new NNMemory(nnapi_, "input_pool", total_input_byte_size));
1322     nn_output_memory_.reset(
1323         new NNMemory(nnapi_, "output_pool", total_output_byte_size));
1324 
1325     return kTfLiteOk;
1326   }
1327 };
1328 
1329 const TfLiteRegistration NNAPIDelegateKernel::registration = {
1330       .init = [](TfLiteContext* context, const char* buffer,
__anon2e5b0dd81702() 1331                  size_t length) -> void* {
1332         const TfLiteDelegateParams* params =
1333             reinterpret_cast<const TfLiteDelegateParams*>(buffer);
1334         NNAPIDelegateKernel* kernel_state = new NNAPIDelegateKernel;
1335         kernel_state->Init(context, params);
1336         return kernel_state;
1337       },
1338 
__anon2e5b0dd81802() 1339       .free = [](TfLiteContext* context, void* buffer) -> void {
1340         delete reinterpret_cast<NNAPIDelegateKernel*>(buffer);
1341       },
1342 
__anon2e5b0dd81902() 1343       .prepare = [](TfLiteContext* context, TfLiteNode* node) -> TfLiteStatus {
1344         NNAPIDelegateKernel* state =
1345             reinterpret_cast<NNAPIDelegateKernel*>(node->user_data);
1346         return state->nn_compilation_ == nullptr ? kTfLiteError : kTfLiteOk;
1347       },
1348 
__anon2e5b0dd81a02() 1349       .invoke = [](TfLiteContext* context, TfLiteNode* node) -> TfLiteStatus {
1350         NNAPIDelegateKernel* state =
1351             reinterpret_cast<NNAPIDelegateKernel*>(node->user_data);
1352         return state->Invoke(context, node);
1353       },
1354 
1355       .profiling_string = nullptr,
1356       .builtin_code = kTfLiteBuiltinDelegate,
1357   };
1358 
1359 }  // namespace
1360 
1361 // Return a NN API Delegate struct that can check for support of ops.
NnApiDelegate(const char * device_name)1362 TfLiteDelegate* NnApiDelegate(const char* device_name) {
1363   static TfLiteDelegate delegate = {
1364       .data_ = nullptr,
1365       .Prepare = [](TfLiteContext* context,
1366                     TfLiteDelegate* delegate) -> TfLiteStatus {
1367         // Do not check nodes_ if NN API is unavailable.
1368         const NnApi* nnapi = NnApiImplementation();
1369         if (nnapi->android_sdk_version < kMinSdkVersionForNNAPI ||
1370             !nnapi->nnapi_exists) {
1371           return kTfLiteOk;
1372         }
1373         // For NNAPI 1.2+, check if there is any accelerator available.
1374         // If not, don't delegate to NNAPI's CPU reference implementation.
1375         if (nnapi->android_sdk_version >= kMinSdkVersionForNNAPI12) {
1376           uint32_t device_count = 0;
1377           RETURN_TFLITE_ERROR_IF_NN_ERROR(
1378               context, nnapi->ANeuralNetworks_getDeviceCount(&device_count));
1379           // Any available accelerator will make the device_count larger than 1.
1380           // More sophisticated check and whitelisting can be added later.
1381           if (device_count <= 1) {
1382             return kTfLiteOk;
1383           }
1384         }
1385         // Allocate one element in vector already since TensorFlow Lite uses
1386         // the first value as the number of nodes. The actual value will be set
1387         // later, after the vector has been filled.
1388         std::vector<int> supported_nodes(1);
1389         // We don't care about all nodes_, we only care about ones in the
1390         // current plan.
1391         TfLiteIntArray* plan;
1392         TF_LITE_ENSURE_STATUS(context->GetExecutionPlan(context, &plan));
1393 
1394         int android_sdk_version = NnApiImplementation()->android_sdk_version;
1395         // Check for every node if it is supported
1396         // TODO(b/80625235): Fix this to do more careful checking of versioning.
1397         for (int node_index : TfLiteIntArrayView(plan)) {
1398           TfLiteNode* node;
1399           TfLiteRegistration* registration;
1400           TF_LITE_ENSURE_STATUS(context->GetNodeAndRegistration(
1401               context, node_index, &node, &registration));
1402           if (NNAPIDelegateKernel::Map(context, registration->builtin_code,
1403                                        registration->version,
1404                                        android_sdk_version, node)) {
1405             supported_nodes.push_back(node_index);
1406           }
1407         }
1408         // First element in vector must be the number of actual nodes.
1409         supported_nodes[0] = supported_nodes.size() - 1;
1410 
1411         // Request TFLite to partition the graph and make kernels
1412         // for each independent node sub set a new NNAPIDelegateKernel.
1413         return context->ReplaceNodeSubsetsWithDelegateKernels(
1414             context, NNAPIDelegateKernel::registration,
1415             reinterpret_cast<TfLiteIntArray*>(supported_nodes.data()),
1416             delegate);
1417       },
1418 
1419       .CopyFromBufferHandle = nullptr,
1420       .CopyToBufferHandle = nullptr,
1421       .FreeBufferHandle = nullptr,
1422       .flags = kTfLiteDelegateFlagsNone,
1423   };
1424   static std::string device_name_;
1425   if (device_name == nullptr) {
1426       device_name_.clear();
1427       delegate.data_ = nullptr;
1428   } else {
1429       device_name_ = device_name;
1430       delegate.data_ = (void *) device_name_.c_str();
1431   }
1432   return &delegate;
1433 }
1434 
1435 }  // namespace tflite
1436