1 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 #include "tensorflow/lite/delegates/nnapi/nnapi_delegate.h"
16 
17 #include <algorithm>
18 #include <cstdarg>
19 #include <cstddef>
20 #include <cstdint>
21 #include <cstdio>
22 #include <cstring>
23 #include <functional>
24 #include <initializer_list>
25 #include <iostream>
26 #include <iterator>
27 #include <map>
28 #include <memory>
29 #include <string>
30 #include <tuple>
31 #include <utility>
32 #include <vector>
33 
34 #include "tensorflow/lite/nnapi/NeuralNetworksTypes.h"
35 
36 #ifdef __ANDROID__
37 #include <sys/system_properties.h>
38 #endif
39 
40 #if defined __ANDROID__ || defined __unix__
41 #define TFLITE_NNAPI_ALLOW_MMAP_SHARING
42 #include <sys/mman.h>
43 #include <unistd.h>
44 #endif
45 
46 #include "tensorflow/lite/allocation.h"
47 #include "tensorflow/lite/builtin_op_data.h"
48 #include "tensorflow/lite/builtin_ops.h"
49 #include "tensorflow/lite/c/builtin_op_data.h"
50 #include "tensorflow/lite/c/common.h"
51 #include "tensorflow/lite/context_util.h"
52 #include "tensorflow/lite/delegates/nnapi/nnapi_delegate_kernel.h"
53 #include "tensorflow/lite/delegates/nnapi/quant_lstm_sup.h"
54 #include "tensorflow/lite/delegates/utils.h"
55 #include "tensorflow/lite/kernels/kernel_util.h"
56 #include "tensorflow/lite/minimal_logging.h"
57 #include "tensorflow/lite/nnapi/nnapi_implementation.h"
58 #include "tensorflow/lite/nnapi/nnapi_util.h"
59 #include "tensorflow/lite/util.h"
60 #include "utils/hash/farmhash.h"
61 
62 namespace tflite {
63 namespace {
64 
65 // Returns the enum name corresponding to the given error code if the given
66 // value corresponds to an of the error codes in the enumeration above or
67 // an message with the unknown code.
68 // LINT.IfChange(NnApiErrorDescription)
NnApiErrorDescription(int error_code)69 std::string NnApiErrorDescription(int error_code) {
70   switch (error_code) {
71     case ANEURALNETWORKS_NO_ERROR:
72       return "ANEURALNETWORKS_NO_ERROR";
73     case ANEURALNETWORKS_OUT_OF_MEMORY:
74       return "ANEURALNETWORKS_OUT_OF_MEMORY";
75     case ANEURALNETWORKS_INCOMPLETE:
76       return "ANEURALNETWORKS_INCOMPLETE";
77     case ANEURALNETWORKS_UNEXPECTED_NULL:
78       return "ANEURALNETWORKS_UNEXPECTED_NULL";
79     case ANEURALNETWORKS_BAD_DATA:
80       return "ANEURALNETWORKS_BAD_DATA";
81     case ANEURALNETWORKS_OP_FAILED:
82       return "ANEURALNETWORKS_OP_FAILED";
83     case ANEURALNETWORKS_BAD_STATE:
84       return "ANEURALNETWORKS_BAD_STATE";
85     case ANEURALNETWORKS_UNMAPPABLE:
86       return "ANEURALNETWORKS_UNMAPPABLE";
87     case ANEURALNETWORKS_OUTPUT_INSUFFICIENT_SIZE:
88       return "ANEURALNETWORKS_OUTPUT_INSUFFICIENT_SIZE";
89     case ANEURALNETWORKS_UNAVAILABLE_DEVICE:
90       return "ANEURALNETWORKS_UNAVAILABLE_DEVICE";
91     case ANEURALNETWORKS_MISSED_DEADLINE_TRANSIENT:
92       return "ANEURALNETWORKS_MISSED_DEADLINE_TRANSIENT";
93     case ANEURALNETWORKS_MISSED_DEADLINE_PERSISTENT:
94       return "ANEURALNETWORKS_MISSED_DEADLINE_PERSISTENT";
95     case ANEURALNETWORKS_RESOURCE_EXHAUSTED_TRANSIENT:
96       return "ANEURALNETWORKS_RESOURCE_EXHAUSTED_TRANSIENT";
97     case ANEURALNETWORKS_RESOURCE_EXHAUSTED_PERSISTENT:
98       return "ANEURALNETWORKS_RESOURCE_EXHAUSTED_PERSISTENT";
99     case ANEURALNETWORKS_DEAD_OBJECT:
100       return "ANEURALNETWORKS_DEAD_OBJECT";
101     default:
102       return "Unknown NNAPI error code: " + std::to_string(error_code);
103   }
104 }
105 // LINT.ThenChange()
106 
107 #define RETURN_TFLITE_ERROR_IF_NN_ERROR(context, code, call_desc, p_errno)  \
108   do {                                                                      \
109     const auto _code = (code);                                              \
110     const auto _call_desc = (call_desc);                                    \
111     if (_code != ANEURALNETWORKS_NO_ERROR) {                                \
112       const auto error_desc = NnApiErrorDescription(_code);                 \
113       TF_LITE_KERNEL_LOG(context,                                           \
114                          "NN API returned error %s at line %d while %s.\n", \
115                          error_desc.c_str(), __LINE__, _call_desc);         \
116       *p_errno = _code;                                                     \
117       return kTfLiteError;                                                  \
118     }                                                                       \
119   } while (0)
120 
121 #define RETURN_TFLITE_ERROR_IF_NN_ERROR_FOR_TENSOR(context, code, call_desc, \
122                                                    p_tensor, p_errno)        \
123   do {                                                                       \
124     const auto _code = (code);                                               \
125     const auto _call_desc = (call_desc);                                     \
126     if (_code != ANEURALNETWORKS_NO_ERROR) {                                 \
127       const auto error_desc = NnApiErrorDescription(_code);                  \
128       TF_LITE_KERNEL_LOG(context,                                            \
129                          "NN API returned error %s at line %d while %s "     \
130                          "for tensor '%s'.\n",                               \
131                          error_desc.c_str(), __LINE__, _call_desc,           \
132                          (p_tensor)->name ? (p_tensor)->name : "no-name");   \
133       *p_errno = _code;                                                      \
134       return kTfLiteError;                                                   \
135     }                                                                        \
136   } while (0)
137 
IsFloat(TfLiteType type)138 bool IsFloat(TfLiteType type) {
139   switch (type) {
140     case kTfLiteFloat32:
141       return true;
142     default:
143       return false;
144   }
145 }
146 
IsFloatOrUInt8(TfLiteType type)147 bool IsFloatOrUInt8(TfLiteType type) {
148   switch (type) {
149     case kTfLiteFloat32:
150     case kTfLiteUInt8:
151       return true;
152     default:
153       return false;
154   }
155 }
156 
IsQuantized(TfLiteType type)157 bool IsQuantized(TfLiteType type) {
158   switch (type) {
159     case kTfLiteUInt8:
160     case kTfLiteInt8:
161       return true;
162     default:
163       // kTfLiteInt16 isn't supported as quantized type yet.
164       return false;
165   }
166 }
167 
IsInt32(TfLiteType type)168 bool IsInt32(TfLiteType type) {
169   switch (type) {
170     case kTfLiteInt32:
171       return true;
172     default:
173       return false;
174   }
175 }
176 
IsFloatOrQuantized(TfLiteType type)177 bool IsFloatOrQuantized(TfLiteType type) {
178   switch (type) {
179     case kTfLiteFloat32:
180     case kTfLiteUInt8:
181     case kTfLiteInt8:
182       return true;
183     default:
184       return false;
185   }
186 }
187 
IsFloatOrInt32(TfLiteType type)188 bool IsFloatOrInt32(TfLiteType type) {
189   switch (type) {
190     case kTfLiteFloat32:
191     case kTfLiteInt32:
192       return true;
193     default:
194       return false;
195   }
196 }
197 
IsFloatQuantizedOrInt32(TfLiteType type)198 bool IsFloatQuantizedOrInt32(TfLiteType type) {
199   switch (type) {
200     case kTfLiteFloat32:
201     case kTfLiteUInt8:
202     case kTfLiteInt8:
203     case kTfLiteInt32:
204       return true;
205     default:
206       return false;
207   }
208 }
209 
IsScalarInputSupported(int builtin_code)210 bool IsScalarInputSupported(int builtin_code) {
211   switch (builtin_code) {
212     case kTfLiteBuiltinAdd:
213     case kTfLiteBuiltinMul:
214     case kTfLiteBuiltinSub:
215     case kTfLiteBuiltinDiv:
216     case kTfLiteBuiltinEqual:
217     case kTfLiteBuiltinNotEqual:
218     case kTfLiteBuiltinGreater:
219     case kTfLiteBuiltinGreaterEqual:
220     case kTfLiteBuiltinLess:
221     case kTfLiteBuiltinLessEqual:
222     case kTfLiteBuiltinPow:
223     case kTfLiteBuiltinMaximum:
224     case kTfLiteBuiltinMinimum:
225     case kTfLiteBuiltinPrelu:
226     case kTfLiteBuiltinLeakyRelu:
227       return true;
228     default:
229       return false;
230   }
231 }
232 
233 // Check if the operation requires explicit conversion from int8 to uint8
234 // values.
NeedInt8Conversion(const TfLiteContext * context,int builtin_code,const TfLiteNode * node)235 bool NeedInt8Conversion(const TfLiteContext* context, int builtin_code,
236                         const TfLiteNode* node) {
237   const int input_id = node->inputs->data[0];
238   const TfLiteType input_type = context->tensors[input_id].type;
239   switch (builtin_code) {
240     case kTfLiteBuiltinConv2d:
241     case kTfLiteBuiltinDepthwiseConv2d:
242     case kTfLiteBuiltinFullyConnected: {
243       if (input_type == kTfLiteInt8) {
244         const int weights_id = node->inputs->data[1];
245         const auto& weights_tensor = context->tensors[weights_id];
246         if ((weights_tensor.type == kTfLiteInt8 ||
247              weights_tensor.type == kTfLiteUInt8) &&
248             weights_tensor.quantization.type == kTfLiteAffineQuantization) {
249           return true;
250         }
251       }
252       return false;
253     }
254     case kTfLiteBuiltinTransposeConv: {
255       // Transpose convolution has a different order of inputs:
256       // 0: output_shape, 1: filter, 2: input, 3: bias.
257       const int input_id = 2;
258       const TfLiteType input_type = context->tensors[input_id].type;
259       if (input_type == kTfLiteInt8) {
260         return true;
261       }
262       return false;
263     }
264     case kTfLiteBuiltinSelect: {
265       const auto value_type = context->tensors[node->inputs->data[1]].type;
266       return value_type == kTfLiteInt8;
267     }
268     case kTfLiteBuiltinAdd:
269     case kTfLiteBuiltinArgMax:
270     case kTfLiteBuiltinArgMin:
271     case kTfLiteBuiltinAveragePool2d:
272     case kTfLiteBuiltinBatchToSpaceNd:
273     case kTfLiteBuiltinConcatenation:
274     case kTfLiteBuiltinEqual:
275     case kTfLiteBuiltinExpandDims:
276     case kTfLiteBuiltinGather:
277     case kTfLiteBuiltinGreater:
278     case kTfLiteBuiltinGreaterEqual:
279     case kTfLiteBuiltinHardSwish:
280     case kTfLiteBuiltinL2Normalization:
281     case kTfLiteBuiltinLeakyRelu:
282     case kTfLiteBuiltinLess:
283     case kTfLiteBuiltinLessEqual:
284     case kTfLiteBuiltinLogistic:
285     case kTfLiteBuiltinMaximum:
286     case kTfLiteBuiltinMaxPool2d:
287     case kTfLiteBuiltinMean:
288     case kTfLiteBuiltinMinimum:
289     case kTfLiteBuiltinMul:
290     case kTfLiteBuiltinNotEqual:
291     case kTfLiteBuiltinPad:
292     case kTfLiteBuiltinPadv2:
293     case kTfLiteBuiltinPrelu:
294     case kTfLiteBuiltinReduceMax:
295     case kTfLiteBuiltinReduceMin:
296     case kTfLiteBuiltinRelu:
297     case kTfLiteBuiltinReluN1To1:
298     case kTfLiteBuiltinRelu6:
299     case kTfLiteBuiltinResizeBilinear:
300     case kTfLiteBuiltinResizeNearestNeighbor:
301     case kTfLiteBuiltinReshape:
302     case kTfLiteBuiltinSlice:
303     case kTfLiteBuiltinSoftmax:
304     case kTfLiteBuiltinSpaceToBatchNd:
305     case kTfLiteBuiltinSpaceToDepth:
306     case kTfLiteBuiltinDepthToSpace:
307     case kTfLiteBuiltinStridedSlice:
308     case kTfLiteBuiltinSub:
309     case kTfLiteBuiltinTanh:
310     case kTfLiteBuiltinTile:
311     case kTfLiteBuiltinTopkV2:
312     case kTfLiteBuiltinTranspose: {
313       return input_type == kTfLiteInt8;
314     }
315     default:
316       return false;
317   }
318 }
319 
320 constexpr int kLstmFullKernelInputSize = 24;
321 // The 20 input version is deprecated and kept only to
322 // support old model. The latest version of the LSTM Full Kernel
323 // is the one with 24 inputs
324 constexpr int kLstmFullKernelNoOptionalParamsInputSize = 20;
325 constexpr int kLstmBasicKernelInputSize = 5;
326 
isLstmBasicKernel(const TfLiteNode * node)327 inline bool isLstmBasicKernel(const TfLiteNode* node) {
328   return node->inputs->size == kLstmBasicKernelInputSize;
329 }
330 
isLstmFullKernel(const TfLiteNode * node)331 inline bool isLstmFullKernel(const TfLiteNode* node) {
332   return node->inputs->size == kLstmFullKernelInputSize ||
333          node->inputs->size == kLstmFullKernelNoOptionalParamsInputSize;
334 }
335 
IsHybridOperator(const TfLiteContext * context,int builtin_code,const TfLiteNode * node)336 bool IsHybridOperator(const TfLiteContext* context, int builtin_code,
337                       const TfLiteNode* node) {
338   switch (builtin_code) {
339     case kTfLiteBuiltinConv2d:
340     case kTfLiteBuiltinFullyConnected: {
341       const int input_id = node->inputs->data[0];
342       const int filter_id = node->inputs->data[1];
343       const TfLiteType input_type = context->tensors[input_id].type;
344       const TfLiteType filter_type = context->tensors[filter_id].type;
345       return IsFloat(input_type) && IsQuantized(filter_type);
346     }
347     case kTfLiteBuiltinLstm: {
348       const int input_id = node->inputs->data[0];
349       // Input #1 is optional so use #2 to determine if hybrid.
350       const int weights_id = node->inputs->data[2];
351       const TfLiteType input_type = context->tensors[input_id].type;
352       const TfLiteType weights_type = context->tensors[weights_id].type;
353       return isLstmFullKernel(node) && IsFloat(input_type) &&
354              IsQuantized(weights_type);
355     }
356     case kTfLiteBuiltinUnidirectionalSequenceLstm: {
357       const int input_id = node->inputs->data[0];
358       // Input #1 is optional so use #2 to determine if hybrid.
359       const int weights_id = node->inputs->data[2];
360       const TfLiteType input_type = context->tensors[input_id].type;
361       const TfLiteType weights_type = context->tensors[weights_id].type;
362       return IsFloat(input_type) && IsQuantized(weights_type);
363     }
364     case kTfLiteBuiltinBidirectionalSequenceLstm: {
365       const int input_id = node->inputs->data[0];
366       // Input #1 is optional so use #2 to determine if hybrid.
367       const int weights_id = node->inputs->data[2];
368       const TfLiteType input_type = context->tensors[input_id].type;
369       const TfLiteType weights_type = context->tensors[weights_id].type;
370       return IsFloat(input_type) && IsQuantized(weights_type);
371     }
372     case kTfLiteBuiltinUnidirectionalSequenceRnn: {
373       const int input_id = node->inputs->data[0];
374       const int weights_id = node->inputs->data[1];
375       const TfLiteType input_type = context->tensors[input_id].type;
376       const TfLiteType weights_type = context->tensors[weights_id].type;
377       return IsFloat(input_type) && IsQuantized(weights_type);
378     }
379     default:
380       return false;
381   }
382 }
383 
HasUnspecifiedDimension(const TfLiteTensor * tensor)384 bool HasUnspecifiedDimension(const TfLiteTensor* tensor) {
385   if (tensor->dims_signature) {
386     for (int i : TfLiteIntArrayView(tensor->dims_signature)) {
387       if (i == -1) return true;
388     }
389   }
390   return false;
391 }
392 
ConvertTensorTypeToNNType(const TfLiteTensor * tensor,TfLiteType ann_type_equivalent)393 ANeuralNetworksOperandType ConvertTensorTypeToNNType(
394     const TfLiteTensor* tensor, TfLiteType ann_type_equivalent) {
395   int32_t nn_type = 0;
396   float scale = 0.0f;
397   int32_t zero_point = 0;
398   switch (tensor->type) {
399     case kTfLiteFloat32:
400       nn_type = ANEURALNETWORKS_TENSOR_FLOAT32;
401       break;
402     case kTfLiteUInt8:
403       nn_type = ann_type_equivalent == kTfLiteInt32
404                     ? ANEURALNETWORKS_TENSOR_INT32
405                     : ANEURALNETWORKS_TENSOR_QUANT8_ASYMM;
406       scale = tensor->params.scale;
407       zero_point = tensor->params.zero_point;
408       if (scale == 0) {
409         // TENSOR_QUANT8_ASYMM and ANEURALNETWORKS_TENSOR_QUANT8_ASYMM
410         // with zero scale are not valid in NNAPI.
411         scale = 1;
412       }
413       break;
414     case kTfLiteInt8:
415       nn_type = ANEURALNETWORKS_TENSOR_QUANT8_SYMM;
416       scale = tensor->params.scale;
417       zero_point = tensor->params.zero_point;
418       if (ann_type_equivalent == kTfLiteUInt8) {
419         nn_type = ANEURALNETWORKS_TENSOR_QUANT8_ASYMM;
420         zero_point += 128;
421       } else if (ann_type_equivalent == kTfLiteInt32) {
422         nn_type = ANEURALNETWORKS_TENSOR_INT32;
423         zero_point += 128;
424       }
425       if (scale == 0) {
426         // TENSOR_QUANT8_ASYMM and ANEURALNETWORKS_TENSOR_QUANT8_ASYMM
427         // with zero scale are not valid in NNAPI.
428         scale = 1;
429       }
430       break;
431     case kTfLiteInt32:
432       nn_type = ANEURALNETWORKS_TENSOR_INT32;
433       scale = tensor->params.scale;
434       zero_point = tensor->params.zero_point;
435       break;
436     case kTfLiteBool:
437       nn_type = ANEURALNETWORKS_TENSOR_BOOL8;
438       break;
439     case kTfLiteInt16:
440       nn_type = ANEURALNETWORKS_TENSOR_QUANT16_SYMM;
441       scale = tensor->params.scale;
442       zero_point = tensor->params.zero_point;
443       break;
444     default:
445       break;
446   }
447   uint32_t tensor_rank = static_cast<uint32_t>(tensor->dims->size);
448   uint32_t* tensor_dims = reinterpret_cast<uint32_t*>(tensor->dims->data);
449   static uint32_t scalar_rank = 1;
450   // treat scalar input as single cell tensor in NNAPI.
451   if (tensor_rank == 0) {
452     tensor_rank = scalar_rank;
453     tensor_dims = &scalar_rank;
454   }
455   ANeuralNetworksOperandType nn_operand_type{
456       .type = nn_type,
457       .dimensionCount = tensor_rank,
458       .dimensions = tensor_dims,
459       .scale = scale,
460       .zeroPoint = zero_point,
461   };
462   return nn_operand_type;
463 }
464 
465 constexpr size_t kDefaultByteAlignmentForNNAPI = 16;
466 
getNumPaddingBytes(size_t byte_size)467 static size_t getNumPaddingBytes(size_t byte_size) {
468   size_t num_padding_bytes = 0;
469   if (byte_size % kDefaultByteAlignmentForNNAPI) {
470     num_padding_bytes = kDefaultByteAlignmentForNNAPI -
471                         (byte_size % kDefaultByteAlignmentForNNAPI);
472   }
473   return num_padding_bytes;
474 }
475 
476 // Return NNAPI device handle with the provided null-terminated device name.
477 // Returns kTfLiteError in case of any NNAPI error and if no device with the
478 // given name can be found.
GetDeviceHandle(const NnApi * nnapi,TfLiteContext * context,const char * device_name_ptr,ANeuralNetworksDevice ** result,int * nnapi_errno)479 TfLiteStatus GetDeviceHandle(const NnApi* nnapi, TfLiteContext* context,
480                              const char* device_name_ptr,
481                              ANeuralNetworksDevice** result, int* nnapi_errno) {
482   if (!device_name_ptr) return kTfLiteError;
483   *result = nullptr;
484   std::string device_name(device_name_ptr);
485   uint32_t num_devices = 0;
486   nnapi->ANeuralNetworks_getDeviceCount(&num_devices);
487 
488   for (uint32_t i = 0; i < num_devices; i++) {
489     ANeuralNetworksDevice* device = nullptr;
490     const char* buffer = nullptr;
491     RETURN_TFLITE_ERROR_IF_NN_ERROR(
492         context, nnapi->ANeuralNetworks_getDevice(i, &device),
493         "Searching for target device", nnapi_errno);
494 
495     RETURN_TFLITE_ERROR_IF_NN_ERROR(
496         context, nnapi->ANeuralNetworksDevice_getName(device, &buffer),
497         "Searching for target device", nnapi_errno);
498 
499     if (device_name == buffer) {
500       *result = device;
501       return kTfLiteOk;
502     }
503   }
504 
505   context->ReportError(context,
506                        "Could not find the specified NNAPI accelerator: %s. "
507                        "Must be one of: {%s}.",
508                        device_name_ptr,
509                        nnapi::GetStringDeviceNamesList().c_str());
510   return kTfLiteError;
511 }
512 
513 // Compute the hash of a TfLiteIntArray.
GetHash(const TfLiteIntArray * int_array,uint64_t combine_with=0)514 uint64_t GetHash(const TfLiteIntArray* int_array, uint64_t combine_with = 0) {
515   constexpr auto kHashConst = 0x9e3779b97f4a7800ULL;
516   uint64_t result = combine_with;
517   for (auto i : TfLiteIntArrayView(int_array)) {
518     result = result ^ (i + kHashConst + (result << 10) + (result >> 4));
519   }
520   return result;
521 }
522 
HasZeroes(TfLiteIntArrayView array)523 bool HasZeroes(TfLiteIntArrayView array) {
524   for (auto value : array) {
525     if (value == 0) {
526       return true;
527     }
528   }
529   return false;
530 }
531 
532 // Bit mask for tensor flags.
533 enum {
534   NN_TENSOR_FLAG_SCALAR_AS_TENSOR = 1U << 0,
535   NN_TENSOR_FLAG_INT8_CONVERSION = 1U << 1,
536   NN_TENSOR_FLAG_USE_INT8_ASYMM_SIGNED = 1U << 2,
537   NN_TENSOR_FLAG_FORCE_PER_CHANNEL = 1U << 3,
538 };
539 
540 // Returns the SDK level to target when delegating to the given devices.
541 // The SDK level is the max of the ones supported by the devices or
542 // the current Android SDK level if no device is present.
GetTargetSdkVersion(TfLiteContext * context,const NnApi * nnapi,const std::vector<ANeuralNetworksDevice * > & device_handles,int * target_sdk_version,int * nnapi_errno)543 TfLiteStatus GetTargetSdkVersion(
544     TfLiteContext* context, const NnApi* nnapi,
545     const std::vector<ANeuralNetworksDevice*>& device_handles,
546     int* target_sdk_version, int* nnapi_errno) {
547   *target_sdk_version = nnapi->android_sdk_version;
548   int64_t devices_sdk_version = -1;
549   for (const auto* device_handle : device_handles) {
550     int64_t curr_device_sdk_version;
551     RETURN_TFLITE_ERROR_IF_NN_ERROR(
552         context,
553         nnapi->ANeuralNetworksDevice_getFeatureLevel(device_handle,
554                                                      &curr_device_sdk_version),
555         "Searching for target device", nnapi_errno);
556 
557     devices_sdk_version =
558         std::max(curr_device_sdk_version, devices_sdk_version);
559   }
560 
561   if ((devices_sdk_version > 0) &&
562       // This second check is necessary since if the nnapi-reference device is
563       // in the list of target devices the devices_sdk_version value will be
564       // 1000.
565       (devices_sdk_version < nnapi->android_sdk_version)) {
566     TFLITE_LOG(TFLITE_LOG_INFO,
567                "Changing Android NN SDK version %d to version "
568                "supported by target devices: %lld",
569                nnapi->android_sdk_version, devices_sdk_version);
570 
571     *target_sdk_version = devices_sdk_version;
572   }
573 
574   return kTfLiteOk;
575 }
576 
577 // Returns true if this delegate is configured to use a specific set of devices.
578 // This will happen either if:
579 // - accelerator_name option has been specified
580 // - NNAPI CPU implementation has been explicitly disabled.
581 // If exclude_nnapi_reference is true this method will return false if the
582 // accelerator_name in the delegate options is equal to "nnapi-reference"
ShouldUseTargetDevices(StatefulNnApiDelegate::Options delegate_options,const NnApi * nnapi,bool exclude_nnapi_reference=false)583 bool ShouldUseTargetDevices(StatefulNnApiDelegate::Options delegate_options,
584                             const NnApi* nnapi,
585                             bool exclude_nnapi_reference = false) {
586   const char* device_name_ptr = delegate_options.accelerator_name;
587   std::string nnapi_cpu("nnapi-reference");
588   bool has_selected_accelerator = device_name_ptr != nullptr;
589   if (exclude_nnapi_reference && has_selected_accelerator) {
590     if (nnapi_cpu == device_name_ptr) return false;
591   }
592   return (delegate_options.disallow_nnapi_cpu &&
593           nnapi->android_sdk_version >=
594               delegate::nnapi::kMinSdkVersionForNNAPI12) ||
595          has_selected_accelerator;
596 }
597 
598 // Fills the given result vector with the list of devices the given delegate
599 // is referring to.
600 // There are three possible results:
601 // - an empty array (not the full list of available accelerators,
602 //   for efficiency reasons) if no accelerator is chosen and the
603 //   disallow_nnapi_cpu delegate option is false.
604 // - A single element array with the target processor, if an accelerator name
605 //   is specified in the delegate options.
606 // - The full list of devices available on device less the nnapi reference
607 //   implementation if the delegate option disallow_nnapi_cpu has been
608 //   specified.
GetTargetDevices(TfLiteContext * context,TfLiteDelegate * delegate,const NnApi * nnapi,int * nnapi_errno,std::vector<ANeuralNetworksDevice * > * result)609 TfLiteStatus GetTargetDevices(TfLiteContext* context, TfLiteDelegate* delegate,
610                               const NnApi* nnapi, int* nnapi_errno,
611                               std::vector<ANeuralNetworksDevice*>* result) {
612   if (nnapi->android_sdk_version < delegate::nnapi::kMinSdkVersionForNNAPI12) {
613     return kTfLiteError;
614   }
615 
616   const auto delegate_options = StatefulNnApiDelegate::GetOptions(delegate);
617   const char* device_name_ptr = delegate_options.accelerator_name;
618 
619   if (device_name_ptr != nullptr) {
620     // User specified an accelerator to use.
621     ANeuralNetworksDevice* nnapi_device = nullptr;
622     TF_LITE_ENSURE_STATUS(GetDeviceHandle(nnapi, context, device_name_ptr,
623                                           &nnapi_device, nnapi_errno));
624     result->push_back(nnapi_device);
625   } else if (delegate_options.disallow_nnapi_cpu) {
626     std::string nnapi_cpu("nnapi-reference");
627     uint32_t num_devices = 0;
628     nnapi->ANeuralNetworks_getDeviceCount(&num_devices);
629 
630     for (uint32_t i = 0; i < num_devices; i++) {
631       ANeuralNetworksDevice* device = nullptr;
632       const char* buffer = nullptr;
633       RETURN_TFLITE_ERROR_IF_NN_ERROR(
634           context, nnapi->ANeuralNetworks_getDevice(i, &device),
635           "Getting list of available devices", nnapi_errno);
636       RETURN_TFLITE_ERROR_IF_NN_ERROR(
637           context, nnapi->ANeuralNetworksDevice_getName(device, &buffer),
638           "Getting list of available devices", nnapi_errno);
639       if (nnapi_cpu != buffer) {
640         result->push_back(device);
641       }
642     }
643   }
644 
645   return kTfLiteOk;
646 }
647 
648 }  // namespace
649 
650 namespace delegate {
651 namespace nnapi {
652 
653 #ifdef TFLITE_NNAPI_ALLOW_MMAP_SHARING
NNMemory(const NnApi * nnapi,const char * name,size_t size)654 NNMemory::NNMemory(const NnApi* nnapi, const char* name, size_t size) {
655   if (name && size > 0) {
656     nnapi_ = nnapi;
657     byte_size_ = size;
658     fd_ = nnapi_->ASharedMemory_create(name, size);
659     data_ptr_ = reinterpret_cast<uint8_t*>(
660         mmap(nullptr, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd_, 0));
661     nnapi_->ANeuralNetworksMemory_createFromFd(size, PROT_READ | PROT_WRITE,
662                                                fd_, 0, &nn_memory_handle_);
663   }
664 }
665 #else
666 NNMemory::NNMemory(const NnApi* /*nnapi*/, const char* /*name*/,
667                    size_t /*size*/)
668     : nnapi_(nullptr) {}
669 #endif
670 
~NNMemory()671 NNMemory::~NNMemory() {
672 #ifdef TFLITE_NNAPI_ALLOW_MMAP_SHARING
673   if (data_ptr_) {
674     munmap(data_ptr_, byte_size_);
675   }
676   if (nn_memory_handle_) {
677     nnapi_->ANeuralNetworksMemory_free(nn_memory_handle_);
678   }
679   if (fd_ > 0) close(fd_);
680 #endif
681 }
682 
683 class DequantizeMapping {
684  public:
DequantizedAnnIndex(int ann_index,TfLiteType type) const685   int DequantizedAnnIndex(int ann_index, TfLiteType type) const {
686     for (const auto& element : mapping_) {
687       if (ann_index == std::get<0>(element) && type == std::get<1>(element)) {
688         return std::get<2>(element);
689       }
690     }
691     return -1;
692   }
693 
Add(int ann_index,TfLiteType type,int dequantized_ann_index)694   void Add(int ann_index, TfLiteType type, int dequantized_ann_index) {
695     // This assumes it is not already mapped.
696     mapping_.emplace_back(ann_index, type, dequantized_ann_index);
697   }
698 
699  private:
700   // Each tuple specifies the ANN (quantized) tensor index, the desired
701   // floating-point type and the matching ANN (dequantized) tensor index. This
702   // could use a map but instead std::vector is used to keep code size lower.
703   std::vector<std::tuple<int, TfLiteType, int>> mapping_;
704 };
705 
706 // Abstract builder for building an op in the NN API graph. This handles
707 // the disparity between TFLite and NN API operand types. NN API has singular
708 // operands for both tensors and parameters, and TFLite separates the two.
709 class NNAPIOpBuilder {
710  public:
NNAPIOpBuilder(const NnApi * nnapi,TfLiteContext * context,OperandMapping * tensor_mapping,DequantizeMapping * dequantize_mapping,std::map<const MMAPAllocation *,ANeuralNetworksMemory * > * allocation_mapping,std::vector<int> * nnapi_to_tflite_op_mapping,ANeuralNetworksModel * nn_model,int * nnapi_errno,bool allow_dynamic_dimensions)711   NNAPIOpBuilder(const NnApi* nnapi, TfLiteContext* context,
712                  OperandMapping* tensor_mapping,
713                  DequantizeMapping* dequantize_mapping,
714                  std::map<const MMAPAllocation*, ANeuralNetworksMemory*>*
715                      allocation_mapping,
716                  std::vector<int>* nnapi_to_tflite_op_mapping,
717                  ANeuralNetworksModel* nn_model, int* nnapi_errno,
718                  bool allow_dynamic_dimensions)
719       : nnapi_(nnapi),
720         context_(context),
721         operand_mapping_(tensor_mapping),
722         dequantize_mapping_(dequantize_mapping),
723         allocation_memory_mapping_(allocation_mapping),
724         nnapi_to_tflite_op_mapping_(nnapi_to_tflite_op_mapping),
725         nn_model_(nn_model),
726         nnapi_errno_(nnapi_errno),
727         allow_dynamic_dimensions_(allow_dynamic_dimensions) {}
728 
AddScalarBoolOperand(bool value)729   TfLiteStatus AddScalarBoolOperand(bool value) {
730     return AddScalarOperand<bool>(value, ANEURALNETWORKS_BOOL);
731   }
732 
AddScalarInt32Operand(int32_t value)733   TfLiteStatus AddScalarInt32Operand(int32_t value) {
734     return AddScalarOperand<int32_t>(value, ANEURALNETWORKS_INT32);
735   }
736 
AddScalarFloat32Operand(float value)737   TfLiteStatus AddScalarFloat32Operand(float value) {
738     return AddScalarOperand<float>(value, ANEURALNETWORKS_FLOAT32);
739   }
740 
AddVectorInt32Operand(const int32_t * values,uint32_t num_values)741   TfLiteStatus AddVectorInt32Operand(const int32_t* values,
742                                      uint32_t num_values) {
743     return AddVectorOperand<int32_t>(values, num_values,
744                                      ANEURALNETWORKS_TENSOR_INT32,
745                                      /*scale=*/0.f, /*zero_point=*/0);
746   }
747 
AddVectorInt32Operand(const int32_t * values,uint32_t num_values,float scale,int32_t zero_point)748   TfLiteStatus AddVectorInt32Operand(const int32_t* values, uint32_t num_values,
749                                      float scale, int32_t zero_point) {
750     return AddVectorOperand<int32_t>(
751         values, num_values, ANEURALNETWORKS_TENSOR_INT32, scale, zero_point);
752   }
753 
AddVectorInt16Operand(const int16_t * values,uint32_t num_values)754   TfLiteStatus AddVectorInt16Operand(const int16_t* values,
755                                      uint32_t num_values) {
756     return AddVectorOperand<int16_t>(values, num_values,
757                                      ANEURALNETWORKS_TENSOR_QUANT16_SYMM,
758                                      /*scale=*/1.f, /*zero_point=*/0);
759   }
760 
AddVectorInt8Operand(const int8_t * values,uint32_t num_values)761   TfLiteStatus AddVectorInt8Operand(const int8_t* values, uint32_t num_values) {
762     return AddVectorOperand<int8_t>(values, num_values,
763                                     ANEURALNETWORKS_TENSOR_QUANT8_SYMM,
764                                     /*scale=*/1.f, /*zero_point=*/0);
765   }
766 
AddVectorFloat32Operand(const float * values,uint32_t num_values)767   TfLiteStatus AddVectorFloat32Operand(const float* values,
768                                        uint32_t num_values) {
769     return AddVectorOperand<float>(values, num_values,
770                                    ANEURALNETWORKS_TENSOR_FLOAT32);
771   }
772 
AddPoolingParams(void * data)773   TfLiteStatus AddPoolingParams(void* data) {
774     auto builtin = reinterpret_cast<TfLitePoolParams*>(data);
775     AddScalarInt32Operand(builtin->padding);
776     AddScalarInt32Operand(builtin->stride_width);
777     AddScalarInt32Operand(builtin->stride_height);
778     AddScalarInt32Operand(builtin->filter_width);
779     AddScalarInt32Operand(builtin->filter_height);
780     AddScalarInt32Operand(builtin->activation);
781     return kTfLiteOk;
782   }
783 
AddTensorInput(int tensor_index,bool hybrid_op,int tensor_flags=0)784   TfLiteStatus AddTensorInput(int tensor_index, bool hybrid_op,
785                               int tensor_flags = 0) {
786     return AddTensor(tensor_index, hybrid_op, &augmented_inputs_, tensor_flags);
787   }
788 
AddTensorOutput(int tensor_index,int tensor_flags=0)789   TfLiteStatus AddTensorOutput(int tensor_index, int tensor_flags = 0) {
790     return AddTensor(tensor_index, /*hybrid_op=*/false, &augmented_outputs_,
791                      tensor_flags);
792   }
793 
AddAdditionalFloat32OutputTensor(uint32_t dimension_count)794   TfLiteStatus AddAdditionalFloat32OutputTensor(uint32_t dimension_count) {
795     std::vector<uint32_t> dims(dimension_count, 0);
796     return AddFloat32OutputTensor(dimension_count, dims.data(), nullptr);
797   }
798 
AddStateFloat32Tensor(int tensor_index,int * ann_tensor_index_out)799   TfLiteStatus AddStateFloat32Tensor(int tensor_index,
800                                      int* ann_tensor_index_out) {
801     TfLiteTensor* tensor = &context_->tensors[tensor_index];
802     return AddFloat32OutputTensor(
803         tensor->dims->size, reinterpret_cast<uint32_t*>(tensor->dims->data),
804         ann_tensor_index_out);
805   }
806 
AddStateInt16Tensor(int tensor_index,int * ann_tensor_index_out)807   TfLiteStatus AddStateInt16Tensor(int tensor_index,
808                                    int* ann_tensor_index_out) {
809     TfLiteTensor* tensor = &context_->tensors[tensor_index];
810     return AddAdditionalOutputTensor(
811         tensor->dims->size, reinterpret_cast<uint32_t*>(tensor->dims->data),
812         ANEURALNETWORKS_TENSOR_QUANT16_SYMM, tensor->params.scale,
813         tensor->params.zero_point, ann_tensor_index_out);
814   }
815 
AddStateInt8AsymTensor(int tensor_index,int * ann_tensor_index_out)816   TfLiteStatus AddStateInt8AsymTensor(int tensor_index,
817                                       int* ann_tensor_index_out) {
818     TfLiteTensor* tensor = &context_->tensors[tensor_index];
819     return AddAdditionalOutputTensor(
820         tensor->dims->size, reinterpret_cast<uint32_t*>(tensor->dims->data),
821         ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED, tensor->params.scale,
822         tensor->params.zero_point, ann_tensor_index_out);
823   }
824 
825   // Add a constant tensor with a single element, intended for broadcast capable
826   // ops.
AddSingleValueConstantTensor(float value,bool is_quantized)827   TfLiteStatus AddSingleValueConstantTensor(float value, bool is_quantized) {
828     if (!is_quantized) {
829       return AddVectorFloat32Operand(&value, 1);
830     } else {
831       // in the case that we need to add a quantized tensor, set the value to
832       // 64, zero_point to be 0 and adjust scale accordingly.
833       const uint8_t quant8_value = 64;
834       return AddVectorOperand<uint8_t>(&quant8_value, 1,
835                                        ANEURALNETWORKS_TENSOR_QUANT8_ASYMM,
836                                        value / quant8_value, 0);
837     }
838   }
839 
840   // Calculate the scale and zero_point for 8-bit unsigned tensor, given float
841   // min and max. zero_point is clamped to [0, 255].
CalculateQuantizationParams(float min,float max,float * scale,int * zero_point)842   TfLiteStatus CalculateQuantizationParams(float min, float max, float* scale,
843                                            int* zero_point) {
844     if (max < min) return kTfLiteError;
845     *scale = (max - min) / 255.f;
846     if (min > 0.f) {
847       *zero_point = 0;
848     } else if (max < 0.f) {
849       *zero_point = 255;
850     } else {
851       *zero_point = (0.f - min) / (*scale);
852     }
853     return kTfLiteOk;
854   }
855 
856   // Lower hardswish according to the following equation:
857   // hard_swish[x] = x (ReLU6(x + 3)) / 6 == x * (Relu_N1_to_1(x/3) * 3 + 3) / 6
858   // = 0.5x * Relu_N1_to_1(x/3) + 0.5x
TransformHardSwishIntoSupportedOps(int lite_input_index,int lite_output_index,bool need_int8_conversion,int lite_node_index)859   TfLiteStatus TransformHardSwishIntoSupportedOps(int lite_input_index,
860                                                   int lite_output_index,
861                                                   bool need_int8_conversion,
862                                                   int lite_node_index) {
863     const TfLiteTensor& tensor = context_->tensors[lite_input_index];
864     float input_scale = tensor.params.scale;
865     int input_zero_point = tensor.params.zero_point;
866     float input_min = 0.f;
867     float input_max = 0.f;
868     int tensor_flags = 0;
869     if (need_int8_conversion) {
870       tensor_flags = tensor_flags | NN_TENSOR_FLAG_INT8_CONVERSION;
871       input_zero_point += 128;
872     }
873     bool is_quantized = false;
874     int nn_type = ANEURALNETWORKS_TENSOR_FLOAT32;
875     if (tensor.type == kTfLiteInt8 || tensor.type == kTfLiteUInt8) {
876       is_quantized = true;
877       nn_type = ANEURALNETWORKS_TENSOR_QUANT8_ASYMM;
878       input_min = (0 - input_zero_point) * input_scale;
879       input_max = (255 - input_zero_point) * input_scale;
880     }
881 
882     // Stage1 : s1 = Relu1(x * 1/3)
883     float s1_output_min = 0.f;
884     float s1_output_max = 0.f;
885     int s1_out_ann_index = 0;
886     {
887       float s1_output_scale = 0.f;
888       int s1_output_zero_point = 0;
889       if (is_quantized) {
890         // clamp the output range to [-1, 1] if needed.
891         s1_output_min = input_min / 3.f < -1.f ? -1.f : input_min / 3.f;
892         s1_output_max = input_max / 3.f > 1.f ? 1.f : input_max / 3.f;
893         CalculateQuantizationParams(s1_output_min, s1_output_max,
894                                     &s1_output_scale, &s1_output_zero_point);
895       }
896       TF_LITE_ENSURE_OK(context_,
897                         AddTensorInput(lite_input_index, false, tensor_flags));
898       const float value3f = 1.f / 3.f;
899       TF_LITE_ENSURE_OK(context_,
900                         AddSingleValueConstantTensor(value3f, is_quantized));
901       TF_LITE_ENSURE_OK(context_,
902                         AddScalarInt32Operand(ANEURALNETWORKS_FUSED_RELU1));
903       TF_LITE_ENSURE_OK(
904           context_,
905           AddAdditionalOutputTensor(
906               tensor.dims->size, reinterpret_cast<uint32_t*>(tensor.dims->data),
907               nn_type, s1_output_scale, s1_output_zero_point,
908               &s1_out_ann_index));
909       TF_LITE_ENSURE_OK(
910           context_, FinalizeAddOperation(ANEURALNETWORKS_MUL, lite_node_index));
911     }
912 
913     // Stage2 : s2 = x / 2
914     float s2_output_min = input_min / 2.f;
915     float s2_output_max = input_max / 2.f;
916     int s2_out_ann_index = 0;
917     {
918       float s2_output_scale = input_scale / 2.0f;
919       int s2_output_zero_point = input_zero_point;
920       TF_LITE_ENSURE_OK(context_,
921                         AddTensorInput(lite_input_index, false, tensor_flags));
922       const float value2f = 0.5f;
923       TF_LITE_ENSURE_OK(context_,
924                         AddSingleValueConstantTensor(value2f, is_quantized));
925       TF_LITE_ENSURE_OK(context_,
926                         AddScalarInt32Operand(ANEURALNETWORKS_FUSED_NONE));
927       TF_LITE_ENSURE_OK(
928           context_,
929           AddAdditionalOutputTensor(
930               tensor.dims->size, reinterpret_cast<uint32_t*>(tensor.dims->data),
931               nn_type, s2_output_scale, s2_output_zero_point,
932               &s2_out_ann_index));
933       TF_LITE_ENSURE_OK(
934           context_, FinalizeAddOperation(ANEURALNETWORKS_MUL, lite_node_index));
935     }
936 
937     // Stage 3 : s3 = s1 * s2
938     int s3_out_ann_index = 0;
939     {
940       augmented_inputs_.push_back(s1_out_ann_index);
941       augmented_inputs_.push_back(s2_out_ann_index);
942       TF_LITE_ENSURE_OK(context_,
943                         AddScalarInt32Operand(ANEURALNETWORKS_FUSED_NONE));
944       float s3_output_scale = 0.f;
945       int s3_output_zero_point = 0;
946       if (is_quantized) {
947         // the min for stage 3 is always 0.0f.
948         float s3_output_min = 0.f;
949         // the max for stage 3 is max(s1_min * s2_min, s1_max * s3_max).
950         float s3_output_max =
951             s1_output_max * s2_output_max > s1_output_min * s2_output_min
952                 ? s1_output_max * s2_output_max
953                 : s1_output_min * s2_output_min;
954         CalculateQuantizationParams(s3_output_min, s3_output_max,
955                                     &s3_output_scale, &s3_output_zero_point);
956       }
957       TF_LITE_ENSURE_OK(
958           context_,
959           AddAdditionalOutputTensor(
960               tensor.dims->size, reinterpret_cast<uint32_t*>(tensor.dims->data),
961               nn_type, s3_output_scale, s3_output_zero_point,
962               &s3_out_ann_index));
963       TF_LITE_ENSURE_OK(
964           context_, FinalizeAddOperation(ANEURALNETWORKS_MUL, lite_node_index));
965     }
966 
967     // Stage 4: y = s3 + s2
968     {
969       augmented_inputs_.push_back(s2_out_ann_index);
970       augmented_inputs_.push_back(s3_out_ann_index);
971       TF_LITE_ENSURE_OK(context_,
972                         AddScalarInt32Operand(ANEURALNETWORKS_FUSED_NONE));
973       TF_LITE_ENSURE_OK(context_,
974                         AddTensorOutput(lite_output_index, tensor_flags));
975       TF_LITE_ENSURE_OK(
976           context_, FinalizeAddOperation(ANEURALNETWORKS_ADD, lite_node_index));
977     }
978 
979     return kTfLiteOk;
980   }
981 
982   // Adds the operation to the model and maps the operation to the originating
983   // TFLite one.
AddOperationToModel(ANeuralNetworksOperationType type,uint32_t input_count,const uint32_t * inputs,uint32_t output_count,const uint32_t * outputs,int lite_node_index)984   TfLiteStatus AddOperationToModel(ANeuralNetworksOperationType type,
985                                    uint32_t input_count, const uint32_t* inputs,
986                                    uint32_t output_count,
987                                    const uint32_t* outputs,
988                                    int lite_node_index) {
989     RETURN_TFLITE_ERROR_IF_NN_ERROR(
990         context_,
991         nnapi_->ANeuralNetworksModel_addOperation(
992             nn_model_, type, input_count, inputs, output_count, outputs),
993         "adding operation", nnapi_errno_);
994     nnapi_to_tflite_op_mapping_->push_back(lite_node_index);
995     return kTfLiteOk;
996   }
997 
998   // Adds a Dequantize operator and replaces the input tensor index with the
999   // dequantized version. If the dequantized version of the operator already
1000   // exists then it is not added again.
AddDequantize(int nn_input_index,int lite_tensor_index,TfLiteType dequantized_type,int lite_node_index)1001   TfLiteStatus AddDequantize(int nn_input_index, int lite_tensor_index,
1002                              TfLiteType dequantized_type, int lite_node_index) {
1003     const int ann_index =
1004         operand_mapping_->lite_index_to_ann(lite_tensor_index);
1005     int dequantized_ann_index =
1006         dequantize_mapping_->DequantizedAnnIndex(ann_index, dequantized_type);
1007 
1008     if (dequantized_ann_index == -1) {
1009       // The dequantized version does not exist yet, it has to be added: a new
1010       // Dequantize operation is added, yielding a new tensor.
1011       const TfLiteTensor& tensor = context_->tensors[lite_tensor_index];
1012       ANeuralNetworksOperandType operand_type{
1013           ANEURALNETWORKS_TENSOR_FLOAT32,
1014           static_cast<uint32_t>(tensor.dims->size),
1015           reinterpret_cast<uint32_t*>(tensor.dims->data), 0.f, 0};
1016       RETURN_TFLITE_ERROR_IF_NN_ERROR(
1017           context_,
1018           nnapi_->ANeuralNetworksModel_addOperand(nn_model_, &operand_type),
1019           "adding operand", nnapi_errno_);
1020       dequantized_ann_index = operand_mapping_->add_new_non_tensor_operand();
1021 
1022       // Add Dequantize operation.
1023       const uint32_t dequantize_input[1] = {static_cast<uint32_t>(ann_index)};
1024       const uint32_t dequantize_output[1] = {
1025           static_cast<uint32_t>(dequantized_ann_index)};
1026       TF_LITE_ENSURE_OK(
1027           context_, AddOperationToModel(ANEURALNETWORKS_DEQUANTIZE,
1028                                         /*input_count=*/1, dequantize_input,
1029                                         /*output_count=*/1, dequantize_output,
1030                                         lite_node_index));
1031       dequantize_mapping_->Add(ann_index, dequantized_type,
1032                                dequantized_ann_index);
1033     }
1034 
1035     // The input for the original operation is modified so that the operation
1036     // now uses the dequantized tensor as input.
1037     augmented_inputs_[nn_input_index] = dequantized_ann_index;
1038 
1039     return kTfLiteOk;
1040   }
1041 
1042   // Finish emitting the op (of type `type`) into the NN API.
FinalizeAddOperation(ANeuralNetworksOperationType type,int lite_node_index)1043   TfLiteStatus FinalizeAddOperation(ANeuralNetworksOperationType type,
1044                                     int lite_node_index) {
1045     // Actually add a NN API operation
1046     TF_LITE_ENSURE_OK(context_,
1047                       AddOperationToModel(
1048                           type, static_cast<uint32_t>(augmented_inputs_.size()),
1049                           augmented_inputs_.data(),
1050                           static_cast<uint32_t>(augmented_outputs_.size()),
1051                           augmented_outputs_.data(), lite_node_index));
1052     augmented_inputs_.clear();
1053     augmented_outputs_.clear();
1054     return kTfLiteOk;
1055   }
1056 
AddSingleValueTensorAsScalarOperand(int tensor_index,int nn_type)1057   TfLiteStatus AddSingleValueTensorAsScalarOperand(int tensor_index,
1058                                                    int nn_type) {
1059     const TfLiteTensor* tensor = &context_->tensors[tensor_index];
1060     TF_LITE_ENSURE_EQ(context_, NumElements(tensor), 1);
1061 
1062     ANeuralNetworksOperandType operand_type{.type = nn_type};
1063     RETURN_TFLITE_ERROR_IF_NN_ERROR_FOR_TENSOR(
1064         context_,
1065         nnapi_->ANeuralNetworksModel_addOperand(nn_model_, &operand_type),
1066         "adding operand", tensor, nnapi_errno_);
1067     int ann_tensor_index = operand_mapping_->lite_index_to_ann(tensor_index);
1068     if (ann_tensor_index != -1) {
1069       augmented_inputs_.push_back(ann_tensor_index);
1070       return kTfLiteOk;
1071     }
1072     // Allocate a new tensor index
1073     ann_tensor_index = operand_mapping_->add_new_ann_tensor_index(tensor_index);
1074     augmented_inputs_.push_back(ann_tensor_index);
1075 
1076     const TfLiteType tensor_type = tensor->type;
1077     TfLiteType nn_type_equivalent;
1078     TF_LITE_ENSURE_OK(context_, GetEquivalentToANNType(context_, nn_type,
1079                                                        &nn_type_equivalent));
1080     if (tensor_type != nn_type_equivalent) {
1081       operand_mapping_->add_type_conversion(tensor_index, nn_type_equivalent);
1082     }
1083     return kTfLiteOk;
1084   }
1085 
1086   template <typename T>
AddNewInputConstantTensor(int32_t nn_type,TfLiteType type,const TfLiteIntArray * dims,const std::vector<T> & tensor_value,const TfLiteQuantizationParams & quant_params,int * tensor_index)1087   TfLiteStatus AddNewInputConstantTensor(
1088       int32_t nn_type, TfLiteType type, const TfLiteIntArray* dims,
1089       const std::vector<T>& tensor_value,
1090       const TfLiteQuantizationParams& quant_params, int* tensor_index) {
1091     TF_LITE_ENSURE_OK(context_,
1092                       context_->AddTensors(context_, 1, tensor_index));
1093 
1094     TfLiteTensor* new_tensor = &context_->tensors[*tensor_index];
1095     new_tensor->type = type;
1096     new_tensor->allocation_type = kTfLiteDynamic;
1097     new_tensor->params = quant_params;
1098 
1099     // Not removing the new tensor in case of resizing errors since it will
1100     // be cleared by the context
1101     TF_LITE_ENSURE_OK(
1102         context_,
1103         context_->ResizeTensor(
1104             context_, new_tensor,
1105             // Resize Tensor takes ownership of the dims array passed as param
1106             TfLiteIntArrayCopy(dims)));
1107 
1108     memcpy(new_tensor->data.raw,
1109            reinterpret_cast<const char*>(tensor_value.data()),
1110            tensor_value.size() * sizeof(T));
1111 
1112     const uint32_t tensor_rank = static_cast<uint32_t>(dims->size);
1113     const uint32_t* tensor_dims = reinterpret_cast<const uint32_t*>(dims->data);
1114     ANeuralNetworksOperandType operand_type{nn_type, tensor_rank, tensor_dims,
1115                                             quant_params.scale,
1116                                             quant_params.zero_point};
1117 
1118     const int ann_tensor_index =
1119         operand_mapping_->add_delegate_generated_input_ann_tensors_operand();
1120 
1121     RETURN_TFLITE_ERROR_IF_NN_ERROR(
1122         context_,
1123         nnapi_->ANeuralNetworksModel_addOperand(nn_model_, &operand_type),
1124         "adding operand", nnapi_errno_);
1125 
1126     augmented_inputs_.push_back(ann_tensor_index);
1127 
1128     RETURN_TFLITE_ERROR_IF_NN_ERROR(
1129         context_,
1130         nnapi_->ANeuralNetworksModel_setOperandValue(
1131             nn_model_, ann_tensor_index, new_tensor->data.raw,
1132             new_tensor->bytes),
1133         "setting new operand value", nnapi_errno_);
1134 
1135     return kTfLiteOk;
1136   }
1137 
1138   template <typename T>
AddNewInputConstantTensor(int32_t nn_type,TfLiteType type,std::initializer_list<int> dims,const std::vector<T> & tensor_value,const TfLiteQuantizationParams & quant_params,int * tensor_index)1139   TfLiteStatus AddNewInputConstantTensor(
1140       int32_t nn_type, TfLiteType type, std::initializer_list<int> dims,
1141       const std::vector<T>& tensor_value,
1142       const TfLiteQuantizationParams& quant_params, int* tensor_index) {
1143     TfLiteIntArray* dim_array = TfLiteIntArrayCreate(dims.size());
1144     dim_array->size = dims.size();
1145     std::copy(dims.begin(), dims.end(), dim_array->data);
1146 
1147     const auto result = AddNewInputConstantTensor(
1148         nn_type, type, dim_array, tensor_value, quant_params, tensor_index);
1149     TfLiteIntArrayFree(dim_array);
1150     return result;
1151   }
1152 
1153  private:
1154   // Returns a TF Lite type which has the same memory representation as a
1155   // provided NN API type.
GetEquivalentToANNType(TfLiteContext * context,int nn_type,TfLiteType * type)1156   TfLiteStatus GetEquivalentToANNType(TfLiteContext* context, int nn_type,
1157                                       TfLiteType* type) {
1158     switch (nn_type) {
1159       case ANEURALNETWORKS_INT32:
1160         *type = kTfLiteInt32;
1161         return kTfLiteOk;
1162       case ANEURALNETWORKS_FLOAT32:
1163         *type = kTfLiteFloat32;
1164         return kTfLiteOk;
1165       default:
1166         context->ReportError(context,
1167                              "NN API Delegate: Can't get an equivalent TF Lite "
1168                              "type for provided NN API type: %d.\n",
1169                              nn_type);
1170         return kTfLiteError;
1171     }
1172   }
1173 
1174   template <typename T>
AddScalarOperand(T value,int32_t nn_type)1175   TfLiteStatus AddScalarOperand(T value, int32_t nn_type) {
1176     ANeuralNetworksOperandType operand_type{.type = nn_type};
1177     RETURN_TFLITE_ERROR_IF_NN_ERROR(
1178         context_,
1179         nnapi_->ANeuralNetworksModel_addOperand(nn_model_, &operand_type),
1180         "adding operand", nnapi_errno_);
1181     const int ann_index = operand_mapping_->add_new_non_tensor_operand();
1182     RETURN_TFLITE_ERROR_IF_NN_ERROR(
1183         context_,
1184         nnapi_->ANeuralNetworksModel_setOperandValue(nn_model_, ann_index,
1185                                                      &value, sizeof(T)),
1186         "setting new operand value", nnapi_errno_);
1187     augmented_inputs_.push_back(ann_index);
1188     return kTfLiteOk;
1189   }
1190 
1191   template <typename T>
AddVectorOperand(const T * values,uint32_t num_values,int32_t nn_type,float scale,int32_t zero_point)1192   TfLiteStatus AddVectorOperand(const T* values, uint32_t num_values,
1193                                 int32_t nn_type, float scale,
1194                                 int32_t zero_point) {
1195     ANeuralNetworksOperandType operand_type{.type = nn_type,
1196                                             .dimensionCount = 1,
1197                                             .dimensions = &num_values,
1198                                             .scale = scale,
1199                                             .zeroPoint = zero_point};
1200 
1201     RETURN_TFLITE_ERROR_IF_NN_ERROR(
1202         context_,
1203         nnapi_->ANeuralNetworksModel_addOperand(nn_model_, &operand_type),
1204         "adding operand", nnapi_errno_);
1205 
1206     const int ann_index = operand_mapping_->add_new_non_tensor_operand();
1207     RETURN_TFLITE_ERROR_IF_NN_ERROR(
1208         context_,
1209         nnapi_->ANeuralNetworksModel_setOperandValue(
1210             nn_model_, ann_index, values, sizeof(T) * num_values),
1211         "settings new operand value", nnapi_errno_);
1212     augmented_inputs_.push_back(ann_index);
1213     return kTfLiteOk;
1214   }
1215 
1216   template <typename T>
AddVectorOperand(const T * values,uint32_t num_values,int32_t nn_type)1217   TfLiteStatus AddVectorOperand(const T* values, uint32_t num_values,
1218                                 int32_t nn_type) {
1219     return AddVectorOperand(values, num_values, nn_type, /*scale=*/0.f,
1220                             /*zero_point=*/0);
1221   }
1222 
AddFloat32OutputTensor(uint32_t dimension_count,const uint32_t * dimension_data,int * ann_index_out)1223   TfLiteStatus AddFloat32OutputTensor(uint32_t dimension_count,
1224                                       const uint32_t* dimension_data,
1225                                       int* ann_index_out) {
1226     return AddAdditionalOutputTensor(
1227         dimension_count, dimension_data, ANEURALNETWORKS_TENSOR_FLOAT32,
1228         /*scale=*/0.f, /*zero_point=*/0, ann_index_out);
1229   }
1230 
AddAdditionalOutputTensor(uint32_t dimension_count,const uint32_t * dimension_data,int32_t nn_type,float scale,int32_t zero_point,int * ann_index_out)1231   TfLiteStatus AddAdditionalOutputTensor(uint32_t dimension_count,
1232                                          const uint32_t* dimension_data,
1233                                          int32_t nn_type, float scale,
1234                                          int32_t zero_point,
1235                                          int* ann_index_out) {
1236     ANeuralNetworksOperandType operand_type{
1237         .type = nn_type,
1238         .dimensionCount = dimension_count,
1239         .dimensions = dimension_data,
1240         .scale = scale,
1241         .zeroPoint = zero_point,
1242     };
1243     RETURN_TFLITE_ERROR_IF_NN_ERROR(
1244         context_,
1245         nnapi_->ANeuralNetworksModel_addOperand(nn_model_, &operand_type),
1246         "adding operand", nnapi_errno_);
1247     const int ann_index = operand_mapping_->add_new_non_tensor_operand();
1248     augmented_outputs_.push_back(ann_index);
1249     if (ann_index_out) *ann_index_out = ann_index;
1250     return kTfLiteOk;
1251   }
1252 
1253   // Adds a new NN API tensor that shadows the TF Lite tensor `tensor_index`.
1254   // This returns the NN API tensor index corresponding to the created tensor.
1255   // If another caller previously created a NN API tensor for `tensor_index`
1256   // then the existing one is returned.
AddTensor(int tensor_index,bool hybrid_op,std::vector<uint32_t> * indices,int tensor_flags=0)1257   TfLiteStatus AddTensor(int tensor_index, bool hybrid_op,
1258                          std::vector<uint32_t>* indices, int tensor_flags = 0) {
1259     const bool scalar_as_tensor =
1260         tensor_flags & NN_TENSOR_FLAG_SCALAR_AS_TENSOR;
1261     const bool need_int8_conversion =
1262         tensor_flags & NN_TENSOR_FLAG_INT8_CONVERSION;
1263     const bool use_int8_asymm_signed =
1264         tensor_flags & NN_TENSOR_FLAG_USE_INT8_ASYMM_SIGNED;
1265     const bool force_per_channel =
1266         tensor_flags & NN_TENSOR_FLAG_FORCE_PER_CHANNEL;
1267     int ann_tensor_index = operand_mapping_->lite_index_to_ann(tensor_index);
1268     if (ann_tensor_index != -1) {
1269       indices->push_back(ann_tensor_index);
1270       return kTfLiteOk;
1271     }
1272     // Allocate a new tensor index
1273     ann_tensor_index = operand_mapping_->add_new_ann_tensor_index(tensor_index);
1274 
1275     // Parameters needed for new type.
1276     int32_t nn_type = 0;
1277     float scale = 0.0f;
1278     int32_t zeroPoint = 0;
1279     ANeuralNetworksSymmPerChannelQuantParams ann_perchannel_params;
1280     TfLiteTensor* tensor = &context_->tensors[tensor_index];
1281     TfLiteType tensor_type = tensor->type;
1282     if (hybrid_op && (tensor_type == kTfLiteUInt8)) {
1283       // For legacy reason, UINT8 weights in hybrid operators are actually INT8
1284       // values and should be interpreted as such.
1285       tensor_type = kTfLiteInt8;
1286     }
1287     switch (tensor_type) {
1288       case kTfLiteNoType:
1289         // Tensors added during initialization of Ops don't have a type yet and
1290         // should not be registered with the NNAPI.
1291         indices->push_back(-1);
1292         return kTfLiteOk;
1293       case kTfLiteFloat32:
1294         nn_type = ANEURALNETWORKS_TENSOR_FLOAT32;
1295         break;
1296       case kTfLiteUInt8:
1297         nn_type = ANEURALNETWORKS_TENSOR_QUANT8_ASYMM;
1298         scale = tensor->params.scale;
1299         zeroPoint = tensor->params.zero_point;
1300         if (scale == 0) {
1301           // ANEURALNETWORKS_TENSOR_QUANT8_ASYMM with zero scale is not valid in
1302           // NNAPI.
1303           scale = 1;
1304         }
1305         break;
1306       case kTfLiteInt8:
1307         // If explicit int8 conversion is needed, we still need
1308         // ANEURALNETWORKS_TENSOR_QUANT8_ASYMM type.
1309         if (use_int8_asymm_signed) {
1310           nn_type = ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED;
1311         } else if (need_int8_conversion) {
1312           nn_type = ANEURALNETWORKS_TENSOR_QUANT8_ASYMM;
1313         } else {
1314           nn_type = ANEURALNETWORKS_TENSOR_QUANT8_SYMM;
1315         }
1316         scale = tensor->params.scale;
1317         zeroPoint = tensor->params.zero_point;
1318         if (tensor->quantization.type == kTfLiteAffineQuantization) {
1319           TfLiteAffineQuantization* quantization_params =
1320               static_cast<TfLiteAffineQuantization*>(
1321                   tensor->quantization.params);
1322           if (quantization_params->scale->size > 1 || force_per_channel) {
1323             // Set up per-channel quantization.
1324             ann_perchannel_params = {
1325                 .channelDim = static_cast<uint32_t>(
1326                     quantization_params->quantized_dimension),
1327                 .scaleCount =
1328                     static_cast<uint32_t>(quantization_params->scale->size),
1329                 .scales = quantization_params->scale->data,
1330             };
1331             nn_type = ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL;
1332             scale = 0.0f;
1333             zeroPoint = 0;
1334           } else if (quantization_params->scale->size == 1) {
1335             scale = quantization_params->scale->data[0];
1336             zeroPoint = quantization_params->zero_point->data[0];
1337           }
1338         }
1339         if (nn_type != ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL) {
1340           if (need_int8_conversion) {
1341             zeroPoint += 128;
1342             operand_mapping_->add_type_conversion(tensor_index, kTfLiteUInt8);
1343           }
1344           if (scale == 0) {
1345             // QUANT8 tensors with zero scale are not valid in NNAPI.
1346             scale = 1;
1347           }
1348         }
1349         break;
1350       case kTfLiteInt32:
1351         nn_type = ANEURALNETWORKS_TENSOR_INT32;
1352         scale = tensor->params.scale;
1353         zeroPoint = tensor->params.zero_point;
1354         break;
1355       case kTfLiteBool:
1356         nn_type = ANEURALNETWORKS_TENSOR_BOOL8;
1357         break;
1358       case kTfLiteInt16:
1359         nn_type = ANEURALNETWORKS_TENSOR_QUANT16_SYMM;
1360         scale = tensor->params.scale;
1361         zeroPoint = tensor->params.zero_point;
1362         break;
1363       default:
1364         context_->ReportError(
1365             context_, "Failed to add NN API tensor: type %s is not supported.",
1366             TfLiteTypeGetName(tensor_type));
1367         return kTfLiteError;
1368     }
1369     bool has_unspecified_dimensions = HasUnspecifiedDimension(tensor);
1370     uint32_t tensor_rank = static_cast<uint32_t>(tensor->dims->size);
1371     std::vector<uint32_t> dims_unspecified(tensor_rank, 0);
1372     if (has_unspecified_dimensions) {
1373       for (int i = 0; i < tensor->dims_signature->size; i++) {
1374         dims_unspecified[i] = tensor->dims_signature->data[i] == -1
1375                                   ? 0
1376                                   : tensor->dims_signature->data[i];
1377       }
1378     }
1379     uint32_t* tensor_dims =
1380         has_unspecified_dimensions && allow_dynamic_dimensions_
1381             ? dims_unspecified.data()
1382             : reinterpret_cast<uint32_t*>(tensor->dims->data);
1383     if (scalar_as_tensor && tensor_rank == 0) {
1384       // Use rank 1, shape {1} operand for TFLite scalar tensors.
1385       tensor_rank = 1;
1386       tensor_dims = &tensor_rank;
1387     }
1388     if (tensor_rank == 0) {
1389       // if the tensor_rank is 0, the dimension ptr must be nullptr.
1390       tensor_dims = nullptr;
1391     }
1392 
1393     ANeuralNetworksOperandType operand_type{nn_type, tensor_rank, tensor_dims,
1394                                             scale, zeroPoint};
1395     RETURN_TFLITE_ERROR_IF_NN_ERROR_FOR_TENSOR(
1396         context_,
1397         nnapi_->ANeuralNetworksModel_addOperand(nn_model_, &operand_type),
1398         "adding operand", tensor, nnapi_errno_);
1399 
1400     if (nn_type == ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL) {
1401       RETURN_TFLITE_ERROR_IF_NN_ERROR_FOR_TENSOR(
1402           context_,
1403           nnapi_->ANeuralNetworksModel_setOperandSymmPerChannelQuantParams(
1404               nn_model_, ann_tensor_index, &ann_perchannel_params),
1405           "setting new operand per channel quantization params", tensor,
1406           nnapi_errno_);
1407     }
1408     if (tensor->allocation_type == kTfLiteMmapRo) {
1409       if (IsQuantized(tensor_type) && need_int8_conversion &&
1410           nn_type != ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL) {
1411         // We need to add a tensor and convert the weights into uint8.
1412         // Currently this is only needed for fully_connected. The new_tensor is
1413         // needed for lifetime management for the converted weights.
1414         int new_tensor_index = -1;
1415         TF_LITE_ENSURE_OK(context_,
1416                           context_->AddTensors(context_, 1, &new_tensor_index));
1417         TfLiteTensor* new_tensor = &context_->tensors[new_tensor_index];
1418         new_tensor->type = kTfLiteUInt8;
1419         new_tensor->allocation_type = kTfLiteDynamic;
1420         new_tensor->params.scale = scale;
1421         new_tensor->params.zero_point = zeroPoint;
1422         // Not removing the new tensor in case of resizing errors since it will
1423         // be cleared by the context
1424         TF_LITE_ENSURE_OK(
1425             context_, context_->ResizeTensor(context_, new_tensor,
1426                                              // Resize Tensor takes ownership of
1427                                              // the dims array passed as param
1428                                              TfLiteIntArrayCopy(tensor->dims)));
1429         // Convert the int8 value into corresponding uint8 value;
1430         const auto num_elements = NumElements(tensor);
1431         for (int i = 0; i < num_elements; ++i) {
1432           new_tensor->data.uint8[i] = static_cast<const uint8_t>(
1433               static_cast<int32_t>(tensor->data.int8[i]) + 128);
1434         }
1435         RETURN_TFLITE_ERROR_IF_NN_ERROR_FOR_TENSOR(
1436             context_,
1437             nnapi_->ANeuralNetworksModel_setOperandValue(
1438                 nn_model_, ann_tensor_index, new_tensor->data.raw,
1439                 new_tensor->bytes),
1440             "setting new operand value", tensor, nnapi_errno_);
1441 #ifdef TFLITE_NNAPI_ALLOW_MMAP_SHARING
1442       } else if (tensor->allocation &&
1443                  static_cast<const Allocation*>(tensor->allocation)->type() ==
1444                      Allocation::Type::kMMap) {
1445         const MMAPAllocation* mmap_alloc =
1446             static_cast<const MMAPAllocation*>(tensor->allocation);
1447         if (allocation_memory_mapping_->count(mmap_alloc) == 0) {
1448           ANeuralNetworksMemory* ann_memory_handle = nullptr;
1449           nnapi_->ANeuralNetworksMemory_createFromFd(
1450               mmap_alloc->bytes(), PROT_READ, mmap_alloc->fd(), 0,
1451               &ann_memory_handle);
1452           allocation_memory_mapping_->insert(
1453               std::make_pair(mmap_alloc, ann_memory_handle));
1454         }
1455         ANeuralNetworksMemory* ann_memory_handle =
1456             allocation_memory_mapping_->at(mmap_alloc);
1457         // Compute the offset to the base pointer of the MMAPAllocation.
1458         auto offset = reinterpret_cast<const uint8_t*>(tensor->data.raw) -
1459                       reinterpret_cast<const uint8_t*>(mmap_alloc->base());
1460         RETURN_TFLITE_ERROR_IF_NN_ERROR_FOR_TENSOR(
1461             context_,
1462             nnapi_->ANeuralNetworksModel_setOperandValueFromMemory(
1463                 nn_model_, ann_tensor_index, ann_memory_handle, offset,
1464                 tensor->bytes),
1465             "setting new operand value from memory", tensor, nnapi_errno_);
1466 #endif
1467       } else {
1468         RETURN_TFLITE_ERROR_IF_NN_ERROR_FOR_TENSOR(
1469             context_,
1470             nnapi_->ANeuralNetworksModel_setOperandValue(
1471                 nn_model_, ann_tensor_index, tensor->data.raw, tensor->bytes),
1472             "setting new operand value", tensor, nnapi_errno_);
1473       }
1474     }
1475     indices->push_back(ann_tensor_index);
1476     return kTfLiteOk;
1477   }
1478 
1479   // Access to NNAPI.
1480   const NnApi* const nnapi_;
1481 
1482   // TfLiteContext for error handling.
1483   TfLiteContext* const context_;
1484 
1485   // Tracks relationship between indices.
1486   OperandMapping* const operand_mapping_;
1487 
1488   // Keeps mapping of ANN quantized tensor and float data type to equivalent
1489   // dequantized ANN tensor. For example, tensor #4 (UINT8) + FLOAT32 could map
1490   // to tensor #10 (FLOAT32) because a DEQUANTIZE operator was added to convert
1491   // tensor #4 to a FLOAT32 tensor.
1492   DequantizeMapping* const dequantize_mapping_;
1493 
1494   std::map<const MMAPAllocation*, ANeuralNetworksMemory*>* const
1495       allocation_memory_mapping_;
1496 
1497   // Tracks for every operation in the NNAPI model the source TfLite model
1498   // node index.
1499   std::vector<int>* const nnapi_to_tflite_op_mapping_;
1500 
1501   // The NNAPI model.
1502   ANeuralNetworksModel* const nn_model_;
1503 
1504   // Inputs and outputs for the current op. These are augmented in the sense
1505   // that NN API uses operands for all arguments, not just tensors, unlike
1506   // TensorFlow Lite.
1507   std::vector<uint32_t> augmented_inputs_;
1508   std::vector<uint32_t> augmented_outputs_;
1509 
1510   // Return status code of the latest NNAPI call.
1511   int* nnapi_errno_;
1512 
1513   // Whether to allow dynamic batch size without re-compilation.
1514   bool allow_dynamic_dimensions_;
1515 };  // namespace nnapi
1516 
1517 namespace {
1518 struct OpValidationContext {
1519   bool is_valid;
1520   std::vector<NNAPIValidationFailure>* validation_failures;
1521 };
1522 
1523 #define EXPECT_INPUT_TYPE_IN(actual_type, ...)                    \
1524   ExpectTypeIn(actual_type, {__VA_ARGS__},                        \
1525                NNAPIValidationFailureType::kUnsupportedInputType, \
1526                "Input type not in expected list " #__VA_ARGS__, &val_ctx)
1527 
AddValidationFailure(NNAPIValidationFailureType failure_type,const char * message,OpValidationContext * val_ctx)1528 inline void AddValidationFailure(NNAPIValidationFailureType failure_type,
1529                                  const char* message,
1530                                  OpValidationContext* val_ctx) {
1531   val_ctx->is_valid = false;
1532 
1533 #ifdef NNAPI_VERBOSE_VALIDATION
1534   if (val_ctx->validation_failures) {
1535     val_ctx->validation_failures->push_back({failure_type, message});
1536   }
1537 #endif
1538 }
1539 
1540 template <typename... Args>
AddValidationFailureFmt(OpValidationContext * val_ctx,NNAPIValidationFailureType failure_type,const char * message_fmt,Args...args)1541 inline void AddValidationFailureFmt(OpValidationContext* val_ctx,
1542                                     NNAPIValidationFailureType failure_type,
1543                                     const char* message_fmt, Args... args) {
1544   val_ctx->is_valid = false;
1545 #ifdef NNAPI_VERBOSE_VALIDATION
1546   if (val_ctx->validation_failures) {
1547     size_t req_buf_size = snprintf(nullptr, 0, message_fmt, args...) + 1;
1548     std::unique_ptr<char[]> tmp_buf(new char[req_buf_size]);
1549     snprintf(tmp_buf.get(), req_buf_size, message_fmt, args...);
1550 
1551     val_ctx->validation_failures->push_back({failure_type, tmp_buf.get()});
1552   }
1553 #endif
1554 }
1555 
Expect(bool condition,NNAPIValidationFailureType failure_type,const char * message,OpValidationContext * val_ctx)1556 inline bool Expect(bool condition, NNAPIValidationFailureType failure_type,
1557                    const char* message, OpValidationContext* val_ctx) {
1558   if (!condition) {
1559     AddValidationFailure(failure_type, message, val_ctx);
1560     return false;
1561   }
1562   return true;
1563 }
1564 
1565 template <typename... Args>
ExpectFmt(bool condition,OpValidationContext * val_ctx,NNAPIValidationFailureType failure_type,const char * message_fmt,Args...args)1566 inline bool ExpectFmt(bool condition, OpValidationContext* val_ctx,
1567                       NNAPIValidationFailureType failure_type,
1568                       const char* message_fmt, Args... args) {
1569   if (!condition) {
1570     AddValidationFailureFmt(val_ctx, failure_type, message_fmt, args...);
1571     return false;
1572   }
1573   return true;
1574 }
1575 
ExpectTypeIn(TfLiteType actual_type,std::initializer_list<TfLiteType> allowed_types,NNAPIValidationFailureType failure_type,const char * msg,OpValidationContext * val_ctx)1576 inline bool ExpectTypeIn(TfLiteType actual_type,
1577                          std::initializer_list<TfLiteType> allowed_types,
1578                          NNAPIValidationFailureType failure_type,
1579                          const char* msg, OpValidationContext* val_ctx) {
1580   return Expect(std::find(allowed_types.begin(), allowed_types.end(),
1581                           actual_type) != allowed_types.end(),
1582                 failure_type, msg, val_ctx);
1583 }
1584 
ExpectMinAndroidSdkVersion(int curr_version,int min_version,OpValidationContext * val_ctx)1585 inline bool ExpectMinAndroidSdkVersion(int curr_version, int min_version,
1586                                        OpValidationContext* val_ctx) {
1587   return ExpectFmt(curr_version >= min_version, val_ctx,
1588                    NNAPIValidationFailureType::kUnsupportedAndroidVersion,
1589                    "Android sdk version less than %d", min_version);
1590 }
1591 
ExpectMaxOpVersion(int curr_version,int max_version,OpValidationContext * val_ctx)1592 inline bool ExpectMaxOpVersion(int curr_version, int max_version,
1593                                OpValidationContext* val_ctx) {
1594   return ExpectFmt(curr_version <= max_version, val_ctx,
1595                    NNAPIValidationFailureType::kUnsupportedOperatorVersion,
1596                    "OP Version higher than %d", max_version);
1597 }
1598 
ExpectOpVersion(int curr_version,int max_version,OpValidationContext * val_ctx)1599 inline bool ExpectOpVersion(int curr_version, int max_version,
1600                             OpValidationContext* val_ctx) {
1601   return ExpectFmt(curr_version <= max_version, val_ctx,
1602                    NNAPIValidationFailureType::kUnsupportedOperatorVersion,
1603                    "OP Version different from %d", max_version);
1604 }
1605 
ExpectIsFloatOperator(const TfLiteContext * context,const TfLiteNode * node,OpValidationContext * val_ctx)1606 inline bool ExpectIsFloatOperator(const TfLiteContext* context,
1607                                   const TfLiteNode* node,
1608                                   OpValidationContext* val_ctx) {
1609   const auto input_type = context->tensors[node->inputs->data[0]].type;
1610   return Expect(IsFloat(input_type),
1611                 NNAPIValidationFailureType::kUnsupportedInputType,
1612                 "Input should be Float", val_ctx);
1613 }
1614 
ExpectIsFloatOrUint8Operator(const TfLiteContext * context,const TfLiteNode * node,OpValidationContext * val_ctx)1615 bool ExpectIsFloatOrUint8Operator(const TfLiteContext* context,
1616                                   const TfLiteNode* node,
1617                                   OpValidationContext* val_ctx) {
1618   const auto input_type = context->tensors[node->inputs->data[0]].type;
1619   return Expect(IsFloatOrUInt8(input_type),
1620                 NNAPIValidationFailureType::kUnsupportedInputType,
1621                 "Input should be Float or UINT8", val_ctx);
1622 }
1623 
ExpectIsFloatOrQuant8Operator(const TfLiteContext * context,const TfLiteNode * node,OpValidationContext * val_ctx)1624 bool ExpectIsFloatOrQuant8Operator(const TfLiteContext* context,
1625                                    const TfLiteNode* node,
1626                                    OpValidationContext* val_ctx) {
1627   const auto input_type = context->tensors[node->inputs->data[0]].type;
1628   return Expect(IsFloatOrQuantized(input_type),
1629                 NNAPIValidationFailureType::kUnsupportedInputType,
1630                 "Input should be Float or Quant8", val_ctx);
1631 }
1632 
ExpectIsFloatOrInt32Operator(const TfLiteContext * context,const TfLiteNode * node,OpValidationContext * val_ctx)1633 bool ExpectIsFloatOrInt32Operator(const TfLiteContext* context,
1634                                   const TfLiteNode* node,
1635                                   OpValidationContext* val_ctx) {
1636   const auto input_type = context->tensors[node->inputs->data[0]].type;
1637   return Expect(IsFloatOrInt32(input_type),
1638                 NNAPIValidationFailureType::kUnsupportedInputType,
1639                 "Input should be Float or Int32", val_ctx);
1640 }
1641 
ExpectIsFloatQuant8OrInt32Operator(const TfLiteContext * context,const TfLiteNode * node,OpValidationContext * val_ctx)1642 bool ExpectIsFloatQuant8OrInt32Operator(const TfLiteContext* context,
1643                                         const TfLiteNode* node,
1644                                         OpValidationContext* val_ctx) {
1645   const auto input_type = context->tensors[node->inputs->data[0]].type;
1646   return Expect(IsFloatQuantizedOrInt32(input_type),
1647                 NNAPIValidationFailureType::kUnsupportedInputType,
1648                 "Input should be Float, Quant8, or Int32", val_ctx);
1649 }
1650 
1651 // When using NN API version 1.0 or 1.1, the condition below must be true for
1652 // When using NN API version 1.0 or 1.1, the condition below must be true for
1653 // quantized versions of the following ops:
1654 // * CONV_2D
1655 // * DEPTHWISE_CONV_2D
1656 // * FULLY_CONNECTED (where filter actually stands for weights)
1657 // The condition is relaxed and no longer required since version 1.2.
ExpectIsRestrictedScalesCompliant(const TfLiteContext * context,const TfLiteNode * node,OpValidationContext * val_ctx)1658 bool ExpectIsRestrictedScalesCompliant(const TfLiteContext* context,
1659                                        const TfLiteNode* node,
1660                                        OpValidationContext* val_ctx) {
1661   const int input_id = node->inputs->data[0];
1662   const int filter_id = node->inputs->data[1];
1663   const int output_id = node->outputs->data[0];
1664   const float input_scale = context->tensors[input_id].params.scale;
1665   const float filter_scale = context->tensors[filter_id].params.scale;
1666   const float output_scale = context->tensors[output_id].params.scale;
1667   return Expect(input_scale * filter_scale < output_scale,
1668                 NNAPIValidationFailureType::kNotRestrictedScaleCompliant,
1669                 "When using NN API version 1.0 or 1.1, input_scale * "
1670                 "filter_scale < output_scale:",
1671                 val_ctx);
1672 }
1673 
1674 }  // namespace
1675 
1676 // Return a function that knows how to translate a node into its operands
1677 // when called. You can use this function to see if a node is supported
1678 // (i.e. if the returned MappingFn is null, then the node is not supported).
Validate(const TfLiteContext * context,int builtin_code,int version,int android_sdk_version,const TfLiteNode * node,bool is_accelerator_specified,std::vector<NNAPIValidationFailure> * map_failures)1679 bool NNAPIDelegateKernel::Validate(
1680     const TfLiteContext* context, int builtin_code, int version,
1681     int android_sdk_version, const TfLiteNode* node,
1682     bool is_accelerator_specified,
1683     std::vector<NNAPIValidationFailure>* map_failures) {
1684   OpValidationContext val_ctx{true, map_failures};
1685   switch (builtin_code) {
1686     case kTfLiteBuiltinAdd: {
1687       ExpectMaxOpVersion(version, 2, &val_ctx);
1688       if (android_sdk_version >= kMinSdkVersionForNNAPI13) {
1689         ExpectIsFloatQuant8OrInt32Operator(context, node, &val_ctx);
1690         if (IsInt32(context->tensors[node->inputs->data[0]].type)) {
1691           Expect(reinterpret_cast<TfLiteAddParams*>(node->builtin_data)
1692                          ->activation == kTfLiteActNone,
1693                  NNAPIValidationFailureType::kNoActivationExpected,
1694                  "No activation function supported", &val_ctx);
1695         }
1696       } else {
1697         ExpectIsFloatOrQuant8Operator(context, node, &val_ctx);
1698       }
1699     } break;
1700     case kTfLiteBuiltinArgMax:
1701     case kTfLiteBuiltinArgMin: {
1702       ExpectMaxOpVersion(version, 2, &val_ctx);
1703       // Those operators were introduced in NNAPI 1.2.
1704       ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
1705                                  &val_ctx);
1706       const TfLiteType input_type =
1707           context->tensors[node->inputs->data[(0)]].type;
1708       EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat16, kTfLiteFloat32,
1709                            kTfLiteInt32, kTfLiteUInt8, kTfLiteInt8);
1710 
1711       const auto& axis_tensor = context->tensors[node->inputs->data[1]];
1712       if (axis_tensor.type == kTfLiteInt64) {
1713         Expect(
1714             axis_tensor.allocation_type == kTfLiteMmapRo &&
1715                 *axis_tensor.data.i64 <= std::numeric_limits<int32_t>::max() &&
1716                 *axis_tensor.data.i64 >= std::numeric_limits<int32_t>::min(),
1717             NNAPIValidationFailureType::kUnsupportedInputType,
1718             "NNAPI only supports axis as int32. If the axis type is int64 and "
1719             "constant we can convert it to int32 if the value isn't too "
1720             "large.",
1721             &val_ctx);
1722       } else {
1723         Expect(axis_tensor.type == kTfLiteInt32,
1724                NNAPIValidationFailureType::kUnsupportedInputType,
1725                "Axis should be Int32", &val_ctx);
1726       }
1727       if (builtin_code == kTfLiteBuiltinArgMax) {
1728         auto builtin =
1729             reinterpret_cast<TfLiteArgMaxParams*>(node->builtin_data);
1730         Expect(builtin->output_type == kTfLiteInt32,
1731                NNAPIValidationFailureType::kUnsupportedOutputType,
1732                "NNAPI only supports int32 output.", &val_ctx);
1733       } else {
1734         auto builtin =
1735             reinterpret_cast<TfLiteArgMinParams*>(node->builtin_data);
1736         Expect(builtin->output_type == kTfLiteInt32,
1737                NNAPIValidationFailureType::kUnsupportedOutputType,
1738                "NNAPI only supports int32 output.", &val_ctx);
1739       }
1740     } break;
1741     case kTfLiteBuiltinMul: {
1742       ExpectMaxOpVersion(version, 2, &val_ctx);
1743       if (android_sdk_version >= kMinSdkVersionForNNAPI13) {
1744         ExpectIsFloatQuant8OrInt32Operator(context, node, &val_ctx);
1745         if (IsInt32(context->tensors[node->inputs->data[0]].type)) {
1746           Expect(reinterpret_cast<TfLiteMulParams*>(node->builtin_data)
1747                          ->activation == kTfLiteActNone,
1748                  NNAPIValidationFailureType::kNoActivationExpected,
1749                  "No activation function supported", &val_ctx);
1750         }
1751       } else {
1752         ExpectIsFloatOrQuant8Operator(context, node, &val_ctx);
1753       }
1754     } break;
1755     case kTfLiteBuiltinAveragePool2d: {
1756       ExpectMaxOpVersion(version, 2, &val_ctx);
1757       ExpectIsFloatOrQuant8Operator(context, node, &val_ctx);
1758       auto builtin = reinterpret_cast<TfLitePoolParams*>(node->builtin_data);
1759       // TODO(b/138756912): Large filter window would overflow on the
1760       // quantized reference CPU path.
1761       if (IsQuantized(context->tensors[node->inputs->data[0]].type)) {
1762         Expect(is_accelerator_specified ||
1763                    (builtin->filter_width * builtin->filter_height <= 256),
1764                NNAPIValidationFailureType::kUnsupportedOperandSize,
1765                "Large filter window would overflow on the reference CPU path",
1766                &val_ctx);
1767       }
1768     } break;
1769     case kTfLiteBuiltinMaxPool2d: {
1770       ExpectMaxOpVersion(version, 2, &val_ctx);
1771       ExpectIsFloatOrQuant8Operator(context, node, &val_ctx);
1772     } break;
1773     case kTfLiteBuiltinL2Pool2d: {
1774       ExpectOpVersion(version, 1, &val_ctx);
1775       ExpectIsFloatOperator(context, node, &val_ctx);
1776 
1777       if (android_sdk_version < kMinSdkVersionForNNAPI12) {
1778         auto builtin = reinterpret_cast<TfLitePoolParams*>(node->builtin_data);
1779         Expect(builtin->activation == kTfLiteActNone,
1780                NNAPIValidationFailureType::kUnsupportedOperandValue,
1781                "Before NNAPI 1.2 fused activation for l2_pool may not be "
1782                "supported.",
1783                &val_ctx);
1784       }
1785     } break;
1786     case kTfLiteBuiltinConv2d: {
1787       ExpectMaxOpVersion(version, 3, &val_ctx);
1788       if (android_sdk_version < kMinSdkVersionForNNAPI12) {
1789         Expect(!IsHybridOperator(context, builtin_code, node),
1790                NNAPIValidationFailureType::kUnsupportedHybridOperator,
1791                "Hybrid operators not supported before NNAPI 1.2", &val_ctx);
1792         ExpectIsFloatOrUint8Operator(context, node, &val_ctx);
1793 
1794         const auto& filter_tensor = context->tensors[node->inputs->data[1]];
1795         if (filter_tensor.quantization.type == kTfLiteAffineQuantization) {
1796           TfLiteAffineQuantization* quantization_params =
1797               static_cast<TfLiteAffineQuantization*>(
1798                   filter_tensor.quantization.params);
1799           Expect(quantization_params->scale->size <= 1,
1800                  NNAPIValidationFailureType::kUnsupportedQuantizationType,
1801                  "Per-channel quantized convolution not supported before NNAPI "
1802                  "1.2.",
1803                  &val_ctx);
1804         }
1805       }
1806       const auto input_type = context->tensors[node->inputs->data[0]].type;
1807       if (android_sdk_version < kMinSdkVersionForNNAPI12 &&
1808           input_type == kTfLiteUInt8) {
1809         ExpectIsRestrictedScalesCompliant(context, node, &val_ctx);
1810       }
1811       auto builtin = reinterpret_cast<TfLiteConvParams*>(node->builtin_data);
1812       // TODO(b/132950584): Add support for Conv2D with omitted bias.
1813       Expect(node->inputs->size == 3,
1814              NNAPIValidationFailureType::kMissingRequiredOperand,
1815              "Conv2D with omitted bias not supported", &val_ctx);
1816       if (builtin->dilation_width_factor != 1 ||
1817           builtin->dilation_height_factor != 1) {
1818         Expect(android_sdk_version >= kMinSdkVersionForNNAPI12,
1819                NNAPIValidationFailureType::kUnsupportedOperandValue,
1820                "NNAPI supports dilated Conv2D since NNAPI 1.2.", &val_ctx);
1821       }
1822     } break;
1823     case kTfLiteBuiltinDepthwiseConv2d: {
1824       ExpectMaxOpVersion(version, 3, &val_ctx);
1825 
1826       if (android_sdk_version < kMinSdkVersionForNNAPI12) {
1827         ExpectIsFloatOrUint8Operator(context, node, &val_ctx);
1828 
1829         const auto input_type = context->tensors[node->inputs->data[0]].type;
1830         if (input_type == kTfLiteUInt8) {
1831           ExpectIsRestrictedScalesCompliant(context, node, &val_ctx);
1832         }
1833 
1834         auto builtin =
1835             reinterpret_cast<TfLiteDepthwiseConvParams*>(node->builtin_data);
1836         Expect(builtin->dilation_width_factor == 1 &&
1837                    builtin->dilation_height_factor == 1,
1838                NNAPIValidationFailureType::kUnsupportedOperandValue,
1839                "dilation_width_factor and dilation_height_factor expected to "
1840                "be equal to 1",
1841                &val_ctx);
1842       }
1843     } break;
1844     case kTfLiteBuiltinFullyConnected: {
1845       ExpectMaxOpVersion(version, 5, &val_ctx);
1846       // TODO(b/132950584): Add support for FullyConnected with no bias.
1847       Expect(node->inputs->size == 3 &&
1848                  node->inputs->data[2] != kTfLiteOptionalTensor,
1849              NNAPIValidationFailureType::kMissingRequiredOperand,
1850              "FullyConnected with no bias not supported", &val_ctx);
1851       const auto output_type = context->tensors[node->outputs->data[0]].type;
1852       Expect(output_type != kTfLiteInt16,
1853              NNAPIValidationFailureType::kUnsupportedOutputType,
1854              "Unsupported output of type kTfLiteInt16", &val_ctx);
1855       if (android_sdk_version < kMinSdkVersionForNNAPI12) {
1856         Expect(!IsHybridOperator(context, builtin_code, node),
1857                NNAPIValidationFailureType::kUnsupportedHybridOperator,
1858                "Hybrid operators not supported before NNAPI 1.2", &val_ctx);
1859         ExpectIsFloatOrUint8Operator(context, node, &val_ctx);
1860       }
1861       const auto input_type = context->tensors[node->inputs->data[0]].type;
1862       if (android_sdk_version < kMinSdkVersionForNNAPI12 &&
1863           input_type == kTfLiteUInt8) {
1864         ExpectIsRestrictedScalesCompliant(context, node, &val_ctx);
1865       }
1866       auto builtin =
1867           reinterpret_cast<TfLiteFullyConnectedParams*>(node->builtin_data);
1868       Expect(!builtin->keep_num_dims,
1869              NNAPIValidationFailureType::kUnsupportedOperandValue,
1870              "keep_num_dims == true not supported", &val_ctx);
1871     } break;
1872     case kTfLiteBuiltinHardSwish: {
1873       // Add support for hardswish. For Pre-Q devices, deconstructing it into
1874       // basic ops. Though for some nnapi accelerators using optimized tflite
1875       // kernels might even be faster.
1876       ExpectIsFloatOrQuant8Operator(context, node, &val_ctx);
1877     } break;
1878     case kTfLiteBuiltinSoftmax: {
1879       ExpectOpVersion(version, 2, &val_ctx);
1880       ExpectIsFloatOrQuant8Operator(context, node, &val_ctx);
1881       const auto& output = context->tensors[node->outputs->data[0]];
1882       ExpectTypeIn(output.type, {kTfLiteFloat32, kTfLiteUInt8, kTfLiteInt8},
1883                    NNAPIValidationFailureType::kUnsupportedOutputType,
1884                    "Output type should be one of kTfLiteFloat32, kTfLiteUInt8, "
1885                    "kTfLiteInt8.",
1886                    &val_ctx);
1887       const auto& input = context->tensors[node->inputs->data[0]];
1888       const int input_rank = input.dims->size;
1889       Expect(input_rank <= 4,
1890              NNAPIValidationFailureType::kUnsupportedOperandRank,
1891              "Input rank should be <= 4", &val_ctx);
1892       if (android_sdk_version < kMinSdkVersionForNNAPI12) {
1893         Expect(
1894             input_rank == 2 || input_rank == 4,
1895             NNAPIValidationFailureType::kUnsupportedOperandRank,
1896             "Before API level 29 only 2D and 4D input tensors were supported.",
1897             &val_ctx);
1898       }
1899     } break;
1900     case kTfLiteBuiltinReshape: {
1901       ExpectOpVersion(version, 1, &val_ctx);
1902       ExpectIsFloatOrQuant8Operator(context, node, &val_ctx);
1903       if (node->inputs->size >= 2) {
1904         Expect(context->tensors[node->inputs->data[1]].allocation_type ==
1905                    kTfLiteMmapRo,
1906                NNAPIValidationFailureType::kInputTensorShouldHaveConstantShape,
1907                "The shape input tensor must be constant.", &val_ctx);
1908       }
1909       if (node->inputs->size == 1) {
1910         // reject scalar reshaping
1911         auto* params =
1912             reinterpret_cast<TfLiteReshapeParams*>(node->builtin_data);
1913         int num_dimensions = params->num_dimensions;
1914         if (num_dimensions == 1 && params->shape[0] == 0) {
1915           // Legacy tflite models use a shape parameter of [0] to indicate
1916           // scalars.
1917           num_dimensions = 0;
1918         }
1919         Expect(num_dimensions > 0,
1920                NNAPIValidationFailureType::kUnsupportedOperandRank,
1921                "New shape rank should be > 0", &val_ctx);
1922       }
1923     } break;
1924     case kTfLiteBuiltinResizeBilinear: {
1925       ExpectMaxOpVersion(version, 3, &val_ctx);
1926       const auto& input = context->tensors[node->inputs->data[0]];
1927       const auto output_dims = context->tensors[node->outputs->data[0]].dims;
1928       Expect(input.dims->size == 4,
1929              NNAPIValidationFailureType::kUnsupportedOperandRank,
1930              "Input should have rank 4", &val_ctx);
1931       ExpectIsFloatOrQuant8Operator(context, node, &val_ctx);
1932       Expect(node->inputs->size >= 2,
1933              NNAPIValidationFailureType::kUnsupportedOperatorVariant,
1934              "Expected at least 2 inputs", &val_ctx);
1935       if (node->inputs->size >= 2) {
1936         Expect(context->tensors[node->inputs->data[1]].allocation_type ==
1937                    kTfLiteMmapRo,
1938                NNAPIValidationFailureType::kInputTensorShouldHaveConstantShape,
1939                "The size input tensor must be constant.", &val_ctx);
1940       }
1941       if (android_sdk_version < kMinSdkVersionForNNAPI12) {
1942         Expect(output_dims->data[1] == output_dims->data[2],
1943                NNAPIValidationFailureType::kUnsupportedOperandValue,
1944                "Require width == height due to driver differences in NNAPI "
1945                "< 1.2",
1946                &val_ctx);
1947       }
1948       auto builtin =
1949           reinterpret_cast<TfLiteResizeBilinearParams*>(node->builtin_data);
1950       if (android_sdk_version <= kMinSdkVersionForNNAPI12) {
1951         Expect(!builtin->align_corners,
1952                NNAPIValidationFailureType::kUnsupportedOperandValue,
1953                "NNAPI does not support align_corners == true.", &val_ctx);
1954         Expect(!builtin->half_pixel_centers,
1955                NNAPIValidationFailureType::kUnsupportedOperandValue,
1956                "NNAPI does not support half_pixel_centers == true.", &val_ctx);
1957       }
1958       if (android_sdk_version < kMinSdkVersionForNNAPI12) {
1959         Expect(input.type == kTfLiteFloat32,
1960                NNAPIValidationFailureType::kUnsupportedInputType,
1961                "NNAPI 1.0 & 1.1 only supports float input.", &val_ctx);
1962       }
1963     } break;
1964     case kTfLiteBuiltinResizeNearestNeighbor: {
1965       ExpectMaxOpVersion(version, 3, &val_ctx);
1966       ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
1967                                  &val_ctx);
1968       ExpectIsFloatOrQuant8Operator(context, node, &val_ctx);
1969       Expect(node->inputs->size >= 2,
1970              NNAPIValidationFailureType::kUnsupportedOperatorVariant,
1971              "Expected at least 2 inputs", &val_ctx);
1972       if (node->inputs->size >= 2) {
1973         Expect(context->tensors[node->inputs->data[1]].allocation_type ==
1974                    kTfLiteMmapRo,
1975                NNAPIValidationFailureType::kInputTensorShouldHaveConstantShape,
1976                "The size input tensor must be constant.", &val_ctx);
1977       }
1978       auto builtin = reinterpret_cast<TfLiteResizeNearestNeighborParams*>(
1979           node->builtin_data);
1980       if (android_sdk_version <= kMinSdkVersionForNNAPI12) {
1981         Expect(!builtin->align_corners,
1982                NNAPIValidationFailureType::kUnsupportedOperandValue,
1983                "NNAPI does not support align_corners == true.", &val_ctx);
1984         Expect(!builtin->half_pixel_centers,
1985                NNAPIValidationFailureType::kUnsupportedOperandValue,
1986                "NNAPI does not support half_pixel_centers == true.", &val_ctx);
1987       }
1988     } break;
1989     case kTfLiteBuiltinSqueeze: {
1990       ExpectOpVersion(version, 1, &val_ctx);
1991       ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI11,
1992                                  &val_ctx);
1993       auto builtin = reinterpret_cast<TfLiteSqueezeParams*>(node->builtin_data);
1994       if (android_sdk_version == kMinSdkVersionForNNAPI11) {
1995         Expect(builtin->num_squeeze_dims != 0,
1996                NNAPIValidationFailureType::kUnsupportedOperandValue,
1997                "NNAPI 1.1 does not support null squeeze_dims properly.",
1998                &val_ctx);
1999       }
2000     } break;
2001     case kTfLiteBuiltinUnidirectionalSequenceLstm: {
2002       ExpectMaxOpVersion(version, 2, &val_ctx);
2003       ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
2004                                  &val_ctx);
2005 
2006       Expect(!IsHybridOperator(context, builtin_code, node),
2007              NNAPIValidationFailureType::kUnsupportedHybridOperator,
2008              "Hybrid version of this op is not supported by NN API.", &val_ctx);
2009 
2010       Expect(node->inputs->size == 20 || node->inputs->size == 24,
2011              NNAPIValidationFailureType::kUnsupportedOperatorVariant,
2012              "Supporting only operation with 20 or 24 inputs", &val_ctx);
2013     } break;
2014     case kTfLiteBuiltinL2Normalization: {
2015       ExpectMaxOpVersion(version, 2, &val_ctx);
2016 
2017       if (android_sdk_version < kMinSdkVersionForNNAPI12) {
2018         ExpectIsFloatOperator(context, node, &val_ctx);
2019 
2020         const auto& input = context->tensors[node->inputs->data[0]];
2021         Expect(input.dims->size == 4,
2022                NNAPIValidationFailureType::kUnsupportedOperatorVariant,
2023                "Expected 4 inputs", &val_ctx);
2024       }
2025       auto builtin = reinterpret_cast<TfLiteL2NormParams*>(node->builtin_data);
2026       Expect(builtin->activation == kTfLiteActNone,
2027              NNAPIValidationFailureType::kNoActivationExpected,
2028              "Expected no activation", &val_ctx);
2029     } break;
2030     case kTfLiteBuiltinLocalResponseNormalization: {
2031       ExpectOpVersion(version, 1, &val_ctx);
2032     } break;
2033     case kTfLiteBuiltinLshProjection: {
2034       ExpectOpVersion(version, 1, &val_ctx);
2035 
2036       if (reinterpret_cast<TfLiteLSHProjectionParams*>(node->builtin_data)
2037               ->type == kTfLiteLshProjectionSparse) {
2038         // NNAPI does not support sparse projection correctly pre-Q
2039         // (b/111751836).
2040         Expect(android_sdk_version >= kMinSdkVersionForNNAPI12,
2041                NNAPIValidationFailureType::kUnsupportedInputType,
2042                "NNAPI does not support sparse projection correctly pre-Q",
2043                &val_ctx);
2044         Expect(node->inputs->size == 2,
2045                NNAPIValidationFailureType::kUnsupportedOperatorVariant,
2046                " NNAPI does not support weights for sparse projects.",
2047                &val_ctx);
2048       }
2049     } break;
2050     case kTfLiteBuiltinConcatenation: {
2051       ExpectMaxOpVersion(version, 2, &val_ctx);
2052       Expect(reinterpret_cast<TfLiteConcatenationParams*>(node->builtin_data)
2053                      ->activation == kTfLiteActNone,
2054              NNAPIValidationFailureType::kNoActivationExpected,
2055              "No activation function supported", &val_ctx);
2056       Expect(context->tensors[node->inputs->data[0]].dims->size <= 4,
2057              NNAPIValidationFailureType::kUnsupportedOperandRank,
2058              "Input rank should be less than 4", &val_ctx);
2059 
2060       const auto& input_type = context->tensors[node->inputs->data[0]].type;
2061       EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat16, kTfLiteFloat32,
2062                            kTfLiteUInt8, kTfLiteInt8);
2063 
2064       if (input_type == kTfLiteUInt8 &&
2065           android_sdk_version < kMinSdkVersionForNNAPI12) {
2066         auto first_param = context->tensors[node->inputs->data[0]].params;
2067         for (int i = 1; i < node->inputs->size; i++) {
2068           auto curr_param = context->tensors[node->inputs->data[i]].params;
2069           if (!Expect(curr_param.scale == first_param.scale &&
2070                           curr_param.zero_point == first_param.zero_point,
2071                       NNAPIValidationFailureType::kUnsupportedOperandValue,
2072                       "NNAPI 1.0-1 only supported concatenating quantized "
2073                       "tensor of the same scale and offset.",
2074                       &val_ctx)) {
2075             break;
2076           }
2077         }
2078       }
2079     } break;
2080     case kTfLiteBuiltinDequantize: {
2081       Expect(version == 1 || version == 2,
2082              NNAPIValidationFailureType::kUnsupportedOperatorVersion,
2083              "Supported op versions are 1 and 2 only", &val_ctx);
2084 
2085       const auto& input = context->tensors[node->inputs->data[0]];
2086       if (android_sdk_version < kMinSdkVersionForNNAPI12) {
2087         EXPECT_INPUT_TYPE_IN(input.type, kTfLiteUInt8);
2088       } else {
2089         EXPECT_INPUT_TYPE_IN(input.type, kTfLiteUInt8, kTfLiteInt8);
2090 
2091         if (android_sdk_version == kMinSdkVersionForNNAPI12 &&
2092             input.type == kTfLiteInt8) {
2093           const auto zero_point = input.params.zero_point;
2094           Expect(zero_point == 0,
2095                  NNAPIValidationFailureType::kUnsupportedInputType,
2096                  "NN API supports int8 type since version 1.2 but only for "
2097                  "symmetric quantization.",
2098                  &val_ctx);
2099         }
2100       }
2101     } break;
2102     case kTfLiteBuiltinFloor: {
2103       ExpectOpVersion(version, 1, &val_ctx);
2104     } break;
2105     case kTfLiteBuiltinRelu:
2106     case kTfLiteBuiltinReluN1To1:
2107     case kTfLiteBuiltinRelu6:
2108     case kTfLiteBuiltinLogistic: {
2109       ExpectMaxOpVersion(version, 2, &val_ctx);
2110       ExpectIsFloatOrQuant8Operator(context, node, &val_ctx);
2111     } break;
2112     case kTfLiteBuiltinTanh: {
2113       ExpectMaxOpVersion(version, 2, &val_ctx);
2114       const TfLiteType input_type =
2115           context->tensors[node->inputs->data[0]].type;
2116       Expect(IsFloat(input_type) ||
2117                  (IsQuantized(input_type) &&
2118                   android_sdk_version >= kMinSdkVersionForNNAPI12),
2119              NNAPIValidationFailureType::kUnsupportedInputType,
2120              " NNAPI only support float tanh.", &val_ctx);
2121     } break;
2122     case kTfLiteBuiltinSub: {
2123       ExpectMaxOpVersion(version, 3, &val_ctx);
2124       const TfLiteType input_type =
2125           context->tensors[node->inputs->data[0]].type;
2126       Expect((android_sdk_version >= kMinSdkVersionForNNAPI11 &&
2127               IsFloat(input_type)) ||
2128                  (android_sdk_version >= kMinSdkVersionForNNAPI12 &&
2129                   IsQuantized(input_type)) ||
2130                  (android_sdk_version >= kMinSdkVersionForNNAPI13 &&
2131                   IsInt32(input_type)),
2132              NNAPIValidationFailureType::kUnsupportedInputType,
2133              "NNAPI only support float sub.", &val_ctx);
2134       if (IsInt32(input_type)) {
2135         Expect(reinterpret_cast<TfLiteSubParams*>(node->builtin_data)
2136                        ->activation == kTfLiteActNone,
2137                NNAPIValidationFailureType::kNoActivationExpected,
2138                "No activation function supported", &val_ctx);
2139       }
2140       const int input0_rank =
2141           context->tensors[node->inputs->data[0]].dims->size;
2142       const int input1_rank =
2143           context->tensors[node->inputs->data[1]].dims->size;
2144       Expect(input0_rank <= 4 && input1_rank <= 4,
2145              NNAPIValidationFailureType::kUnsupportedOperandRank,
2146              "Input rank must be <= 4", &val_ctx);
2147     } break;
2148     case kTfLiteBuiltinDiv: {
2149       ExpectOpVersion(version, 1, &val_ctx);
2150       ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI11,
2151                                  &val_ctx);
2152       Expect(context->tensors[node->inputs->data[0]].type == kTfLiteFloat32,
2153              NNAPIValidationFailureType::kUnsupportedInputType,
2154              "NNAPI only support float div.", &val_ctx);
2155     } break;
2156     case kTfLiteBuiltinPad:
2157     case kTfLiteBuiltinPadv2: {
2158       ExpectMaxOpVersion(version, 2, &val_ctx);
2159       ExpectIsFloatOrQuant8Operator(context, node, &val_ctx);
2160       ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI11,
2161                                  &val_ctx);
2162 
2163       const TfLiteIntArrayView input_shape(
2164           context->tensors[node->inputs->data[0]].dims);
2165       Expect(!HasZeroes(input_shape),
2166              NNAPIValidationFailureType::kUnsupportedOperandValue,
2167              "NN API pad ops do not support input tensors with no elements",
2168              &val_ctx);
2169 
2170       Expect(node->inputs->size >= 2,
2171              NNAPIValidationFailureType::kUnsupportedOperatorVariant,
2172              "Expecting at least 2 inputs", &val_ctx);
2173 
2174       if (node->inputs->size == 3) {
2175         // This is going to be mapped with a PadV2
2176         Expect(
2177             android_sdk_version >= kMinSdkVersionForNNAPI12,
2178             NNAPIValidationFailureType::kUnsupportedOperatorVariant,
2179             "Specification of the padding value is supported from NNAPI 1.2.",
2180             &val_ctx);
2181       } else {  // this is going to be mapped as Pad
2182         if (android_sdk_version < kMinSdkVersionForNNAPI12) {
2183           Expect(context->tensors[node->inputs->data[0]].type == kTfLiteFloat32,
2184                  NNAPIValidationFailureType::kUnsupportedInputType,
2185                  "Only Float32 inputs are supported before NNAPI 1.2",
2186                  &val_ctx);
2187         }
2188       }
2189     } break;
2190     case kTfLiteBuiltinUnidirectionalSequenceRnn: {
2191       ExpectOpVersion(version, 1, &val_ctx);
2192       ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
2193                                  &val_ctx);
2194       Expect(!IsHybridOperator(context, builtin_code, node),
2195              NNAPIValidationFailureType::kUnsupportedHybridOperator,
2196              "Hybrid version of this op is not supported by NN API.", &val_ctx);
2197     } break;
2198     case kTfLiteBuiltinSpaceToBatchNd: {
2199       ExpectMaxOpVersion(version, 2, &val_ctx);
2200       ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI11,
2201                                  &val_ctx);
2202     } break;
2203     case kTfLiteBuiltinBatchToSpaceNd: {
2204       ExpectMaxOpVersion(version, 2, &val_ctx);
2205       ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI11,
2206                                  &val_ctx);
2207       auto crops = context->tensors[node->inputs->data[2]];
2208       auto crops_data = crops.data.i32;
2209       Expect(crops_data && crops.bytes == 16 && crops_data[0] == 0 &&
2210                  crops_data[1] == 0 && crops_data[2] == 0 && crops_data[3] == 0,
2211              NNAPIValidationFailureType::kUnsupportedOperandValue,
2212              "All crops should be 0.", &val_ctx);
2213     } break;
2214     case kTfLiteBuiltinStridedSlice: {
2215       ExpectMaxOpVersion(version, 2, &val_ctx);
2216       ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI11,
2217                                  &val_ctx);
2218     } break;
2219     case kTfLiteBuiltinTranspose: {
2220       ExpectMaxOpVersion(version, 2, &val_ctx);
2221       ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI11,
2222                                  &val_ctx);
2223       // Note that the permutation input tensor value dictates the output
2224       // dimensions.
2225       // TODO(b/110888333): Support dynamically-sized tensors in delegates.
2226       Expect((node->inputs->size > 1) &&
2227                  (context->tensors[node->inputs->data[1]].allocation_type ==
2228                   kTfLiteMmapRo),
2229              NNAPIValidationFailureType::kInputTensorShouldHaveConstantShape,
2230              "Dynamically-sized tensors not supported.", &val_ctx);
2231     } break;
2232     case kTfLiteBuiltinAbs:
2233     case kTfLiteBuiltinExp:
2234     case kTfLiteBuiltinLog:
2235     case kTfLiteBuiltinRsqrt:
2236     case kTfLiteBuiltinPow: {
2237       ExpectOpVersion(version, 1, &val_ctx);
2238       ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
2239                                  &val_ctx);
2240       ExpectIsFloatOperator(context, node, &val_ctx);
2241     } break;
2242     case kTfLiteBuiltinSlice: {
2243       ExpectMaxOpVersion(version, 2, &val_ctx);
2244       ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
2245                                  &val_ctx);
2246       const auto input_type = context->tensors[node->inputs->data[0]].type;
2247       const auto begin_type = context->tensors[node->inputs->data[1]].type;
2248       const auto size_type = context->tensors[node->inputs->data[2]].type;
2249       EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat32, kTfLiteInt32,
2250                            kTfLiteUInt8, kTfLiteInt8);
2251       Expect(begin_type == kTfLiteInt32,
2252              NNAPIValidationFailureType::kUnsupportedInputType,
2253              "Begin type should be Int32", &val_ctx);
2254       Expect(size_type == kTfLiteInt32,
2255              NNAPIValidationFailureType::kUnsupportedInputType,
2256              "Size type should be Int32", &val_ctx);
2257     } break;
2258     case kTfLiteBuiltinSin: {
2259       ExpectOpVersion(version, 1, &val_ctx);
2260       ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
2261                                  &val_ctx);
2262       ExpectIsFloatOperator(context, node, &val_ctx);
2263     } break;
2264     case kTfLiteBuiltinTransposeConv: {
2265       ExpectMaxOpVersion(version, 2, &val_ctx);
2266       ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
2267                                  &val_ctx);
2268       Expect((node->inputs->size > 1) &&
2269                  (context->tensors[node->inputs->data[0]].allocation_type ==
2270                   kTfLiteMmapRo) &&
2271                  (context->tensors[node->inputs->data[1]].allocation_type ==
2272                   kTfLiteMmapRo),
2273              NNAPIValidationFailureType::kInputTensorShouldHaveConstantShape,
2274              "Dynamically-sized tensors not supported.", &val_ctx);
2275     } break;
2276     case kTfLiteBuiltinSqrt: {
2277       ExpectOpVersion(version, 1, &val_ctx);
2278       ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
2279                                  &val_ctx);
2280       ExpectIsFloatOperator(context, node, &val_ctx);
2281     } break;
2282     case kTfLiteBuiltinRnn: {
2283       ExpectOpVersion(version, 1, &val_ctx);
2284       Expect(node->inputs->size == 5,
2285              NNAPIValidationFailureType::kUnsupportedOperatorVariant,
2286              "Expected 5 input", &val_ctx);
2287       if (node->inputs->size >= 2) {
2288         Expect(
2289             context->tensors[node->inputs->data[/*kWeightsTensor*/ 1]].type ==
2290                 kTfLiteFloat32,
2291             NNAPIValidationFailureType::kUnsupportedInputType,
2292             "NNAPI only support float32 weights.", &val_ctx);
2293       }
2294     } break;
2295     case kTfLiteBuiltinSpaceToDepth: {
2296       ExpectMaxOpVersion(version, 2, &val_ctx);
2297       const TfLiteType input_type =
2298           context->tensors[node->inputs->data[0]].type;
2299       EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat32, kTfLiteUInt8,
2300                            kTfLiteInt8);
2301     } break;
2302     case kTfLiteBuiltinSvdf: {
2303       ExpectOpVersion(version, 1, &val_ctx);
2304       Expect(node->inputs->size == 5,
2305              NNAPIValidationFailureType::kUnsupportedOperandRank,
2306              "Expected input of rank 5", &val_ctx);
2307       if (node->inputs->size >= 2) {
2308         Expect(
2309             context->tensors[node->inputs->data[/*kWeightsTensor*/ 1]].type ==
2310                 kTfLiteFloat32,
2311             NNAPIValidationFailureType::kUnsupportedInputType,
2312             "NNAPI only support float32 weights.", &val_ctx);
2313       }
2314       Expect(android_sdk_version >= kMinSdkVersionForNNAPI11,
2315              NNAPIValidationFailureType::kUnsupportedOperandRank,
2316              "SVDF does not support rank > 1 on NNAPI 1.0.", &val_ctx);
2317       Expect(context->tensors[node->inputs->data[/*kWeightsFeatureTensor*/ 1]]
2318                      .type == kTfLiteFloat32,
2319              NNAPIValidationFailureType::kUnsupportedInputType,
2320              "Weights should be Float32", &val_ctx);
2321     } break;
2322     case kTfLiteBuiltinLstm: {
2323       ExpectMaxOpVersion(version, 3, &val_ctx);
2324       Expect(
2325           android_sdk_version >= kMinSdkVersionForNNAPI11,
2326           NNAPIValidationFailureType::kUnsupportedAndroidVersion,
2327           "NNAPI 1.0 has a bug for optional tensors which would affect LSTM.",
2328           &val_ctx);
2329       Expect(android_sdk_version >= kMinSdkVersionForNNAPI12 ||
2330                  !IsHybridOperator(context, builtin_code, node),
2331              NNAPIValidationFailureType::kUnsupportedHybridOperator,
2332              "Hybrid operators not supported before NNAPI 1.2.", &val_ctx);
2333 
2334       const auto weight_input_index =
2335           isLstmBasicKernel(node) ? 2 /*  basic::kInputWeights */
2336                                   : 4 /* full::kInputToOutputWeightsTensor */;
2337 
2338       const TfLiteType weight_type =
2339           context->tensors[node->inputs->data[weight_input_index]].type;
2340 
2341       if (isLstmBasicKernel(node)) {
2342         Expect(weight_type == kTfLiteUInt8,
2343                NNAPIValidationFailureType::kUnsupportedInputType,
2344                "Basic LSTM Kernels support only UINT8 weights", &val_ctx);
2345 
2346         const auto input_quantization_params =
2347             context->tensors[node->inputs->data[0]].params;
2348         Expect(input_quantization_params.scale == 1. / 128. &&
2349                    input_quantization_params.zero_point == 128,
2350                NNAPIValidationFailureType::kUnsupportedQuantizationParameters,
2351                "Invalid input quantization", &val_ctx);
2352 
2353         const auto output_quantization_params =
2354             context->tensors[node->outputs->data[0]].params;
2355         Expect(output_quantization_params.scale == 1. / 128. &&
2356                    output_quantization_params.zero_point == 128,
2357                NNAPIValidationFailureType::kUnsupportedQuantizationParameters,
2358                "Invalid output quantization", &val_ctx);
2359 
2360         const auto cell_state_quantization_params =
2361             context->tensors[node->outputs->data[1]].params;
2362         Expect(cell_state_quantization_params.scale == 16. / 32768. ||
2363                    cell_state_quantization_params.zero_point == 0,
2364                NNAPIValidationFailureType::kUnsupportedQuantizationParameters,
2365                "Invalid cell state quantization", &val_ctx);
2366 
2367         auto is_const_tensor = [&node, &context](int tensor_idx) {
2368           return context->tensors[node->inputs->data[tensor_idx]]
2369                      .allocation_type == kTfLiteMmapRo;
2370         };
2371 
2372         Expect(is_const_tensor(2 /* kInputWeights */),
2373                NNAPIValidationFailureType::kInputTensorShouldHaveConstantShape,
2374                "Weights tensor should be constant", &val_ctx);
2375         Expect(is_const_tensor(3 /* kInputBiases */),
2376                NNAPIValidationFailureType::kInputTensorShouldHaveConstantShape,
2377                "Biases tensor should be constant", &val_ctx);
2378 
2379         return val_ctx.is_valid;
2380       } else {
2381         if (node->inputs->size == 24) {
2382           ExpectMinAndroidSdkVersion(android_sdk_version,
2383                                      kMinSdkVersionForNNAPI12, &val_ctx);
2384         }
2385 
2386         if (android_sdk_version >= kMinSdkVersionForNNAPI13) {
2387           Expect(weight_type == kTfLiteFloat32 || weight_type == kTfLiteUInt8 ||
2388                      weight_type == kTfLiteInt8,
2389                  NNAPIValidationFailureType::kUnsupportedInputType,
2390                  "Weight has to be Float32 or UINT8 or INT8", &val_ctx);
2391         } else {
2392           Expect(weight_type == kTfLiteFloat32 || weight_type == kTfLiteUInt8,
2393                  NNAPIValidationFailureType::kUnsupportedInputType,
2394                  "Weight has to be Float32 or UINT8", &val_ctx);
2395         }
2396       }
2397     } break;
2398     case kTfLiteBuiltinMean: {
2399       ExpectMaxOpVersion(version, 2, &val_ctx);
2400       ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI11,
2401                                  &val_ctx);
2402       if (android_sdk_version >= kMinSdkVersionForNNAPI12) {
2403         Expect(context->tensors[node->inputs->data[0]].type == kTfLiteFloat32 ||
2404                    IsQuantized(context->tensors[node->inputs->data[0]].type),
2405                NNAPIValidationFailureType::kUnsupportedInputType,
2406                "Expected Float32 or Quantized input", &val_ctx);
2407       } else {
2408         Expect(context->tensors[node->inputs->data[0]].type == kTfLiteFloat32,
2409                NNAPIValidationFailureType::kUnsupportedInputType,
2410                "Expected Float32 input", &val_ctx);
2411       }
2412       Expect(context->tensors[node->outputs->data[0]].dims->size > 0,
2413              NNAPIValidationFailureType::kUnsupportedOutputType,
2414              "NNAPI does not support generating a scalar as output for MEAN.",
2415              &val_ctx);
2416 
2417       auto input_param = context->tensors[node->inputs->data[0]].params;
2418       auto output_param = context->tensors[node->outputs->data[0]].params;
2419       Expect(input_param.scale == output_param.scale &&
2420                  input_param.zero_point == output_param.zero_point,
2421              NNAPIValidationFailureType::kUnsupportedOutputType,
2422              "NNAPI requires that the input and output have the same "
2423              "quantization parameters.",
2424              &val_ctx);
2425     } break;
2426     case kTfLiteBuiltinEmbeddingLookup: {
2427       ExpectOpVersion(version, 1, &val_ctx);
2428       Expect(context->tensors[node->inputs->data[1]].type == kTfLiteFloat32,
2429              NNAPIValidationFailureType::kUnsupportedInputType,
2430              "NNAPI only support float32 values.", &val_ctx);
2431     } break;
2432     case kTfLiteBuiltinHashtableLookup: {
2433       ExpectOpVersion(version, 1, &val_ctx);
2434       Expect(context->tensors[node->outputs->data[0]].type == kTfLiteFloat32,
2435              NNAPIValidationFailureType::kUnsupportedOutputType,
2436              "NNAPI only support float32 output.", &val_ctx);
2437     } break;
2438     case kTfLiteBuiltinMaximum:
2439     case kTfLiteBuiltinMinimum: {
2440       ExpectMaxOpVersion(version, 3, &val_ctx);
2441       ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
2442                                  &val_ctx);
2443       const auto input_type = context->tensors[node->inputs->data[0]].type;
2444       EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat32, kTfLiteUInt8,
2445                            kTfLiteInt8, kTfLiteInt32);
2446       const TfLiteTensor& operand0 = context->tensors[node->inputs->data[0]];
2447       if (operand0.dims->size == 0) {
2448         Expect(operand0.allocation_type == kTfLiteMmapRo,
2449                NNAPIValidationFailureType::kUnsupportedInputType,
2450                "Scalar operand should be constant", &val_ctx);
2451       }
2452       const TfLiteTensor& operand1 = context->tensors[node->inputs->data[1]];
2453       if (operand1.dims->size == 0) {
2454         Expect(operand1.allocation_type == kTfLiteMmapRo,
2455                NNAPIValidationFailureType::kUnsupportedInputType,
2456                "Scalar operand should be constant", &val_ctx);
2457       }
2458     } break;
2459     case kTfLiteBuiltinCast: {
2460       ExpectOpVersion(version, 1, &val_ctx);
2461       ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
2462                                  &val_ctx);
2463       const TfLiteType input_type =
2464           context->tensors[node->inputs->data[0]].type;
2465       const TfLiteType output_type =
2466           context->tensors[node->outputs->data[0]].type;
2467       if (android_sdk_version >= kMinSdkVersionForNNAPI13) {
2468         EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat32, kTfLiteInt32,
2469                              kTfLiteUInt8, kTfLiteInt8);
2470 
2471         ExpectTypeIn(
2472             output_type,
2473             {kTfLiteFloat32, kTfLiteInt32, kTfLiteUInt8, kTfLiteInt8},
2474             NNAPIValidationFailureType::kUnsupportedOutputType,
2475             "Output type should be one of kTfLiteFloat32, kTfLiteInt32, "
2476             "kTfLiteUInt8, kTfLiteInt8.",
2477             &val_ctx);
2478       } else {
2479         EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat32, kTfLiteInt32,
2480                              kTfLiteUInt8);
2481 
2482         ExpectTypeIn(
2483             output_type, {kTfLiteFloat32, kTfLiteInt32, kTfLiteUInt8},
2484             NNAPIValidationFailureType::kUnsupportedOutputType,
2485             "Output type should be one of kTfLiteFloat32, kTfLiteInt32, "
2486             "kTfLiteUInt8.",
2487             &val_ctx);
2488       }
2489     } break;
2490     case kTfLiteBuiltinLeakyRelu:
2491     case kTfLiteBuiltinPrelu: {
2492       ExpectOpVersion(version, 1, &val_ctx);
2493       ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
2494                                  &val_ctx);
2495       const auto input_type = context->tensors[node->inputs->data[0]].type;
2496       EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat32, kTfLiteUInt8,
2497                            kTfLiteInt8);
2498     } break;
2499     case kTfLiteBuiltinTile: {
2500       ExpectOpVersion(version, 1, &val_ctx);
2501       ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
2502                                  &val_ctx);
2503       const auto input_type = context->tensors[node->inputs->data[0]].type;
2504       EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat32, kTfLiteInt8,
2505                            kTfLiteUInt8, kTfLiteInt32);
2506       const auto multipliers_type =
2507           context->tensors[node->inputs->data[1]].type;
2508       Expect(multipliers_type == kTfLiteInt32,
2509              NNAPIValidationFailureType::kUnsupportedInputType,
2510              "Multipliers should be Int32", &val_ctx);
2511     } break;
2512     case kTfLiteBuiltinLogicalOr:
2513     case kTfLiteBuiltinLogicalAnd:
2514     case kTfLiteBuiltinLogicalNot: {
2515       ExpectOpVersion(version, 1, &val_ctx);
2516       ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
2517                                  &val_ctx);
2518       const auto input_type = context->tensors[node->inputs->data[0]].type;
2519       Expect(input_type == kTfLiteBool,
2520              NNAPIValidationFailureType::kUnsupportedInputType,
2521              "Input should be bool", &val_ctx);
2522     } break;
2523     case kTfLiteBuiltinLess:
2524     case kTfLiteBuiltinLessEqual:
2525     case kTfLiteBuiltinGreater:
2526     case kTfLiteBuiltinGreaterEqual:
2527     case kTfLiteBuiltinEqual:
2528     case kTfLiteBuiltinNotEqual: {
2529       ExpectMaxOpVersion(version, 2, &val_ctx);
2530       ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI11,
2531                                  &val_ctx);
2532       const auto input_type = context->tensors[node->inputs->data[0]].type;
2533       EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat32, kTfLiteUInt8,
2534                            kTfLiteInt8, kTfLiteBool, kTfLiteInt32);
2535     } break;
2536     case kTfLiteBuiltinNeg: {
2537       ExpectMaxOpVersion(version, 2, &val_ctx);
2538       ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI11,
2539                                  &val_ctx);
2540       const auto input_type = context->tensors[node->inputs->data[0]].type;
2541       EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat32, kTfLiteInt32);
2542     } break;
2543     case kTfLiteBuiltinTopkV2: {
2544       ExpectMaxOpVersion(version, 2, &val_ctx);
2545       ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
2546                                  &val_ctx);
2547       const auto& input_type = context->tensors[node->inputs->data[0]].type;
2548       EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat32, kTfLiteInt32,
2549                            kTfLiteUInt8, kTfLiteInt8);
2550       const auto& k_param = context->tensors[node->inputs->data[1]];
2551       Expect(k_param.type == kTfLiteInt32 &&
2552                  k_param.allocation_type == kTfLiteMmapRo,
2553              NNAPIValidationFailureType::kUnsupportedInputType,
2554              "K param should be a constant of type Int32", &val_ctx);
2555     } break;
2556     case kTfLiteBuiltinSelect: {
2557       ExpectMaxOpVersion(version, 2, &val_ctx);
2558       ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI11,
2559                                  &val_ctx);
2560       const auto value_type = context->tensors[node->inputs->data[1]].type;
2561       EXPECT_INPUT_TYPE_IN(value_type, kTfLiteFloat32, kTfLiteInt32,
2562                            kTfLiteUInt8, kTfLiteInt8);
2563       TfLiteIntArray* condition_shape =
2564           context->tensors[node->inputs->data[0]].dims;
2565       TfLiteIntArray* input_shape =
2566           context->tensors[node->inputs->data[1]].dims;
2567       Expect(TfLiteIntArrayEqual(condition_shape, input_shape),
2568              NNAPIValidationFailureType::kUnsupportedOperandValue,
2569              "Condition and inputs tensors should have the same shape",
2570              &val_ctx);
2571     } break;
2572     case kTfLiteBuiltinGather: {
2573       ExpectOpVersion(version, 2, &val_ctx);
2574       ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
2575                                  &val_ctx);
2576       const auto input_type = context->tensors[node->inputs->data[0]].type;
2577       const auto& positions = context->tensors[node->inputs->data[1]];
2578 
2579       EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat32, kTfLiteFloat16,
2580                            kTfLiteInt32, kTfLiteUInt8, kTfLiteInt8);
2581 
2582       Expect(positions.type == kTfLiteInt32,
2583              NNAPIValidationFailureType::kUnsupportedInputType,
2584              "Positions type should be one of kTfLiteInt32", &val_ctx);
2585       Expect(positions.dims->size != 0,
2586              NNAPIValidationFailureType::kUnsupportedOperandRank,
2587              "0-dimension args are not supported by NNAPI.", &val_ctx);
2588     } break;
2589     case kTfLiteBuiltinBidirectionalSequenceLstm: {
2590       ExpectOpVersion(version, 1, &val_ctx);
2591       ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
2592                                  &val_ctx);
2593       Expect(!IsHybridOperator(context, builtin_code, node),
2594              NNAPIValidationFailureType::kUnsupportedHybridOperator,
2595              "Hybrid version of this op is not supported by NN API.", &val_ctx);
2596     } break;
2597     case kTfLiteBuiltinExpandDims: {
2598       ExpectOpVersion(version, 1, &val_ctx);
2599       ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
2600                                  &val_ctx);
2601       const auto input_type = context->tensors[node->inputs->data[0]].type;
2602       EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat32, kTfLiteFloat16,
2603                            kTfLiteInt32, kTfLiteUInt8, kTfLiteInt8);
2604       const auto axis = context->tensors[node->inputs->data[1]];
2605       Expect(axis.type == kTfLiteInt32 && axis.allocation_type == kTfLiteMmapRo,
2606              NNAPIValidationFailureType::kUnsupportedInputType,
2607              "NNAPI only supports constant int32 axis tensor.", &val_ctx);
2608     } break;
2609     case kTfLiteBuiltinSplit: {
2610       ExpectOpVersion(version, 3, &val_ctx);
2611       ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
2612                                  &val_ctx);
2613       // Tensor indices: split_dim: 0, value: 1
2614       const TfLiteTensor& input = context->tensors[node->inputs->data[1]];
2615       if (android_sdk_version >= kMinSdkVersionForNNAPI13) {
2616         EXPECT_INPUT_TYPE_IN(input.type, kTfLiteFloat32, kTfLiteUInt8,
2617                              kTfLiteInt8, kTfLiteInt32);
2618       } else {
2619         EXPECT_INPUT_TYPE_IN(input.type, kTfLiteFloat32, kTfLiteUInt8,
2620                              kTfLiteInt32);
2621       }
2622       const TfLiteTensor& axis = context->tensors[node->inputs->data[0]];
2623       Expect(axis.type == kTfLiteInt32 && axis.allocation_type == kTfLiteMmapRo,
2624              NNAPIValidationFailureType::kUnsupportedInputType,
2625              "NNAPI only supports constant int32 axis tensor.", &val_ctx);
2626     } break;
2627     case kTfLiteBuiltinLogSoftmax: {
2628       ExpectOpVersion(version, 1, &val_ctx);
2629       ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
2630                                  &val_ctx);
2631       const auto input_type = context->tensors[node->inputs->data[0]].type;
2632       Expect(input_type == kTfLiteFloat32,
2633              NNAPIValidationFailureType::kUnsupportedInputType,
2634              "Input should be Float32.", &val_ctx);
2635     } break;
2636     case kTfLiteBuiltinQuantize: {
2637       ExpectMaxOpVersion(version, 2, &val_ctx);
2638       ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
2639                                  &val_ctx);
2640       const auto value_type = context->tensors[node->inputs->data[0]].type;
2641       Expect(value_type == kTfLiteFloat32 || IsQuantized(value_type),
2642              NNAPIValidationFailureType::kUnsupportedInputType,
2643              "Value should be quantized or Float32.", &val_ctx);
2644       if (IsQuantized(value_type)) {
2645         const auto quantization_params =
2646             context->tensors[node->inputs->data[0]].params;
2647         Expect(quantization_params.scale > 0.f,
2648                NNAPIValidationFailureType::kUnsupportedQuantizationParameters,
2649                "Quantization scale should be > 0.", &val_ctx);
2650       }
2651       const auto output_type = context->tensors[node->outputs->data[0]].type;
2652       if (android_sdk_version < kMinSdkVersionForNNAPI13) {
2653         Expect(output_type == kTfLiteUInt8,
2654                NNAPIValidationFailureType::kUnsupportedOutputType,
2655                "Output should be kTfLiteUInt8.", &val_ctx);
2656       } else {
2657         ExpectTypeIn(output_type, {kTfLiteUInt8, kTfLiteInt8},
2658                      NNAPIValidationFailureType::kUnsupportedOutputType,
2659                      "Output should be kTfLiteUInt8.", &val_ctx);
2660       }
2661       const auto quantization_params =
2662           context->tensors[node->outputs->data[0]].params;
2663       Expect(quantization_params.scale > 0.f,
2664              NNAPIValidationFailureType::kUnsupportedQuantizationParameters,
2665              "Quantization scale should be > 0.", &val_ctx);
2666     } break;
2667     case kTfLiteBuiltinReduceAny: {
2668       ExpectOpVersion(version, 2, &val_ctx);
2669       ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
2670                                  &val_ctx);
2671       Expect(context->tensors[node->outputs->data[0]].dims->size != 0,
2672              NNAPIValidationFailureType::kUnsupportedOutputType,
2673              "NNAPI does not support generating a scalar as output.", &val_ctx);
2674     } break;
2675     case kTfLiteBuiltinReduceMin:
2676     case kTfLiteBuiltinReduceMax: {
2677       ExpectMaxOpVersion(version, 2, &val_ctx);
2678       ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
2679                                  &val_ctx);
2680       const auto input_tensor = context->tensors[node->inputs->data[0]];
2681       const auto input_type = input_tensor.type;
2682       EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat32, kTfLiteUInt8,
2683                            kTfLiteInt8);
2684       Expect(input_tensor.dims->size != 0,
2685              NNAPIValidationFailureType::kUnsupportedOutputType,
2686              "NNAPI does not support generating a scalar as output.", &val_ctx);
2687     } break;
2688     case kTfLiteBuiltinDepthToSpace: {
2689       const TfLiteType input_type =
2690           context->tensors[node->inputs->data[0]].type;
2691       EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat32, kTfLiteUInt8,
2692                            kTfLiteInt8);
2693     } break;
2694     case kTfLiteBuiltinReduceProd:
2695     case kTfLiteBuiltinSum: {
2696       ExpectOpVersion(version, 1, &val_ctx);
2697       ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
2698                                  &val_ctx);
2699       Expect(context->tensors[node->outputs->data[0]].dims->size != 0,
2700              NNAPIValidationFailureType::kUnsupportedOutputType,
2701              "NNAPI does not support generating a scalar as output", &val_ctx);
2702       const auto input_type = context->tensors[node->inputs->data[0]].type;
2703       Expect(input_type == kTfLiteFloat32,
2704              NNAPIValidationFailureType::kUnsupportedInputType,
2705              "NNAPI only supports floating point input.", &val_ctx);
2706     } break;
2707     case kTfLiteBuiltinElu: {
2708       ExpectOpVersion(version, 1, &val_ctx);
2709       ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI13,
2710                                  &val_ctx);
2711       const auto input_type = context->tensors[node->inputs->data[0]].type;
2712       Expect(input_type == kTfLiteFloat32,
2713              NNAPIValidationFailureType::kUnsupportedInputType,
2714              "NNAPI only supports floating point input.", &val_ctx);
2715     } break;
2716     case kTfLiteBuiltinFill: {
2717       ExpectOpVersion(version, 1, &val_ctx);
2718       ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI13,
2719                                  &val_ctx);
2720       const auto& dims_tensor = context->tensors[node->inputs->data[0]];
2721       Expect(IsConstantTensor(&dims_tensor),
2722              NNAPIValidationFailureType::kUnsupportedInputType,
2723              "NNAPI doesn't support dynamic dimensions tensor.", &val_ctx);
2724       EXPECT_INPUT_TYPE_IN(dims_tensor.type, kTfLiteInt32, kTfLiteInt64);
2725       if (IsConstantTensor(&dims_tensor)) {
2726         Expect(dims_tensor.dims->data[0] != 0,
2727                NNAPIValidationFailureType::kUnsupportedOperandValue,
2728                "NNAPI doesn't support generating scalars from FILL", &val_ctx);
2729         if (dims_tensor.type == kTfLiteInt64) {
2730           bool fit_in_int32 =
2731               std::all_of(dims_tensor.data.i64,
2732                           dims_tensor.data.i64 + dims_tensor.dims->data[0],
2733                           [](int64_t dim) {
2734                             return std::numeric_limits<int32_t>::min() <= dim &&
2735                                    dim <= std::numeric_limits<int32_t>::max();
2736                           });
2737           Expect(fit_in_int32,
2738                  NNAPIValidationFailureType::kUnsupportedOperandValue,
2739                  "NNAPI only supports int32 dimensions tensor. If the "
2740                  "dimensions type is int64 and they are constant we can "
2741                  "convert them to int32 if the value isn't too large.",
2742                  &val_ctx);
2743         }
2744       }
2745       const auto& value_tensor = context->tensors[node->inputs->data[1]];
2746       EXPECT_INPUT_TYPE_IN(value_tensor.type, kTfLiteFloat32, kTfLiteInt32,
2747                            kTfLiteInt64);
2748       if (value_tensor.type == kTfLiteInt64) {
2749         Expect(
2750             IsConstantTensor(&value_tensor) &&
2751                 *value_tensor.data.i64 <= std::numeric_limits<int32_t>::max() &&
2752                 *value_tensor.data.i64 >= std::numeric_limits<int32_t>::min(),
2753             NNAPIValidationFailureType::kUnsupportedInputType,
2754             "NNAPI only supports int32 input. If the input type is int64 and "
2755             "constant we can convert it to int32 if the value isn't too "
2756             "large.",
2757             &val_ctx);
2758       }
2759     } break;
2760     default:
2761       // All other operators are not mapped.
2762       AddValidationFailure(NNAPIValidationFailureType::kUnsupportedOperator,
2763                            "Unsupported operation type.", &val_ctx);
2764   }
2765   return val_ctx.is_valid;
2766 }  // NOLINT(readability/fn_size)
2767 
Map(TfLiteContext * context,int builtin_code,int version,int android_sdk_version,const NNAPIOpMappingArgs & mapping_args,ANeuralNetworksOperationType * nn_op_type)2768 TfLiteStatus NNAPIDelegateKernel::Map(
2769     TfLiteContext* context, int builtin_code, int version,
2770     int android_sdk_version, const NNAPIOpMappingArgs& mapping_args,
2771     ANeuralNetworksOperationType* nn_op_type) {
2772   switch (builtin_code) {
2773     case kTfLiteBuiltinAdd: {
2774       auto builtin =
2775           reinterpret_cast<TfLiteAddParams*>(mapping_args.node->builtin_data);
2776       mapping_args.builder->AddScalarInt32Operand(builtin->activation);
2777       *nn_op_type = ANEURALNETWORKS_ADD;
2778     } break;
2779     case kTfLiteBuiltinArgMax: {
2780       *nn_op_type = ANEURALNETWORKS_ARGMAX;
2781     } break;
2782     case kTfLiteBuiltinArgMin: {
2783       *nn_op_type = ANEURALNETWORKS_ARGMIN;
2784     } break;
2785     case kTfLiteBuiltinMul: {
2786       auto builtin =
2787           reinterpret_cast<TfLiteMulParams*>(mapping_args.node->builtin_data);
2788       mapping_args.builder->AddScalarInt32Operand(builtin->activation);
2789       *nn_op_type = ANEURALNETWORKS_MUL;
2790     } break;
2791     case kTfLiteBuiltinAveragePool2d: {
2792       mapping_args.builder->AddPoolingParams(mapping_args.node->builtin_data);
2793       *nn_op_type = ANEURALNETWORKS_AVERAGE_POOL_2D;
2794     } break;
2795     case kTfLiteBuiltinMaxPool2d: {
2796       mapping_args.builder->AddPoolingParams(mapping_args.node->builtin_data);
2797       *nn_op_type = ANEURALNETWORKS_MAX_POOL_2D;
2798     } break;
2799     case kTfLiteBuiltinL2Pool2d: {
2800       mapping_args.builder->AddPoolingParams(mapping_args.node->builtin_data);
2801       *nn_op_type = ANEURALNETWORKS_L2_POOL_2D;
2802     } break;
2803     case kTfLiteBuiltinConv2d: {
2804       auto builtin =
2805           reinterpret_cast<TfLiteConvParams*>(mapping_args.node->builtin_data);
2806       mapping_args.builder->AddScalarInt32Operand(builtin->padding);
2807       mapping_args.builder->AddScalarInt32Operand(builtin->stride_width);
2808       mapping_args.builder->AddScalarInt32Operand(builtin->stride_height);
2809       mapping_args.builder->AddScalarInt32Operand(builtin->activation);
2810       // NNAPI supports dilated Conv2D since NNAPI 1.2.
2811       if (builtin->dilation_width_factor != 1 ||
2812           builtin->dilation_height_factor != 1) {
2813         mapping_args.builder->AddScalarBoolOperand(false);  // Use NHWC format
2814         mapping_args.builder->AddScalarInt32Operand(
2815             builtin->dilation_width_factor);
2816         mapping_args.builder->AddScalarInt32Operand(
2817             builtin->dilation_height_factor);
2818       }
2819       *nn_op_type = ANEURALNETWORKS_CONV_2D;
2820     } break;
2821     case kTfLiteBuiltinDepthwiseConv2d: {
2822       auto builtin = reinterpret_cast<TfLiteDepthwiseConvParams*>(
2823           mapping_args.node->builtin_data);
2824       mapping_args.builder->AddScalarInt32Operand(builtin->padding);
2825       mapping_args.builder->AddScalarInt32Operand(builtin->stride_width);
2826       mapping_args.builder->AddScalarInt32Operand(builtin->stride_height);
2827       mapping_args.builder->AddScalarInt32Operand(builtin->depth_multiplier);
2828       mapping_args.builder->AddScalarInt32Operand(builtin->activation);
2829       if (builtin->dilation_width_factor != 1 ||
2830           builtin->dilation_height_factor != 1) {
2831         mapping_args.builder->AddScalarBoolOperand(false);  // Use NHWC format.
2832         mapping_args.builder->AddScalarInt32Operand(
2833             builtin->dilation_width_factor);
2834         mapping_args.builder->AddScalarInt32Operand(
2835             builtin->dilation_height_factor);
2836       }
2837       *nn_op_type = ANEURALNETWORKS_DEPTHWISE_CONV_2D;
2838     } break;
2839     case kTfLiteBuiltinFullyConnected: {
2840       auto builtin = reinterpret_cast<TfLiteFullyConnectedParams*>(
2841           mapping_args.node->builtin_data);
2842       mapping_args.builder->AddScalarInt32Operand(builtin->activation);
2843       *nn_op_type = ANEURALNETWORKS_FULLY_CONNECTED;
2844     } break;
2845     case kTfLiteBuiltinHardSwish: {
2846       *nn_op_type = ANEURALNETWORKS_HARD_SWISH;
2847     } break;
2848     case kTfLiteBuiltinSoftmax: {
2849       auto builtin = reinterpret_cast<TfLiteSoftmaxParams*>(
2850           mapping_args.node->builtin_data);
2851       mapping_args.builder->AddScalarFloat32Operand(builtin->beta);
2852       // Optional scalar specifying the dimension the activation would be
2853       // performed on is not added. Default to -1.
2854       *nn_op_type = ANEURALNETWORKS_SOFTMAX;
2855     } break;
2856     case kTfLiteBuiltinReshape: {
2857       if (mapping_args.node->inputs->size == 1) {
2858         // if no new_shape tensor, construct the new shape from params.
2859         auto* params = reinterpret_cast<TfLiteReshapeParams*>(
2860             mapping_args.node->builtin_data);
2861         int num_dimensions = params->num_dimensions;
2862         std::vector<int32_t> output_shape(num_dimensions);
2863         for (int i = 0; i < num_dimensions; ++i) {
2864           output_shape[i] = params->shape[i];
2865         }
2866         mapping_args.builder->AddVectorInt32Operand(
2867             output_shape.data(), static_cast<uint32_t>(num_dimensions));
2868       }
2869       *nn_op_type = ANEURALNETWORKS_RESHAPE;
2870     } break;
2871     case kTfLiteBuiltinResizeBilinear: {
2872       const int output_id = mapping_args.node->outputs->data[0];
2873       auto& output = mapping_args.context->tensors[output_id];
2874       const int output_height = output.dims->data[1];
2875       const int output_width = output.dims->data[2];
2876       mapping_args.builder->AddScalarInt32Operand(output_width);
2877       mapping_args.builder->AddScalarInt32Operand(output_height);
2878       auto builtin = reinterpret_cast<TfLiteResizeBilinearParams*>(
2879           mapping_args.node->builtin_data);
2880       if (builtin->align_corners == true ||
2881           builtin->half_pixel_centers == true) {
2882         mapping_args.builder->AddScalarBoolOperand(false);  // Use NHWC format
2883         mapping_args.builder->AddScalarBoolOperand(builtin->align_corners);
2884         mapping_args.builder->AddScalarBoolOperand(builtin->half_pixel_centers);
2885       }
2886       *nn_op_type = ANEURALNETWORKS_RESIZE_BILINEAR;
2887     } break;
2888     case kTfLiteBuiltinResizeNearestNeighbor: {
2889       const TfLiteTensor& new_shape =
2890           mapping_args.context->tensors[mapping_args.node->inputs->data[1]];
2891       // NNAPI uses scalar inputs for height and width.
2892       mapping_args.builder->AddScalarInt32Operand(new_shape.data.i32[1]);
2893       mapping_args.builder->AddScalarInt32Operand(new_shape.data.i32[0]);
2894       mapping_args.builder->AddScalarBoolOperand(false);  // Use NHWC format
2895       auto builtin = reinterpret_cast<TfLiteResizeNearestNeighborParams*>(
2896           mapping_args.node->builtin_data);
2897       if (builtin->align_corners == true ||
2898           builtin->half_pixel_centers == true) {
2899         mapping_args.builder->AddScalarBoolOperand(builtin->align_corners);
2900         mapping_args.builder->AddScalarBoolOperand(builtin->half_pixel_centers);
2901       }
2902       *nn_op_type = ANEURALNETWORKS_RESIZE_NEAREST_NEIGHBOR;
2903     } break;
2904     case kTfLiteBuiltinSqueeze: {
2905       auto builtin = reinterpret_cast<TfLiteSqueezeParams*>(
2906           mapping_args.node->builtin_data);
2907       // Note that we add the squeeze dimensions even if the dimensions
2908       // were unspecified (empty), as NNAPI requires the operand.
2909       mapping_args.builder->AddVectorInt32Operand(
2910           builtin->num_squeeze_dims ? builtin->squeeze_dims : nullptr,
2911           static_cast<uint32_t>(builtin->num_squeeze_dims));
2912       *nn_op_type = ANEURALNETWORKS_SQUEEZE;
2913     } break;
2914     case kTfLiteBuiltinUnidirectionalSequenceLstm: {
2915       auto builtin = reinterpret_cast<TfLiteUnidirectionalSequenceLSTMParams*>(
2916           mapping_args.node->builtin_data);
2917       mapping_args.builder->AddScalarInt32Operand(builtin->activation);
2918       mapping_args.builder->AddScalarFloat32Operand(builtin->cell_clip);
2919       mapping_args.builder->AddScalarFloat32Operand(builtin->proj_clip);
2920       mapping_args.builder->AddScalarBoolOperand(builtin->time_major);
2921       const bool hybrid_op = IsHybridOperator(
2922           mapping_args.context, kTfLiteBuiltinUnidirectionalSequenceLstm,
2923           mapping_args.node);
2924       if (mapping_args.node->inputs->size == 24) {
2925         // Add layer normalization tensors if they are provided.
2926         for (int i = 20; i < 24; ++i) {
2927           const int input_index = mapping_args.node->inputs->data[i];
2928           if (input_index != kTfLiteOptionalTensor) {
2929             mapping_args.builder->AddTensorInput(input_index, hybrid_op);
2930           } else {
2931             mapping_args.builder->AddVectorFloat32Operand(nullptr, 0);
2932           }
2933         }
2934       } else {
2935         for (int i = 0; i < 4; ++i) {
2936           mapping_args.builder->AddVectorFloat32Operand(nullptr, 0);
2937         }
2938       }
2939 
2940       *nn_op_type = ANEURALNETWORKS_UNIDIRECTIONAL_SEQUENCE_LSTM;
2941     } break;
2942     case kTfLiteBuiltinL2Normalization: {
2943       *nn_op_type = ANEURALNETWORKS_L2_NORMALIZATION;
2944     } break;
2945     case kTfLiteBuiltinLocalResponseNormalization: {
2946       auto builtin = reinterpret_cast<TfLiteLocalResponseNormParams*>(
2947           mapping_args.node->builtin_data);
2948       mapping_args.builder->AddScalarInt32Operand(builtin->radius);
2949       mapping_args.builder->AddScalarFloat32Operand(builtin->bias);
2950       mapping_args.builder->AddScalarFloat32Operand(builtin->alpha);
2951       mapping_args.builder->AddScalarFloat32Operand(builtin->beta);
2952       *nn_op_type = ANEURALNETWORKS_LOCAL_RESPONSE_NORMALIZATION;
2953     } break;
2954     case kTfLiteBuiltinLshProjection: {
2955       auto builtin = reinterpret_cast<TfLiteLSHProjectionParams*>(
2956           mapping_args.node->builtin_data);
2957       int type = builtin->type;
2958       // In Android Q+, NNAPI uses 3 to denote
2959       // kTfLiteLshProjectionSparse.
2960       const int kNNAPILshProjectionSparse = 3;
2961       if (builtin->type == kTfLiteLshProjectionSparse) {
2962         type = kNNAPILshProjectionSparse;
2963         // Add NNAPI null weight operand.
2964         mapping_args.builder->AddVectorFloat32Operand(nullptr, 0);
2965       }
2966       mapping_args.builder->AddScalarInt32Operand(type);
2967       *nn_op_type = ANEURALNETWORKS_LSH_PROJECTION;
2968     } break;
2969     case kTfLiteBuiltinConcatenation: {
2970       auto builtin = reinterpret_cast<TfLiteConcatenationParams*>(
2971           mapping_args.node->builtin_data);
2972       int axis = builtin->axis < 0
2973                      ? mapping_args.context
2974                                ->tensors[mapping_args.node->inputs->data[0]]
2975                                .dims->size +
2976                            builtin->axis
2977                      : builtin->axis;
2978       mapping_args.builder->AddScalarInt32Operand(axis);
2979       *nn_op_type = ANEURALNETWORKS_CONCATENATION;
2980     } break;
2981     case kTfLiteBuiltinDequantize: {
2982       *nn_op_type = ANEURALNETWORKS_DEQUANTIZE;
2983     } break;
2984     case kTfLiteBuiltinFloor: {
2985       *nn_op_type = ANEURALNETWORKS_FLOOR;
2986     } break;
2987     case kTfLiteBuiltinRelu: {
2988       *nn_op_type = ANEURALNETWORKS_RELU;
2989     } break;
2990     case kTfLiteBuiltinReluN1To1: {
2991       *nn_op_type = ANEURALNETWORKS_RELU1;
2992     } break;
2993     case kTfLiteBuiltinRelu6: {
2994       *nn_op_type = ANEURALNETWORKS_RELU6;
2995     } break;
2996     case kTfLiteBuiltinLogistic: {
2997       *nn_op_type = ANEURALNETWORKS_LOGISTIC;
2998     } break;
2999     case kTfLiteBuiltinTanh: {
3000       *nn_op_type = ANEURALNETWORKS_TANH;
3001     } break;
3002     case kTfLiteBuiltinSub: {
3003       auto builtin =
3004           reinterpret_cast<TfLiteSubParams*>(mapping_args.node->builtin_data);
3005       mapping_args.builder->AddScalarInt32Operand(builtin->activation);
3006       *nn_op_type = ANEURALNETWORKS_SUB;
3007     } break;
3008     case kTfLiteBuiltinDiv: {
3009       auto builtin =
3010           reinterpret_cast<TfLiteDivParams*>(mapping_args.node->builtin_data);
3011       mapping_args.builder->AddScalarInt32Operand(builtin->activation);
3012       *nn_op_type = ANEURALNETWORKS_DIV;
3013     } break;
3014     case kTfLiteBuiltinPad:
3015     case kTfLiteBuiltinPadv2: {
3016       // We want to map to PAD as much as possible since it is more widely
3017       // supported. We map to PadV2 only when there is the need to specify
3018       // the padding value
3019       if (mapping_args.node->inputs->size == 2) {
3020         *nn_op_type = ANEURALNETWORKS_PAD;
3021       } else {
3022         const int constant_value_id = mapping_args.node->inputs->data[2];
3023         if (constant_value_id == kTfLiteOptionalTensor) {
3024           *nn_op_type = ANEURALNETWORKS_PAD;
3025         } else {
3026           *nn_op_type = ANEURALNETWORKS_PAD_V2;
3027         }
3028       }
3029     } break;
3030     case kTfLiteBuiltinUnidirectionalSequenceRnn: {
3031       auto builtin = reinterpret_cast<TfLiteSequenceRNNParams*>(
3032           mapping_args.node->builtin_data);
3033       mapping_args.builder->AddScalarInt32Operand(builtin->activation);
3034       mapping_args.builder->AddScalarInt32Operand(builtin->time_major);
3035       *nn_op_type = ANEURALNETWORKS_UNIDIRECTIONAL_SEQUENCE_RNN;
3036     } break;
3037     case kTfLiteBuiltinSpaceToBatchNd: {
3038       *nn_op_type = ANEURALNETWORKS_SPACE_TO_BATCH_ND;
3039     } break;
3040     case kTfLiteBuiltinBatchToSpaceNd: {
3041       *nn_op_type = ANEURALNETWORKS_BATCH_TO_SPACE_ND;
3042     } break;
3043     case kTfLiteBuiltinStridedSlice: {
3044       auto builtin = reinterpret_cast<TfLiteStridedSliceParams*>(
3045           mapping_args.node->builtin_data);
3046       mapping_args.builder->AddScalarInt32Operand(builtin->begin_mask);
3047       mapping_args.builder->AddScalarInt32Operand(builtin->end_mask);
3048       mapping_args.builder->AddScalarInt32Operand(builtin->shrink_axis_mask);
3049       *nn_op_type = ANEURALNETWORKS_STRIDED_SLICE;
3050     } break;
3051     case kTfLiteBuiltinTranspose: {
3052       *nn_op_type = ANEURALNETWORKS_TRANSPOSE;
3053     } break;
3054     case kTfLiteBuiltinAbs: {
3055       *nn_op_type = ANEURALNETWORKS_ABS;
3056     } break;
3057     case kTfLiteBuiltinExp: {
3058       *nn_op_type = ANEURALNETWORKS_EXP;
3059     } break;
3060     case kTfLiteBuiltinLog: {
3061       *nn_op_type = ANEURALNETWORKS_LOG;
3062     } break;
3063     case kTfLiteBuiltinRsqrt: {
3064       *nn_op_type = ANEURALNETWORKS_RSQRT;
3065     } break;
3066     case kTfLiteBuiltinPow: {
3067       *nn_op_type = ANEURALNETWORKS_POW;
3068     } break;
3069     case kTfLiteBuiltinSlice: {
3070       *nn_op_type = ANEURALNETWORKS_SLICE;
3071     } break;
3072     case kTfLiteBuiltinSin: {
3073       *nn_op_type = ANEURALNETWORKS_SIN;
3074     } break;
3075     case kTfLiteBuiltinTransposeConv: {
3076       int input_tensor_flags = 0;
3077       const int input_tensor_id =
3078           mapping_args.node->inputs->data[/*kDataInputTensor*/ 2];
3079       const int weight_tensor_id =
3080           mapping_args.node->inputs->data[/*kWeightsTensor*/ 1];
3081 
3082       // Transpose convolution doesn't have hybrid variation.
3083       const bool hybrid_op = false;
3084 
3085       if (android_sdk_version >= kMinSdkVersionForNNAPI13) {
3086         mapping_args.builder->AddTensorInput(
3087             input_tensor_id, hybrid_op,
3088             input_tensor_flags | NN_TENSOR_FLAG_USE_INT8_ASYMM_SIGNED);
3089 
3090       } else {
3091         mapping_args.builder->AddTensorInput(
3092             input_tensor_id, hybrid_op,
3093             input_tensor_flags | NN_TENSOR_FLAG_INT8_CONVERSION);
3094       }
3095       // Transpose convlution uses per-channel quantization with int8 inputs
3096       // even if the number of channels in quantization parameters is equal to 1
3097       // (as opposed to conv2d, which uses per-tensor quantization in this
3098       // case).
3099       mapping_args.builder->AddTensorInput(
3100           weight_tensor_id, hybrid_op,
3101           input_tensor_flags | NN_TENSOR_FLAG_FORCE_PER_CHANNEL);
3102 
3103       // NNAPI requires a bias tensor, so we allocate a new tensor to fill
3104       // it with zeroes. It is deleted with other tensors in the context
3105       // during subgraph destructor call.
3106       int bias_index = -1;
3107       mapping_args.context->AddTensors(mapping_args.context, 1, &bias_index);
3108       TfLiteTensor* bias_tensor = &mapping_args.context->tensors[bias_index];
3109       const auto input_type =
3110           mapping_args.context
3111               ->tensors[mapping_args.node->inputs->data[/*kDataInputTensor*/ 2]]
3112               .type;
3113       if (input_type == kTfLiteFloat32) {
3114         bias_tensor->type = kTfLiteFloat32;
3115       } else {
3116         bias_tensor->type = kTfLiteInt32;
3117       }
3118 
3119       // Create an array with a required bias shape and resize the bias
3120       // tensor.
3121       TfLiteIntArray* bias_shape = TfLiteIntArrayCreate(1);
3122       const TfLiteTensor& output_shape =
3123           mapping_args.context->tensors[mapping_args.node->inputs
3124                                             ->data[/*kOutputShapeTensor*/ 0]];
3125       const int output_depth = output_shape.data.i32[3];
3126       bias_shape->data[0] = output_depth;
3127       bias_tensor->allocation_type = kTfLiteDynamic;
3128       mapping_args.context->ResizeTensor(mapping_args.context, bias_tensor,
3129                                          bias_shape);
3130 
3131       // Set tensor's values to zeroes and add it using AddVector*, so
3132       // that the values are copied to NNAPI. We don't use the AddTensor
3133       // function because it doesn't copy values and the tensor we just
3134       // created is not in the node->inputs.
3135       if (input_type == kTfLiteFloat32) {
3136         memset(bias_tensor->data.f, 0, output_depth * sizeof(float));
3137         mapping_args.builder->AddVectorFloat32Operand(bias_tensor->data.f,
3138                                                       output_depth);
3139       } else {
3140         memset(bias_tensor->data.i32, 0, output_depth * sizeof(int));
3141         const TfLiteTensor& input_tensor =
3142             mapping_args.context->tensors[mapping_args.node->inputs
3143                                               ->data[/*kDataInputTensor*/ 2]];
3144         const TfLiteTensor& filter_tensor =
3145             mapping_args.context->tensors[mapping_args.node->inputs
3146                                               ->data[/*kWeightsTensor*/ 1]];
3147         // NNAPI requires bias scale to be a product of an input scale and
3148         // a filter scale.
3149         bias_tensor->params.scale =
3150             input_tensor.params.scale * filter_tensor.params.scale;
3151         mapping_args.builder->AddVectorInt32Operand(
3152             bias_tensor->data.i32, output_depth,
3153             input_tensor.params.scale * filter_tensor.params.scale,
3154             /*zero_point=*/0);
3155       }
3156 
3157       mapping_args.builder->AddTensorInput(
3158           mapping_args.node->inputs->data[/*kOutputShapeTensor*/ 0], hybrid_op);
3159 
3160       auto builtin = reinterpret_cast<TfLiteTransposeConvParams*>(
3161           mapping_args.node->builtin_data);
3162       mapping_args.builder->AddScalarInt32Operand(builtin->padding);
3163       mapping_args.builder->AddScalarInt32Operand(builtin->stride_width);
3164       mapping_args.builder->AddScalarInt32Operand(builtin->stride_height);
3165       mapping_args.builder->AddScalarInt32Operand(
3166           /*ANEURALNETWORKS_FUSED_NONE*/ 0);
3167       // Use NHWC layout for input and output.
3168       mapping_args.builder->AddScalarBoolOperand(false);
3169       *nn_op_type = ANEURALNETWORKS_TRANSPOSE_CONV;
3170     } break;
3171     case kTfLiteBuiltinSqrt: {
3172       *nn_op_type = ANEURALNETWORKS_SQRT;
3173     } break;
3174     case kTfLiteBuiltinRnn: {
3175       // NNAPI need both state_in and state_out.
3176       int ann_index;
3177       mapping_args.builder->AddStateFloat32Tensor(
3178           mapping_args.node->inputs->data[/*kHiddenStateTensor*/ 4],
3179           &ann_index);
3180       mapping_args.model_state_outputs->push_back(ann_index);
3181       mapping_args.model_state_tfl_inputs->push_back(
3182           mapping_args.node->inputs->data[/*kHiddenStateTensor*/ 4]);
3183       auto builtin =
3184           reinterpret_cast<TfLiteRNNParams*>(mapping_args.node->builtin_data);
3185       mapping_args.builder->AddScalarInt32Operand(builtin->activation);
3186       *nn_op_type = ANEURALNETWORKS_RNN;
3187     } break;
3188     case kTfLiteBuiltinSpaceToDepth: {
3189       auto builtin = reinterpret_cast<TfLiteSpaceToDepthParams*>(
3190           mapping_args.node->builtin_data);
3191       mapping_args.builder->AddScalarInt32Operand(builtin->block_size);
3192       *nn_op_type = ANEURALNETWORKS_SPACE_TO_DEPTH;
3193     } break;
3194     case kTfLiteBuiltinSvdf: {
3195       // NNAPI need both state_in and state_out.
3196       int ann_index;
3197       mapping_args.builder->AddStateFloat32Tensor(
3198           mapping_args.node->inputs->data[/*kInputActivationStateTensor*/ 4],
3199           &ann_index);
3200       mapping_args.model_state_outputs->push_back(ann_index);
3201       mapping_args.model_state_tfl_inputs->push_back(
3202           mapping_args.node->inputs->data[/*kInputActivationStateTensor*/ 4]);
3203 
3204       auto builtin =
3205           reinterpret_cast<TfLiteSVDFParams*>(mapping_args.node->builtin_data);
3206       mapping_args.builder->AddScalarInt32Operand(builtin->rank);
3207       mapping_args.builder->AddScalarInt32Operand(builtin->activation);
3208       *nn_op_type = ANEURALNETWORKS_SVDF;
3209     } break;
3210     case kTfLiteBuiltinLstm: {
3211       if (isLstmBasicKernel(mapping_args.node)) {
3212         const auto output_dims =
3213             mapping_args.context->tensors[mapping_args.node->outputs->data[1]]
3214                 .dims;
3215 
3216         // Inputs kInputData
3217         mapping_args.builder->AddTensorInput(
3218             mapping_args.node->inputs->data[0 /* kInputData */],
3219             /* hybrid_op */ false,
3220             /* scalar_as_tensor */ false);
3221 
3222         // The 8 weights tensors are set decomposing the
3223         // kInputWeights param
3224         const auto weight_tensor =
3225             mapping_args.context->tensors[mapping_args.node->inputs
3226                                               ->data[2 /* kInputWeights */]];
3227 
3228         std::vector<uint8_t> recurrent_to_input;
3229         std::vector<uint8_t> input_to_input;
3230         std::vector<uint8_t> recurrent_to_cell;
3231         std::vector<uint8_t> input_to_cell;
3232         std::vector<uint8_t> recurrent_to_forget;
3233         std::vector<uint8_t> input_to_forget;
3234         std::vector<uint8_t> recurrent_to_output;
3235         std::vector<uint8_t> input_to_output;
3236         tflite::delegate::nnapi::DecomposeQuantLstmWeightsTensor(
3237             weight_tensor.data.uint8, weight_tensor.dims, &recurrent_to_input,
3238             &input_to_input, &recurrent_to_cell, &input_to_cell,
3239             &recurrent_to_forget, &input_to_forget, &recurrent_to_output,
3240             &input_to_output);
3241 
3242         TfLiteIntArray* recurrent_weight_dims = TfLiteIntArrayCreate(2);
3243         TfLiteIntArray* input_weight_dims = TfLiteIntArrayCreate(2);
3244         tflite::delegate::nnapi::SetWeightSubmatrixDims(
3245             weight_tensor.dims, recurrent_weight_dims, input_weight_dims);
3246 
3247         int new_tensor_index = -1;
3248 
3249         mapping_args.builder->AddNewInputConstantTensor<uint8_t>(
3250             ANEURALNETWORKS_TENSOR_QUANT8_ASYMM, kTfLiteUInt8,
3251             input_weight_dims, input_to_input, weight_tensor.params,
3252             &new_tensor_index);
3253 
3254         mapping_args.builder->AddNewInputConstantTensor<uint8_t>(
3255             ANEURALNETWORKS_TENSOR_QUANT8_ASYMM, kTfLiteUInt8,
3256             input_weight_dims, input_to_forget, weight_tensor.params,
3257             &new_tensor_index);
3258 
3259         mapping_args.builder->AddNewInputConstantTensor<uint8_t>(
3260             ANEURALNETWORKS_TENSOR_QUANT8_ASYMM, kTfLiteUInt8,
3261             input_weight_dims, input_to_cell, weight_tensor.params,
3262             &new_tensor_index);
3263 
3264         mapping_args.builder->AddNewInputConstantTensor<uint8_t>(
3265             ANEURALNETWORKS_TENSOR_QUANT8_ASYMM, kTfLiteUInt8,
3266             input_weight_dims, input_to_output, weight_tensor.params,
3267             &new_tensor_index);
3268 
3269         mapping_args.builder->AddNewInputConstantTensor<uint8_t>(
3270             ANEURALNETWORKS_TENSOR_QUANT8_ASYMM, kTfLiteUInt8,
3271             recurrent_weight_dims, recurrent_to_input, weight_tensor.params,
3272             &new_tensor_index);
3273 
3274         mapping_args.builder->AddNewInputConstantTensor<uint8_t>(
3275             ANEURALNETWORKS_TENSOR_QUANT8_ASYMM, kTfLiteUInt8,
3276             recurrent_weight_dims, recurrent_to_forget, weight_tensor.params,
3277             &new_tensor_index);
3278 
3279         mapping_args.builder->AddNewInputConstantTensor<uint8_t>(
3280             ANEURALNETWORKS_TENSOR_QUANT8_ASYMM, kTfLiteUInt8,
3281             recurrent_weight_dims, recurrent_to_cell, weight_tensor.params,
3282             &new_tensor_index);
3283 
3284         mapping_args.builder->AddNewInputConstantTensor<uint8_t>(
3285             ANEURALNETWORKS_TENSOR_QUANT8_ASYMM, kTfLiteUInt8,
3286             recurrent_weight_dims, recurrent_to_output, weight_tensor.params,
3287             &new_tensor_index);
3288 
3289         TfLiteIntArrayFree(input_weight_dims);
3290         TfLiteIntArrayFree(recurrent_weight_dims);
3291 
3292         // Biases have to be split in four.
3293         const auto bias_size = output_dims->data[1];
3294         const TfLiteTensor& biases_tensor =
3295             mapping_args.context->tensors[mapping_args.node->inputs
3296                                               ->data[3 /* kInputBiases */]];
3297 
3298         std::vector<int32_t> input_bias;
3299         std::vector<int32_t> cell_bias;
3300         std::vector<int32_t> forget_bias;
3301         std::vector<int32_t> output_bias;
3302         delegate::nnapi::DecomposeBiasTensor(biases_tensor.data.i32, bias_size,
3303                                              &input_bias, &cell_bias,
3304                                              &forget_bias, &output_bias);
3305 
3306         int input_bias_tensor = -1;
3307         mapping_args.builder->AddNewInputConstantTensor<int32_t>(
3308             ANEURALNETWORKS_TENSOR_INT32, kTfLiteInt32, {bias_size}, input_bias,
3309             biases_tensor.params, &input_bias_tensor);
3310         int forget_bias_tensor = -1;
3311         mapping_args.builder->AddNewInputConstantTensor(
3312             ANEURALNETWORKS_TENSOR_INT32, kTfLiteInt32, {bias_size},
3313             forget_bias, biases_tensor.params, &forget_bias_tensor);
3314         int cell_gate_bias_tensor = -1;
3315         mapping_args.builder->AddNewInputConstantTensor(
3316             ANEURALNETWORKS_TENSOR_INT32, kTfLiteInt32, {bias_size}, cell_bias,
3317             biases_tensor.params, &cell_gate_bias_tensor);
3318         int output_gate_bias_tensor = -1;
3319         mapping_args.builder->AddNewInputConstantTensor(
3320             ANEURALNETWORKS_TENSOR_INT32, kTfLiteInt32, {bias_size},
3321             output_bias, biases_tensor.params, &output_gate_bias_tensor);
3322 
3323         mapping_args.builder->AddTensorInput(
3324             mapping_args.node->inputs->data[4 /* kInputPrevState */],
3325             /* hybrid_op */ false,
3326             /* scalar_as_tensor */ false);
3327 
3328         // kInputPrevActivation
3329         mapping_args.builder->AddTensorInput(
3330             mapping_args.node->inputs->data[1 /* kInputPrevActivation */],
3331             /* hybrid_op */ false,
3332             /* scalar_as_tensor */ false);
3333 
3334         // Configuring the copy from the activation, state outputs
3335         // to their associated inputs
3336         mapping_args.feedback_loops->push_back(std::make_tuple(
3337             mapping_args.node->outputs->data[0 /*kOutputActivation*/],
3338             mapping_args.node->inputs->data[1 /*kInputPrevActivation*/]));
3339 
3340         mapping_args.feedback_loops->push_back(std::make_tuple(
3341             mapping_args.node->outputs->data[1 /*kOutputState*/],
3342             mapping_args.node->inputs->data[4 /*kInputPrevState*/]));
3343 
3344         // OUTPUTS
3345         // Setting only the first two since the remaining ones are
3346         // ignored by NNAPI
3347         mapping_args.builder->AddTensorOutput(
3348             mapping_args.node->outputs->data[1 /* kOutputState */], 0);
3349 
3350         mapping_args.builder->AddTensorOutput(
3351             mapping_args.node->outputs->data[0 /* kOutputActivation */], 0);
3352 
3353         *nn_op_type = ANEURALNETWORKS_QUANTIZED_16BIT_LSTM;
3354       } else {
3355         auto builtin = reinterpret_cast<TfLiteLSTMParams*>(
3356             mapping_args.node->builtin_data);
3357         mapping_args.builder->AddScalarInt32Operand(builtin->activation);
3358         mapping_args.builder->AddScalarFloat32Operand(builtin->cell_clip);
3359         mapping_args.builder->AddScalarFloat32Operand(builtin->proj_clip);
3360 
3361         // Current NNAPI implementation requires the scratch_buffer as
3362         // output.
3363         mapping_args.builder->AddAdditionalFloat32OutputTensor(2);
3364 
3365         // NNAPI need both state_in and state_out for cell_state and
3366         // output_state.
3367         int ann_index;
3368         mapping_args.builder->AddStateFloat32Tensor(
3369             mapping_args.node->inputs->data[/*kInputActivationStateTensor*/ 18],
3370             &ann_index);
3371         mapping_args.model_state_outputs->push_back(ann_index);
3372         mapping_args.model_state_tfl_inputs->push_back(
3373             mapping_args.node->inputs
3374                 ->data[/*kInputActivationStateTensor*/ 18]);
3375         mapping_args.builder->AddStateFloat32Tensor(
3376             mapping_args.node->inputs->data[/*kInputCellStateTensor*/ 19],
3377             &ann_index);
3378         mapping_args.model_state_outputs->push_back(ann_index);
3379         mapping_args.model_state_tfl_inputs->push_back(
3380             mapping_args.node->inputs->data[/*kInputCellStateTensor*/ 19]);
3381 
3382         const bool hybrid_op = IsHybridOperator(
3383             mapping_args.context, kTfLiteBuiltinLstm, mapping_args.node);
3384 
3385         if (mapping_args.node->inputs->size == 24) {
3386           for (int i = 20; i < 24; ++i) {
3387             const auto input_index = mapping_args.node->inputs->data[i];
3388             if (input_index != kTfLiteOptionalTensor) {
3389               mapping_args.builder->AddTensorInput(input_index, hybrid_op);
3390             } else {
3391               mapping_args.builder->AddVectorFloat32Operand(nullptr, 0);
3392             }
3393           }
3394         }
3395 
3396         *nn_op_type = ANEURALNETWORKS_LSTM;
3397       }
3398     } break;
3399     case kTfLiteBuiltinMean: {
3400       auto builtin = reinterpret_cast<TfLiteReducerParams*>(
3401           mapping_args.node->builtin_data);
3402       int32_t keep_dims = 0;
3403       if (builtin->keep_dims) keep_dims = 1;
3404       mapping_args.builder->AddScalarInt32Operand(keep_dims);
3405       *nn_op_type = ANEURALNETWORKS_MEAN;
3406     } break;
3407     case kTfLiteBuiltinEmbeddingLookup: {
3408       *nn_op_type = ANEURALNETWORKS_EMBEDDING_LOOKUP;
3409     } break;
3410     case kTfLiteBuiltinHashtableLookup: {
3411       *nn_op_type = ANEURALNETWORKS_HASHTABLE_LOOKUP;
3412     } break;
3413     case kTfLiteBuiltinMaximum: {
3414       *nn_op_type = ANEURALNETWORKS_MAXIMUM;
3415     } break;
3416     case kTfLiteBuiltinMinimum: {
3417       *nn_op_type = ANEURALNETWORKS_MINIMUM;
3418     } break;
3419     case kTfLiteBuiltinCast: {
3420       *nn_op_type = ANEURALNETWORKS_CAST;
3421     } break;
3422     case kTfLiteBuiltinLeakyRelu: {
3423       const auto input_type =
3424           mapping_args.context->tensors[mapping_args.node->inputs->data[0]]
3425               .type;
3426       auto builtin = reinterpret_cast<TfLiteLeakyReluParams*>(
3427           mapping_args.node->builtin_data);
3428 
3429       TfLiteTensor alpha_tensor;
3430       alpha_tensor.type = input_type;
3431       alpha_tensor.allocation_type = kTfLiteDynamic;
3432       alpha_tensor.dims = TfLiteIntArrayCreate(1);
3433       alpha_tensor.dims->data[0] = 1;
3434       alpha_tensor.params.zero_point = 0;
3435 
3436       int new_tensor_index = -1;
3437       if (input_type == kTfLiteFloat32) {
3438         alpha_tensor.params.scale = 0;
3439         std::vector<float> alpha_value = {builtin->alpha};
3440         mapping_args.builder->AddNewInputConstantTensor(
3441             ANEURALNETWORKS_TENSOR_FLOAT32, kTfLiteFloat32, alpha_tensor.dims,
3442             alpha_value, alpha_tensor.params, &new_tensor_index);
3443       } else if (input_type == kTfLiteInt8 &&
3444                  android_sdk_version >= kMinSdkVersionForNNAPI13) {
3445         alpha_tensor.params.scale = builtin->alpha;
3446         std::vector<int8_t> alpha_value = {1};
3447         mapping_args.builder->AddNewInputConstantTensor(
3448             ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED, kTfLiteInt8,
3449             alpha_tensor.dims, alpha_value, alpha_tensor.params,
3450             &new_tensor_index);
3451       } else {
3452         alpha_tensor.params.scale = builtin->alpha;
3453         std::vector<uint8_t> alpha_value = {1};
3454         mapping_args.builder->AddNewInputConstantTensor(
3455             ANEURALNETWORKS_TENSOR_QUANT8_ASYMM, kTfLiteUInt8,
3456             alpha_tensor.dims, alpha_value, alpha_tensor.params,
3457             &new_tensor_index);
3458       }
3459 
3460       *nn_op_type = ANEURALNETWORKS_PRELU;
3461     } break;
3462     case kTfLiteBuiltinPrelu: {
3463       *nn_op_type = ANEURALNETWORKS_PRELU;
3464     } break;
3465     case kTfLiteBuiltinTile: {
3466       *nn_op_type = ANEURALNETWORKS_TILE;
3467     } break;
3468     case kTfLiteBuiltinLogicalOr: {
3469       *nn_op_type = ANEURALNETWORKS_LOGICAL_OR;
3470     } break;
3471     case kTfLiteBuiltinLogicalAnd: {
3472       *nn_op_type = ANEURALNETWORKS_LOGICAL_AND;
3473     } break;
3474     case kTfLiteBuiltinLogicalNot: {
3475       *nn_op_type = ANEURALNETWORKS_LOGICAL_NOT;
3476     } break;
3477     case kTfLiteBuiltinLess: {
3478       *nn_op_type = ANEURALNETWORKS_LESS;
3479     } break;
3480     case kTfLiteBuiltinLessEqual: {
3481       *nn_op_type = ANEURALNETWORKS_LESS_EQUAL;
3482     } break;
3483     case kTfLiteBuiltinGreater: {
3484       *nn_op_type = ANEURALNETWORKS_GREATER;
3485     } break;
3486     case kTfLiteBuiltinGreaterEqual: {
3487       *nn_op_type = ANEURALNETWORKS_GREATER_EQUAL;
3488     } break;
3489     case kTfLiteBuiltinEqual: {
3490       *nn_op_type = ANEURALNETWORKS_EQUAL;
3491     } break;
3492     case kTfLiteBuiltinNotEqual: {
3493       *nn_op_type = ANEURALNETWORKS_NOT_EQUAL;
3494     } break;
3495     case kTfLiteBuiltinNeg: {
3496       *nn_op_type = ANEURALNETWORKS_NEG;
3497     } break;
3498     case kTfLiteBuiltinTopkV2: {
3499       const TfLiteTensor& k_param =
3500           mapping_args.context->tensors[mapping_args.node->inputs->data[1]];
3501       mapping_args.builder->AddScalarInt32Operand(*k_param.data.i32);
3502       *nn_op_type = ANEURALNETWORKS_TOPK_V2;
3503     } break;
3504     case kTfLiteBuiltinSelect: {
3505       *nn_op_type = ANEURALNETWORKS_SELECT;
3506     } break;
3507     case kTfLiteBuiltinGather: {
3508       auto builtin = reinterpret_cast<TfLiteGatherParams*>(
3509           mapping_args.node->builtin_data);
3510       mapping_args.builder->AddScalarInt32Operand(builtin->axis);
3511       mapping_args.builder->AddTensorInput(mapping_args.node->inputs->data[1],
3512                                            /* hybrid_op */ false,
3513                                            /* tensor_flags */ 0);
3514       *nn_op_type = ANEURALNETWORKS_GATHER;
3515     } break;
3516     case kTfLiteBuiltinBidirectionalSequenceLstm: {
3517       auto builtin = reinterpret_cast<TfLiteBidirectionalSequenceLSTMParams*>(
3518           mapping_args.node->builtin_data);
3519       mapping_args.builder->AddScalarInt32Operand(builtin->activation);
3520       mapping_args.builder->AddScalarFloat32Operand(builtin->cell_clip);
3521       mapping_args.builder->AddScalarFloat32Operand(builtin->proj_clip);
3522       mapping_args.builder->AddScalarBoolOperand(builtin->merge_outputs);
3523       mapping_args.builder->AddScalarBoolOperand(builtin->time_major);
3524       // TF Lite doesn't support layer normalization in bidirectional
3525       // sequence LSTM, so we insert optional tensors for NNAPI.
3526       for (int i = 0; i < 8; ++i) {
3527         mapping_args.builder->AddVectorFloat32Operand(nullptr, 0);
3528       }
3529       *nn_op_type = ANEURALNETWORKS_BIDIRECTIONAL_SEQUENCE_LSTM;
3530     } break;
3531     case kTfLiteBuiltinExpandDims: {
3532       const TfLiteTensor& axis_param =
3533           mapping_args.context->tensors[mapping_args.node->inputs->data[1]];
3534       mapping_args.builder->AddScalarInt32Operand(*axis_param.data.i32);
3535       *nn_op_type = ANEURALNETWORKS_EXPAND_DIMS;
3536     } break;
3537     case kTfLiteBuiltinSplit: {
3538       const TfLiteTensor& axis =
3539           mapping_args.context->tensors[mapping_args.node->inputs->data[0]];
3540       auto builtin =
3541           reinterpret_cast<TfLiteSplitParams*>(mapping_args.node->builtin_data);
3542       mapping_args.builder->AddScalarInt32Operand(*axis.data.i32);
3543       mapping_args.builder->AddScalarInt32Operand(builtin->num_splits);
3544       *nn_op_type = ANEURALNETWORKS_SPLIT;
3545     } break;
3546     case kTfLiteBuiltinLogSoftmax: {
3547       // Scaling and axis are hardcoded to respectively 1 and -1
3548       // in TFLite.
3549       mapping_args.builder->AddScalarFloat32Operand(1);
3550       mapping_args.builder->AddScalarInt32Operand(-1);
3551       *nn_op_type = ANEURALNETWORKS_LOG_SOFTMAX;
3552     } break;
3553     case kTfLiteBuiltinQuantize: {
3554       auto input_index = mapping_args.node->inputs->data[0];
3555       // NNAPI doesn't support requantization cases but only quantizations
3556       // from float. Dequantizing our input adding a Dequantize node before
3557       // this one.
3558       if (IsQuantized(mapping_args.context->tensors[input_index].type)) {
3559         mapping_args.builder->AddDequantize(0, input_index, kTfLiteFloat32,
3560                                             mapping_args.node_index);
3561       }
3562 
3563       *nn_op_type = ANEURALNETWORKS_QUANTIZE;
3564     } break;
3565     case kTfLiteBuiltinReduceAny: {
3566       auto builtin = reinterpret_cast<TfLiteReducerParams*>(
3567           mapping_args.node->builtin_data);
3568       mapping_args.builder->AddScalarBoolOperand(builtin->keep_dims);
3569       *nn_op_type = ANEURALNETWORKS_REDUCE_ANY;
3570     } break;
3571     case kTfLiteBuiltinReduceMin: {
3572       auto builtin = reinterpret_cast<TfLiteReducerParams*>(
3573           mapping_args.node->builtin_data);
3574       mapping_args.builder->AddScalarBoolOperand(builtin->keep_dims);
3575       *nn_op_type = ANEURALNETWORKS_REDUCE_MIN;
3576     } break;
3577     case kTfLiteBuiltinReduceMax: {
3578       auto builtin = reinterpret_cast<TfLiteReducerParams*>(
3579           mapping_args.node->builtin_data);
3580       mapping_args.builder->AddScalarBoolOperand(builtin->keep_dims);
3581       *nn_op_type = ANEURALNETWORKS_REDUCE_MAX;
3582     } break;
3583     case kTfLiteBuiltinDepthToSpace: {
3584       auto builtin = reinterpret_cast<TfLiteDepthToSpaceParams*>(
3585           mapping_args.node->builtin_data);
3586       mapping_args.builder->AddScalarInt32Operand(builtin->block_size);
3587       *nn_op_type = ANEURALNETWORKS_DEPTH_TO_SPACE;
3588     } break;
3589     case kTfLiteBuiltinReduceProd: {
3590       auto builtin = reinterpret_cast<TfLiteReducerParams*>(
3591           mapping_args.node->builtin_data);
3592       mapping_args.builder->AddScalarBoolOperand(builtin->keep_dims);
3593       *nn_op_type = ANEURALNETWORKS_REDUCE_PROD;
3594     } break;
3595     case kTfLiteBuiltinSum: {
3596       auto builtin = reinterpret_cast<TfLiteReducerParams*>(
3597           mapping_args.node->builtin_data);
3598       mapping_args.builder->AddScalarBoolOperand(builtin->keep_dims);
3599       *nn_op_type = ANEURALNETWORKS_REDUCE_SUM;
3600     } break;
3601     case kTfLiteBuiltinElu: {
3602       mapping_args.builder->AddScalarFloat32Operand(1.0);
3603       *nn_op_type = ANEURALNETWORKS_ELU;
3604     } break;
3605     case kTfLiteBuiltinFill: {
3606       *nn_op_type = ANEURALNETWORKS_FILL;
3607     } break;
3608     default:
3609       // All other operators are not mapped.
3610       return kTfLiteError;
3611   }
3612   return kTfLiteOk;
3613 }
3614 
3615 // Initialize the kernel (a NN model).
Init(TfLiteContext * context,const TfLiteDelegateParams * params,int * nnapi_errno)3616 TfLiteStatus NNAPIDelegateKernel::Init(TfLiteContext* context,
3617                                        const TfLiteDelegateParams* params,
3618                                        int* nnapi_errno) {
3619   for (auto node_index : TfLiteIntArrayView(params->nodes_to_replace)) {
3620     nodes_.push_back(node_index);
3621   }
3622 
3623   const auto delegate_options =
3624       StatefulNnApiDelegate::GetOptions(params->delegate);
3625   if (nnapi_->android_sdk_version >= kMinSdkVersionForNNAPI12 &&
3626       ShouldUseTargetDevices(delegate_options, nnapi_)) {
3627     TF_LITE_ENSURE_STATUS(GetTargetDevices(context, params->delegate, nnapi_,
3628                                            nnapi_errno, &nnapi_devices_));
3629 
3630     if (nnapi_devices_.empty()) {
3631       context->ReportError(
3632           context, "NNAPI delegate requested but no accelerators available.");
3633       return kTfLiteError;
3634     }
3635   }
3636 
3637   // Mark the handle backed tensors.
3638   tensor_memory_map_ =
3639       &StatefulNnApiDelegate::GetTensorMemoryMap(params->delegate);
3640 
3641   if (!nn_model_) {
3642     ANeuralNetworksModel* model = nullptr;
3643     RETURN_TFLITE_ERROR_IF_NN_ERROR(context,
3644                                     nnapi_->ANeuralNetworksModel_create(&model),
3645                                     "creating NNAPI model", nnapi_errno);
3646     nn_model_.reset(model);
3647 
3648     TF_LITE_ENSURE_STATUS(BuildGraph(context, delegate_options,
3649                                      params->input_tensors,
3650                                      params->output_tensors, nnapi_errno));
3651   }
3652 
3653   // Calculating model compilation cache here since the value depends on
3654   // some of the TfLiteDelegateParams
3655   nn_compilation_cache_token_.clear();
3656   const char* cache_dir = delegate_options.cache_dir;
3657   const char* model_token = delegate_options.model_token;
3658   if (nnapi_->android_sdk_version >= kMinSdkVersionForNNAPI12 && cache_dir &&
3659       model_token) {
3660     // Compilation caching could be enabled, try construct the uint8
3661     // token.
3662     // TODO(b/133342794): use a generic token generator class.
3663     uint64_t token_parts[4];
3664     // Create bits from model_token.
3665     // Using farmhash fingerprint instead of std::hash, as the latter is not
3666     // guaranteed to be stable across program invocations.
3667     token_parts[0] =
3668         farmhash::Fingerprint64(model_token, std::strlen(model_token));
3669     // Create bits from params->nodes_to_replace.
3670     token_parts[1] = GetHash(params->nodes_to_replace);
3671     // Create bits from params->input_tensors. These include the input tensor
3672     // sizes, as the cached compilations are size-dependent.
3673     token_parts[2] = GetHash(params->input_tensors);
3674     for (int i : TfLiteIntArrayView(params->input_tensors)) {
3675       if (i != kTfLiteOptionalTensor) {
3676         TfLiteTensor* t = &context->tensors[i];
3677         TF_LITE_ENSURE(context, t->dims);
3678         token_parts[2] = GetHash(t->dims, token_parts[2]);
3679       }
3680     }
3681     // bits from params->output_tensors.
3682     token_parts[3] = GetHash(params->output_tensors);
3683     // NNAPI requires the token to be 256bit long.
3684     // TODO(b/172238515): get token size from header instead of
3685     // hardcoding.
3686     std::vector<uint8_t> nnapi_cache_token(32, 0);
3687     // Copy the token bits.
3688     uint8_t* p = reinterpret_cast<uint8_t*>(token_parts);
3689     for (int i = 0; i < 4 * sizeof(uint64_t); i++) {
3690       nnapi_cache_token[i] = p[i];
3691     }
3692 
3693     nn_compilation_cache_token_ = nnapi_cache_token;
3694   }
3695 
3696   initialised_ = true;
3697 
3698   return kTfLiteOk;
3699 }
3700 
Prepare(TfLiteContext * context,TfLiteNode * node,int * nnapi_errno)3701 TfLiteStatus NNAPIDelegateKernel::Prepare(TfLiteContext* context,
3702                                           TfLiteNode* node, int* nnapi_errno) {
3703   if (!initialised_) {
3704     return kTfLiteError;
3705   }
3706 
3707   const auto delegate_options =
3708       StatefulNnApiDelegate::GetOptions(node->delegate);
3709   if (nn_compilation_) {
3710     return kTfLiteOk;
3711   }
3712 
3713   ANeuralNetworksCompilation* compilation = nullptr;
3714   if (!nnapi_devices_.empty()) {
3715     // Compile for the selected accelerator.
3716     RETURN_TFLITE_ERROR_IF_NN_ERROR(
3717         context,
3718         nnapi_->ANeuralNetworksCompilation_createForDevices(
3719             nn_model_.get(), nnapi_devices_.data(), nnapi_devices_.size(),
3720             &compilation),
3721         "creating NNAPI model for given devices", nnapi_errno);
3722   } else {
3723     RETURN_TFLITE_ERROR_IF_NN_ERROR(context,
3724                                     nnapi_->ANeuralNetworksCompilation_create(
3725                                         nn_model_.get(), &compilation),
3726                                     "creating NNAPI compilation", nnapi_errno);
3727   }
3728 
3729   auto preference = delegate_options.execution_preference;
3730   if (preference !=
3731       StatefulNnApiDelegate::Options::ExecutionPreference::kUndefined) {
3732     const int preference_result =
3733         nnapi_->ANeuralNetworksCompilation_setPreference(compilation,
3734                                                          preference);
3735     if (preference_result != ANEURALNETWORKS_NO_ERROR) {
3736       nnapi_->ANeuralNetworksCompilation_free(compilation);
3737       compilation = nullptr;
3738     }
3739     RETURN_TFLITE_ERROR_IF_NN_ERROR(context, preference_result,
3740                                     "setting compilation preferences",
3741                                     nnapi_errno);
3742   }
3743 
3744   if (!nn_compilation_cache_token_.empty()) {
3745     const char* cache_dir = delegate_options.cache_dir;
3746     const int set_caching_result =
3747         nnapi_->ANeuralNetworksCompilation_setCaching(
3748             compilation, cache_dir, nn_compilation_cache_token_.data());
3749     if (set_caching_result != ANEURALNETWORKS_NO_ERROR) {
3750       nnapi_->ANeuralNetworksCompilation_free(compilation);
3751       compilation = nullptr;
3752     }
3753     RETURN_TFLITE_ERROR_IF_NN_ERROR(context, set_caching_result,
3754                                     "configuring NNAPI caching", nnapi_errno);
3755   }
3756   // Set compilation timeout if applicable.
3757   if (nnapi_->android_sdk_version >= kMinSdkVersionForNNAPI13) {
3758     if (delegate_options.max_compilation_timeout_duration_ns > 0) {
3759       RETURN_TFLITE_ERROR_IF_NN_ERROR(
3760           context,
3761           nnapi_->ANeuralNetworksCompilation_setTimeout(
3762               compilation,
3763               delegate_options.max_compilation_timeout_duration_ns),
3764           "setting compilation timeout", nnapi_errno);
3765     }
3766     RETURN_TFLITE_ERROR_IF_NN_ERROR(
3767         context,
3768         nnapi_->ANeuralNetworksCompilation_setPriority(
3769             compilation, delegate_options.execution_priority),
3770         "setting compilation priority", nnapi_errno);
3771   }
3772   const int finish_result =
3773       nnapi_->ANeuralNetworksCompilation_finish(compilation);
3774   if (finish_result != ANEURALNETWORKS_NO_ERROR) {
3775     nnapi_->ANeuralNetworksCompilation_free(compilation);
3776     compilation = nullptr;
3777   }
3778   RETURN_TFLITE_ERROR_IF_NN_ERROR(context, finish_result,
3779                                   "completing NNAPI compilation", nnapi_errno);
3780   nn_compilation_.reset(compilation);
3781 
3782   return kTfLiteOk;
3783 }
3784 
GetOperationsSupportedByTargetNnApiDevices(TfLiteContext * context,std::vector<int> * supported_nodes,int * nnapi_errno)3785 TfLiteStatus NNAPIDelegateKernel::GetOperationsSupportedByTargetNnApiDevices(
3786     TfLiteContext* context, std::vector<int>* supported_nodes,
3787     int* nnapi_errno) {
3788   if (!nnapi_->ANeuralNetworksModel_getSupportedOperationsForDevices) {
3789     return kTfLiteError;
3790   }
3791 
3792   const auto nnapi_model_size = nnapi_to_tflite_op_mapping_.size();
3793 
3794   // Determine the list of operations the device actually supports
3795   std::unique_ptr<bool[]> nnapi_ops_support_flags(new bool[nnapi_model_size]);
3796 
3797   RETURN_TFLITE_ERROR_IF_NN_ERROR(
3798       context,
3799       nnapi_->ANeuralNetworksModel_getSupportedOperationsForDevices(
3800           nn_model_.get(), nnapi_devices_.data(), nnapi_devices_.size(),
3801           nnapi_ops_support_flags.get()),
3802       "Checking supported operations for devices", nnapi_errno);
3803 
3804   // A TfLite op is supported only if all the associated NNAPI ones are.
3805   auto tflite_ops_support_status = std::map<int, bool>();
3806   std::for_each(nodes_.begin(), nodes_.end(),
3807                 [&tflite_ops_support_status](int tflite_node_index) {
3808                   tflite_ops_support_status[tflite_node_index] = true;
3809                 });
3810   for (int nnapi_op_index = 0; nnapi_op_index < nnapi_model_size;
3811        nnapi_op_index++) {
3812     const auto tflite_op_index = nnapi_to_tflite_op_mapping_[nnapi_op_index];
3813     tflite_ops_support_status[tflite_op_index] &=
3814         nnapi_ops_support_flags[nnapi_op_index];
3815   }
3816 
3817   supported_nodes->clear();
3818   std::for_each(nodes_.begin(), nodes_.end(),
3819                 [&supported_nodes, &tflite_ops_support_status](int node_index) {
3820                   if (tflite_ops_support_status[node_index]) {
3821                     supported_nodes->push_back(node_index);
3822                   }
3823                 });
3824 
3825   return kTfLiteOk;
3826 }
3827 
Invoke(TfLiteContext * context,TfLiteNode * node,int * nnapi_errno)3828 TfLiteStatus NNAPIDelegateKernel::Invoke(TfLiteContext* context,
3829                                          TfLiteNode* node, int* nnapi_errno) {
3830   ANeuralNetworksExecution* execution = nullptr;
3831   RETURN_TFLITE_ERROR_IF_NN_ERROR(context,
3832                                   nnapi_->ANeuralNetworksExecution_create(
3833                                       nn_compilation_.get(), &execution),
3834                                   "creating NNAPI execution", nnapi_errno);
3835   std::unique_ptr<ANeuralNetworksExecution, NNFreeExecution>
3836       execution_unique_ptr(execution, NNFreeExecution(nnapi_));
3837 
3838   // Set compilation timeout if applicable.
3839   const auto delegate_options =
3840       StatefulNnApiDelegate::GetOptions(node->delegate);
3841   if (nnapi_->android_sdk_version >= kMinSdkVersionForNNAPI13) {
3842     if (delegate_options.max_execution_timeout_duration_ns > 0) {
3843       RETURN_TFLITE_ERROR_IF_NN_ERROR(
3844           context,
3845           nnapi_->ANeuralNetworksExecution_setTimeout(
3846               execution, delegate_options.max_execution_timeout_duration_ns),
3847           "setting execution timeout", nnapi_errno);
3848     }
3849     if (delegate_options.max_execution_loop_timeout_duration_ns > 0) {
3850       RETURN_TFLITE_ERROR_IF_NN_ERROR(
3851           context,
3852           nnapi_->ANeuralNetworksExecution_setLoopTimeout(
3853               execution,
3854               delegate_options.max_execution_loop_timeout_duration_ns),
3855           "setting execution loop timeout", nnapi_errno);
3856     }
3857   }
3858   // Check if the size of input and output memory pool needs to be resized.
3859   if (delegate_options.allow_dynamic_dimensions) {
3860     size_t total_input_byte_size = 0;
3861     // Make the TensorFlow Lite inputs and outputs to ann_indices.
3862     for (int i : TfLiteIntArrayView(node->inputs)) {
3863       // Constant tensors are not NNAPI inputs.
3864       if (i != kTfLiteOptionalTensor &&
3865           context->tensors[i].allocation_type != kTfLiteMmapRo &&
3866           // The delegate might not have mapped this input (this can
3867           // happen if one tensor is split in several ones)
3868           operand_mapping_.lite_index_to_ann(i) != -1) {
3869         if (context->tensors[i].buffer_handle != kTfLiteNullBufferHandle) {
3870           continue;
3871         }
3872         const TfLiteType nn_type_conversion =
3873             operand_mapping_.lite_index_to_ann_type_conversion(i);
3874         int tensor_size = 0;
3875         if (nn_type_conversion == kTfLiteNoType) {
3876           tensor_size = context->tensors[i].bytes;
3877         } else {
3878           size_t type_size;
3879           TF_LITE_ENSURE_OK(
3880               context, GetSizeOfType(context, nn_type_conversion, &type_size));
3881           tensor_size = NumElements(&context->tensors[i]) * type_size;
3882         }
3883         total_input_byte_size += tensor_size;
3884         total_input_byte_size += getNumPaddingBytes(tensor_size);
3885       }
3886     }
3887     if (total_input_byte_size > nn_input_memory_->get_byte_size()) {
3888       nn_input_memory_.reset(
3889           new NNMemory(nnapi_, "input_pool", total_input_byte_size));
3890     }
3891 
3892     size_t total_output_byte_size = 0;
3893     for (int i : TfLiteIntArrayView(node->outputs)) {
3894       if (context->tensors[i].buffer_handle != kTfLiteNullBufferHandle) {
3895         continue;
3896       }
3897       total_output_byte_size += context->tensors[i].bytes;
3898       total_output_byte_size += getNumPaddingBytes(context->tensors[i].bytes);
3899     }
3900     if (total_output_byte_size > nn_output_memory_->get_byte_size()) {
3901       nn_output_memory_.reset(
3902           new NNMemory(nnapi_, "output_pool", total_output_byte_size));
3903     }
3904   }
3905 
3906   // Set the input tensor buffers. Note: we access tflite tensors using
3907   // absolute indices but NN api indices inputs by relative indices.
3908   int relative_input_index = 0;
3909 
3910   const bool use_int8_asymm_signed =
3911       target_sdk_version_ >= kMinSdkVersionForNNAPI13;
3912 
3913   size_t input_offset = 0;
3914   for (auto absolute_input_index : TfLiteIntArrayView(node->inputs)) {
3915     if (absolute_input_index == kTfLiteOptionalTensor) {
3916       continue;
3917     }
3918     ANeuralNetworksOperandType input_nn_operand_type;
3919     ANeuralNetworksOperandType* input_nn_operand_type_ptr = nullptr;
3920     TfLiteTensor* tensor = &context->tensors[absolute_input_index];
3921     TfLiteType ann_type_equivalent =
3922         operand_mapping_.lite_index_to_ann_type_conversion(
3923             absolute_input_index);
3924     if (delegate_options.allow_dynamic_dimensions &&
3925         HasUnspecifiedDimension(tensor)) {
3926       input_nn_operand_type =
3927           ConvertTensorTypeToNNType(tensor, ann_type_equivalent);
3928       input_nn_operand_type_ptr = &input_nn_operand_type;
3929     }
3930     if (tensor->allocation_type != kTfLiteMmapRo) {
3931       if (tensor->buffer_handle != kTfLiteNullBufferHandle &&
3932           tensor->buffer_handle < tensor_memory_map_->size()) {
3933         RETURN_TFLITE_ERROR_IF_NN_ERROR_FOR_TENSOR(
3934             context,
3935             nnapi_->ANeuralNetworksExecution_setInputFromMemory(
3936                 execution, relative_input_index, input_nn_operand_type_ptr,
3937                 tensor_memory_map_->at(tensor->buffer_handle).memory, 0,
3938                 tensor->bytes),
3939             "associating NNAPI execution input with a memory object", tensor,
3940             nnapi_errno);
3941         relative_input_index++;
3942         continue;
3943       }
3944       int tensor_size = 0;
3945       if (ann_type_equivalent != kTfLiteNoType) {
3946         const auto num_elements = NumElements(tensor);
3947         uint8_t* input_ptr = nn_input_memory_->get_data_ptr() + input_offset;
3948         if (tensor->type == kTfLiteUInt8 &&
3949             ann_type_equivalent == kTfLiteInt32) {
3950           for (int i = 0; i < num_elements; ++i) {
3951             reinterpret_cast<int32_t*>(input_ptr)[i] =
3952                 static_cast<const int32_t>(tensor->data.uint8[i]);
3953           }
3954         } else if (tensor->type == kTfLiteInt8 &&
3955                    ann_type_equivalent == kTfLiteUInt8) {
3956           // Explicitly convert int8 values to uint8 values.
3957           for (int i = 0; i < num_elements; ++i) {
3958             input_ptr[i] = static_cast<const uint8_t>(
3959                 static_cast<int32_t>(tensor->data.int8[i]) + 128);
3960           }
3961         } else if (tensor->type == kTfLiteInt8 &&
3962                    ann_type_equivalent == kTfLiteInt32) {
3963           if (use_int8_asymm_signed) {
3964             for (int i = 0; i < num_elements; ++i) {
3965               reinterpret_cast<int32_t*>(input_ptr)[i] =
3966                   static_cast<const int32_t>(tensor->data.int8[i]);
3967             }
3968           } else {
3969             for (int i = 0; i < num_elements; ++i) {
3970               reinterpret_cast<int32_t*>(input_ptr)[i] =
3971                   static_cast<const int32_t>(tensor->data.int8[i]) + 128;
3972             }
3973           }
3974         } else {
3975           context->ReportError(
3976               context,
3977               "NN API Delegate: unsupported tensor types conversion: "
3978               "from type code %d to type code %d.\n",
3979               tensor->type, ann_type_equivalent);
3980           return kTfLiteError;
3981         }
3982         size_t type_size;
3983         TF_LITE_ENSURE_OK(
3984             context, GetSizeOfType(context, ann_type_equivalent, &type_size));
3985         tensor_size = NumElements(tensor) * type_size;
3986         RETURN_TFLITE_ERROR_IF_NN_ERROR_FOR_TENSOR(
3987             context,
3988             nnapi_->ANeuralNetworksExecution_setInputFromMemory(
3989                 execution, relative_input_index, input_nn_operand_type_ptr,
3990                 nn_input_memory_->get_handle(), input_offset, tensor_size),
3991             "associating NNAPI execution input with a memory object", tensor,
3992             nnapi_errno);
3993       } else {
3994         // copy data to pre-allocated shared memory.
3995         memcpy(nn_input_memory_->get_data_ptr() + input_offset,
3996                tensor->data.raw, tensor->bytes);
3997         RETURN_TFLITE_ERROR_IF_NN_ERROR_FOR_TENSOR(
3998             context,
3999             nnapi_->ANeuralNetworksExecution_setInputFromMemory(
4000                 execution, relative_input_index, input_nn_operand_type_ptr,
4001                 nn_input_memory_->get_handle(), input_offset, tensor->bytes),
4002             "associating NNAPI execution input with a memory object", tensor,
4003             nnapi_errno);
4004         tensor_size = tensor->bytes;
4005       }
4006       input_offset += tensor_size;
4007       input_offset += getNumPaddingBytes(tensor_size);
4008       relative_input_index++;
4009     }
4010   }
4011 
4012   // Set the output tensor buffers.
4013   int relative_output_index = 0;
4014   size_t output_offset = 0;
4015   for (auto output_index : TfLiteIntArrayView(node->outputs)) {
4016     // If the NNAPI implementation doesn't have some of the outputs
4017     // they are left unmapped and we should not try to read their value here
4018     if (operand_mapping_.lite_index_to_ann(output_index) == -1) {
4019       continue;
4020     }
4021     ANeuralNetworksOperandType output_nn_operand_type;
4022     ANeuralNetworksOperandType* output_nn_operand_type_ptr = nullptr;
4023     TfLiteTensor* tensor = &context->tensors[output_index];
4024     if (delegate_options.allow_dynamic_dimensions &&
4025         HasUnspecifiedDimension(tensor)) {
4026       TfLiteType ann_type_equivalent =
4027           operand_mapping_.lite_index_to_ann_type_conversion(output_index);
4028       output_nn_operand_type =
4029           ConvertTensorTypeToNNType(tensor, ann_type_equivalent);
4030       output_nn_operand_type_ptr = &output_nn_operand_type;
4031     }
4032     if (tensor->buffer_handle != kTfLiteNullBufferHandle &&
4033         tensor->buffer_handle < tensor_memory_map_->size()) {
4034       RETURN_TFLITE_ERROR_IF_NN_ERROR_FOR_TENSOR(
4035           context,
4036           nnapi_->ANeuralNetworksExecution_setOutputFromMemory(
4037               execution, relative_output_index, output_nn_operand_type_ptr,
4038               tensor_memory_map_->at(tensor->buffer_handle).memory, 0,
4039               tensor->bytes),
4040           "associating NNAPI execution output to a memory object", tensor,
4041           nnapi_errno);
4042 
4043     } else {
4044       RETURN_TFLITE_ERROR_IF_NN_ERROR_FOR_TENSOR(
4045           context,
4046           nnapi_->ANeuralNetworksExecution_setOutputFromMemory(
4047               execution, relative_output_index, output_nn_operand_type_ptr,
4048               nn_output_memory_->get_handle(), output_offset, tensor->bytes),
4049           "associating NNAPI execution output to a memory object", tensor,
4050           nnapi_errno);
4051       output_offset += tensor->bytes;
4052       output_offset += getNumPaddingBytes(tensor->bytes);
4053     }
4054     relative_output_index++;
4055   }
4056 
4057   // The state_out of previous invocation need to be mapped to state_in of
4058   // current invocation.
4059   for (size_t i = 0; i < model_state_tfl_inputs_.size(); i++) {
4060     int state_tensor_idx = model_state_tfl_inputs_[i];
4061     TfLiteTensor* tensor = &context->tensors[state_tensor_idx];
4062     // Here we are using a deep copy for state_in tensors so that we are not
4063     // reading and writing into the same buffer during a invocation.
4064     // TODO(b/110369471): using double shared buffer to minimize the copies.
4065     RETURN_TFLITE_ERROR_IF_NN_ERROR(
4066         context,
4067         nnapi_->ANeuralNetworksExecution_setOutput(
4068             execution, relative_output_index, nullptr, tensor->data.raw,
4069             tensor->bytes),
4070         "associating NNAPI execution output to a buffer", nnapi_errno);
4071     relative_output_index++;
4072   }
4073   // Invoke ANN in blocking fashion.
4074   if (nnapi_->android_sdk_version < kMinSdkVersionForNNAPI12) {
4075     ANeuralNetworksEvent* event = nullptr;
4076     RETURN_TFLITE_ERROR_IF_NN_ERROR(
4077         context,
4078         nnapi_->ANeuralNetworksExecution_startCompute(execution, &event),
4079         "starting async computation", nnapi_errno);
4080     const int wait_result = nnapi_->ANeuralNetworksEvent_wait(event);
4081     nnapi_->ANeuralNetworksEvent_free(event);
4082     RETURN_TFLITE_ERROR_IF_NN_ERROR(context, wait_result,
4083                                     "waiting for async computation completion",
4084                                     nnapi_errno);
4085   } else {
4086     // Use synchronous execution for NNAPI 1.2+.
4087     RETURN_TFLITE_ERROR_IF_NN_ERROR(
4088         context, nnapi_->ANeuralNetworksExecution_compute(execution),
4089         "running computation", nnapi_errno);
4090   }
4091 
4092   // copy results from shared memory to the destination.
4093   output_offset = 0;
4094   for (auto output_index : TfLiteIntArrayView(node->outputs)) {
4095     TfLiteTensor* tensor = &context->tensors[output_index];
4096     if (tensor->buffer_handle != kTfLiteNullBufferHandle) {
4097       continue;
4098     }
4099     TfLiteType ann_type_equivalent =
4100         operand_mapping_.lite_index_to_ann_type_conversion(output_index);
4101     if (tensor->type == kTfLiteInt8 && ann_type_equivalent == kTfLiteUInt8) {
4102       // Explicitly convert uint8 values to int8 values.
4103       uint8_t* output_ptr = reinterpret_cast<uint8_t*>(
4104           nn_output_memory_->get_data_ptr() + output_offset);
4105       const auto num_elements = NumElements(tensor);
4106       for (int i = 0; i < num_elements; ++i) {
4107         output_ptr[i] =
4108             static_cast<uint8_t>(static_cast<int32_t>(output_ptr[i]) - 128);
4109       }
4110     }
4111     memcpy(tensor->data.raw, nn_output_memory_->get_data_ptr() + output_offset,
4112            tensor->bytes);
4113     output_offset += tensor->bytes;
4114     output_offset += getNumPaddingBytes(tensor->bytes);
4115   }
4116 
4117   // copy output of all output tensors in feedback_loops_ into the
4118   // associated input
4119   for (auto feedback_loop : feedback_loops_) {
4120     int output_tensor_idx;
4121     int input_tensor_idx;
4122     std::tie(output_tensor_idx, input_tensor_idx) = feedback_loop;
4123     TfLiteTensor& src = context->tensors[output_tensor_idx];
4124     TfLiteTensor& dest = context->tensors[input_tensor_idx];
4125 
4126     memcpy(dest.data.raw, src.data.raw, src.bytes);
4127   }
4128 
4129   return kTfLiteOk;
4130 }
4131 
AddDequantizeOperatorsWhereNeeded(const TfLiteContext * context,int builtin_code,const TfLiteNode * node,int tflite_node_index,NNAPIOpBuilder * builder,int * nnapi_errno)4132 void NNAPIDelegateKernel::AddDequantizeOperatorsWhereNeeded(
4133     const TfLiteContext* context, int builtin_code, const TfLiteNode* node,
4134     int tflite_node_index, NNAPIOpBuilder* builder, int* nnapi_errno) {
4135   // Depending on the operator and the input data format, Dequantize
4136   // operators may need to be added. For example when the input is
4137   // floating-point but weights are quantized then the weights will first be
4138   // dequantized to the same format as the input before being passed to the
4139   // operator.
4140 
4141   // The tensor determining whether the inputs should be floating-point.
4142   int input_tensor_index = -1;
4143   std::vector<int> inputs_to_potentially_dequantize;
4144 
4145   switch (builtin_code) {
4146     case kTfLiteBuiltinConv2d:
4147     case kTfLiteBuiltinFullyConnected: {
4148       input_tensor_index = 0;
4149       // Weights and bias are inputs #1 and #2 respectively and may require
4150       // dequantization.
4151       inputs_to_potentially_dequantize = {1, 2};
4152       break;
4153     }
4154     case kTfLiteBuiltinLstm: {
4155       input_tensor_index = 0;
4156       inputs_to_potentially_dequantize = {1,  2,  3,  4,  5,  6,  7,
4157                                           8,  9,  10, 11, 12, 13, 14,
4158                                           15, 16, 17, 20, 21, 22, 23};
4159       break;
4160     }
4161     default:
4162       return;
4163   }
4164 
4165   int tensor_id = node->inputs->data[input_tensor_index];
4166   if (tensor_id < 0) return;
4167 
4168   // Nothing to do if the input is not floating-point.
4169   if (!IsFloat(context->tensors[tensor_id].type)) return;
4170 
4171   for (int i : inputs_to_potentially_dequantize) {
4172     if (i < 0 || i >= node->inputs->size) continue;  // Ignore invalid index.
4173     tensor_id = node->inputs->data[i];
4174     if (tensor_id < 0) continue;  // Ignore optional input.
4175 
4176     const TfLiteType type = context->tensors[tensor_id].type;
4177     // Nothing to do for this tensor if it's not quantized.
4178     if (!IsQuantized(type)) continue;
4179 
4180     // Insert Dequantize operator if it hasn't been done already and change
4181     // the node's input accordingly.
4182     builder->AddDequantize(i, node->inputs->data[i], type, tflite_node_index);
4183   }
4184 }
4185 
AddOpsAndTensors(TfLiteContext * context,int * nnapi_errno,bool allow_dynamic_dimensions)4186 TfLiteStatus NNAPIDelegateKernel::AddOpsAndTensors(
4187     TfLiteContext* context, int* nnapi_errno, bool allow_dynamic_dimensions) {
4188   DequantizeMapping dequantize_mapping;
4189   // The operand builder allows creating a single op. It is created outside
4190   // the for loop to avoid reallocating the vectors.
4191   NNAPIOpBuilder builder(nnapi_, context, &operand_mapping_,
4192                          &dequantize_mapping, &allocation_memory_mapping_,
4193                          &nnapi_to_tflite_op_mapping_, nn_model_.get(),
4194                          nnapi_errno, allow_dynamic_dimensions);
4195   // If we have target accelerators the target SDK version might be
4196   // different than the current android version.
4197   target_sdk_version_ = nnapi_->android_sdk_version;
4198   if (!nnapi_devices_.empty()) {
4199     TF_LITE_ENSURE_STATUS(GetTargetSdkVersion(
4200         context, nnapi_, nnapi_devices_, &target_sdk_version_, nnapi_errno));
4201   }
4202   // Add Tensors.
4203   for (auto node_index : nodes_) {
4204     // Obtain the op and registration.
4205     TfLiteNode* node;
4206     TfLiteRegistration* reg;
4207     TF_LITE_ENSURE_STATUS(
4208         context->GetNodeAndRegistration(context, node_index, &node, &reg));
4209 
4210     // Fully quantized full LSTM.
4211     if (target_sdk_version_ >= kMinSdkVersionForNNAPI13 &&
4212         reg->builtin_code == kTfLiteBuiltinLstm && isLstmFullKernel(node) &&
4213         context->tensors[node->inputs->data[0]].type == kTfLiteInt8) {
4214       const auto quant8_full_lstm_op_code = ANEURALNETWORKS_QUANTIZED_LSTM;
4215 
4216       constexpr int kInputTensor = 0;
4217       constexpr int kInputToInputWeightsTensor = 1;
4218       constexpr int kRecurrentToInputWeightsTensor = 5;
4219       constexpr int kInputGateBiasTensor = 12;
4220       constexpr int kForgetGateBiasTensor = 13;
4221       constexpr int kCellGateBiasTensor = 14;
4222       constexpr int kOutputGateBiasTensor = 15;
4223       constexpr int kProjectionWeightsTensor = 16;
4224       constexpr int kProjectionBiasTensor = 17;
4225       constexpr int kPrevOutputTensor = 18;
4226 
4227       // Add input tensors.
4228       for (int input_pos = 0; input_pos < node->inputs->size; ++input_pos) {
4229         const auto input_index = node->inputs->data[input_pos];
4230         if (input_index == kTfLiteOptionalTensor) {
4231           if (input_pos == kInputToInputWeightsTensor ||
4232               input_pos == kRecurrentToInputWeightsTensor ||
4233               input_pos == kProjectionWeightsTensor) {
4234             TF_LITE_ENSURE_STATUS(builder.AddVectorInt8Operand(nullptr, 0));
4235           } else if (input_pos == kInputGateBiasTensor ||
4236                      input_pos == kForgetGateBiasTensor ||
4237                      input_pos == kCellGateBiasTensor ||
4238                      input_pos == kOutputGateBiasTensor ||
4239                      input_pos == kProjectionBiasTensor) {
4240             TF_LITE_ENSURE_STATUS(builder.AddVectorInt32Operand(nullptr, 0));
4241           } else {  // cell-to-* and layer norm weights.
4242             TF_LITE_ENSURE_STATUS(builder.AddVectorInt16Operand(nullptr, 0));
4243           }
4244         } else {
4245           // Only input and previous output use INT8_ASYM_SIGNED.
4246           int flags =
4247               (input_pos == kInputTensor || input_pos == kPrevOutputTensor)
4248                   ? NN_TENSOR_FLAG_USE_INT8_ASYMM_SIGNED
4249                   : 0;
4250           TF_LITE_ENSURE_STATUS(
4251               builder.AddTensorInput(input_index, /*hybrid_op=*/false, flags));
4252         }
4253       }
4254 
4255       // Add clip parameters.
4256       auto builtin = reinterpret_cast<TfLiteLSTMParams*>(node->builtin_data);
4257       TF_LITE_ENSURE_STATUS(
4258           builder.AddScalarFloat32Operand(builtin->cell_clip));
4259       TF_LITE_ENSURE_STATUS(
4260           builder.AddScalarFloat32Operand(builtin->proj_clip));
4261 
4262       // Add quantization parameters for intermediate tensors.
4263       TF_LITE_ENSURE_EQ(context, node->intermediates->size, 5);
4264       for (int intermediate_pos = 0;
4265            intermediate_pos < node->intermediates->size; ++intermediate_pos) {
4266         const auto intermediate_index =
4267             node->intermediates->data[intermediate_pos];
4268         const TfLiteTensor& tensor = context->tensors[intermediate_index];
4269         TfLiteAffineQuantization* quantization_params =
4270             static_cast<TfLiteAffineQuantization*>(tensor.quantization.params);
4271         if (intermediate_pos == 4) {
4272           TF_LITE_ENSURE_STATUS(builder.AddScalarInt32Operand(
4273               quantization_params->zero_point->data[0]));
4274         }
4275         TF_LITE_ENSURE_STATUS(builder.AddScalarFloat32Operand(
4276             quantization_params->scale->data[0]));
4277       }
4278 
4279       // Activation state output.
4280       int ann_index;
4281       builder.AddStateInt8AsymTensor(
4282           node->inputs->data[/*kInputActivationStateTensor*/ 18], &ann_index);
4283       model_state_outputs_.push_back(ann_index);
4284       model_state_tfl_inputs_.push_back(
4285           node->inputs->data[/*kInputActivationStateTensor*/ 18]);
4286 
4287       // Cell state output.
4288       builder.AddStateInt16Tensor(
4289           node->inputs->data[/*kInputCellStateTensor*/ 19], &ann_index);
4290       model_state_outputs_.push_back(ann_index);
4291       model_state_tfl_inputs_.push_back(
4292           node->inputs->data[/*kInputCellStateTensor*/ 19]);
4293 
4294       // Add output tensors.
4295       for (int output_pos = 0; output_pos < node->outputs->size; ++output_pos) {
4296         const auto output_index = node->outputs->data[output_pos];
4297         TF_LITE_ENSURE_STATUS(builder.AddTensorOutput(
4298             output_index, NN_TENSOR_FLAG_USE_INT8_ASYMM_SIGNED));
4299       }
4300 
4301       builder.FinalizeAddOperation(quant8_full_lstm_op_code, node_index);
4302       continue;
4303     }
4304 
4305     const bool hybrid_op = IsHybridOperator(context, reg->builtin_code, node);
4306     const bool scalar_as_tensor = IsScalarInputSupported(reg->builtin_code);
4307     const bool need_int8_conversion =
4308         target_sdk_version_ < kMinSdkVersionForNNAPI13 &&
4309         NeedInt8Conversion(context, reg->builtin_code, node);
4310     const bool use_int8_asymm_signed =
4311         target_sdk_version_ >= kMinSdkVersionForNNAPI13 && !hybrid_op;
4312 
4313     int input_tensor_flags = 0;
4314     if (scalar_as_tensor) {
4315       input_tensor_flags |= NN_TENSOR_FLAG_SCALAR_AS_TENSOR;
4316     }
4317     if (use_int8_asymm_signed) {
4318       input_tensor_flags |= NN_TENSOR_FLAG_USE_INT8_ASYMM_SIGNED;
4319     }
4320 
4321     // On SDK level less than 30, h_swish will be lowered into supported NNAPI
4322     // operations. Since SDK level 30, h_swish is supported as a single
4323     // operation.
4324     if (reg->builtin_code == kTfLiteBuiltinHardSwish &&
4325         nnapi_->android_sdk_version < kMinSdkVersionForNNAPI13) {
4326       builder.TransformHardSwishIntoSupportedOps(
4327           node->inputs->data[0], node->outputs->data[0], need_int8_conversion,
4328           node_index);
4329       continue;
4330     }
4331     // Map inputs to NN API tensor indices.
4332     for (int input_pos = 0; input_pos < node->inputs->size; ++input_pos) {
4333       if (reg->builtin_code == kTfLiteBuiltinTransposeConv) {
4334         // Everything is added during Map since input tensors
4335         // have different order.
4336         continue;
4337       }
4338       const auto input_index = node->inputs->data[input_pos];
4339       if (need_int8_conversion &&
4340           (input_pos == 0 ||
4341            reg->builtin_code == kTfLiteBuiltinFullyConnected ||
4342            reg->builtin_code == kTfLiteBuiltinConv2d ||
4343            reg->builtin_code == kTfLiteBuiltinDepthwiseConv2d ||
4344            reg->builtin_code == kTfLiteBuiltinAdd ||
4345            reg->builtin_code == kTfLiteBuiltinMul ||
4346            reg->builtin_code == kTfLiteBuiltinSub ||
4347            reg->builtin_code == kTfLiteBuiltinConcatenation ||
4348            reg->builtin_code == kTfLiteBuiltinMaximum ||
4349            reg->builtin_code == kTfLiteBuiltinMinimum ||
4350            reg->builtin_code == kTfLiteBuiltinLeakyRelu ||
4351            reg->builtin_code == kTfLiteBuiltinLess ||
4352            reg->builtin_code == kTfLiteBuiltinLessEqual ||
4353            reg->builtin_code == kTfLiteBuiltinPrelu ||
4354            reg->builtin_code == kTfLiteBuiltinGreater ||
4355            reg->builtin_code == kTfLiteBuiltinGreaterEqual ||
4356            reg->builtin_code == kTfLiteBuiltinEqual ||
4357            reg->builtin_code == kTfLiteBuiltinNotEqual ||
4358            reg->builtin_code == kTfLiteBuiltinSelect)) {
4359         // Only selected inputs require int8 conversion.
4360         TF_LITE_ENSURE_STATUS(builder.AddTensorInput(
4361             input_index, hybrid_op,
4362             input_tensor_flags | NN_TENSOR_FLAG_INT8_CONVERSION));
4363         continue;
4364       }
4365       if (reg->builtin_code == kTfLiteBuiltinLstm && isLstmFullKernel(node) &&
4366           input_pos >= 20) {
4367         // Skip layer normalization weights. They are added in the Map
4368         // function (after all the other inputs added there) since layer
4369         // normalization weights are the last four inputs of the LSTM op in
4370         // NNAPI.
4371         continue;
4372       }
4373       if (reg->builtin_code == kTfLiteBuiltinLstm && isLstmBasicKernel(node)) {
4374         // Configuring all inputs in the Map function
4375         continue;
4376       }
4377       if (reg->builtin_code == kTfLiteBuiltinUnidirectionalSequenceLstm) {
4378         if (input_pos >= 20) {
4379           // Skip layer normalization weights. They are added in the Map
4380           // function (after all the other inputs added there) since layer
4381           // normalization weights are the last four inputs of the
4382           // unidirectional sequence LSTM op in NNAPI.
4383           continue;
4384         }
4385         if (input_index == kTfLiteOptionalTensor) {
4386           TF_LITE_ENSURE_STATUS(builder.AddVectorFloat32Operand(nullptr, 0));
4387           continue;
4388         }
4389       }
4390       if ((reg->builtin_code == kTfLiteBuiltinSplit) &&
4391           (input_index == node->inputs->data[0])) {
4392         // Skip the axis input tensor; it will be added as a scalar operand
4393         // by the Map() mapping.
4394         continue;
4395       }
4396 
4397       // Pad and Padv2 have an optional parameter for a pad value which has
4398       // to be converted to a scalar type in NN API.
4399       if ((reg->builtin_code == kTfLiteBuiltinPadv2 ||
4400            reg->builtin_code == kTfLiteBuiltinPad) &&
4401           node->inputs->size == 3 && input_pos == 2) {
4402         const int constant_value_id = node->inputs->data[2];
4403         if (constant_value_id == kTfLiteOptionalTensor) {
4404           continue;
4405         }
4406         const TfLiteTensor constant_value = context->tensors[constant_value_id];
4407 
4408         switch (constant_value.type) {
4409           case kTfLiteFloat32:
4410             if (constant_value.allocation_type == kTfLiteMmapRo) {
4411               builder.AddScalarFloat32Operand(*constant_value.data.f);
4412             } else {
4413               builder.AddSingleValueTensorAsScalarOperand(
4414                   constant_value_id, ANEURALNETWORKS_FLOAT32);
4415             }
4416             break;
4417           case kTfLiteUInt8:
4418             if (constant_value.allocation_type == kTfLiteMmapRo) {
4419               builder.AddScalarInt32Operand(
4420                   static_cast<int32_t>(*constant_value.data.uint8));
4421             } else {
4422               builder.AddSingleValueTensorAsScalarOperand(
4423                   constant_value_id, ANEURALNETWORKS_INT32);
4424             }
4425             break;
4426           case kTfLiteInt8:
4427             if (constant_value.allocation_type == kTfLiteMmapRo) {
4428               if (need_int8_conversion) {
4429                 builder.AddScalarInt32Operand(
4430                     static_cast<int32_t>(*constant_value.data.int8) + 128);
4431               } else {
4432                 builder.AddScalarInt32Operand(*constant_value.data.int8);
4433               }
4434             } else {
4435               builder.AddSingleValueTensorAsScalarOperand(
4436                   constant_value_id, ANEURALNETWORKS_INT32);
4437             }
4438             break;
4439           default:
4440             context->ReportError(context,
4441                                  "Unsupported type of pad value for pad_v2\n");
4442             return kTfLiteError;
4443         }
4444         continue;
4445       }
4446 
4447       if (input_index == kTfLiteOptionalTensor &&
4448           (reg->builtin_code == kTfLiteBuiltinLstm ||
4449            reg->builtin_code == kTfLiteBuiltinSvdf ||
4450            reg->builtin_code == kTfLiteBuiltinBidirectionalSequenceLstm)) {
4451         // properly handle the optional tensor for LSTM and SVDF.
4452         // currently only support float32.
4453         TF_LITE_ENSURE_STATUS(builder.AddVectorFloat32Operand(nullptr, 0));
4454       } else if (reg->builtin_code == kTfLiteBuiltinResizeBilinear ||
4455                  reg->builtin_code == kTfLiteBuiltinResizeNearestNeighbor) {
4456         if (input_pos == 0) {
4457           // Only the first input tensor is added. The second one,
4458           // specifying the output height and width, is not added and
4459           // instead the height and width will be added individually as
4460           // scalars by the mapping function returned by Map().
4461           TF_LITE_ENSURE_STATUS(builder.AddTensorInput(input_index, hybrid_op,
4462                                                        input_tensor_flags));
4463         }
4464       } else if (reg->builtin_code == kTfLiteBuiltinTopkV2 && input_pos > 0) {
4465         // The K parameter tensor is not handled here but by the functor
4466         // returned by Map, the input tensor is instead added in
4467         // the else clause below
4468         continue;
4469       } else if (reg->builtin_code == kTfLiteBuiltinGather) {
4470         // Everything else is added during Map since input tensors
4471         // have different order.
4472         if (input_pos == 0) {
4473           TF_LITE_ENSURE_STATUS(builder.AddTensorInput(input_index, hybrid_op,
4474                                                        input_tensor_flags));
4475         }
4476         continue;
4477       } else if (reg->builtin_code == kTfLiteBuiltinExpandDims &&
4478                  input_pos == 1) {
4479         // The axis param is added during Map
4480         continue;
4481       } else if (reg->builtin_code == kTfLiteBuiltinBatchToSpaceNd &&
4482                  input_pos == 2) {
4483         // NNAPI does not support crops.
4484         // The Map function will check if all crops are zero.
4485         continue;
4486       } else if (reg->builtin_code == kTfLiteBuiltinArgMin ||
4487                  reg->builtin_code == kTfLiteBuiltinArgMax) {
4488         // The first input tensor is added as is. The second one, specifying
4489         // the axis, needs to be converted to a scalar since TFLite uses a
4490         // tensor but NNAPI uses a scalar as the axis.
4491         if (input_pos == 0) {
4492           TF_LITE_ENSURE_STATUS(builder.AddTensorInput(input_index, hybrid_op,
4493                                                        input_tensor_flags));
4494         } else {
4495           const int axis_id = node->inputs->data[1];
4496           const TfLiteTensor& axis_tensor = context->tensors[axis_id];
4497           switch (axis_tensor.type) {
4498             case kTfLiteInt32:
4499               if (axis_tensor.allocation_type == kTfLiteMmapRo) {
4500                 TF_LITE_ENSURE_STATUS(builder.AddScalarInt32Operand(
4501                     static_cast<int32_t>(*axis_tensor.data.i32)));
4502               } else {
4503                 TF_LITE_ENSURE_STATUS(
4504                     builder.AddSingleValueTensorAsScalarOperand(
4505                         axis_id, ANEURALNETWORKS_INT32));
4506               }
4507               break;
4508             case kTfLiteInt64:
4509               // Map() function already makes sure int64 input is constant.
4510               TF_LITE_ENSURE_STATUS(builder.AddScalarInt32Operand(
4511                   static_cast<int32_t>(*axis_tensor.data.i64)));
4512               break;
4513             default:
4514               return kTfLiteError;
4515           }
4516         }
4517       } else if (reg->builtin_code == kTfLiteBuiltinMaximum ||
4518                  reg->builtin_code == kTfLiteBuiltinMinimum) {
4519         const TfLiteTensor& operand_tensor =
4520             context->tensors[node->inputs->data[input_pos]];
4521         if (operand_tensor.dims->size == 0) {
4522           int tensor_index;
4523 
4524           TF_LITE_ENSURE_EQ(context, operand_tensor.allocation_type,
4525                             kTfLiteMmapRo);
4526           switch (operand_tensor.type) {
4527             case kTfLiteFloat32:
4528               TF_LITE_ENSURE_STATUS(builder.AddNewInputConstantTensor(
4529                   ANEURALNETWORKS_TENSOR_FLOAT32, operand_tensor.type, {1},
4530                   std::vector<float>(1, operand_tensor.data.f[0]),
4531                   operand_tensor.params, &tensor_index));
4532               break;
4533             case kTfLiteUInt8:
4534               TF_LITE_ENSURE_STATUS(builder.AddNewInputConstantTensor(
4535                   ANEURALNETWORKS_TENSOR_QUANT8_ASYMM, operand_tensor.type, {1},
4536                   std::vector<uint8_t>(1, operand_tensor.data.uint8[0]),
4537                   operand_tensor.params, &tensor_index));
4538               break;
4539             case kTfLiteInt8: {
4540               auto params = operand_tensor.params;
4541               if (params.scale == 0.0) {
4542                 params.scale = 1.0;
4543               }
4544 
4545               if (use_int8_asymm_signed) {
4546                 TF_LITE_ENSURE_STATUS(builder.AddNewInputConstantTensor(
4547                     ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED,
4548                     operand_tensor.type, {1},
4549                     std::vector<int8_t>(1, operand_tensor.data.int8[0]), params,
4550                     &tensor_index));
4551               } else {
4552                 TF_LITE_ENSURE_STATUS(builder.AddNewInputConstantTensor(
4553                     ANEURALNETWORKS_TENSOR_QUANT8_ASYMM, operand_tensor.type,
4554                     {1},
4555                     std::vector<int8_t>(1, operand_tensor.data.int8[0] + 128),
4556                     params, &tensor_index));
4557               }
4558             } break;
4559             case kTfLiteInt32:
4560               TF_LITE_ENSURE_STATUS(builder.AddNewInputConstantTensor(
4561                   ANEURALNETWORKS_TENSOR_INT32, operand_tensor.type, {1},
4562                   std::vector<int32_t>(1, operand_tensor.data.i32[0]),
4563                   operand_tensor.params, &tensor_index));
4564               break;
4565             default:
4566               return kTfLiteError;
4567           }
4568         } else {
4569           TF_LITE_ENSURE_STATUS(builder.AddTensorInput(input_index, hybrid_op,
4570                                                        input_tensor_flags));
4571         }
4572       } else if ((reg->builtin_code == kTfLiteBuiltinReduceAny ||
4573                   reg->builtin_code == kTfLiteBuiltinReduceMax ||
4574                   reg->builtin_code == kTfLiteBuiltinReduceMin ||
4575                   reg->builtin_code == kTfLiteBuiltinReduceProd ||
4576                   reg->builtin_code == kTfLiteBuiltinSum) &&
4577                  (input_pos == 1)) {
4578         // The axis needs, be converted to a tensor if specified as scalar
4579         const TfLiteTensor& axis_tensor =
4580             context->tensors[node->inputs->data[input_pos]];
4581         if (axis_tensor.dims->size == 0) {
4582           TF_LITE_ENSURE_STATUS(
4583               builder.AddVectorInt32Operand(axis_tensor.data.i32, 1));
4584         } else {
4585           TF_LITE_ENSURE_STATUS(builder.AddTensorInput(input_index, hybrid_op,
4586                                                        input_tensor_flags));
4587         }
4588       } else if (reg->builtin_code == kTfLiteBuiltinFill) {
4589         if (input_pos == 0) {
4590           const int dims_id = node->inputs->data[0];
4591           const TfLiteTensor& dims_tensor = context->tensors[dims_id];
4592           switch (dims_tensor.type) {
4593             case kTfLiteInt32:
4594               TF_LITE_ENSURE_STATUS(
4595                   builder.AddTensorInput(input_index, hybrid_op));
4596               break;
4597             case kTfLiteInt64: {
4598               // We made sure that dimensions are constant and fit into int32
4599               // in Map(), so we can safely create a new tensor with casted
4600               // values.
4601               const int dims_size = dims_tensor.dims->data[0];
4602               std::vector<int32_t> dims_int32(dims_size);
4603               std::copy(dims_tensor.data.i64, dims_tensor.data.i64 + dims_size,
4604                         dims_int32.begin());
4605               int new_tensor_index = -1;
4606               builder.AddNewInputConstantTensor(
4607                   ANEURALNETWORKS_TENSOR_INT32, kTfLiteInt32, dims_tensor.dims,
4608                   dims_int32, dims_tensor.params, &new_tensor_index);
4609             } break;
4610             default:
4611               return kTfLiteError;
4612           }
4613         } else {
4614           const int value_id = node->inputs->data[1];
4615           const TfLiteTensor& value_tensor = context->tensors[value_id];
4616           switch (value_tensor.type) {
4617             case kTfLiteFloat32:
4618               TF_LITE_ENSURE_STATUS(
4619                   builder.AddScalarFloat32Operand(*value_tensor.data.f));
4620               break;
4621             case kTfLiteInt32:
4622               TF_LITE_ENSURE_STATUS(
4623                   builder.AddScalarInt32Operand(*value_tensor.data.i32));
4624               break;
4625             case kTfLiteInt64:
4626               // Map() function already makes sure int64 input is constant and
4627               // fits into int32.
4628               TF_LITE_ENSURE_STATUS(builder.AddScalarInt32Operand(
4629                   static_cast<int32_t>(*value_tensor.data.i64)));
4630               break;
4631             default:
4632               return kTfLiteError;
4633           }
4634         }
4635       } else {
4636         TF_LITE_ENSURE_STATUS(
4637             builder.AddTensorInput(input_index, hybrid_op, input_tensor_flags));
4638       }
4639     }
4640 
4641     // Get op type and operands
4642     // Fails if the Validate function failed
4643     int nn_op_type;
4644     TF_LITE_ENSURE_STATUS(
4645         Map(context, reg->builtin_code, reg->version, target_sdk_version_,
4646             {context, &builder, node, node_index, &model_state_outputs_,
4647              &model_state_tfl_inputs_, &feedback_loops_, nnapi_errno},
4648             &nn_op_type));
4649 
4650     // Map outputs to NN API tensor indices.
4651     int output_tensor_flags = 0;
4652     if (need_int8_conversion) {
4653       output_tensor_flags |= NN_TENSOR_FLAG_INT8_CONVERSION;
4654     }
4655     if (use_int8_asymm_signed) {
4656       output_tensor_flags |= NN_TENSOR_FLAG_USE_INT8_ASYMM_SIGNED;
4657     }
4658     for (int output_pos = 0; output_pos < node->outputs->size; ++output_pos) {
4659       const auto output_index = node->outputs->data[output_pos];
4660 
4661       // Outputs for  basic LSTM cell are set in the Map function since
4662       if (reg->builtin_code == kTfLiteBuiltinLstm && isLstmBasicKernel(node)) {
4663         continue;
4664       }
4665 
4666       TF_LITE_ENSURE_STATUS(
4667           builder.AddTensorOutput(output_index, output_tensor_flags));
4668     }
4669 
4670     // Dequantize operators may have to be added in case inputs are to be
4671     // floating-point.
4672     AddDequantizeOperatorsWhereNeeded(context, reg->builtin_code, node,
4673                                       node_index, &builder, nnapi_errno);
4674 
4675     TF_LITE_ENSURE_OK(context_,
4676                       builder.FinalizeAddOperation(nn_op_type, node_index));
4677   }
4678   return kTfLiteOk;
4679 }
4680 
BuildGraph(TfLiteContext * context,const StatefulNnApiDelegate::Options & delegate_options,const TfLiteIntArray * input_tensors,const TfLiteIntArray * output_tensors,int * nnapi_errno)4681 TfLiteStatus NNAPIDelegateKernel::BuildGraph(
4682     TfLiteContext* context,
4683     const StatefulNnApiDelegate::Options& delegate_options,
4684     const TfLiteIntArray* input_tensors, const TfLiteIntArray* output_tensors,
4685     int* nnapi_errno) {
4686   // Build the ops and tensors.
4687   TF_LITE_ENSURE_STATUS(AddOpsAndTensors(
4688       context, nnapi_errno, delegate_options.allow_dynamic_dimensions));
4689   // Map input and output tensor indices to ANN
4690   std::vector<uint32_t> inputs;
4691   inputs.reserve(input_tensors->size);
4692   std::vector<uint32_t> outputs;
4693   outputs.reserve(output_tensors->size);
4694 
4695   size_t total_input_byte_size = 0;
4696   // Make the TensorFlow Lite inputs and outputs to ann_indices.
4697   for (int i : TfLiteIntArrayView(input_tensors)) {
4698     // Constant tensors are not NNAPI inputs.
4699     if (i != kTfLiteOptionalTensor &&
4700         context->tensors[i].allocation_type != kTfLiteMmapRo &&
4701         // The delegate might not have mapped this input (this can
4702         // happen if one tensor is split in several ones)
4703         operand_mapping_.lite_index_to_ann(i) != -1) {
4704       inputs.push_back(operand_mapping_.lite_index_to_ann(i));
4705       if (context->tensors[i].buffer_handle != kTfLiteNullBufferHandle) {
4706         continue;
4707       }
4708       const TfLiteType nn_type_conversion =
4709           operand_mapping_.lite_index_to_ann_type_conversion(i);
4710       int tensor_size = 0;
4711       if (nn_type_conversion == kTfLiteNoType) {
4712         tensor_size = context->tensors[i].bytes;
4713       } else {
4714         size_t type_size;
4715         TF_LITE_ENSURE_OK(
4716             context, GetSizeOfType(context, nn_type_conversion, &type_size));
4717         tensor_size = NumElements(&context->tensors[i]) * type_size;
4718       }
4719       total_input_byte_size += tensor_size;
4720       total_input_byte_size += getNumPaddingBytes(tensor_size);
4721     }
4722   }
4723 
4724   size_t total_output_byte_size = 0;
4725   for (int i : TfLiteIntArrayView(output_tensors)) {
4726     const int output_tensor_ann_index = operand_mapping_.lite_index_to_ann(i);
4727     // Unmapped outputs are not added
4728     if (output_tensor_ann_index != -1) {
4729       outputs.push_back(output_tensor_ann_index);
4730     }
4731     if (context->tensors[i].buffer_handle != kTfLiteNullBufferHandle) {
4732       continue;
4733     }
4734     total_output_byte_size += context->tensors[i].bytes;
4735     total_output_byte_size += getNumPaddingBytes(context->tensors[i].bytes);
4736   }
4737 
4738   // Add state output tensors as model outputs.
4739   for (int i : model_state_outputs_) {
4740     outputs.push_back(i);
4741   }
4742 
4743   // Tell ANN to declare inputs/outputs
4744   RETURN_TFLITE_ERROR_IF_NN_ERROR(
4745       context,
4746       nnapi_->ANeuralNetworksModel_identifyInputsAndOutputs(
4747           nn_model_.get(), inputs.size(), inputs.data(), outputs.size(),
4748           outputs.data()),
4749       "identifying model inputs and outputs", nnapi_errno);
4750 
4751   auto allow_fp16 =
4752       context->allow_fp32_relax_to_fp16 | delegate_options.allow_fp16;
4753   if (nnapi_->android_sdk_version >= kMinSdkVersionForNNAPI11) {
4754     RETURN_TFLITE_ERROR_IF_NN_ERROR(
4755         context,
4756         nnapi_->ANeuralNetworksModel_relaxComputationFloat32toFloat16(
4757             nn_model_.get(), allow_fp16),
4758         "set relaxed computation mode for fp32 if possible", nnapi_errno);
4759   }
4760 
4761   RETURN_TFLITE_ERROR_IF_NN_ERROR(
4762       context, nnapi_->ANeuralNetworksModel_finish(nn_model_.get()),
4763       "finalizing the model", nnapi_errno);
4764 
4765   // Create shared memory pool for inputs and outputs.
4766   nn_input_memory_.reset(
4767       new NNMemory(nnapi_, "input_pool", total_input_byte_size));
4768   nn_output_memory_.reset(
4769       new NNMemory(nnapi_, "output_pool", total_output_byte_size));
4770 
4771   return kTfLiteOk;
4772 }
4773 
4774 }  // namespace nnapi
4775 }  // namespace delegate
4776 
4777 using ::tflite::delegate::nnapi::kMinSdkVersionForNNAPI;
4778 using ::tflite::delegate::nnapi::kMinSdkVersionForNNAPI11;
4779 using ::tflite::delegate::nnapi::kMinSdkVersionForNNAPI12;
4780 using ::tflite::delegate::nnapi::NNAPIDelegateKernel;
4781 
Data(const NnApi * nnapi)4782 StatefulNnApiDelegate::Data::Data(const NnApi* nnapi) : nnapi(nnapi) {}
4783 
~Data()4784 StatefulNnApiDelegate::Data::~Data() {
4785   std::for_each(std::begin(delegate_state_cache),
4786                 std::end(delegate_state_cache),
4787                 [](const std::pair<int, NNAPIDelegateKernel*>& entry) {
4788                   delete entry.second;
4789                 });
4790 }
4791 
CacheDelegateKernel(const TfLiteDelegateParams * delegate_params,NNAPIDelegateKernel * delegate_state)4792 void StatefulNnApiDelegate::Data::CacheDelegateKernel(
4793     const TfLiteDelegateParams* delegate_params,
4794     NNAPIDelegateKernel* delegate_state) {
4795   const int cache_key = delegate_params->nodes_to_replace->data[0];
4796   delegate_state_cache.emplace(cache_key, delegate_state);
4797 }
4798 
MaybeGetCachedDelegateKernel(const TfLiteDelegateParams * delegate_params)4799 NNAPIDelegateKernel* StatefulNnApiDelegate::Data::MaybeGetCachedDelegateKernel(
4800     const TfLiteDelegateParams* delegate_params) {
4801   const int cache_key = delegate_params->nodes_to_replace->data[0];
4802   const auto cached_state = delegate_state_cache.find(cache_key);
4803   if (cached_state != std::end(delegate_state_cache)) {
4804     auto result = cached_state->second;
4805     delegate_state_cache.erase(cached_state);
4806     return result;
4807   } else {
4808     return nullptr;
4809   }
4810 }
4811 
StatefulNnApiDelegate(const NnApi * nnapi)4812 StatefulNnApiDelegate::StatefulNnApiDelegate(const NnApi* nnapi)
4813     : StatefulNnApiDelegate(nnapi, Options()) {}
4814 
StatefulNnApiDelegate(Options options)4815 StatefulNnApiDelegate::StatefulNnApiDelegate(Options options)
4816     : StatefulNnApiDelegate(NnApiImplementation(), options) {}
4817 
StatefulNnApiDelegate(const NnApi * nnapi,Options options)4818 StatefulNnApiDelegate::StatefulNnApiDelegate(const NnApi* nnapi,
4819                                              Options options)
4820     : TfLiteDelegate(TfLiteDelegateCreate()), delegate_data_(nnapi) {
4821   if (options.accelerator_name) {
4822     delegate_data_.accelerator_name = options.accelerator_name;
4823   }
4824   if (options.cache_dir) {
4825     delegate_data_.cache_dir = options.cache_dir;
4826   }
4827   if (options.model_token) {
4828     delegate_data_.model_token = options.model_token;
4829   }
4830   delegate_data_.execution_preference = options.execution_preference;
4831   delegate_data_.disallow_nnapi_cpu = options.disallow_nnapi_cpu;
4832   delegate_data_.max_number_delegated_partitions =
4833       options.max_number_delegated_partitions;
4834   delegate_data_.allow_fp16 = options.allow_fp16;
4835   delegate_data_.execution_priority = options.execution_priority;
4836   delegate_data_.max_compilation_timeout_duration_ns =
4837       options.max_compilation_timeout_duration_ns;
4838   delegate_data_.max_execution_timeout_duration_ns =
4839       options.max_execution_timeout_duration_ns;
4840   delegate_data_.max_execution_loop_timeout_duration_ns =
4841       options.max_execution_loop_timeout_duration_ns;
4842   if (nnapi->android_sdk_version >= kMinSdkVersionForNNAPI11) {
4843     delegate_data_.allow_dynamic_dimensions = options.allow_dynamic_dimensions;
4844   }
4845   TFLITE_LOG_PROD_ONCE(tflite::TFLITE_LOG_INFO,
4846                        "Created TensorFlow Lite delegate for NNAPI.");
4847   Prepare = DoPrepare;
4848   CopyFromBufferHandle = DoCopyFromBufferHandle;
4849   CopyToBufferHandle = DoCopyToBufferHandle;
4850   FreeBufferHandle = DoFreeBufferHandle;
4851   data_ = &delegate_data_;
4852   if (delegate_data_.allow_dynamic_dimensions) {
4853     flags |= kTfLiteDelegateFlagsAllowDynamicTensors;
4854     flags |= kTfLiteDelegateFlagsRequirePropagatedShapes;
4855   }
4856 }
4857 
StatefulNnApiDelegate()4858 StatefulNnApiDelegate::StatefulNnApiDelegate()
4859     : StatefulNnApiDelegate(Options()) {}
4860 
GetOptions(TfLiteDelegate * delegate)4861 const StatefulNnApiDelegate::Options StatefulNnApiDelegate::GetOptions(
4862     TfLiteDelegate* delegate) {
4863   auto delegate_data = reinterpret_cast<Data*>(delegate->data_);
4864   StatefulNnApiDelegate::Options options;
4865   options.execution_preference = delegate_data->execution_preference;
4866   options.accelerator_name = delegate_data->accelerator_name.empty()
4867                                  ? nullptr
4868                                  : delegate_data->accelerator_name.c_str();
4869   options.cache_dir = delegate_data->cache_dir.empty()
4870                           ? nullptr
4871                           : delegate_data->cache_dir.c_str();
4872   options.model_token = delegate_data->model_token.empty()
4873                             ? nullptr
4874                             : delegate_data->model_token.c_str();
4875   options.disallow_nnapi_cpu = delegate_data->disallow_nnapi_cpu;
4876   options.max_number_delegated_partitions =
4877       delegate_data->max_number_delegated_partitions;
4878   options.allow_fp16 = delegate_data->allow_fp16;
4879   options.execution_priority = delegate_data->execution_priority;
4880   options.max_compilation_timeout_duration_ns =
4881       delegate_data->max_compilation_timeout_duration_ns;
4882   options.max_execution_timeout_duration_ns =
4883       delegate_data->max_execution_timeout_duration_ns;
4884   options.max_execution_loop_timeout_duration_ns =
4885       delegate_data->max_execution_loop_timeout_duration_ns;
4886   options.allow_dynamic_dimensions = delegate_data->allow_dynamic_dimensions;
4887   return options;
4888 }
4889 
4890 const std::vector<StatefulNnApiDelegate::MemoryRegistration>&
GetTensorMemoryMap(TfLiteDelegate * delegate)4891 StatefulNnApiDelegate::GetTensorMemoryMap(TfLiteDelegate* delegate) {
4892   auto delegate_data = reinterpret_cast<Data*>(delegate->data_);
4893   return delegate_data->tensor_memory_map;
4894 }
4895 
RegisterNnapiMemory(ANeuralNetworksMemory * memory,CopyToHostTensorFnPtr callback,void * callback_context)4896 TfLiteBufferHandle StatefulNnApiDelegate::RegisterNnapiMemory(
4897     ANeuralNetworksMemory* memory, CopyToHostTensorFnPtr callback,
4898     void* callback_context) {
4899   int map_size = delegate_data_.tensor_memory_map.size();
4900   for (int i = 0; i < map_size; i++) {
4901     if (delegate_data_.tensor_memory_map[i].memory == nullptr) {
4902       delegate_data_.tensor_memory_map[i] = {memory, callback,
4903                                              callback_context};
4904       return i;
4905     }
4906   }
4907   delegate_data_.tensor_memory_map.push_back(
4908       {memory, callback, callback_context});
4909   return map_size;
4910 }
4911 
DoCopyFromBufferHandle(TfLiteContext * context,TfLiteDelegate * delegate,TfLiteBufferHandle buffer_handle,TfLiteTensor * tensor)4912 TfLiteStatus StatefulNnApiDelegate::DoCopyFromBufferHandle(
4913     TfLiteContext* context, TfLiteDelegate* delegate,
4914     TfLiteBufferHandle buffer_handle, TfLiteTensor* tensor) {
4915   auto delegate_data = reinterpret_cast<Data*>(delegate->data_);
4916   if (buffer_handle < 0 ||
4917       buffer_handle >= delegate_data->tensor_memory_map.size()) {
4918     return kTfLiteError;
4919   }
4920   auto memory = delegate_data->tensor_memory_map[buffer_handle].memory;
4921   auto callback = delegate_data->tensor_memory_map[buffer_handle].callback;
4922   auto callback_context =
4923       delegate_data->tensor_memory_map[buffer_handle].callback_context;
4924   if (!memory || !callback) {
4925     return kTfLiteError;
4926   }
4927   return callback(tensor, memory, 0, tensor->bytes, callback_context);
4928 }
4929 
DoCopyToBufferHandle(TfLiteContext * context,TfLiteDelegate * delegate,TfLiteBufferHandle buffer_handle,TfLiteTensor * tensor)4930 TfLiteStatus StatefulNnApiDelegate::DoCopyToBufferHandle(
4931     TfLiteContext* context, TfLiteDelegate* delegate,
4932     TfLiteBufferHandle buffer_handle, TfLiteTensor* tensor) {
4933   return kTfLiteError;
4934 }
4935 
DoFreeBufferHandle(TfLiteContext * context,TfLiteDelegate * delegate,TfLiteBufferHandle * handle)4936 void StatefulNnApiDelegate::DoFreeBufferHandle(TfLiteContext* context,
4937                                                TfLiteDelegate* delegate,
4938                                                TfLiteBufferHandle* handle) {
4939   auto delegate_data = reinterpret_cast<Data*>(delegate->data_);
4940   if (*handle >= 0 && *handle < delegate_data->tensor_memory_map.size()) {
4941     delegate_data->tensor_memory_map[*handle] = {nullptr, nullptr, nullptr};
4942     *handle = kTfLiteNullBufferHandle;
4943   }
4944 }
4945 
GetNnApiErrno() const4946 int StatefulNnApiDelegate::GetNnApiErrno() const {
4947   return delegate_data_.nnapi_errno;
4948 }
4949 
4950 // static
GetNodesSupportedByAccelerator(TfLiteContext * context,TfLiteDelegate * delegate,const NnApi * nnapi,const std::vector<int> & supported_nodes,std::vector<int> * device_supported_nodes,int * num_partitions,TfLiteDelegateParams ** params_array,int * nnapi_errno)4951 TfLiteStatus StatefulNnApiDelegate::GetNodesSupportedByAccelerator(
4952     TfLiteContext* context, TfLiteDelegate* delegate, const NnApi* nnapi,
4953     const std::vector<int>& supported_nodes,
4954     std::vector<int>* device_supported_nodes, int* num_partitions,
4955     TfLiteDelegateParams** params_array, int* nnapi_errno) {
4956   auto* delegate_data = static_cast<Data*>(delegate->data_);
4957   // The first entry in the array is the element count
4958 
4959   auto supported_nodes_int_array = BuildTfLiteIntArray(supported_nodes);
4960   TF_LITE_ENSURE_STATUS(context->PreviewDelegatePartitioning(
4961       context, supported_nodes_int_array.get(), params_array, num_partitions));
4962   // For each partition check if which nodes are actually supported by the
4963   // target accelerators.
4964   delegate_data->delegate_state_cache.clear();
4965   for (int idx = 0; idx < *num_partitions; idx++) {
4966     const auto& partition_params = (*params_array)[idx];
4967     std::unique_ptr<NNAPIDelegateKernel> kernel_state(
4968         new NNAPIDelegateKernel(nnapi));
4969     TfLiteDelegateParams params_with_delegate = partition_params;
4970     params_with_delegate.delegate = delegate;
4971     TF_LITE_ENSURE_STATUS(
4972         kernel_state->Init(context, &params_with_delegate, nnapi_errno));
4973     std::vector<int> supported_partition_nodes;
4974     TF_LITE_ENSURE_STATUS(
4975         kernel_state->GetOperationsSupportedByTargetNnApiDevices(
4976             context, &supported_partition_nodes, nnapi_errno));
4977     device_supported_nodes->insert(device_supported_nodes->end(),
4978                                    supported_partition_nodes.begin(),
4979                                    supported_partition_nodes.end());
4980 
4981     bool model_fully_supported = (supported_partition_nodes.size() ==
4982                                   partition_params.nodes_to_replace->size);
4983     if (model_fully_supported) {
4984       delegate_data->CacheDelegateKernel(&partition_params,
4985                                          kernel_state.release());
4986     }
4987   }
4988 
4989   if (device_supported_nodes->size() != supported_nodes.size()) {
4990     // We changed the set of nodes to delegate this will create a different
4991     // partitioning layout.
4992     auto device_sup_nodes_int_array =
4993         BuildTfLiteIntArray(*device_supported_nodes);
4994     TF_LITE_ENSURE_STATUS(context->PreviewDelegatePartitioning(
4995         context, device_sup_nodes_int_array.get(), params_array,
4996         num_partitions));
4997   }
4998 
4999   return kTfLiteOk;
5000 }
5001 
5002 // static
LimitDelegatedPartitions(int max_partitions,std::vector<TfLiteDelegateParams> partition_params_array,std::vector<int> * nodes_to_delegate)5003 TfLiteStatus StatefulNnApiDelegate::LimitDelegatedPartitions(
5004     int max_partitions,
5005     std::vector<TfLiteDelegateParams> partition_params_array,
5006     std::vector<int>* nodes_to_delegate) {
5007   int num_partitions = partition_params_array.size();
5008   if (max_partitions <= 0 || num_partitions <= max_partitions) {
5009     return kTfLiteOk;
5010   }
5011 
5012   int number_delegated_partitions = std::count_if(
5013       partition_params_array.begin(), partition_params_array.end(),
5014       [nodes_to_delegate](const TfLiteDelegateParams& partition_params) {
5015         return std::find(nodes_to_delegate->begin(), nodes_to_delegate->end(),
5016                          partition_params.nodes_to_replace->data[0]) !=
5017                nodes_to_delegate->end();
5018       });
5019 
5020   if (number_delegated_partitions > max_partitions) {
5021     std::sort(partition_params_array.begin(), partition_params_array.end(),
5022               [](const TfLiteDelegateParams& left,
5023                  const TfLiteDelegateParams& right) -> bool {
5024                 // Reverse sort
5025                 return left.nodes_to_replace->size >
5026                        right.nodes_to_replace->size;
5027               });
5028 
5029     nodes_to_delegate->clear();
5030 
5031     for (int i = 0; i < max_partitions; i++) {
5032       const TfLiteDelegateParams& partition_params = partition_params_array[i];
5033 
5034       nodes_to_delegate->insert(nodes_to_delegate->end(),
5035                                 partition_params.nodes_to_replace->data,
5036                                 partition_params.nodes_to_replace->data +
5037                                     partition_params.nodes_to_replace->size);
5038     }
5039   }
5040 
5041   return kTfLiteOk;
5042 }
5043 
DoPrepare(TfLiteContext * context,TfLiteDelegate * delegate)5044 TfLiteStatus StatefulNnApiDelegate::DoPrepare(TfLiteContext* context,
5045                                               TfLiteDelegate* delegate) {
5046   auto* delegate_data = static_cast<Data*>(delegate->data_);
5047   int* nnapi_errno = &(delegate_data->nnapi_errno);
5048   const NnApi* nnapi = delegate_data->nnapi;
5049 
5050   // Resetting the error code when the delegate is initialized
5051   // by TFLite. This causes the error to be reset if reusing the same
5052   // StatefulNnApiDelegate after a failure
5053   *nnapi_errno = 0;
5054 
5055   // Do not check nodes_ if NN API is unavailable.
5056   if (nnapi->android_sdk_version < kMinSdkVersionForNNAPI ||
5057       !nnapi->nnapi_exists) {
5058     return kTfLiteOk;
5059   }
5060 
5061   int target_sdk_version = nnapi->android_sdk_version;
5062   const StatefulNnApiDelegate::Options delegate_options =
5063       StatefulNnApiDelegate::GetOptions(delegate);
5064   // For NNAPI 1.2+, check if there is any accelerator available.
5065   // If not, don't delegate to NNAPI's CPU reference implementation unless
5066   // it has been specified as target accelerator.
5067   if (nnapi->android_sdk_version >= kMinSdkVersionForNNAPI12) {
5068     if (ShouldUseTargetDevices(delegate_options, nnapi)) {
5069       std::vector<ANeuralNetworksDevice*> devices;
5070       TF_LITE_ENSURE_STATUS(
5071           GetTargetDevices(context, delegate, nnapi, nnapi_errno, &devices));
5072 
5073       if (devices.empty()) {
5074         if (delegate_options.accelerator_name) {
5075           // There was a selected device and it is not available.
5076           return kTfLiteError;
5077         } else {
5078           // Only nnapi-reference is available but was disabled by the delegate
5079           // options
5080           return kTfLiteOk;
5081         }
5082       }
5083 
5084       TF_LITE_ENSURE_STATUS(GetTargetSdkVersion(
5085           context, nnapi, devices, &target_sdk_version, nnapi_errno));
5086     } else {
5087       // If no accelerator is specified, only use NNAPI if an accelerator is
5088       // available. Any available accelerator will make the device_count larger
5089       // than 1. More sophisticated check and allowlisting can be added later.
5090       uint32_t device_count = 0;
5091       RETURN_TFLITE_ERROR_IF_NN_ERROR(
5092           context, nnapi->ANeuralNetworks_getDeviceCount(&device_count),
5093           "getting number of NNAPI devices", nnapi_errno);
5094       if (device_count <= 1) {
5095         return kTfLiteOk;
5096       }
5097     }
5098   }
5099 
5100   std::vector<int> supported_nodes;
5101   // We don't care about all nodes_, we only care about ones in the
5102   // current plan.
5103   TfLiteIntArray* plan;
5104   TF_LITE_ENSURE_STATUS(context->GetExecutionPlan(context, &plan));
5105 
5106   // Check for every node if it is supported
5107   const bool is_accelerator_specified = ShouldUseTargetDevices(
5108       delegate_options, nnapi, /*exclude_nnapi_reference=*/true);
5109   for (int node_index : TfLiteIntArrayView(plan)) {
5110     TfLiteNode* node;
5111     TfLiteRegistration* registration;
5112     TF_LITE_ENSURE_STATUS(context->GetNodeAndRegistration(
5113         context, node_index, &node, &registration));
5114     if (NNAPIDelegateKernel::Validate(context, registration->builtin_code,
5115                                       registration->version, target_sdk_version,
5116                                       node, is_accelerator_specified)) {
5117       supported_nodes.push_back(node_index);
5118     }
5119   }
5120 
5121   // If there are no delegated nodes, short-circuit node replacement.
5122   if (supported_nodes.empty()) {
5123     return kTfLiteOk;
5124   }
5125 
5126   // NN API Delegate Registration (the pseudo kernel that will invoke NN
5127   // API node sub sets)
5128   static const TfLiteRegistration nnapi_delegate_kernel = {
5129       .init = [](TfLiteContext* context, const char* buffer,
5130                  size_t length) -> void* {
5131         const TfLiteDelegateParams* params =
5132             reinterpret_cast<const TfLiteDelegateParams*>(buffer);
5133 
5134         auto* delegate_data = static_cast<Data*>(params->delegate->data_);
5135         int* nnapi_errno = &(delegate_data->nnapi_errno);
5136 
5137         NNAPIDelegateKernel* kernel_state =
5138             delegate_data->MaybeGetCachedDelegateKernel(params);
5139         if (!kernel_state) {
5140           kernel_state = new NNAPIDelegateKernel(delegate_data->nnapi);
5141           kernel_state->Init(context, params, nnapi_errno);
5142         }
5143 
5144         return kernel_state;
5145       },
5146 
5147       .free = [](TfLiteContext* context, void* buffer) -> void {
5148         delete reinterpret_cast<NNAPIDelegateKernel*>(buffer);
5149       },
5150 
5151       .prepare = [](TfLiteContext* context, TfLiteNode* node) -> TfLiteStatus {
5152         NNAPIDelegateKernel* state =
5153             reinterpret_cast<NNAPIDelegateKernel*>(node->user_data);
5154         int* nnapi_errno =
5155             &(static_cast<Data*>(node->delegate->data_)->nnapi_errno);
5156         return state->Prepare(context, node, nnapi_errno);
5157       },
5158 
5159       .invoke = [](TfLiteContext* context, TfLiteNode* node) -> TfLiteStatus {
5160         NNAPIDelegateKernel* state =
5161             reinterpret_cast<NNAPIDelegateKernel*>(node->user_data);
5162         int* nnapi_errno =
5163             &(static_cast<Data*>(node->delegate->data_)->nnapi_errno);
5164         return state->Invoke(context, node, nnapi_errno);
5165       },
5166 
5167       .profiling_string = nullptr,
5168       .builtin_code = kTfLiteBuiltinDelegate,
5169       .custom_name = "TfLiteNnapiDelegate",
5170       .version = 1,
5171   };
5172 
5173   std::vector<int> nodes_to_delegate;
5174 
5175   int num_partitions;
5176   TfLiteDelegateParams* params_array;
5177   if (is_accelerator_specified &&
5178       nnapi->android_sdk_version >= kMinSdkVersionForNNAPI12) {
5179     // Filtering out nodes not supported by target accelerators.
5180     // Cannot query supported operation before NNAPI 1.2
5181     TF_LITE_ENSURE_STATUS(GetNodesSupportedByAccelerator(
5182         context, delegate, nnapi, supported_nodes, &nodes_to_delegate,
5183         &num_partitions, &params_array, nnapi_errno));
5184   } else {
5185     nodes_to_delegate = supported_nodes;
5186     auto supported_nodes_int_array = BuildTfLiteIntArray(supported_nodes);
5187     TF_LITE_ENSURE_STATUS(context->PreviewDelegatePartitioning(
5188         context, supported_nodes_int_array.get(), &params_array,
5189         &num_partitions));
5190   }
5191 
5192   TF_LITE_ENSURE_STATUS(
5193       LimitDelegatedPartitions(delegate_options.max_number_delegated_partitions,
5194                                std::vector<TfLiteDelegateParams>(
5195                                    params_array, params_array + num_partitions),
5196                                &nodes_to_delegate));
5197 
5198   if (nodes_to_delegate.empty()) {
5199     return kTfLiteOk;
5200   } else {
5201     // Request TFLite to partition the graph and make kernels
5202     // for each independent node sub set a new nnapi_delegate_kernel.
5203     auto nodes_to_delegate_int_array = BuildTfLiteIntArray(nodes_to_delegate);
5204     return context->ReplaceNodeSubsetsWithDelegateKernels(
5205         context, nnapi_delegate_kernel, nodes_to_delegate_int_array.get(),
5206         delegate);
5207   }
5208 }
5209 
5210 // Returns a singleton NNAPI Delegate that can check for support of ops.
NnApiDelegate()5211 TfLiteDelegate* NnApiDelegate() {
5212   static StatefulNnApiDelegate* delegate = new StatefulNnApiDelegate();
5213   return delegate;
5214 }
5215 
5216 }  // namespace tflite
5217