1 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 #include "tensorflow/lite/delegates/nnapi/nnapi_delegate.h"
16
17 #include <algorithm>
18 #include <cstdarg>
19 #include <cstddef>
20 #include <cstdint>
21 #include <cstdio>
22 #include <cstring>
23 #include <functional>
24 #include <initializer_list>
25 #include <iostream>
26 #include <iterator>
27 #include <map>
28 #include <memory>
29 #include <string>
30 #include <tuple>
31 #include <utility>
32 #include <vector>
33
34 #include "tensorflow/lite/nnapi/NeuralNetworksTypes.h"
35
36 #ifdef __ANDROID__
37 #include <sys/system_properties.h>
38 #endif
39
40 #if defined __ANDROID__ || defined __unix__
41 #define TFLITE_NNAPI_ALLOW_MMAP_SHARING
42 #include <sys/mman.h>
43 #include <unistd.h>
44 #endif
45
46 #include "tensorflow/lite/allocation.h"
47 #include "tensorflow/lite/builtin_op_data.h"
48 #include "tensorflow/lite/builtin_ops.h"
49 #include "tensorflow/lite/c/builtin_op_data.h"
50 #include "tensorflow/lite/c/common.h"
51 #include "tensorflow/lite/context_util.h"
52 #include "tensorflow/lite/delegates/nnapi/nnapi_delegate_kernel.h"
53 #include "tensorflow/lite/delegates/nnapi/quant_lstm_sup.h"
54 #include "tensorflow/lite/delegates/utils.h"
55 #include "tensorflow/lite/kernels/kernel_util.h"
56 #include "tensorflow/lite/minimal_logging.h"
57 #include "tensorflow/lite/nnapi/nnapi_implementation.h"
58 #include "tensorflow/lite/nnapi/nnapi_util.h"
59 #include "tensorflow/lite/util.h"
60 #include "utils/hash/farmhash.h"
61
62 namespace tflite {
63 namespace {
64
65 // Returns the enum name corresponding to the given error code if the given
66 // value corresponds to an of the error codes in the enumeration above or
67 // an message with the unknown code.
68 // LINT.IfChange(NnApiErrorDescription)
NnApiErrorDescription(int error_code)69 std::string NnApiErrorDescription(int error_code) {
70 switch (error_code) {
71 case ANEURALNETWORKS_NO_ERROR:
72 return "ANEURALNETWORKS_NO_ERROR";
73 case ANEURALNETWORKS_OUT_OF_MEMORY:
74 return "ANEURALNETWORKS_OUT_OF_MEMORY";
75 case ANEURALNETWORKS_INCOMPLETE:
76 return "ANEURALNETWORKS_INCOMPLETE";
77 case ANEURALNETWORKS_UNEXPECTED_NULL:
78 return "ANEURALNETWORKS_UNEXPECTED_NULL";
79 case ANEURALNETWORKS_BAD_DATA:
80 return "ANEURALNETWORKS_BAD_DATA";
81 case ANEURALNETWORKS_OP_FAILED:
82 return "ANEURALNETWORKS_OP_FAILED";
83 case ANEURALNETWORKS_BAD_STATE:
84 return "ANEURALNETWORKS_BAD_STATE";
85 case ANEURALNETWORKS_UNMAPPABLE:
86 return "ANEURALNETWORKS_UNMAPPABLE";
87 case ANEURALNETWORKS_OUTPUT_INSUFFICIENT_SIZE:
88 return "ANEURALNETWORKS_OUTPUT_INSUFFICIENT_SIZE";
89 case ANEURALNETWORKS_UNAVAILABLE_DEVICE:
90 return "ANEURALNETWORKS_UNAVAILABLE_DEVICE";
91 case ANEURALNETWORKS_MISSED_DEADLINE_TRANSIENT:
92 return "ANEURALNETWORKS_MISSED_DEADLINE_TRANSIENT";
93 case ANEURALNETWORKS_MISSED_DEADLINE_PERSISTENT:
94 return "ANEURALNETWORKS_MISSED_DEADLINE_PERSISTENT";
95 case ANEURALNETWORKS_RESOURCE_EXHAUSTED_TRANSIENT:
96 return "ANEURALNETWORKS_RESOURCE_EXHAUSTED_TRANSIENT";
97 case ANEURALNETWORKS_RESOURCE_EXHAUSTED_PERSISTENT:
98 return "ANEURALNETWORKS_RESOURCE_EXHAUSTED_PERSISTENT";
99 case ANEURALNETWORKS_DEAD_OBJECT:
100 return "ANEURALNETWORKS_DEAD_OBJECT";
101 default:
102 return "Unknown NNAPI error code: " + std::to_string(error_code);
103 }
104 }
105 // LINT.ThenChange()
106
107 #define RETURN_TFLITE_ERROR_IF_NN_ERROR(context, code, call_desc, p_errno) \
108 do { \
109 const auto _code = (code); \
110 const auto _call_desc = (call_desc); \
111 if (_code != ANEURALNETWORKS_NO_ERROR) { \
112 const auto error_desc = NnApiErrorDescription(_code); \
113 TF_LITE_KERNEL_LOG(context, \
114 "NN API returned error %s at line %d while %s.\n", \
115 error_desc.c_str(), __LINE__, _call_desc); \
116 *p_errno = _code; \
117 return kTfLiteError; \
118 } \
119 } while (0)
120
121 #define RETURN_TFLITE_ERROR_IF_NN_ERROR_FOR_TENSOR(context, code, call_desc, \
122 p_tensor, p_errno) \
123 do { \
124 const auto _code = (code); \
125 const auto _call_desc = (call_desc); \
126 if (_code != ANEURALNETWORKS_NO_ERROR) { \
127 const auto error_desc = NnApiErrorDescription(_code); \
128 TF_LITE_KERNEL_LOG(context, \
129 "NN API returned error %s at line %d while %s " \
130 "for tensor '%s'.\n", \
131 error_desc.c_str(), __LINE__, _call_desc, \
132 (p_tensor)->name ? (p_tensor)->name : "no-name"); \
133 *p_errno = _code; \
134 return kTfLiteError; \
135 } \
136 } while (0)
137
IsFloat(TfLiteType type)138 bool IsFloat(TfLiteType type) {
139 switch (type) {
140 case kTfLiteFloat32:
141 return true;
142 default:
143 return false;
144 }
145 }
146
IsFloatOrUInt8(TfLiteType type)147 bool IsFloatOrUInt8(TfLiteType type) {
148 switch (type) {
149 case kTfLiteFloat32:
150 case kTfLiteUInt8:
151 return true;
152 default:
153 return false;
154 }
155 }
156
IsQuantized(TfLiteType type)157 bool IsQuantized(TfLiteType type) {
158 switch (type) {
159 case kTfLiteUInt8:
160 case kTfLiteInt8:
161 return true;
162 default:
163 // kTfLiteInt16 isn't supported as quantized type yet.
164 return false;
165 }
166 }
167
IsInt32(TfLiteType type)168 bool IsInt32(TfLiteType type) {
169 switch (type) {
170 case kTfLiteInt32:
171 return true;
172 default:
173 return false;
174 }
175 }
176
IsFloatOrQuantized(TfLiteType type)177 bool IsFloatOrQuantized(TfLiteType type) {
178 switch (type) {
179 case kTfLiteFloat32:
180 case kTfLiteUInt8:
181 case kTfLiteInt8:
182 return true;
183 default:
184 return false;
185 }
186 }
187
IsFloatOrInt32(TfLiteType type)188 bool IsFloatOrInt32(TfLiteType type) {
189 switch (type) {
190 case kTfLiteFloat32:
191 case kTfLiteInt32:
192 return true;
193 default:
194 return false;
195 }
196 }
197
IsFloatQuantizedOrInt32(TfLiteType type)198 bool IsFloatQuantizedOrInt32(TfLiteType type) {
199 switch (type) {
200 case kTfLiteFloat32:
201 case kTfLiteUInt8:
202 case kTfLiteInt8:
203 case kTfLiteInt32:
204 return true;
205 default:
206 return false;
207 }
208 }
209
IsScalarInputSupported(int builtin_code)210 bool IsScalarInputSupported(int builtin_code) {
211 switch (builtin_code) {
212 case kTfLiteBuiltinAdd:
213 case kTfLiteBuiltinMul:
214 case kTfLiteBuiltinSub:
215 case kTfLiteBuiltinDiv:
216 case kTfLiteBuiltinEqual:
217 case kTfLiteBuiltinNotEqual:
218 case kTfLiteBuiltinGreater:
219 case kTfLiteBuiltinGreaterEqual:
220 case kTfLiteBuiltinLess:
221 case kTfLiteBuiltinLessEqual:
222 case kTfLiteBuiltinPow:
223 case kTfLiteBuiltinMaximum:
224 case kTfLiteBuiltinMinimum:
225 case kTfLiteBuiltinPrelu:
226 case kTfLiteBuiltinLeakyRelu:
227 return true;
228 default:
229 return false;
230 }
231 }
232
233 // Check if the operation requires explicit conversion from int8 to uint8
234 // values.
NeedInt8Conversion(const TfLiteContext * context,int builtin_code,const TfLiteNode * node)235 bool NeedInt8Conversion(const TfLiteContext* context, int builtin_code,
236 const TfLiteNode* node) {
237 const int input_id = node->inputs->data[0];
238 const TfLiteType input_type = context->tensors[input_id].type;
239 switch (builtin_code) {
240 case kTfLiteBuiltinConv2d:
241 case kTfLiteBuiltinDepthwiseConv2d:
242 case kTfLiteBuiltinFullyConnected: {
243 if (input_type == kTfLiteInt8) {
244 const int weights_id = node->inputs->data[1];
245 const auto& weights_tensor = context->tensors[weights_id];
246 if ((weights_tensor.type == kTfLiteInt8 ||
247 weights_tensor.type == kTfLiteUInt8) &&
248 weights_tensor.quantization.type == kTfLiteAffineQuantization) {
249 return true;
250 }
251 }
252 return false;
253 }
254 case kTfLiteBuiltinTransposeConv: {
255 // Transpose convolution has a different order of inputs:
256 // 0: output_shape, 1: filter, 2: input, 3: bias.
257 const int input_id = 2;
258 const TfLiteType input_type = context->tensors[input_id].type;
259 if (input_type == kTfLiteInt8) {
260 return true;
261 }
262 return false;
263 }
264 case kTfLiteBuiltinSelect: {
265 const auto value_type = context->tensors[node->inputs->data[1]].type;
266 return value_type == kTfLiteInt8;
267 }
268 case kTfLiteBuiltinAdd:
269 case kTfLiteBuiltinArgMax:
270 case kTfLiteBuiltinArgMin:
271 case kTfLiteBuiltinAveragePool2d:
272 case kTfLiteBuiltinBatchToSpaceNd:
273 case kTfLiteBuiltinConcatenation:
274 case kTfLiteBuiltinEqual:
275 case kTfLiteBuiltinExpandDims:
276 case kTfLiteBuiltinGather:
277 case kTfLiteBuiltinGreater:
278 case kTfLiteBuiltinGreaterEqual:
279 case kTfLiteBuiltinHardSwish:
280 case kTfLiteBuiltinL2Normalization:
281 case kTfLiteBuiltinLeakyRelu:
282 case kTfLiteBuiltinLess:
283 case kTfLiteBuiltinLessEqual:
284 case kTfLiteBuiltinLogistic:
285 case kTfLiteBuiltinMaximum:
286 case kTfLiteBuiltinMaxPool2d:
287 case kTfLiteBuiltinMean:
288 case kTfLiteBuiltinMinimum:
289 case kTfLiteBuiltinMul:
290 case kTfLiteBuiltinNotEqual:
291 case kTfLiteBuiltinPad:
292 case kTfLiteBuiltinPadv2:
293 case kTfLiteBuiltinPrelu:
294 case kTfLiteBuiltinReduceMax:
295 case kTfLiteBuiltinReduceMin:
296 case kTfLiteBuiltinRelu:
297 case kTfLiteBuiltinReluN1To1:
298 case kTfLiteBuiltinRelu6:
299 case kTfLiteBuiltinResizeBilinear:
300 case kTfLiteBuiltinResizeNearestNeighbor:
301 case kTfLiteBuiltinReshape:
302 case kTfLiteBuiltinSlice:
303 case kTfLiteBuiltinSoftmax:
304 case kTfLiteBuiltinSpaceToBatchNd:
305 case kTfLiteBuiltinSpaceToDepth:
306 case kTfLiteBuiltinDepthToSpace:
307 case kTfLiteBuiltinStridedSlice:
308 case kTfLiteBuiltinSub:
309 case kTfLiteBuiltinTanh:
310 case kTfLiteBuiltinTile:
311 case kTfLiteBuiltinTopkV2:
312 case kTfLiteBuiltinTranspose: {
313 return input_type == kTfLiteInt8;
314 }
315 default:
316 return false;
317 }
318 }
319
320 constexpr int kLstmFullKernelInputSize = 24;
321 // The 20 input version is deprecated and kept only to
322 // support old model. The latest version of the LSTM Full Kernel
323 // is the one with 24 inputs
324 constexpr int kLstmFullKernelNoOptionalParamsInputSize = 20;
325 constexpr int kLstmBasicKernelInputSize = 5;
326
isLstmBasicKernel(const TfLiteNode * node)327 inline bool isLstmBasicKernel(const TfLiteNode* node) {
328 return node->inputs->size == kLstmBasicKernelInputSize;
329 }
330
isLstmFullKernel(const TfLiteNode * node)331 inline bool isLstmFullKernel(const TfLiteNode* node) {
332 return node->inputs->size == kLstmFullKernelInputSize ||
333 node->inputs->size == kLstmFullKernelNoOptionalParamsInputSize;
334 }
335
IsHybridOperator(const TfLiteContext * context,int builtin_code,const TfLiteNode * node)336 bool IsHybridOperator(const TfLiteContext* context, int builtin_code,
337 const TfLiteNode* node) {
338 switch (builtin_code) {
339 case kTfLiteBuiltinConv2d:
340 case kTfLiteBuiltinFullyConnected: {
341 const int input_id = node->inputs->data[0];
342 const int filter_id = node->inputs->data[1];
343 const TfLiteType input_type = context->tensors[input_id].type;
344 const TfLiteType filter_type = context->tensors[filter_id].type;
345 return IsFloat(input_type) && IsQuantized(filter_type);
346 }
347 case kTfLiteBuiltinLstm: {
348 const int input_id = node->inputs->data[0];
349 // Input #1 is optional so use #2 to determine if hybrid.
350 const int weights_id = node->inputs->data[2];
351 const TfLiteType input_type = context->tensors[input_id].type;
352 const TfLiteType weights_type = context->tensors[weights_id].type;
353 return isLstmFullKernel(node) && IsFloat(input_type) &&
354 IsQuantized(weights_type);
355 }
356 case kTfLiteBuiltinUnidirectionalSequenceLstm: {
357 const int input_id = node->inputs->data[0];
358 // Input #1 is optional so use #2 to determine if hybrid.
359 const int weights_id = node->inputs->data[2];
360 const TfLiteType input_type = context->tensors[input_id].type;
361 const TfLiteType weights_type = context->tensors[weights_id].type;
362 return IsFloat(input_type) && IsQuantized(weights_type);
363 }
364 case kTfLiteBuiltinBidirectionalSequenceLstm: {
365 const int input_id = node->inputs->data[0];
366 // Input #1 is optional so use #2 to determine if hybrid.
367 const int weights_id = node->inputs->data[2];
368 const TfLiteType input_type = context->tensors[input_id].type;
369 const TfLiteType weights_type = context->tensors[weights_id].type;
370 return IsFloat(input_type) && IsQuantized(weights_type);
371 }
372 case kTfLiteBuiltinUnidirectionalSequenceRnn: {
373 const int input_id = node->inputs->data[0];
374 const int weights_id = node->inputs->data[1];
375 const TfLiteType input_type = context->tensors[input_id].type;
376 const TfLiteType weights_type = context->tensors[weights_id].type;
377 return IsFloat(input_type) && IsQuantized(weights_type);
378 }
379 default:
380 return false;
381 }
382 }
383
HasUnspecifiedDimension(const TfLiteTensor * tensor)384 bool HasUnspecifiedDimension(const TfLiteTensor* tensor) {
385 if (tensor->dims_signature) {
386 for (int i : TfLiteIntArrayView(tensor->dims_signature)) {
387 if (i == -1) return true;
388 }
389 }
390 return false;
391 }
392
ConvertTensorTypeToNNType(const TfLiteTensor * tensor,TfLiteType ann_type_equivalent)393 ANeuralNetworksOperandType ConvertTensorTypeToNNType(
394 const TfLiteTensor* tensor, TfLiteType ann_type_equivalent) {
395 int32_t nn_type = 0;
396 float scale = 0.0f;
397 int32_t zero_point = 0;
398 switch (tensor->type) {
399 case kTfLiteFloat32:
400 nn_type = ANEURALNETWORKS_TENSOR_FLOAT32;
401 break;
402 case kTfLiteUInt8:
403 nn_type = ann_type_equivalent == kTfLiteInt32
404 ? ANEURALNETWORKS_TENSOR_INT32
405 : ANEURALNETWORKS_TENSOR_QUANT8_ASYMM;
406 scale = tensor->params.scale;
407 zero_point = tensor->params.zero_point;
408 if (scale == 0) {
409 // TENSOR_QUANT8_ASYMM and ANEURALNETWORKS_TENSOR_QUANT8_ASYMM
410 // with zero scale are not valid in NNAPI.
411 scale = 1;
412 }
413 break;
414 case kTfLiteInt8:
415 nn_type = ANEURALNETWORKS_TENSOR_QUANT8_SYMM;
416 scale = tensor->params.scale;
417 zero_point = tensor->params.zero_point;
418 if (ann_type_equivalent == kTfLiteUInt8) {
419 nn_type = ANEURALNETWORKS_TENSOR_QUANT8_ASYMM;
420 zero_point += 128;
421 } else if (ann_type_equivalent == kTfLiteInt32) {
422 nn_type = ANEURALNETWORKS_TENSOR_INT32;
423 zero_point += 128;
424 }
425 if (scale == 0) {
426 // TENSOR_QUANT8_ASYMM and ANEURALNETWORKS_TENSOR_QUANT8_ASYMM
427 // with zero scale are not valid in NNAPI.
428 scale = 1;
429 }
430 break;
431 case kTfLiteInt32:
432 nn_type = ANEURALNETWORKS_TENSOR_INT32;
433 scale = tensor->params.scale;
434 zero_point = tensor->params.zero_point;
435 break;
436 case kTfLiteBool:
437 nn_type = ANEURALNETWORKS_TENSOR_BOOL8;
438 break;
439 case kTfLiteInt16:
440 nn_type = ANEURALNETWORKS_TENSOR_QUANT16_SYMM;
441 scale = tensor->params.scale;
442 zero_point = tensor->params.zero_point;
443 break;
444 default:
445 break;
446 }
447 uint32_t tensor_rank = static_cast<uint32_t>(tensor->dims->size);
448 uint32_t* tensor_dims = reinterpret_cast<uint32_t*>(tensor->dims->data);
449 static uint32_t scalar_rank = 1;
450 // treat scalar input as single cell tensor in NNAPI.
451 if (tensor_rank == 0) {
452 tensor_rank = scalar_rank;
453 tensor_dims = &scalar_rank;
454 }
455 ANeuralNetworksOperandType nn_operand_type{
456 .type = nn_type,
457 .dimensionCount = tensor_rank,
458 .dimensions = tensor_dims,
459 .scale = scale,
460 .zeroPoint = zero_point,
461 };
462 return nn_operand_type;
463 }
464
465 constexpr size_t kDefaultByteAlignmentForNNAPI = 16;
466
getNumPaddingBytes(size_t byte_size)467 static size_t getNumPaddingBytes(size_t byte_size) {
468 size_t num_padding_bytes = 0;
469 if (byte_size % kDefaultByteAlignmentForNNAPI) {
470 num_padding_bytes = kDefaultByteAlignmentForNNAPI -
471 (byte_size % kDefaultByteAlignmentForNNAPI);
472 }
473 return num_padding_bytes;
474 }
475
476 // Return NNAPI device handle with the provided null-terminated device name.
477 // Returns kTfLiteError in case of any NNAPI error and if no device with the
478 // given name can be found.
GetDeviceHandle(const NnApi * nnapi,TfLiteContext * context,const char * device_name_ptr,ANeuralNetworksDevice ** result,int * nnapi_errno)479 TfLiteStatus GetDeviceHandle(const NnApi* nnapi, TfLiteContext* context,
480 const char* device_name_ptr,
481 ANeuralNetworksDevice** result, int* nnapi_errno) {
482 if (!device_name_ptr) return kTfLiteError;
483 *result = nullptr;
484 std::string device_name(device_name_ptr);
485 uint32_t num_devices = 0;
486 nnapi->ANeuralNetworks_getDeviceCount(&num_devices);
487
488 for (uint32_t i = 0; i < num_devices; i++) {
489 ANeuralNetworksDevice* device = nullptr;
490 const char* buffer = nullptr;
491 RETURN_TFLITE_ERROR_IF_NN_ERROR(
492 context, nnapi->ANeuralNetworks_getDevice(i, &device),
493 "Searching for target device", nnapi_errno);
494
495 RETURN_TFLITE_ERROR_IF_NN_ERROR(
496 context, nnapi->ANeuralNetworksDevice_getName(device, &buffer),
497 "Searching for target device", nnapi_errno);
498
499 if (device_name == buffer) {
500 *result = device;
501 return kTfLiteOk;
502 }
503 }
504
505 context->ReportError(context,
506 "Could not find the specified NNAPI accelerator: %s. "
507 "Must be one of: {%s}.",
508 device_name_ptr,
509 nnapi::GetStringDeviceNamesList().c_str());
510 return kTfLiteError;
511 }
512
513 // Compute the hash of a TfLiteIntArray.
GetHash(const TfLiteIntArray * int_array,uint64_t combine_with=0)514 uint64_t GetHash(const TfLiteIntArray* int_array, uint64_t combine_with = 0) {
515 constexpr auto kHashConst = 0x9e3779b97f4a7800ULL;
516 uint64_t result = combine_with;
517 for (auto i : TfLiteIntArrayView(int_array)) {
518 result = result ^ (i + kHashConst + (result << 10) + (result >> 4));
519 }
520 return result;
521 }
522
HasZeroes(TfLiteIntArrayView array)523 bool HasZeroes(TfLiteIntArrayView array) {
524 for (auto value : array) {
525 if (value == 0) {
526 return true;
527 }
528 }
529 return false;
530 }
531
532 // Bit mask for tensor flags.
533 enum {
534 NN_TENSOR_FLAG_SCALAR_AS_TENSOR = 1U << 0,
535 NN_TENSOR_FLAG_INT8_CONVERSION = 1U << 1,
536 NN_TENSOR_FLAG_USE_INT8_ASYMM_SIGNED = 1U << 2,
537 NN_TENSOR_FLAG_FORCE_PER_CHANNEL = 1U << 3,
538 };
539
540 // Returns the SDK level to target when delegating to the given devices.
541 // The SDK level is the max of the ones supported by the devices or
542 // the current Android SDK level if no device is present.
GetTargetSdkVersion(TfLiteContext * context,const NnApi * nnapi,const std::vector<ANeuralNetworksDevice * > & device_handles,int * target_sdk_version,int * nnapi_errno)543 TfLiteStatus GetTargetSdkVersion(
544 TfLiteContext* context, const NnApi* nnapi,
545 const std::vector<ANeuralNetworksDevice*>& device_handles,
546 int* target_sdk_version, int* nnapi_errno) {
547 *target_sdk_version = nnapi->android_sdk_version;
548 int64_t devices_sdk_version = -1;
549 for (const auto* device_handle : device_handles) {
550 int64_t curr_device_sdk_version;
551 RETURN_TFLITE_ERROR_IF_NN_ERROR(
552 context,
553 nnapi->ANeuralNetworksDevice_getFeatureLevel(device_handle,
554 &curr_device_sdk_version),
555 "Searching for target device", nnapi_errno);
556
557 devices_sdk_version =
558 std::max(curr_device_sdk_version, devices_sdk_version);
559 }
560
561 if ((devices_sdk_version > 0) &&
562 // This second check is necessary since if the nnapi-reference device is
563 // in the list of target devices the devices_sdk_version value will be
564 // 1000.
565 (devices_sdk_version < nnapi->android_sdk_version)) {
566 TFLITE_LOG(TFLITE_LOG_INFO,
567 "Changing Android NN SDK version %d to version "
568 "supported by target devices: %lld",
569 nnapi->android_sdk_version, devices_sdk_version);
570
571 *target_sdk_version = devices_sdk_version;
572 }
573
574 return kTfLiteOk;
575 }
576
577 // Returns true if this delegate is configured to use a specific set of devices.
578 // This will happen either if:
579 // - accelerator_name option has been specified
580 // - NNAPI CPU implementation has been explicitly disabled.
581 // If exclude_nnapi_reference is true this method will return false if the
582 // accelerator_name in the delegate options is equal to "nnapi-reference"
ShouldUseTargetDevices(StatefulNnApiDelegate::Options delegate_options,const NnApi * nnapi,bool exclude_nnapi_reference=false)583 bool ShouldUseTargetDevices(StatefulNnApiDelegate::Options delegate_options,
584 const NnApi* nnapi,
585 bool exclude_nnapi_reference = false) {
586 const char* device_name_ptr = delegate_options.accelerator_name;
587 std::string nnapi_cpu("nnapi-reference");
588 bool has_selected_accelerator = device_name_ptr != nullptr;
589 if (exclude_nnapi_reference && has_selected_accelerator) {
590 if (nnapi_cpu == device_name_ptr) return false;
591 }
592 return (delegate_options.disallow_nnapi_cpu &&
593 nnapi->android_sdk_version >=
594 delegate::nnapi::kMinSdkVersionForNNAPI12) ||
595 has_selected_accelerator;
596 }
597
598 // Fills the given result vector with the list of devices the given delegate
599 // is referring to.
600 // There are three possible results:
601 // - an empty array (not the full list of available accelerators,
602 // for efficiency reasons) if no accelerator is chosen and the
603 // disallow_nnapi_cpu delegate option is false.
604 // - A single element array with the target processor, if an accelerator name
605 // is specified in the delegate options.
606 // - The full list of devices available on device less the nnapi reference
607 // implementation if the delegate option disallow_nnapi_cpu has been
608 // specified.
GetTargetDevices(TfLiteContext * context,TfLiteDelegate * delegate,const NnApi * nnapi,int * nnapi_errno,std::vector<ANeuralNetworksDevice * > * result)609 TfLiteStatus GetTargetDevices(TfLiteContext* context, TfLiteDelegate* delegate,
610 const NnApi* nnapi, int* nnapi_errno,
611 std::vector<ANeuralNetworksDevice*>* result) {
612 if (nnapi->android_sdk_version < delegate::nnapi::kMinSdkVersionForNNAPI12) {
613 return kTfLiteError;
614 }
615
616 const auto delegate_options = StatefulNnApiDelegate::GetOptions(delegate);
617 const char* device_name_ptr = delegate_options.accelerator_name;
618
619 if (device_name_ptr != nullptr) {
620 // User specified an accelerator to use.
621 ANeuralNetworksDevice* nnapi_device = nullptr;
622 TF_LITE_ENSURE_STATUS(GetDeviceHandle(nnapi, context, device_name_ptr,
623 &nnapi_device, nnapi_errno));
624 result->push_back(nnapi_device);
625 } else if (delegate_options.disallow_nnapi_cpu) {
626 std::string nnapi_cpu("nnapi-reference");
627 uint32_t num_devices = 0;
628 nnapi->ANeuralNetworks_getDeviceCount(&num_devices);
629
630 for (uint32_t i = 0; i < num_devices; i++) {
631 ANeuralNetworksDevice* device = nullptr;
632 const char* buffer = nullptr;
633 RETURN_TFLITE_ERROR_IF_NN_ERROR(
634 context, nnapi->ANeuralNetworks_getDevice(i, &device),
635 "Getting list of available devices", nnapi_errno);
636 RETURN_TFLITE_ERROR_IF_NN_ERROR(
637 context, nnapi->ANeuralNetworksDevice_getName(device, &buffer),
638 "Getting list of available devices", nnapi_errno);
639 if (nnapi_cpu != buffer) {
640 result->push_back(device);
641 }
642 }
643 }
644
645 return kTfLiteOk;
646 }
647
648 } // namespace
649
650 namespace delegate {
651 namespace nnapi {
652
653 #ifdef TFLITE_NNAPI_ALLOW_MMAP_SHARING
NNMemory(const NnApi * nnapi,const char * name,size_t size)654 NNMemory::NNMemory(const NnApi* nnapi, const char* name, size_t size) {
655 if (name && size > 0) {
656 nnapi_ = nnapi;
657 byte_size_ = size;
658 fd_ = nnapi_->ASharedMemory_create(name, size);
659 data_ptr_ = reinterpret_cast<uint8_t*>(
660 mmap(nullptr, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd_, 0));
661 nnapi_->ANeuralNetworksMemory_createFromFd(size, PROT_READ | PROT_WRITE,
662 fd_, 0, &nn_memory_handle_);
663 }
664 }
665 #else
666 NNMemory::NNMemory(const NnApi* /*nnapi*/, const char* /*name*/,
667 size_t /*size*/)
668 : nnapi_(nullptr) {}
669 #endif
670
~NNMemory()671 NNMemory::~NNMemory() {
672 #ifdef TFLITE_NNAPI_ALLOW_MMAP_SHARING
673 if (data_ptr_) {
674 munmap(data_ptr_, byte_size_);
675 }
676 if (nn_memory_handle_) {
677 nnapi_->ANeuralNetworksMemory_free(nn_memory_handle_);
678 }
679 if (fd_ > 0) close(fd_);
680 #endif
681 }
682
683 class DequantizeMapping {
684 public:
DequantizedAnnIndex(int ann_index,TfLiteType type) const685 int DequantizedAnnIndex(int ann_index, TfLiteType type) const {
686 for (const auto& element : mapping_) {
687 if (ann_index == std::get<0>(element) && type == std::get<1>(element)) {
688 return std::get<2>(element);
689 }
690 }
691 return -1;
692 }
693
Add(int ann_index,TfLiteType type,int dequantized_ann_index)694 void Add(int ann_index, TfLiteType type, int dequantized_ann_index) {
695 // This assumes it is not already mapped.
696 mapping_.emplace_back(ann_index, type, dequantized_ann_index);
697 }
698
699 private:
700 // Each tuple specifies the ANN (quantized) tensor index, the desired
701 // floating-point type and the matching ANN (dequantized) tensor index. This
702 // could use a map but instead std::vector is used to keep code size lower.
703 std::vector<std::tuple<int, TfLiteType, int>> mapping_;
704 };
705
706 // Abstract builder for building an op in the NN API graph. This handles
707 // the disparity between TFLite and NN API operand types. NN API has singular
708 // operands for both tensors and parameters, and TFLite separates the two.
709 class NNAPIOpBuilder {
710 public:
NNAPIOpBuilder(const NnApi * nnapi,TfLiteContext * context,OperandMapping * tensor_mapping,DequantizeMapping * dequantize_mapping,std::map<const MMAPAllocation *,ANeuralNetworksMemory * > * allocation_mapping,std::vector<int> * nnapi_to_tflite_op_mapping,ANeuralNetworksModel * nn_model,int * nnapi_errno,bool allow_dynamic_dimensions)711 NNAPIOpBuilder(const NnApi* nnapi, TfLiteContext* context,
712 OperandMapping* tensor_mapping,
713 DequantizeMapping* dequantize_mapping,
714 std::map<const MMAPAllocation*, ANeuralNetworksMemory*>*
715 allocation_mapping,
716 std::vector<int>* nnapi_to_tflite_op_mapping,
717 ANeuralNetworksModel* nn_model, int* nnapi_errno,
718 bool allow_dynamic_dimensions)
719 : nnapi_(nnapi),
720 context_(context),
721 operand_mapping_(tensor_mapping),
722 dequantize_mapping_(dequantize_mapping),
723 allocation_memory_mapping_(allocation_mapping),
724 nnapi_to_tflite_op_mapping_(nnapi_to_tflite_op_mapping),
725 nn_model_(nn_model),
726 nnapi_errno_(nnapi_errno),
727 allow_dynamic_dimensions_(allow_dynamic_dimensions) {}
728
AddScalarBoolOperand(bool value)729 TfLiteStatus AddScalarBoolOperand(bool value) {
730 return AddScalarOperand<bool>(value, ANEURALNETWORKS_BOOL);
731 }
732
AddScalarInt32Operand(int32_t value)733 TfLiteStatus AddScalarInt32Operand(int32_t value) {
734 return AddScalarOperand<int32_t>(value, ANEURALNETWORKS_INT32);
735 }
736
AddScalarFloat32Operand(float value)737 TfLiteStatus AddScalarFloat32Operand(float value) {
738 return AddScalarOperand<float>(value, ANEURALNETWORKS_FLOAT32);
739 }
740
AddVectorInt32Operand(const int32_t * values,uint32_t num_values)741 TfLiteStatus AddVectorInt32Operand(const int32_t* values,
742 uint32_t num_values) {
743 return AddVectorOperand<int32_t>(values, num_values,
744 ANEURALNETWORKS_TENSOR_INT32,
745 /*scale=*/0.f, /*zero_point=*/0);
746 }
747
AddVectorInt32Operand(const int32_t * values,uint32_t num_values,float scale,int32_t zero_point)748 TfLiteStatus AddVectorInt32Operand(const int32_t* values, uint32_t num_values,
749 float scale, int32_t zero_point) {
750 return AddVectorOperand<int32_t>(
751 values, num_values, ANEURALNETWORKS_TENSOR_INT32, scale, zero_point);
752 }
753
AddVectorInt16Operand(const int16_t * values,uint32_t num_values)754 TfLiteStatus AddVectorInt16Operand(const int16_t* values,
755 uint32_t num_values) {
756 return AddVectorOperand<int16_t>(values, num_values,
757 ANEURALNETWORKS_TENSOR_QUANT16_SYMM,
758 /*scale=*/1.f, /*zero_point=*/0);
759 }
760
AddVectorInt8Operand(const int8_t * values,uint32_t num_values)761 TfLiteStatus AddVectorInt8Operand(const int8_t* values, uint32_t num_values) {
762 return AddVectorOperand<int8_t>(values, num_values,
763 ANEURALNETWORKS_TENSOR_QUANT8_SYMM,
764 /*scale=*/1.f, /*zero_point=*/0);
765 }
766
AddVectorFloat32Operand(const float * values,uint32_t num_values)767 TfLiteStatus AddVectorFloat32Operand(const float* values,
768 uint32_t num_values) {
769 return AddVectorOperand<float>(values, num_values,
770 ANEURALNETWORKS_TENSOR_FLOAT32);
771 }
772
AddPoolingParams(void * data)773 TfLiteStatus AddPoolingParams(void* data) {
774 auto builtin = reinterpret_cast<TfLitePoolParams*>(data);
775 AddScalarInt32Operand(builtin->padding);
776 AddScalarInt32Operand(builtin->stride_width);
777 AddScalarInt32Operand(builtin->stride_height);
778 AddScalarInt32Operand(builtin->filter_width);
779 AddScalarInt32Operand(builtin->filter_height);
780 AddScalarInt32Operand(builtin->activation);
781 return kTfLiteOk;
782 }
783
AddTensorInput(int tensor_index,bool hybrid_op,int tensor_flags=0)784 TfLiteStatus AddTensorInput(int tensor_index, bool hybrid_op,
785 int tensor_flags = 0) {
786 return AddTensor(tensor_index, hybrid_op, &augmented_inputs_, tensor_flags);
787 }
788
AddTensorOutput(int tensor_index,int tensor_flags=0)789 TfLiteStatus AddTensorOutput(int tensor_index, int tensor_flags = 0) {
790 return AddTensor(tensor_index, /*hybrid_op=*/false, &augmented_outputs_,
791 tensor_flags);
792 }
793
AddAdditionalFloat32OutputTensor(uint32_t dimension_count)794 TfLiteStatus AddAdditionalFloat32OutputTensor(uint32_t dimension_count) {
795 std::vector<uint32_t> dims(dimension_count, 0);
796 return AddFloat32OutputTensor(dimension_count, dims.data(), nullptr);
797 }
798
AddStateFloat32Tensor(int tensor_index,int * ann_tensor_index_out)799 TfLiteStatus AddStateFloat32Tensor(int tensor_index,
800 int* ann_tensor_index_out) {
801 TfLiteTensor* tensor = &context_->tensors[tensor_index];
802 return AddFloat32OutputTensor(
803 tensor->dims->size, reinterpret_cast<uint32_t*>(tensor->dims->data),
804 ann_tensor_index_out);
805 }
806
AddStateInt16Tensor(int tensor_index,int * ann_tensor_index_out)807 TfLiteStatus AddStateInt16Tensor(int tensor_index,
808 int* ann_tensor_index_out) {
809 TfLiteTensor* tensor = &context_->tensors[tensor_index];
810 return AddAdditionalOutputTensor(
811 tensor->dims->size, reinterpret_cast<uint32_t*>(tensor->dims->data),
812 ANEURALNETWORKS_TENSOR_QUANT16_SYMM, tensor->params.scale,
813 tensor->params.zero_point, ann_tensor_index_out);
814 }
815
AddStateInt8AsymTensor(int tensor_index,int * ann_tensor_index_out)816 TfLiteStatus AddStateInt8AsymTensor(int tensor_index,
817 int* ann_tensor_index_out) {
818 TfLiteTensor* tensor = &context_->tensors[tensor_index];
819 return AddAdditionalOutputTensor(
820 tensor->dims->size, reinterpret_cast<uint32_t*>(tensor->dims->data),
821 ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED, tensor->params.scale,
822 tensor->params.zero_point, ann_tensor_index_out);
823 }
824
825 // Add a constant tensor with a single element, intended for broadcast capable
826 // ops.
AddSingleValueConstantTensor(float value,bool is_quantized)827 TfLiteStatus AddSingleValueConstantTensor(float value, bool is_quantized) {
828 if (!is_quantized) {
829 return AddVectorFloat32Operand(&value, 1);
830 } else {
831 // in the case that we need to add a quantized tensor, set the value to
832 // 64, zero_point to be 0 and adjust scale accordingly.
833 const uint8_t quant8_value = 64;
834 return AddVectorOperand<uint8_t>(&quant8_value, 1,
835 ANEURALNETWORKS_TENSOR_QUANT8_ASYMM,
836 value / quant8_value, 0);
837 }
838 }
839
840 // Calculate the scale and zero_point for 8-bit unsigned tensor, given float
841 // min and max. zero_point is clamped to [0, 255].
CalculateQuantizationParams(float min,float max,float * scale,int * zero_point)842 TfLiteStatus CalculateQuantizationParams(float min, float max, float* scale,
843 int* zero_point) {
844 if (max < min) return kTfLiteError;
845 *scale = (max - min) / 255.f;
846 if (min > 0.f) {
847 *zero_point = 0;
848 } else if (max < 0.f) {
849 *zero_point = 255;
850 } else {
851 *zero_point = (0.f - min) / (*scale);
852 }
853 return kTfLiteOk;
854 }
855
856 // Lower hardswish according to the following equation:
857 // hard_swish[x] = x (ReLU6(x + 3)) / 6 == x * (Relu_N1_to_1(x/3) * 3 + 3) / 6
858 // = 0.5x * Relu_N1_to_1(x/3) + 0.5x
TransformHardSwishIntoSupportedOps(int lite_input_index,int lite_output_index,bool need_int8_conversion,int lite_node_index)859 TfLiteStatus TransformHardSwishIntoSupportedOps(int lite_input_index,
860 int lite_output_index,
861 bool need_int8_conversion,
862 int lite_node_index) {
863 const TfLiteTensor& tensor = context_->tensors[lite_input_index];
864 float input_scale = tensor.params.scale;
865 int input_zero_point = tensor.params.zero_point;
866 float input_min = 0.f;
867 float input_max = 0.f;
868 int tensor_flags = 0;
869 if (need_int8_conversion) {
870 tensor_flags = tensor_flags | NN_TENSOR_FLAG_INT8_CONVERSION;
871 input_zero_point += 128;
872 }
873 bool is_quantized = false;
874 int nn_type = ANEURALNETWORKS_TENSOR_FLOAT32;
875 if (tensor.type == kTfLiteInt8 || tensor.type == kTfLiteUInt8) {
876 is_quantized = true;
877 nn_type = ANEURALNETWORKS_TENSOR_QUANT8_ASYMM;
878 input_min = (0 - input_zero_point) * input_scale;
879 input_max = (255 - input_zero_point) * input_scale;
880 }
881
882 // Stage1 : s1 = Relu1(x * 1/3)
883 float s1_output_min = 0.f;
884 float s1_output_max = 0.f;
885 int s1_out_ann_index = 0;
886 {
887 float s1_output_scale = 0.f;
888 int s1_output_zero_point = 0;
889 if (is_quantized) {
890 // clamp the output range to [-1, 1] if needed.
891 s1_output_min = input_min / 3.f < -1.f ? -1.f : input_min / 3.f;
892 s1_output_max = input_max / 3.f > 1.f ? 1.f : input_max / 3.f;
893 CalculateQuantizationParams(s1_output_min, s1_output_max,
894 &s1_output_scale, &s1_output_zero_point);
895 }
896 TF_LITE_ENSURE_OK(context_,
897 AddTensorInput(lite_input_index, false, tensor_flags));
898 const float value3f = 1.f / 3.f;
899 TF_LITE_ENSURE_OK(context_,
900 AddSingleValueConstantTensor(value3f, is_quantized));
901 TF_LITE_ENSURE_OK(context_,
902 AddScalarInt32Operand(ANEURALNETWORKS_FUSED_RELU1));
903 TF_LITE_ENSURE_OK(
904 context_,
905 AddAdditionalOutputTensor(
906 tensor.dims->size, reinterpret_cast<uint32_t*>(tensor.dims->data),
907 nn_type, s1_output_scale, s1_output_zero_point,
908 &s1_out_ann_index));
909 TF_LITE_ENSURE_OK(
910 context_, FinalizeAddOperation(ANEURALNETWORKS_MUL, lite_node_index));
911 }
912
913 // Stage2 : s2 = x / 2
914 float s2_output_min = input_min / 2.f;
915 float s2_output_max = input_max / 2.f;
916 int s2_out_ann_index = 0;
917 {
918 float s2_output_scale = input_scale / 2.0f;
919 int s2_output_zero_point = input_zero_point;
920 TF_LITE_ENSURE_OK(context_,
921 AddTensorInput(lite_input_index, false, tensor_flags));
922 const float value2f = 0.5f;
923 TF_LITE_ENSURE_OK(context_,
924 AddSingleValueConstantTensor(value2f, is_quantized));
925 TF_LITE_ENSURE_OK(context_,
926 AddScalarInt32Operand(ANEURALNETWORKS_FUSED_NONE));
927 TF_LITE_ENSURE_OK(
928 context_,
929 AddAdditionalOutputTensor(
930 tensor.dims->size, reinterpret_cast<uint32_t*>(tensor.dims->data),
931 nn_type, s2_output_scale, s2_output_zero_point,
932 &s2_out_ann_index));
933 TF_LITE_ENSURE_OK(
934 context_, FinalizeAddOperation(ANEURALNETWORKS_MUL, lite_node_index));
935 }
936
937 // Stage 3 : s3 = s1 * s2
938 int s3_out_ann_index = 0;
939 {
940 augmented_inputs_.push_back(s1_out_ann_index);
941 augmented_inputs_.push_back(s2_out_ann_index);
942 TF_LITE_ENSURE_OK(context_,
943 AddScalarInt32Operand(ANEURALNETWORKS_FUSED_NONE));
944 float s3_output_scale = 0.f;
945 int s3_output_zero_point = 0;
946 if (is_quantized) {
947 // the min for stage 3 is always 0.0f.
948 float s3_output_min = 0.f;
949 // the max for stage 3 is max(s1_min * s2_min, s1_max * s3_max).
950 float s3_output_max =
951 s1_output_max * s2_output_max > s1_output_min * s2_output_min
952 ? s1_output_max * s2_output_max
953 : s1_output_min * s2_output_min;
954 CalculateQuantizationParams(s3_output_min, s3_output_max,
955 &s3_output_scale, &s3_output_zero_point);
956 }
957 TF_LITE_ENSURE_OK(
958 context_,
959 AddAdditionalOutputTensor(
960 tensor.dims->size, reinterpret_cast<uint32_t*>(tensor.dims->data),
961 nn_type, s3_output_scale, s3_output_zero_point,
962 &s3_out_ann_index));
963 TF_LITE_ENSURE_OK(
964 context_, FinalizeAddOperation(ANEURALNETWORKS_MUL, lite_node_index));
965 }
966
967 // Stage 4: y = s3 + s2
968 {
969 augmented_inputs_.push_back(s2_out_ann_index);
970 augmented_inputs_.push_back(s3_out_ann_index);
971 TF_LITE_ENSURE_OK(context_,
972 AddScalarInt32Operand(ANEURALNETWORKS_FUSED_NONE));
973 TF_LITE_ENSURE_OK(context_,
974 AddTensorOutput(lite_output_index, tensor_flags));
975 TF_LITE_ENSURE_OK(
976 context_, FinalizeAddOperation(ANEURALNETWORKS_ADD, lite_node_index));
977 }
978
979 return kTfLiteOk;
980 }
981
982 // Adds the operation to the model and maps the operation to the originating
983 // TFLite one.
AddOperationToModel(ANeuralNetworksOperationType type,uint32_t input_count,const uint32_t * inputs,uint32_t output_count,const uint32_t * outputs,int lite_node_index)984 TfLiteStatus AddOperationToModel(ANeuralNetworksOperationType type,
985 uint32_t input_count, const uint32_t* inputs,
986 uint32_t output_count,
987 const uint32_t* outputs,
988 int lite_node_index) {
989 RETURN_TFLITE_ERROR_IF_NN_ERROR(
990 context_,
991 nnapi_->ANeuralNetworksModel_addOperation(
992 nn_model_, type, input_count, inputs, output_count, outputs),
993 "adding operation", nnapi_errno_);
994 nnapi_to_tflite_op_mapping_->push_back(lite_node_index);
995 return kTfLiteOk;
996 }
997
998 // Adds a Dequantize operator and replaces the input tensor index with the
999 // dequantized version. If the dequantized version of the operator already
1000 // exists then it is not added again.
AddDequantize(int nn_input_index,int lite_tensor_index,TfLiteType dequantized_type,int lite_node_index)1001 TfLiteStatus AddDequantize(int nn_input_index, int lite_tensor_index,
1002 TfLiteType dequantized_type, int lite_node_index) {
1003 const int ann_index =
1004 operand_mapping_->lite_index_to_ann(lite_tensor_index);
1005 int dequantized_ann_index =
1006 dequantize_mapping_->DequantizedAnnIndex(ann_index, dequantized_type);
1007
1008 if (dequantized_ann_index == -1) {
1009 // The dequantized version does not exist yet, it has to be added: a new
1010 // Dequantize operation is added, yielding a new tensor.
1011 const TfLiteTensor& tensor = context_->tensors[lite_tensor_index];
1012 ANeuralNetworksOperandType operand_type{
1013 ANEURALNETWORKS_TENSOR_FLOAT32,
1014 static_cast<uint32_t>(tensor.dims->size),
1015 reinterpret_cast<uint32_t*>(tensor.dims->data), 0.f, 0};
1016 RETURN_TFLITE_ERROR_IF_NN_ERROR(
1017 context_,
1018 nnapi_->ANeuralNetworksModel_addOperand(nn_model_, &operand_type),
1019 "adding operand", nnapi_errno_);
1020 dequantized_ann_index = operand_mapping_->add_new_non_tensor_operand();
1021
1022 // Add Dequantize operation.
1023 const uint32_t dequantize_input[1] = {static_cast<uint32_t>(ann_index)};
1024 const uint32_t dequantize_output[1] = {
1025 static_cast<uint32_t>(dequantized_ann_index)};
1026 TF_LITE_ENSURE_OK(
1027 context_, AddOperationToModel(ANEURALNETWORKS_DEQUANTIZE,
1028 /*input_count=*/1, dequantize_input,
1029 /*output_count=*/1, dequantize_output,
1030 lite_node_index));
1031 dequantize_mapping_->Add(ann_index, dequantized_type,
1032 dequantized_ann_index);
1033 }
1034
1035 // The input for the original operation is modified so that the operation
1036 // now uses the dequantized tensor as input.
1037 augmented_inputs_[nn_input_index] = dequantized_ann_index;
1038
1039 return kTfLiteOk;
1040 }
1041
1042 // Finish emitting the op (of type `type`) into the NN API.
FinalizeAddOperation(ANeuralNetworksOperationType type,int lite_node_index)1043 TfLiteStatus FinalizeAddOperation(ANeuralNetworksOperationType type,
1044 int lite_node_index) {
1045 // Actually add a NN API operation
1046 TF_LITE_ENSURE_OK(context_,
1047 AddOperationToModel(
1048 type, static_cast<uint32_t>(augmented_inputs_.size()),
1049 augmented_inputs_.data(),
1050 static_cast<uint32_t>(augmented_outputs_.size()),
1051 augmented_outputs_.data(), lite_node_index));
1052 augmented_inputs_.clear();
1053 augmented_outputs_.clear();
1054 return kTfLiteOk;
1055 }
1056
AddSingleValueTensorAsScalarOperand(int tensor_index,int nn_type)1057 TfLiteStatus AddSingleValueTensorAsScalarOperand(int tensor_index,
1058 int nn_type) {
1059 const TfLiteTensor* tensor = &context_->tensors[tensor_index];
1060 TF_LITE_ENSURE_EQ(context_, NumElements(tensor), 1);
1061
1062 ANeuralNetworksOperandType operand_type{.type = nn_type};
1063 RETURN_TFLITE_ERROR_IF_NN_ERROR_FOR_TENSOR(
1064 context_,
1065 nnapi_->ANeuralNetworksModel_addOperand(nn_model_, &operand_type),
1066 "adding operand", tensor, nnapi_errno_);
1067 int ann_tensor_index = operand_mapping_->lite_index_to_ann(tensor_index);
1068 if (ann_tensor_index != -1) {
1069 augmented_inputs_.push_back(ann_tensor_index);
1070 return kTfLiteOk;
1071 }
1072 // Allocate a new tensor index
1073 ann_tensor_index = operand_mapping_->add_new_ann_tensor_index(tensor_index);
1074 augmented_inputs_.push_back(ann_tensor_index);
1075
1076 const TfLiteType tensor_type = tensor->type;
1077 TfLiteType nn_type_equivalent;
1078 TF_LITE_ENSURE_OK(context_, GetEquivalentToANNType(context_, nn_type,
1079 &nn_type_equivalent));
1080 if (tensor_type != nn_type_equivalent) {
1081 operand_mapping_->add_type_conversion(tensor_index, nn_type_equivalent);
1082 }
1083 return kTfLiteOk;
1084 }
1085
1086 template <typename T>
AddNewInputConstantTensor(int32_t nn_type,TfLiteType type,const TfLiteIntArray * dims,const std::vector<T> & tensor_value,const TfLiteQuantizationParams & quant_params,int * tensor_index)1087 TfLiteStatus AddNewInputConstantTensor(
1088 int32_t nn_type, TfLiteType type, const TfLiteIntArray* dims,
1089 const std::vector<T>& tensor_value,
1090 const TfLiteQuantizationParams& quant_params, int* tensor_index) {
1091 TF_LITE_ENSURE_OK(context_,
1092 context_->AddTensors(context_, 1, tensor_index));
1093
1094 TfLiteTensor* new_tensor = &context_->tensors[*tensor_index];
1095 new_tensor->type = type;
1096 new_tensor->allocation_type = kTfLiteDynamic;
1097 new_tensor->params = quant_params;
1098
1099 // Not removing the new tensor in case of resizing errors since it will
1100 // be cleared by the context
1101 TF_LITE_ENSURE_OK(
1102 context_,
1103 context_->ResizeTensor(
1104 context_, new_tensor,
1105 // Resize Tensor takes ownership of the dims array passed as param
1106 TfLiteIntArrayCopy(dims)));
1107
1108 memcpy(new_tensor->data.raw,
1109 reinterpret_cast<const char*>(tensor_value.data()),
1110 tensor_value.size() * sizeof(T));
1111
1112 const uint32_t tensor_rank = static_cast<uint32_t>(dims->size);
1113 const uint32_t* tensor_dims = reinterpret_cast<const uint32_t*>(dims->data);
1114 ANeuralNetworksOperandType operand_type{nn_type, tensor_rank, tensor_dims,
1115 quant_params.scale,
1116 quant_params.zero_point};
1117
1118 const int ann_tensor_index =
1119 operand_mapping_->add_delegate_generated_input_ann_tensors_operand();
1120
1121 RETURN_TFLITE_ERROR_IF_NN_ERROR(
1122 context_,
1123 nnapi_->ANeuralNetworksModel_addOperand(nn_model_, &operand_type),
1124 "adding operand", nnapi_errno_);
1125
1126 augmented_inputs_.push_back(ann_tensor_index);
1127
1128 RETURN_TFLITE_ERROR_IF_NN_ERROR(
1129 context_,
1130 nnapi_->ANeuralNetworksModel_setOperandValue(
1131 nn_model_, ann_tensor_index, new_tensor->data.raw,
1132 new_tensor->bytes),
1133 "setting new operand value", nnapi_errno_);
1134
1135 return kTfLiteOk;
1136 }
1137
1138 template <typename T>
AddNewInputConstantTensor(int32_t nn_type,TfLiteType type,std::initializer_list<int> dims,const std::vector<T> & tensor_value,const TfLiteQuantizationParams & quant_params,int * tensor_index)1139 TfLiteStatus AddNewInputConstantTensor(
1140 int32_t nn_type, TfLiteType type, std::initializer_list<int> dims,
1141 const std::vector<T>& tensor_value,
1142 const TfLiteQuantizationParams& quant_params, int* tensor_index) {
1143 TfLiteIntArray* dim_array = TfLiteIntArrayCreate(dims.size());
1144 dim_array->size = dims.size();
1145 std::copy(dims.begin(), dims.end(), dim_array->data);
1146
1147 const auto result = AddNewInputConstantTensor(
1148 nn_type, type, dim_array, tensor_value, quant_params, tensor_index);
1149 TfLiteIntArrayFree(dim_array);
1150 return result;
1151 }
1152
1153 private:
1154 // Returns a TF Lite type which has the same memory representation as a
1155 // provided NN API type.
GetEquivalentToANNType(TfLiteContext * context,int nn_type,TfLiteType * type)1156 TfLiteStatus GetEquivalentToANNType(TfLiteContext* context, int nn_type,
1157 TfLiteType* type) {
1158 switch (nn_type) {
1159 case ANEURALNETWORKS_INT32:
1160 *type = kTfLiteInt32;
1161 return kTfLiteOk;
1162 case ANEURALNETWORKS_FLOAT32:
1163 *type = kTfLiteFloat32;
1164 return kTfLiteOk;
1165 default:
1166 context->ReportError(context,
1167 "NN API Delegate: Can't get an equivalent TF Lite "
1168 "type for provided NN API type: %d.\n",
1169 nn_type);
1170 return kTfLiteError;
1171 }
1172 }
1173
1174 template <typename T>
AddScalarOperand(T value,int32_t nn_type)1175 TfLiteStatus AddScalarOperand(T value, int32_t nn_type) {
1176 ANeuralNetworksOperandType operand_type{.type = nn_type};
1177 RETURN_TFLITE_ERROR_IF_NN_ERROR(
1178 context_,
1179 nnapi_->ANeuralNetworksModel_addOperand(nn_model_, &operand_type),
1180 "adding operand", nnapi_errno_);
1181 const int ann_index = operand_mapping_->add_new_non_tensor_operand();
1182 RETURN_TFLITE_ERROR_IF_NN_ERROR(
1183 context_,
1184 nnapi_->ANeuralNetworksModel_setOperandValue(nn_model_, ann_index,
1185 &value, sizeof(T)),
1186 "setting new operand value", nnapi_errno_);
1187 augmented_inputs_.push_back(ann_index);
1188 return kTfLiteOk;
1189 }
1190
1191 template <typename T>
AddVectorOperand(const T * values,uint32_t num_values,int32_t nn_type,float scale,int32_t zero_point)1192 TfLiteStatus AddVectorOperand(const T* values, uint32_t num_values,
1193 int32_t nn_type, float scale,
1194 int32_t zero_point) {
1195 ANeuralNetworksOperandType operand_type{.type = nn_type,
1196 .dimensionCount = 1,
1197 .dimensions = &num_values,
1198 .scale = scale,
1199 .zeroPoint = zero_point};
1200
1201 RETURN_TFLITE_ERROR_IF_NN_ERROR(
1202 context_,
1203 nnapi_->ANeuralNetworksModel_addOperand(nn_model_, &operand_type),
1204 "adding operand", nnapi_errno_);
1205
1206 const int ann_index = operand_mapping_->add_new_non_tensor_operand();
1207 RETURN_TFLITE_ERROR_IF_NN_ERROR(
1208 context_,
1209 nnapi_->ANeuralNetworksModel_setOperandValue(
1210 nn_model_, ann_index, values, sizeof(T) * num_values),
1211 "settings new operand value", nnapi_errno_);
1212 augmented_inputs_.push_back(ann_index);
1213 return kTfLiteOk;
1214 }
1215
1216 template <typename T>
AddVectorOperand(const T * values,uint32_t num_values,int32_t nn_type)1217 TfLiteStatus AddVectorOperand(const T* values, uint32_t num_values,
1218 int32_t nn_type) {
1219 return AddVectorOperand(values, num_values, nn_type, /*scale=*/0.f,
1220 /*zero_point=*/0);
1221 }
1222
AddFloat32OutputTensor(uint32_t dimension_count,const uint32_t * dimension_data,int * ann_index_out)1223 TfLiteStatus AddFloat32OutputTensor(uint32_t dimension_count,
1224 const uint32_t* dimension_data,
1225 int* ann_index_out) {
1226 return AddAdditionalOutputTensor(
1227 dimension_count, dimension_data, ANEURALNETWORKS_TENSOR_FLOAT32,
1228 /*scale=*/0.f, /*zero_point=*/0, ann_index_out);
1229 }
1230
AddAdditionalOutputTensor(uint32_t dimension_count,const uint32_t * dimension_data,int32_t nn_type,float scale,int32_t zero_point,int * ann_index_out)1231 TfLiteStatus AddAdditionalOutputTensor(uint32_t dimension_count,
1232 const uint32_t* dimension_data,
1233 int32_t nn_type, float scale,
1234 int32_t zero_point,
1235 int* ann_index_out) {
1236 ANeuralNetworksOperandType operand_type{
1237 .type = nn_type,
1238 .dimensionCount = dimension_count,
1239 .dimensions = dimension_data,
1240 .scale = scale,
1241 .zeroPoint = zero_point,
1242 };
1243 RETURN_TFLITE_ERROR_IF_NN_ERROR(
1244 context_,
1245 nnapi_->ANeuralNetworksModel_addOperand(nn_model_, &operand_type),
1246 "adding operand", nnapi_errno_);
1247 const int ann_index = operand_mapping_->add_new_non_tensor_operand();
1248 augmented_outputs_.push_back(ann_index);
1249 if (ann_index_out) *ann_index_out = ann_index;
1250 return kTfLiteOk;
1251 }
1252
1253 // Adds a new NN API tensor that shadows the TF Lite tensor `tensor_index`.
1254 // This returns the NN API tensor index corresponding to the created tensor.
1255 // If another caller previously created a NN API tensor for `tensor_index`
1256 // then the existing one is returned.
AddTensor(int tensor_index,bool hybrid_op,std::vector<uint32_t> * indices,int tensor_flags=0)1257 TfLiteStatus AddTensor(int tensor_index, bool hybrid_op,
1258 std::vector<uint32_t>* indices, int tensor_flags = 0) {
1259 const bool scalar_as_tensor =
1260 tensor_flags & NN_TENSOR_FLAG_SCALAR_AS_TENSOR;
1261 const bool need_int8_conversion =
1262 tensor_flags & NN_TENSOR_FLAG_INT8_CONVERSION;
1263 const bool use_int8_asymm_signed =
1264 tensor_flags & NN_TENSOR_FLAG_USE_INT8_ASYMM_SIGNED;
1265 const bool force_per_channel =
1266 tensor_flags & NN_TENSOR_FLAG_FORCE_PER_CHANNEL;
1267 int ann_tensor_index = operand_mapping_->lite_index_to_ann(tensor_index);
1268 if (ann_tensor_index != -1) {
1269 indices->push_back(ann_tensor_index);
1270 return kTfLiteOk;
1271 }
1272 // Allocate a new tensor index
1273 ann_tensor_index = operand_mapping_->add_new_ann_tensor_index(tensor_index);
1274
1275 // Parameters needed for new type.
1276 int32_t nn_type = 0;
1277 float scale = 0.0f;
1278 int32_t zeroPoint = 0;
1279 ANeuralNetworksSymmPerChannelQuantParams ann_perchannel_params;
1280 TfLiteTensor* tensor = &context_->tensors[tensor_index];
1281 TfLiteType tensor_type = tensor->type;
1282 if (hybrid_op && (tensor_type == kTfLiteUInt8)) {
1283 // For legacy reason, UINT8 weights in hybrid operators are actually INT8
1284 // values and should be interpreted as such.
1285 tensor_type = kTfLiteInt8;
1286 }
1287 switch (tensor_type) {
1288 case kTfLiteNoType:
1289 // Tensors added during initialization of Ops don't have a type yet and
1290 // should not be registered with the NNAPI.
1291 indices->push_back(-1);
1292 return kTfLiteOk;
1293 case kTfLiteFloat32:
1294 nn_type = ANEURALNETWORKS_TENSOR_FLOAT32;
1295 break;
1296 case kTfLiteUInt8:
1297 nn_type = ANEURALNETWORKS_TENSOR_QUANT8_ASYMM;
1298 scale = tensor->params.scale;
1299 zeroPoint = tensor->params.zero_point;
1300 if (scale == 0) {
1301 // ANEURALNETWORKS_TENSOR_QUANT8_ASYMM with zero scale is not valid in
1302 // NNAPI.
1303 scale = 1;
1304 }
1305 break;
1306 case kTfLiteInt8:
1307 // If explicit int8 conversion is needed, we still need
1308 // ANEURALNETWORKS_TENSOR_QUANT8_ASYMM type.
1309 if (use_int8_asymm_signed) {
1310 nn_type = ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED;
1311 } else if (need_int8_conversion) {
1312 nn_type = ANEURALNETWORKS_TENSOR_QUANT8_ASYMM;
1313 } else {
1314 nn_type = ANEURALNETWORKS_TENSOR_QUANT8_SYMM;
1315 }
1316 scale = tensor->params.scale;
1317 zeroPoint = tensor->params.zero_point;
1318 if (tensor->quantization.type == kTfLiteAffineQuantization) {
1319 TfLiteAffineQuantization* quantization_params =
1320 static_cast<TfLiteAffineQuantization*>(
1321 tensor->quantization.params);
1322 if (quantization_params->scale->size > 1 || force_per_channel) {
1323 // Set up per-channel quantization.
1324 ann_perchannel_params = {
1325 .channelDim = static_cast<uint32_t>(
1326 quantization_params->quantized_dimension),
1327 .scaleCount =
1328 static_cast<uint32_t>(quantization_params->scale->size),
1329 .scales = quantization_params->scale->data,
1330 };
1331 nn_type = ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL;
1332 scale = 0.0f;
1333 zeroPoint = 0;
1334 } else if (quantization_params->scale->size == 1) {
1335 scale = quantization_params->scale->data[0];
1336 zeroPoint = quantization_params->zero_point->data[0];
1337 }
1338 }
1339 if (nn_type != ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL) {
1340 if (need_int8_conversion) {
1341 zeroPoint += 128;
1342 operand_mapping_->add_type_conversion(tensor_index, kTfLiteUInt8);
1343 }
1344 if (scale == 0) {
1345 // QUANT8 tensors with zero scale are not valid in NNAPI.
1346 scale = 1;
1347 }
1348 }
1349 break;
1350 case kTfLiteInt32:
1351 nn_type = ANEURALNETWORKS_TENSOR_INT32;
1352 scale = tensor->params.scale;
1353 zeroPoint = tensor->params.zero_point;
1354 break;
1355 case kTfLiteBool:
1356 nn_type = ANEURALNETWORKS_TENSOR_BOOL8;
1357 break;
1358 case kTfLiteInt16:
1359 nn_type = ANEURALNETWORKS_TENSOR_QUANT16_SYMM;
1360 scale = tensor->params.scale;
1361 zeroPoint = tensor->params.zero_point;
1362 break;
1363 default:
1364 context_->ReportError(
1365 context_, "Failed to add NN API tensor: type %s is not supported.",
1366 TfLiteTypeGetName(tensor_type));
1367 return kTfLiteError;
1368 }
1369 bool has_unspecified_dimensions = HasUnspecifiedDimension(tensor);
1370 uint32_t tensor_rank = static_cast<uint32_t>(tensor->dims->size);
1371 std::vector<uint32_t> dims_unspecified(tensor_rank, 0);
1372 if (has_unspecified_dimensions) {
1373 for (int i = 0; i < tensor->dims_signature->size; i++) {
1374 dims_unspecified[i] = tensor->dims_signature->data[i] == -1
1375 ? 0
1376 : tensor->dims_signature->data[i];
1377 }
1378 }
1379 uint32_t* tensor_dims =
1380 has_unspecified_dimensions && allow_dynamic_dimensions_
1381 ? dims_unspecified.data()
1382 : reinterpret_cast<uint32_t*>(tensor->dims->data);
1383 if (scalar_as_tensor && tensor_rank == 0) {
1384 // Use rank 1, shape {1} operand for TFLite scalar tensors.
1385 tensor_rank = 1;
1386 tensor_dims = &tensor_rank;
1387 }
1388 if (tensor_rank == 0) {
1389 // if the tensor_rank is 0, the dimension ptr must be nullptr.
1390 tensor_dims = nullptr;
1391 }
1392
1393 ANeuralNetworksOperandType operand_type{nn_type, tensor_rank, tensor_dims,
1394 scale, zeroPoint};
1395 RETURN_TFLITE_ERROR_IF_NN_ERROR_FOR_TENSOR(
1396 context_,
1397 nnapi_->ANeuralNetworksModel_addOperand(nn_model_, &operand_type),
1398 "adding operand", tensor, nnapi_errno_);
1399
1400 if (nn_type == ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL) {
1401 RETURN_TFLITE_ERROR_IF_NN_ERROR_FOR_TENSOR(
1402 context_,
1403 nnapi_->ANeuralNetworksModel_setOperandSymmPerChannelQuantParams(
1404 nn_model_, ann_tensor_index, &ann_perchannel_params),
1405 "setting new operand per channel quantization params", tensor,
1406 nnapi_errno_);
1407 }
1408 if (tensor->allocation_type == kTfLiteMmapRo) {
1409 if (IsQuantized(tensor_type) && need_int8_conversion &&
1410 nn_type != ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL) {
1411 // We need to add a tensor and convert the weights into uint8.
1412 // Currently this is only needed for fully_connected. The new_tensor is
1413 // needed for lifetime management for the converted weights.
1414 int new_tensor_index = -1;
1415 TF_LITE_ENSURE_OK(context_,
1416 context_->AddTensors(context_, 1, &new_tensor_index));
1417 TfLiteTensor* new_tensor = &context_->tensors[new_tensor_index];
1418 new_tensor->type = kTfLiteUInt8;
1419 new_tensor->allocation_type = kTfLiteDynamic;
1420 new_tensor->params.scale = scale;
1421 new_tensor->params.zero_point = zeroPoint;
1422 // Not removing the new tensor in case of resizing errors since it will
1423 // be cleared by the context
1424 TF_LITE_ENSURE_OK(
1425 context_, context_->ResizeTensor(context_, new_tensor,
1426 // Resize Tensor takes ownership of
1427 // the dims array passed as param
1428 TfLiteIntArrayCopy(tensor->dims)));
1429 // Convert the int8 value into corresponding uint8 value;
1430 const auto num_elements = NumElements(tensor);
1431 for (int i = 0; i < num_elements; ++i) {
1432 new_tensor->data.uint8[i] = static_cast<const uint8_t>(
1433 static_cast<int32_t>(tensor->data.int8[i]) + 128);
1434 }
1435 RETURN_TFLITE_ERROR_IF_NN_ERROR_FOR_TENSOR(
1436 context_,
1437 nnapi_->ANeuralNetworksModel_setOperandValue(
1438 nn_model_, ann_tensor_index, new_tensor->data.raw,
1439 new_tensor->bytes),
1440 "setting new operand value", tensor, nnapi_errno_);
1441 #ifdef TFLITE_NNAPI_ALLOW_MMAP_SHARING
1442 } else if (tensor->allocation &&
1443 static_cast<const Allocation*>(tensor->allocation)->type() ==
1444 Allocation::Type::kMMap) {
1445 const MMAPAllocation* mmap_alloc =
1446 static_cast<const MMAPAllocation*>(tensor->allocation);
1447 if (allocation_memory_mapping_->count(mmap_alloc) == 0) {
1448 ANeuralNetworksMemory* ann_memory_handle = nullptr;
1449 nnapi_->ANeuralNetworksMemory_createFromFd(
1450 mmap_alloc->bytes(), PROT_READ, mmap_alloc->fd(), 0,
1451 &ann_memory_handle);
1452 allocation_memory_mapping_->insert(
1453 std::make_pair(mmap_alloc, ann_memory_handle));
1454 }
1455 ANeuralNetworksMemory* ann_memory_handle =
1456 allocation_memory_mapping_->at(mmap_alloc);
1457 // Compute the offset to the base pointer of the MMAPAllocation.
1458 auto offset = reinterpret_cast<const uint8_t*>(tensor->data.raw) -
1459 reinterpret_cast<const uint8_t*>(mmap_alloc->base());
1460 RETURN_TFLITE_ERROR_IF_NN_ERROR_FOR_TENSOR(
1461 context_,
1462 nnapi_->ANeuralNetworksModel_setOperandValueFromMemory(
1463 nn_model_, ann_tensor_index, ann_memory_handle, offset,
1464 tensor->bytes),
1465 "setting new operand value from memory", tensor, nnapi_errno_);
1466 #endif
1467 } else {
1468 RETURN_TFLITE_ERROR_IF_NN_ERROR_FOR_TENSOR(
1469 context_,
1470 nnapi_->ANeuralNetworksModel_setOperandValue(
1471 nn_model_, ann_tensor_index, tensor->data.raw, tensor->bytes),
1472 "setting new operand value", tensor, nnapi_errno_);
1473 }
1474 }
1475 indices->push_back(ann_tensor_index);
1476 return kTfLiteOk;
1477 }
1478
1479 // Access to NNAPI.
1480 const NnApi* const nnapi_;
1481
1482 // TfLiteContext for error handling.
1483 TfLiteContext* const context_;
1484
1485 // Tracks relationship between indices.
1486 OperandMapping* const operand_mapping_;
1487
1488 // Keeps mapping of ANN quantized tensor and float data type to equivalent
1489 // dequantized ANN tensor. For example, tensor #4 (UINT8) + FLOAT32 could map
1490 // to tensor #10 (FLOAT32) because a DEQUANTIZE operator was added to convert
1491 // tensor #4 to a FLOAT32 tensor.
1492 DequantizeMapping* const dequantize_mapping_;
1493
1494 std::map<const MMAPAllocation*, ANeuralNetworksMemory*>* const
1495 allocation_memory_mapping_;
1496
1497 // Tracks for every operation in the NNAPI model the source TfLite model
1498 // node index.
1499 std::vector<int>* const nnapi_to_tflite_op_mapping_;
1500
1501 // The NNAPI model.
1502 ANeuralNetworksModel* const nn_model_;
1503
1504 // Inputs and outputs for the current op. These are augmented in the sense
1505 // that NN API uses operands for all arguments, not just tensors, unlike
1506 // TensorFlow Lite.
1507 std::vector<uint32_t> augmented_inputs_;
1508 std::vector<uint32_t> augmented_outputs_;
1509
1510 // Return status code of the latest NNAPI call.
1511 int* nnapi_errno_;
1512
1513 // Whether to allow dynamic batch size without re-compilation.
1514 bool allow_dynamic_dimensions_;
1515 }; // namespace nnapi
1516
1517 namespace {
1518 struct OpValidationContext {
1519 bool is_valid;
1520 std::vector<NNAPIValidationFailure>* validation_failures;
1521 };
1522
1523 #define EXPECT_INPUT_TYPE_IN(actual_type, ...) \
1524 ExpectTypeIn(actual_type, {__VA_ARGS__}, \
1525 NNAPIValidationFailureType::kUnsupportedInputType, \
1526 "Input type not in expected list " #__VA_ARGS__, &val_ctx)
1527
AddValidationFailure(NNAPIValidationFailureType failure_type,const char * message,OpValidationContext * val_ctx)1528 inline void AddValidationFailure(NNAPIValidationFailureType failure_type,
1529 const char* message,
1530 OpValidationContext* val_ctx) {
1531 val_ctx->is_valid = false;
1532
1533 #ifdef NNAPI_VERBOSE_VALIDATION
1534 if (val_ctx->validation_failures) {
1535 val_ctx->validation_failures->push_back({failure_type, message});
1536 }
1537 #endif
1538 }
1539
1540 template <typename... Args>
AddValidationFailureFmt(OpValidationContext * val_ctx,NNAPIValidationFailureType failure_type,const char * message_fmt,Args...args)1541 inline void AddValidationFailureFmt(OpValidationContext* val_ctx,
1542 NNAPIValidationFailureType failure_type,
1543 const char* message_fmt, Args... args) {
1544 val_ctx->is_valid = false;
1545 #ifdef NNAPI_VERBOSE_VALIDATION
1546 if (val_ctx->validation_failures) {
1547 size_t req_buf_size = snprintf(nullptr, 0, message_fmt, args...) + 1;
1548 std::unique_ptr<char[]> tmp_buf(new char[req_buf_size]);
1549 snprintf(tmp_buf.get(), req_buf_size, message_fmt, args...);
1550
1551 val_ctx->validation_failures->push_back({failure_type, tmp_buf.get()});
1552 }
1553 #endif
1554 }
1555
Expect(bool condition,NNAPIValidationFailureType failure_type,const char * message,OpValidationContext * val_ctx)1556 inline bool Expect(bool condition, NNAPIValidationFailureType failure_type,
1557 const char* message, OpValidationContext* val_ctx) {
1558 if (!condition) {
1559 AddValidationFailure(failure_type, message, val_ctx);
1560 return false;
1561 }
1562 return true;
1563 }
1564
1565 template <typename... Args>
ExpectFmt(bool condition,OpValidationContext * val_ctx,NNAPIValidationFailureType failure_type,const char * message_fmt,Args...args)1566 inline bool ExpectFmt(bool condition, OpValidationContext* val_ctx,
1567 NNAPIValidationFailureType failure_type,
1568 const char* message_fmt, Args... args) {
1569 if (!condition) {
1570 AddValidationFailureFmt(val_ctx, failure_type, message_fmt, args...);
1571 return false;
1572 }
1573 return true;
1574 }
1575
ExpectTypeIn(TfLiteType actual_type,std::initializer_list<TfLiteType> allowed_types,NNAPIValidationFailureType failure_type,const char * msg,OpValidationContext * val_ctx)1576 inline bool ExpectTypeIn(TfLiteType actual_type,
1577 std::initializer_list<TfLiteType> allowed_types,
1578 NNAPIValidationFailureType failure_type,
1579 const char* msg, OpValidationContext* val_ctx) {
1580 return Expect(std::find(allowed_types.begin(), allowed_types.end(),
1581 actual_type) != allowed_types.end(),
1582 failure_type, msg, val_ctx);
1583 }
1584
ExpectMinAndroidSdkVersion(int curr_version,int min_version,OpValidationContext * val_ctx)1585 inline bool ExpectMinAndroidSdkVersion(int curr_version, int min_version,
1586 OpValidationContext* val_ctx) {
1587 return ExpectFmt(curr_version >= min_version, val_ctx,
1588 NNAPIValidationFailureType::kUnsupportedAndroidVersion,
1589 "Android sdk version less than %d", min_version);
1590 }
1591
ExpectMaxOpVersion(int curr_version,int max_version,OpValidationContext * val_ctx)1592 inline bool ExpectMaxOpVersion(int curr_version, int max_version,
1593 OpValidationContext* val_ctx) {
1594 return ExpectFmt(curr_version <= max_version, val_ctx,
1595 NNAPIValidationFailureType::kUnsupportedOperatorVersion,
1596 "OP Version higher than %d", max_version);
1597 }
1598
ExpectOpVersion(int curr_version,int max_version,OpValidationContext * val_ctx)1599 inline bool ExpectOpVersion(int curr_version, int max_version,
1600 OpValidationContext* val_ctx) {
1601 return ExpectFmt(curr_version <= max_version, val_ctx,
1602 NNAPIValidationFailureType::kUnsupportedOperatorVersion,
1603 "OP Version different from %d", max_version);
1604 }
1605
ExpectIsFloatOperator(const TfLiteContext * context,const TfLiteNode * node,OpValidationContext * val_ctx)1606 inline bool ExpectIsFloatOperator(const TfLiteContext* context,
1607 const TfLiteNode* node,
1608 OpValidationContext* val_ctx) {
1609 const auto input_type = context->tensors[node->inputs->data[0]].type;
1610 return Expect(IsFloat(input_type),
1611 NNAPIValidationFailureType::kUnsupportedInputType,
1612 "Input should be Float", val_ctx);
1613 }
1614
ExpectIsFloatOrUint8Operator(const TfLiteContext * context,const TfLiteNode * node,OpValidationContext * val_ctx)1615 bool ExpectIsFloatOrUint8Operator(const TfLiteContext* context,
1616 const TfLiteNode* node,
1617 OpValidationContext* val_ctx) {
1618 const auto input_type = context->tensors[node->inputs->data[0]].type;
1619 return Expect(IsFloatOrUInt8(input_type),
1620 NNAPIValidationFailureType::kUnsupportedInputType,
1621 "Input should be Float or UINT8", val_ctx);
1622 }
1623
ExpectIsFloatOrQuant8Operator(const TfLiteContext * context,const TfLiteNode * node,OpValidationContext * val_ctx)1624 bool ExpectIsFloatOrQuant8Operator(const TfLiteContext* context,
1625 const TfLiteNode* node,
1626 OpValidationContext* val_ctx) {
1627 const auto input_type = context->tensors[node->inputs->data[0]].type;
1628 return Expect(IsFloatOrQuantized(input_type),
1629 NNAPIValidationFailureType::kUnsupportedInputType,
1630 "Input should be Float or Quant8", val_ctx);
1631 }
1632
ExpectIsFloatOrInt32Operator(const TfLiteContext * context,const TfLiteNode * node,OpValidationContext * val_ctx)1633 bool ExpectIsFloatOrInt32Operator(const TfLiteContext* context,
1634 const TfLiteNode* node,
1635 OpValidationContext* val_ctx) {
1636 const auto input_type = context->tensors[node->inputs->data[0]].type;
1637 return Expect(IsFloatOrInt32(input_type),
1638 NNAPIValidationFailureType::kUnsupportedInputType,
1639 "Input should be Float or Int32", val_ctx);
1640 }
1641
ExpectIsFloatQuant8OrInt32Operator(const TfLiteContext * context,const TfLiteNode * node,OpValidationContext * val_ctx)1642 bool ExpectIsFloatQuant8OrInt32Operator(const TfLiteContext* context,
1643 const TfLiteNode* node,
1644 OpValidationContext* val_ctx) {
1645 const auto input_type = context->tensors[node->inputs->data[0]].type;
1646 return Expect(IsFloatQuantizedOrInt32(input_type),
1647 NNAPIValidationFailureType::kUnsupportedInputType,
1648 "Input should be Float, Quant8, or Int32", val_ctx);
1649 }
1650
1651 // When using NN API version 1.0 or 1.1, the condition below must be true for
1652 // When using NN API version 1.0 or 1.1, the condition below must be true for
1653 // quantized versions of the following ops:
1654 // * CONV_2D
1655 // * DEPTHWISE_CONV_2D
1656 // * FULLY_CONNECTED (where filter actually stands for weights)
1657 // The condition is relaxed and no longer required since version 1.2.
ExpectIsRestrictedScalesCompliant(const TfLiteContext * context,const TfLiteNode * node,OpValidationContext * val_ctx)1658 bool ExpectIsRestrictedScalesCompliant(const TfLiteContext* context,
1659 const TfLiteNode* node,
1660 OpValidationContext* val_ctx) {
1661 const int input_id = node->inputs->data[0];
1662 const int filter_id = node->inputs->data[1];
1663 const int output_id = node->outputs->data[0];
1664 const float input_scale = context->tensors[input_id].params.scale;
1665 const float filter_scale = context->tensors[filter_id].params.scale;
1666 const float output_scale = context->tensors[output_id].params.scale;
1667 return Expect(input_scale * filter_scale < output_scale,
1668 NNAPIValidationFailureType::kNotRestrictedScaleCompliant,
1669 "When using NN API version 1.0 or 1.1, input_scale * "
1670 "filter_scale < output_scale:",
1671 val_ctx);
1672 }
1673
1674 } // namespace
1675
1676 // Return a function that knows how to translate a node into its operands
1677 // when called. You can use this function to see if a node is supported
1678 // (i.e. if the returned MappingFn is null, then the node is not supported).
Validate(const TfLiteContext * context,int builtin_code,int version,int android_sdk_version,const TfLiteNode * node,bool is_accelerator_specified,std::vector<NNAPIValidationFailure> * map_failures)1679 bool NNAPIDelegateKernel::Validate(
1680 const TfLiteContext* context, int builtin_code, int version,
1681 int android_sdk_version, const TfLiteNode* node,
1682 bool is_accelerator_specified,
1683 std::vector<NNAPIValidationFailure>* map_failures) {
1684 OpValidationContext val_ctx{true, map_failures};
1685 switch (builtin_code) {
1686 case kTfLiteBuiltinAdd: {
1687 ExpectMaxOpVersion(version, 2, &val_ctx);
1688 if (android_sdk_version >= kMinSdkVersionForNNAPI13) {
1689 ExpectIsFloatQuant8OrInt32Operator(context, node, &val_ctx);
1690 if (IsInt32(context->tensors[node->inputs->data[0]].type)) {
1691 Expect(reinterpret_cast<TfLiteAddParams*>(node->builtin_data)
1692 ->activation == kTfLiteActNone,
1693 NNAPIValidationFailureType::kNoActivationExpected,
1694 "No activation function supported", &val_ctx);
1695 }
1696 } else {
1697 ExpectIsFloatOrQuant8Operator(context, node, &val_ctx);
1698 }
1699 } break;
1700 case kTfLiteBuiltinArgMax:
1701 case kTfLiteBuiltinArgMin: {
1702 ExpectMaxOpVersion(version, 2, &val_ctx);
1703 // Those operators were introduced in NNAPI 1.2.
1704 ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
1705 &val_ctx);
1706 const TfLiteType input_type =
1707 context->tensors[node->inputs->data[(0)]].type;
1708 EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat16, kTfLiteFloat32,
1709 kTfLiteInt32, kTfLiteUInt8, kTfLiteInt8);
1710
1711 const auto& axis_tensor = context->tensors[node->inputs->data[1]];
1712 if (axis_tensor.type == kTfLiteInt64) {
1713 Expect(
1714 axis_tensor.allocation_type == kTfLiteMmapRo &&
1715 *axis_tensor.data.i64 <= std::numeric_limits<int32_t>::max() &&
1716 *axis_tensor.data.i64 >= std::numeric_limits<int32_t>::min(),
1717 NNAPIValidationFailureType::kUnsupportedInputType,
1718 "NNAPI only supports axis as int32. If the axis type is int64 and "
1719 "constant we can convert it to int32 if the value isn't too "
1720 "large.",
1721 &val_ctx);
1722 } else {
1723 Expect(axis_tensor.type == kTfLiteInt32,
1724 NNAPIValidationFailureType::kUnsupportedInputType,
1725 "Axis should be Int32", &val_ctx);
1726 }
1727 if (builtin_code == kTfLiteBuiltinArgMax) {
1728 auto builtin =
1729 reinterpret_cast<TfLiteArgMaxParams*>(node->builtin_data);
1730 Expect(builtin->output_type == kTfLiteInt32,
1731 NNAPIValidationFailureType::kUnsupportedOutputType,
1732 "NNAPI only supports int32 output.", &val_ctx);
1733 } else {
1734 auto builtin =
1735 reinterpret_cast<TfLiteArgMinParams*>(node->builtin_data);
1736 Expect(builtin->output_type == kTfLiteInt32,
1737 NNAPIValidationFailureType::kUnsupportedOutputType,
1738 "NNAPI only supports int32 output.", &val_ctx);
1739 }
1740 } break;
1741 case kTfLiteBuiltinMul: {
1742 ExpectMaxOpVersion(version, 2, &val_ctx);
1743 if (android_sdk_version >= kMinSdkVersionForNNAPI13) {
1744 ExpectIsFloatQuant8OrInt32Operator(context, node, &val_ctx);
1745 if (IsInt32(context->tensors[node->inputs->data[0]].type)) {
1746 Expect(reinterpret_cast<TfLiteMulParams*>(node->builtin_data)
1747 ->activation == kTfLiteActNone,
1748 NNAPIValidationFailureType::kNoActivationExpected,
1749 "No activation function supported", &val_ctx);
1750 }
1751 } else {
1752 ExpectIsFloatOrQuant8Operator(context, node, &val_ctx);
1753 }
1754 } break;
1755 case kTfLiteBuiltinAveragePool2d: {
1756 ExpectMaxOpVersion(version, 2, &val_ctx);
1757 ExpectIsFloatOrQuant8Operator(context, node, &val_ctx);
1758 auto builtin = reinterpret_cast<TfLitePoolParams*>(node->builtin_data);
1759 // TODO(b/138756912): Large filter window would overflow on the
1760 // quantized reference CPU path.
1761 if (IsQuantized(context->tensors[node->inputs->data[0]].type)) {
1762 Expect(is_accelerator_specified ||
1763 (builtin->filter_width * builtin->filter_height <= 256),
1764 NNAPIValidationFailureType::kUnsupportedOperandSize,
1765 "Large filter window would overflow on the reference CPU path",
1766 &val_ctx);
1767 }
1768 } break;
1769 case kTfLiteBuiltinMaxPool2d: {
1770 ExpectMaxOpVersion(version, 2, &val_ctx);
1771 ExpectIsFloatOrQuant8Operator(context, node, &val_ctx);
1772 } break;
1773 case kTfLiteBuiltinL2Pool2d: {
1774 ExpectOpVersion(version, 1, &val_ctx);
1775 ExpectIsFloatOperator(context, node, &val_ctx);
1776
1777 if (android_sdk_version < kMinSdkVersionForNNAPI12) {
1778 auto builtin = reinterpret_cast<TfLitePoolParams*>(node->builtin_data);
1779 Expect(builtin->activation == kTfLiteActNone,
1780 NNAPIValidationFailureType::kUnsupportedOperandValue,
1781 "Before NNAPI 1.2 fused activation for l2_pool may not be "
1782 "supported.",
1783 &val_ctx);
1784 }
1785 } break;
1786 case kTfLiteBuiltinConv2d: {
1787 ExpectMaxOpVersion(version, 3, &val_ctx);
1788 if (android_sdk_version < kMinSdkVersionForNNAPI12) {
1789 Expect(!IsHybridOperator(context, builtin_code, node),
1790 NNAPIValidationFailureType::kUnsupportedHybridOperator,
1791 "Hybrid operators not supported before NNAPI 1.2", &val_ctx);
1792 ExpectIsFloatOrUint8Operator(context, node, &val_ctx);
1793
1794 const auto& filter_tensor = context->tensors[node->inputs->data[1]];
1795 if (filter_tensor.quantization.type == kTfLiteAffineQuantization) {
1796 TfLiteAffineQuantization* quantization_params =
1797 static_cast<TfLiteAffineQuantization*>(
1798 filter_tensor.quantization.params);
1799 Expect(quantization_params->scale->size <= 1,
1800 NNAPIValidationFailureType::kUnsupportedQuantizationType,
1801 "Per-channel quantized convolution not supported before NNAPI "
1802 "1.2.",
1803 &val_ctx);
1804 }
1805 }
1806 const auto input_type = context->tensors[node->inputs->data[0]].type;
1807 if (android_sdk_version < kMinSdkVersionForNNAPI12 &&
1808 input_type == kTfLiteUInt8) {
1809 ExpectIsRestrictedScalesCompliant(context, node, &val_ctx);
1810 }
1811 auto builtin = reinterpret_cast<TfLiteConvParams*>(node->builtin_data);
1812 // TODO(b/132950584): Add support for Conv2D with omitted bias.
1813 Expect(node->inputs->size == 3,
1814 NNAPIValidationFailureType::kMissingRequiredOperand,
1815 "Conv2D with omitted bias not supported", &val_ctx);
1816 if (builtin->dilation_width_factor != 1 ||
1817 builtin->dilation_height_factor != 1) {
1818 Expect(android_sdk_version >= kMinSdkVersionForNNAPI12,
1819 NNAPIValidationFailureType::kUnsupportedOperandValue,
1820 "NNAPI supports dilated Conv2D since NNAPI 1.2.", &val_ctx);
1821 }
1822 } break;
1823 case kTfLiteBuiltinDepthwiseConv2d: {
1824 ExpectMaxOpVersion(version, 3, &val_ctx);
1825
1826 if (android_sdk_version < kMinSdkVersionForNNAPI12) {
1827 ExpectIsFloatOrUint8Operator(context, node, &val_ctx);
1828
1829 const auto input_type = context->tensors[node->inputs->data[0]].type;
1830 if (input_type == kTfLiteUInt8) {
1831 ExpectIsRestrictedScalesCompliant(context, node, &val_ctx);
1832 }
1833
1834 auto builtin =
1835 reinterpret_cast<TfLiteDepthwiseConvParams*>(node->builtin_data);
1836 Expect(builtin->dilation_width_factor == 1 &&
1837 builtin->dilation_height_factor == 1,
1838 NNAPIValidationFailureType::kUnsupportedOperandValue,
1839 "dilation_width_factor and dilation_height_factor expected to "
1840 "be equal to 1",
1841 &val_ctx);
1842 }
1843 } break;
1844 case kTfLiteBuiltinFullyConnected: {
1845 ExpectMaxOpVersion(version, 5, &val_ctx);
1846 // TODO(b/132950584): Add support for FullyConnected with no bias.
1847 Expect(node->inputs->size == 3 &&
1848 node->inputs->data[2] != kTfLiteOptionalTensor,
1849 NNAPIValidationFailureType::kMissingRequiredOperand,
1850 "FullyConnected with no bias not supported", &val_ctx);
1851 const auto output_type = context->tensors[node->outputs->data[0]].type;
1852 Expect(output_type != kTfLiteInt16,
1853 NNAPIValidationFailureType::kUnsupportedOutputType,
1854 "Unsupported output of type kTfLiteInt16", &val_ctx);
1855 if (android_sdk_version < kMinSdkVersionForNNAPI12) {
1856 Expect(!IsHybridOperator(context, builtin_code, node),
1857 NNAPIValidationFailureType::kUnsupportedHybridOperator,
1858 "Hybrid operators not supported before NNAPI 1.2", &val_ctx);
1859 ExpectIsFloatOrUint8Operator(context, node, &val_ctx);
1860 }
1861 const auto input_type = context->tensors[node->inputs->data[0]].type;
1862 if (android_sdk_version < kMinSdkVersionForNNAPI12 &&
1863 input_type == kTfLiteUInt8) {
1864 ExpectIsRestrictedScalesCompliant(context, node, &val_ctx);
1865 }
1866 auto builtin =
1867 reinterpret_cast<TfLiteFullyConnectedParams*>(node->builtin_data);
1868 Expect(!builtin->keep_num_dims,
1869 NNAPIValidationFailureType::kUnsupportedOperandValue,
1870 "keep_num_dims == true not supported", &val_ctx);
1871 } break;
1872 case kTfLiteBuiltinHardSwish: {
1873 // Add support for hardswish. For Pre-Q devices, deconstructing it into
1874 // basic ops. Though for some nnapi accelerators using optimized tflite
1875 // kernels might even be faster.
1876 ExpectIsFloatOrQuant8Operator(context, node, &val_ctx);
1877 } break;
1878 case kTfLiteBuiltinSoftmax: {
1879 ExpectOpVersion(version, 2, &val_ctx);
1880 ExpectIsFloatOrQuant8Operator(context, node, &val_ctx);
1881 const auto& output = context->tensors[node->outputs->data[0]];
1882 ExpectTypeIn(output.type, {kTfLiteFloat32, kTfLiteUInt8, kTfLiteInt8},
1883 NNAPIValidationFailureType::kUnsupportedOutputType,
1884 "Output type should be one of kTfLiteFloat32, kTfLiteUInt8, "
1885 "kTfLiteInt8.",
1886 &val_ctx);
1887 const auto& input = context->tensors[node->inputs->data[0]];
1888 const int input_rank = input.dims->size;
1889 Expect(input_rank <= 4,
1890 NNAPIValidationFailureType::kUnsupportedOperandRank,
1891 "Input rank should be <= 4", &val_ctx);
1892 if (android_sdk_version < kMinSdkVersionForNNAPI12) {
1893 Expect(
1894 input_rank == 2 || input_rank == 4,
1895 NNAPIValidationFailureType::kUnsupportedOperandRank,
1896 "Before API level 29 only 2D and 4D input tensors were supported.",
1897 &val_ctx);
1898 }
1899 } break;
1900 case kTfLiteBuiltinReshape: {
1901 ExpectOpVersion(version, 1, &val_ctx);
1902 ExpectIsFloatOrQuant8Operator(context, node, &val_ctx);
1903 if (node->inputs->size >= 2) {
1904 Expect(context->tensors[node->inputs->data[1]].allocation_type ==
1905 kTfLiteMmapRo,
1906 NNAPIValidationFailureType::kInputTensorShouldHaveConstantShape,
1907 "The shape input tensor must be constant.", &val_ctx);
1908 }
1909 if (node->inputs->size == 1) {
1910 // reject scalar reshaping
1911 auto* params =
1912 reinterpret_cast<TfLiteReshapeParams*>(node->builtin_data);
1913 int num_dimensions = params->num_dimensions;
1914 if (num_dimensions == 1 && params->shape[0] == 0) {
1915 // Legacy tflite models use a shape parameter of [0] to indicate
1916 // scalars.
1917 num_dimensions = 0;
1918 }
1919 Expect(num_dimensions > 0,
1920 NNAPIValidationFailureType::kUnsupportedOperandRank,
1921 "New shape rank should be > 0", &val_ctx);
1922 }
1923 } break;
1924 case kTfLiteBuiltinResizeBilinear: {
1925 ExpectMaxOpVersion(version, 3, &val_ctx);
1926 const auto& input = context->tensors[node->inputs->data[0]];
1927 const auto output_dims = context->tensors[node->outputs->data[0]].dims;
1928 Expect(input.dims->size == 4,
1929 NNAPIValidationFailureType::kUnsupportedOperandRank,
1930 "Input should have rank 4", &val_ctx);
1931 ExpectIsFloatOrQuant8Operator(context, node, &val_ctx);
1932 Expect(node->inputs->size >= 2,
1933 NNAPIValidationFailureType::kUnsupportedOperatorVariant,
1934 "Expected at least 2 inputs", &val_ctx);
1935 if (node->inputs->size >= 2) {
1936 Expect(context->tensors[node->inputs->data[1]].allocation_type ==
1937 kTfLiteMmapRo,
1938 NNAPIValidationFailureType::kInputTensorShouldHaveConstantShape,
1939 "The size input tensor must be constant.", &val_ctx);
1940 }
1941 if (android_sdk_version < kMinSdkVersionForNNAPI12) {
1942 Expect(output_dims->data[1] == output_dims->data[2],
1943 NNAPIValidationFailureType::kUnsupportedOperandValue,
1944 "Require width == height due to driver differences in NNAPI "
1945 "< 1.2",
1946 &val_ctx);
1947 }
1948 auto builtin =
1949 reinterpret_cast<TfLiteResizeBilinearParams*>(node->builtin_data);
1950 if (android_sdk_version <= kMinSdkVersionForNNAPI12) {
1951 Expect(!builtin->align_corners,
1952 NNAPIValidationFailureType::kUnsupportedOperandValue,
1953 "NNAPI does not support align_corners == true.", &val_ctx);
1954 Expect(!builtin->half_pixel_centers,
1955 NNAPIValidationFailureType::kUnsupportedOperandValue,
1956 "NNAPI does not support half_pixel_centers == true.", &val_ctx);
1957 }
1958 if (android_sdk_version < kMinSdkVersionForNNAPI12) {
1959 Expect(input.type == kTfLiteFloat32,
1960 NNAPIValidationFailureType::kUnsupportedInputType,
1961 "NNAPI 1.0 & 1.1 only supports float input.", &val_ctx);
1962 }
1963 } break;
1964 case kTfLiteBuiltinResizeNearestNeighbor: {
1965 ExpectMaxOpVersion(version, 3, &val_ctx);
1966 ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
1967 &val_ctx);
1968 ExpectIsFloatOrQuant8Operator(context, node, &val_ctx);
1969 Expect(node->inputs->size >= 2,
1970 NNAPIValidationFailureType::kUnsupportedOperatorVariant,
1971 "Expected at least 2 inputs", &val_ctx);
1972 if (node->inputs->size >= 2) {
1973 Expect(context->tensors[node->inputs->data[1]].allocation_type ==
1974 kTfLiteMmapRo,
1975 NNAPIValidationFailureType::kInputTensorShouldHaveConstantShape,
1976 "The size input tensor must be constant.", &val_ctx);
1977 }
1978 auto builtin = reinterpret_cast<TfLiteResizeNearestNeighborParams*>(
1979 node->builtin_data);
1980 if (android_sdk_version <= kMinSdkVersionForNNAPI12) {
1981 Expect(!builtin->align_corners,
1982 NNAPIValidationFailureType::kUnsupportedOperandValue,
1983 "NNAPI does not support align_corners == true.", &val_ctx);
1984 Expect(!builtin->half_pixel_centers,
1985 NNAPIValidationFailureType::kUnsupportedOperandValue,
1986 "NNAPI does not support half_pixel_centers == true.", &val_ctx);
1987 }
1988 } break;
1989 case kTfLiteBuiltinSqueeze: {
1990 ExpectOpVersion(version, 1, &val_ctx);
1991 ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI11,
1992 &val_ctx);
1993 auto builtin = reinterpret_cast<TfLiteSqueezeParams*>(node->builtin_data);
1994 if (android_sdk_version == kMinSdkVersionForNNAPI11) {
1995 Expect(builtin->num_squeeze_dims != 0,
1996 NNAPIValidationFailureType::kUnsupportedOperandValue,
1997 "NNAPI 1.1 does not support null squeeze_dims properly.",
1998 &val_ctx);
1999 }
2000 } break;
2001 case kTfLiteBuiltinUnidirectionalSequenceLstm: {
2002 ExpectMaxOpVersion(version, 2, &val_ctx);
2003 ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
2004 &val_ctx);
2005
2006 Expect(!IsHybridOperator(context, builtin_code, node),
2007 NNAPIValidationFailureType::kUnsupportedHybridOperator,
2008 "Hybrid version of this op is not supported by NN API.", &val_ctx);
2009
2010 Expect(node->inputs->size == 20 || node->inputs->size == 24,
2011 NNAPIValidationFailureType::kUnsupportedOperatorVariant,
2012 "Supporting only operation with 20 or 24 inputs", &val_ctx);
2013 } break;
2014 case kTfLiteBuiltinL2Normalization: {
2015 ExpectMaxOpVersion(version, 2, &val_ctx);
2016
2017 if (android_sdk_version < kMinSdkVersionForNNAPI12) {
2018 ExpectIsFloatOperator(context, node, &val_ctx);
2019
2020 const auto& input = context->tensors[node->inputs->data[0]];
2021 Expect(input.dims->size == 4,
2022 NNAPIValidationFailureType::kUnsupportedOperatorVariant,
2023 "Expected 4 inputs", &val_ctx);
2024 }
2025 auto builtin = reinterpret_cast<TfLiteL2NormParams*>(node->builtin_data);
2026 Expect(builtin->activation == kTfLiteActNone,
2027 NNAPIValidationFailureType::kNoActivationExpected,
2028 "Expected no activation", &val_ctx);
2029 } break;
2030 case kTfLiteBuiltinLocalResponseNormalization: {
2031 ExpectOpVersion(version, 1, &val_ctx);
2032 } break;
2033 case kTfLiteBuiltinLshProjection: {
2034 ExpectOpVersion(version, 1, &val_ctx);
2035
2036 if (reinterpret_cast<TfLiteLSHProjectionParams*>(node->builtin_data)
2037 ->type == kTfLiteLshProjectionSparse) {
2038 // NNAPI does not support sparse projection correctly pre-Q
2039 // (b/111751836).
2040 Expect(android_sdk_version >= kMinSdkVersionForNNAPI12,
2041 NNAPIValidationFailureType::kUnsupportedInputType,
2042 "NNAPI does not support sparse projection correctly pre-Q",
2043 &val_ctx);
2044 Expect(node->inputs->size == 2,
2045 NNAPIValidationFailureType::kUnsupportedOperatorVariant,
2046 " NNAPI does not support weights for sparse projects.",
2047 &val_ctx);
2048 }
2049 } break;
2050 case kTfLiteBuiltinConcatenation: {
2051 ExpectMaxOpVersion(version, 2, &val_ctx);
2052 Expect(reinterpret_cast<TfLiteConcatenationParams*>(node->builtin_data)
2053 ->activation == kTfLiteActNone,
2054 NNAPIValidationFailureType::kNoActivationExpected,
2055 "No activation function supported", &val_ctx);
2056 Expect(context->tensors[node->inputs->data[0]].dims->size <= 4,
2057 NNAPIValidationFailureType::kUnsupportedOperandRank,
2058 "Input rank should be less than 4", &val_ctx);
2059
2060 const auto& input_type = context->tensors[node->inputs->data[0]].type;
2061 EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat16, kTfLiteFloat32,
2062 kTfLiteUInt8, kTfLiteInt8);
2063
2064 if (input_type == kTfLiteUInt8 &&
2065 android_sdk_version < kMinSdkVersionForNNAPI12) {
2066 auto first_param = context->tensors[node->inputs->data[0]].params;
2067 for (int i = 1; i < node->inputs->size; i++) {
2068 auto curr_param = context->tensors[node->inputs->data[i]].params;
2069 if (!Expect(curr_param.scale == first_param.scale &&
2070 curr_param.zero_point == first_param.zero_point,
2071 NNAPIValidationFailureType::kUnsupportedOperandValue,
2072 "NNAPI 1.0-1 only supported concatenating quantized "
2073 "tensor of the same scale and offset.",
2074 &val_ctx)) {
2075 break;
2076 }
2077 }
2078 }
2079 } break;
2080 case kTfLiteBuiltinDequantize: {
2081 Expect(version == 1 || version == 2,
2082 NNAPIValidationFailureType::kUnsupportedOperatorVersion,
2083 "Supported op versions are 1 and 2 only", &val_ctx);
2084
2085 const auto& input = context->tensors[node->inputs->data[0]];
2086 if (android_sdk_version < kMinSdkVersionForNNAPI12) {
2087 EXPECT_INPUT_TYPE_IN(input.type, kTfLiteUInt8);
2088 } else {
2089 EXPECT_INPUT_TYPE_IN(input.type, kTfLiteUInt8, kTfLiteInt8);
2090
2091 if (android_sdk_version == kMinSdkVersionForNNAPI12 &&
2092 input.type == kTfLiteInt8) {
2093 const auto zero_point = input.params.zero_point;
2094 Expect(zero_point == 0,
2095 NNAPIValidationFailureType::kUnsupportedInputType,
2096 "NN API supports int8 type since version 1.2 but only for "
2097 "symmetric quantization.",
2098 &val_ctx);
2099 }
2100 }
2101 } break;
2102 case kTfLiteBuiltinFloor: {
2103 ExpectOpVersion(version, 1, &val_ctx);
2104 } break;
2105 case kTfLiteBuiltinRelu:
2106 case kTfLiteBuiltinReluN1To1:
2107 case kTfLiteBuiltinRelu6:
2108 case kTfLiteBuiltinLogistic: {
2109 ExpectMaxOpVersion(version, 2, &val_ctx);
2110 ExpectIsFloatOrQuant8Operator(context, node, &val_ctx);
2111 } break;
2112 case kTfLiteBuiltinTanh: {
2113 ExpectMaxOpVersion(version, 2, &val_ctx);
2114 const TfLiteType input_type =
2115 context->tensors[node->inputs->data[0]].type;
2116 Expect(IsFloat(input_type) ||
2117 (IsQuantized(input_type) &&
2118 android_sdk_version >= kMinSdkVersionForNNAPI12),
2119 NNAPIValidationFailureType::kUnsupportedInputType,
2120 " NNAPI only support float tanh.", &val_ctx);
2121 } break;
2122 case kTfLiteBuiltinSub: {
2123 ExpectMaxOpVersion(version, 3, &val_ctx);
2124 const TfLiteType input_type =
2125 context->tensors[node->inputs->data[0]].type;
2126 Expect((android_sdk_version >= kMinSdkVersionForNNAPI11 &&
2127 IsFloat(input_type)) ||
2128 (android_sdk_version >= kMinSdkVersionForNNAPI12 &&
2129 IsQuantized(input_type)) ||
2130 (android_sdk_version >= kMinSdkVersionForNNAPI13 &&
2131 IsInt32(input_type)),
2132 NNAPIValidationFailureType::kUnsupportedInputType,
2133 "NNAPI only support float sub.", &val_ctx);
2134 if (IsInt32(input_type)) {
2135 Expect(reinterpret_cast<TfLiteSubParams*>(node->builtin_data)
2136 ->activation == kTfLiteActNone,
2137 NNAPIValidationFailureType::kNoActivationExpected,
2138 "No activation function supported", &val_ctx);
2139 }
2140 const int input0_rank =
2141 context->tensors[node->inputs->data[0]].dims->size;
2142 const int input1_rank =
2143 context->tensors[node->inputs->data[1]].dims->size;
2144 Expect(input0_rank <= 4 && input1_rank <= 4,
2145 NNAPIValidationFailureType::kUnsupportedOperandRank,
2146 "Input rank must be <= 4", &val_ctx);
2147 } break;
2148 case kTfLiteBuiltinDiv: {
2149 ExpectOpVersion(version, 1, &val_ctx);
2150 ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI11,
2151 &val_ctx);
2152 Expect(context->tensors[node->inputs->data[0]].type == kTfLiteFloat32,
2153 NNAPIValidationFailureType::kUnsupportedInputType,
2154 "NNAPI only support float div.", &val_ctx);
2155 } break;
2156 case kTfLiteBuiltinPad:
2157 case kTfLiteBuiltinPadv2: {
2158 ExpectMaxOpVersion(version, 2, &val_ctx);
2159 ExpectIsFloatOrQuant8Operator(context, node, &val_ctx);
2160 ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI11,
2161 &val_ctx);
2162
2163 const TfLiteIntArrayView input_shape(
2164 context->tensors[node->inputs->data[0]].dims);
2165 Expect(!HasZeroes(input_shape),
2166 NNAPIValidationFailureType::kUnsupportedOperandValue,
2167 "NN API pad ops do not support input tensors with no elements",
2168 &val_ctx);
2169
2170 Expect(node->inputs->size >= 2,
2171 NNAPIValidationFailureType::kUnsupportedOperatorVariant,
2172 "Expecting at least 2 inputs", &val_ctx);
2173
2174 if (node->inputs->size == 3) {
2175 // This is going to be mapped with a PadV2
2176 Expect(
2177 android_sdk_version >= kMinSdkVersionForNNAPI12,
2178 NNAPIValidationFailureType::kUnsupportedOperatorVariant,
2179 "Specification of the padding value is supported from NNAPI 1.2.",
2180 &val_ctx);
2181 } else { // this is going to be mapped as Pad
2182 if (android_sdk_version < kMinSdkVersionForNNAPI12) {
2183 Expect(context->tensors[node->inputs->data[0]].type == kTfLiteFloat32,
2184 NNAPIValidationFailureType::kUnsupportedInputType,
2185 "Only Float32 inputs are supported before NNAPI 1.2",
2186 &val_ctx);
2187 }
2188 }
2189 } break;
2190 case kTfLiteBuiltinUnidirectionalSequenceRnn: {
2191 ExpectOpVersion(version, 1, &val_ctx);
2192 ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
2193 &val_ctx);
2194 Expect(!IsHybridOperator(context, builtin_code, node),
2195 NNAPIValidationFailureType::kUnsupportedHybridOperator,
2196 "Hybrid version of this op is not supported by NN API.", &val_ctx);
2197 } break;
2198 case kTfLiteBuiltinSpaceToBatchNd: {
2199 ExpectMaxOpVersion(version, 2, &val_ctx);
2200 ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI11,
2201 &val_ctx);
2202 } break;
2203 case kTfLiteBuiltinBatchToSpaceNd: {
2204 ExpectMaxOpVersion(version, 2, &val_ctx);
2205 ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI11,
2206 &val_ctx);
2207 auto crops = context->tensors[node->inputs->data[2]];
2208 auto crops_data = crops.data.i32;
2209 Expect(crops_data && crops.bytes == 16 && crops_data[0] == 0 &&
2210 crops_data[1] == 0 && crops_data[2] == 0 && crops_data[3] == 0,
2211 NNAPIValidationFailureType::kUnsupportedOperandValue,
2212 "All crops should be 0.", &val_ctx);
2213 } break;
2214 case kTfLiteBuiltinStridedSlice: {
2215 ExpectMaxOpVersion(version, 2, &val_ctx);
2216 ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI11,
2217 &val_ctx);
2218 } break;
2219 case kTfLiteBuiltinTranspose: {
2220 ExpectMaxOpVersion(version, 2, &val_ctx);
2221 ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI11,
2222 &val_ctx);
2223 // Note that the permutation input tensor value dictates the output
2224 // dimensions.
2225 // TODO(b/110888333): Support dynamically-sized tensors in delegates.
2226 Expect((node->inputs->size > 1) &&
2227 (context->tensors[node->inputs->data[1]].allocation_type ==
2228 kTfLiteMmapRo),
2229 NNAPIValidationFailureType::kInputTensorShouldHaveConstantShape,
2230 "Dynamically-sized tensors not supported.", &val_ctx);
2231 } break;
2232 case kTfLiteBuiltinAbs:
2233 case kTfLiteBuiltinExp:
2234 case kTfLiteBuiltinLog:
2235 case kTfLiteBuiltinRsqrt:
2236 case kTfLiteBuiltinPow: {
2237 ExpectOpVersion(version, 1, &val_ctx);
2238 ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
2239 &val_ctx);
2240 ExpectIsFloatOperator(context, node, &val_ctx);
2241 } break;
2242 case kTfLiteBuiltinSlice: {
2243 ExpectMaxOpVersion(version, 2, &val_ctx);
2244 ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
2245 &val_ctx);
2246 const auto input_type = context->tensors[node->inputs->data[0]].type;
2247 const auto begin_type = context->tensors[node->inputs->data[1]].type;
2248 const auto size_type = context->tensors[node->inputs->data[2]].type;
2249 EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat32, kTfLiteInt32,
2250 kTfLiteUInt8, kTfLiteInt8);
2251 Expect(begin_type == kTfLiteInt32,
2252 NNAPIValidationFailureType::kUnsupportedInputType,
2253 "Begin type should be Int32", &val_ctx);
2254 Expect(size_type == kTfLiteInt32,
2255 NNAPIValidationFailureType::kUnsupportedInputType,
2256 "Size type should be Int32", &val_ctx);
2257 } break;
2258 case kTfLiteBuiltinSin: {
2259 ExpectOpVersion(version, 1, &val_ctx);
2260 ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
2261 &val_ctx);
2262 ExpectIsFloatOperator(context, node, &val_ctx);
2263 } break;
2264 case kTfLiteBuiltinTransposeConv: {
2265 ExpectMaxOpVersion(version, 2, &val_ctx);
2266 ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
2267 &val_ctx);
2268 Expect((node->inputs->size > 1) &&
2269 (context->tensors[node->inputs->data[0]].allocation_type ==
2270 kTfLiteMmapRo) &&
2271 (context->tensors[node->inputs->data[1]].allocation_type ==
2272 kTfLiteMmapRo),
2273 NNAPIValidationFailureType::kInputTensorShouldHaveConstantShape,
2274 "Dynamically-sized tensors not supported.", &val_ctx);
2275 } break;
2276 case kTfLiteBuiltinSqrt: {
2277 ExpectOpVersion(version, 1, &val_ctx);
2278 ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
2279 &val_ctx);
2280 ExpectIsFloatOperator(context, node, &val_ctx);
2281 } break;
2282 case kTfLiteBuiltinRnn: {
2283 ExpectOpVersion(version, 1, &val_ctx);
2284 Expect(node->inputs->size == 5,
2285 NNAPIValidationFailureType::kUnsupportedOperatorVariant,
2286 "Expected 5 input", &val_ctx);
2287 if (node->inputs->size >= 2) {
2288 Expect(
2289 context->tensors[node->inputs->data[/*kWeightsTensor*/ 1]].type ==
2290 kTfLiteFloat32,
2291 NNAPIValidationFailureType::kUnsupportedInputType,
2292 "NNAPI only support float32 weights.", &val_ctx);
2293 }
2294 } break;
2295 case kTfLiteBuiltinSpaceToDepth: {
2296 ExpectMaxOpVersion(version, 2, &val_ctx);
2297 const TfLiteType input_type =
2298 context->tensors[node->inputs->data[0]].type;
2299 EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat32, kTfLiteUInt8,
2300 kTfLiteInt8);
2301 } break;
2302 case kTfLiteBuiltinSvdf: {
2303 ExpectOpVersion(version, 1, &val_ctx);
2304 Expect(node->inputs->size == 5,
2305 NNAPIValidationFailureType::kUnsupportedOperandRank,
2306 "Expected input of rank 5", &val_ctx);
2307 if (node->inputs->size >= 2) {
2308 Expect(
2309 context->tensors[node->inputs->data[/*kWeightsTensor*/ 1]].type ==
2310 kTfLiteFloat32,
2311 NNAPIValidationFailureType::kUnsupportedInputType,
2312 "NNAPI only support float32 weights.", &val_ctx);
2313 }
2314 Expect(android_sdk_version >= kMinSdkVersionForNNAPI11,
2315 NNAPIValidationFailureType::kUnsupportedOperandRank,
2316 "SVDF does not support rank > 1 on NNAPI 1.0.", &val_ctx);
2317 Expect(context->tensors[node->inputs->data[/*kWeightsFeatureTensor*/ 1]]
2318 .type == kTfLiteFloat32,
2319 NNAPIValidationFailureType::kUnsupportedInputType,
2320 "Weights should be Float32", &val_ctx);
2321 } break;
2322 case kTfLiteBuiltinLstm: {
2323 ExpectMaxOpVersion(version, 3, &val_ctx);
2324 Expect(
2325 android_sdk_version >= kMinSdkVersionForNNAPI11,
2326 NNAPIValidationFailureType::kUnsupportedAndroidVersion,
2327 "NNAPI 1.0 has a bug for optional tensors which would affect LSTM.",
2328 &val_ctx);
2329 Expect(android_sdk_version >= kMinSdkVersionForNNAPI12 ||
2330 !IsHybridOperator(context, builtin_code, node),
2331 NNAPIValidationFailureType::kUnsupportedHybridOperator,
2332 "Hybrid operators not supported before NNAPI 1.2.", &val_ctx);
2333
2334 const auto weight_input_index =
2335 isLstmBasicKernel(node) ? 2 /* basic::kInputWeights */
2336 : 4 /* full::kInputToOutputWeightsTensor */;
2337
2338 const TfLiteType weight_type =
2339 context->tensors[node->inputs->data[weight_input_index]].type;
2340
2341 if (isLstmBasicKernel(node)) {
2342 Expect(weight_type == kTfLiteUInt8,
2343 NNAPIValidationFailureType::kUnsupportedInputType,
2344 "Basic LSTM Kernels support only UINT8 weights", &val_ctx);
2345
2346 const auto input_quantization_params =
2347 context->tensors[node->inputs->data[0]].params;
2348 Expect(input_quantization_params.scale == 1. / 128. &&
2349 input_quantization_params.zero_point == 128,
2350 NNAPIValidationFailureType::kUnsupportedQuantizationParameters,
2351 "Invalid input quantization", &val_ctx);
2352
2353 const auto output_quantization_params =
2354 context->tensors[node->outputs->data[0]].params;
2355 Expect(output_quantization_params.scale == 1. / 128. &&
2356 output_quantization_params.zero_point == 128,
2357 NNAPIValidationFailureType::kUnsupportedQuantizationParameters,
2358 "Invalid output quantization", &val_ctx);
2359
2360 const auto cell_state_quantization_params =
2361 context->tensors[node->outputs->data[1]].params;
2362 Expect(cell_state_quantization_params.scale == 16. / 32768. ||
2363 cell_state_quantization_params.zero_point == 0,
2364 NNAPIValidationFailureType::kUnsupportedQuantizationParameters,
2365 "Invalid cell state quantization", &val_ctx);
2366
2367 auto is_const_tensor = [&node, &context](int tensor_idx) {
2368 return context->tensors[node->inputs->data[tensor_idx]]
2369 .allocation_type == kTfLiteMmapRo;
2370 };
2371
2372 Expect(is_const_tensor(2 /* kInputWeights */),
2373 NNAPIValidationFailureType::kInputTensorShouldHaveConstantShape,
2374 "Weights tensor should be constant", &val_ctx);
2375 Expect(is_const_tensor(3 /* kInputBiases */),
2376 NNAPIValidationFailureType::kInputTensorShouldHaveConstantShape,
2377 "Biases tensor should be constant", &val_ctx);
2378
2379 return val_ctx.is_valid;
2380 } else {
2381 if (node->inputs->size == 24) {
2382 ExpectMinAndroidSdkVersion(android_sdk_version,
2383 kMinSdkVersionForNNAPI12, &val_ctx);
2384 }
2385
2386 if (android_sdk_version >= kMinSdkVersionForNNAPI13) {
2387 Expect(weight_type == kTfLiteFloat32 || weight_type == kTfLiteUInt8 ||
2388 weight_type == kTfLiteInt8,
2389 NNAPIValidationFailureType::kUnsupportedInputType,
2390 "Weight has to be Float32 or UINT8 or INT8", &val_ctx);
2391 } else {
2392 Expect(weight_type == kTfLiteFloat32 || weight_type == kTfLiteUInt8,
2393 NNAPIValidationFailureType::kUnsupportedInputType,
2394 "Weight has to be Float32 or UINT8", &val_ctx);
2395 }
2396 }
2397 } break;
2398 case kTfLiteBuiltinMean: {
2399 ExpectMaxOpVersion(version, 2, &val_ctx);
2400 ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI11,
2401 &val_ctx);
2402 if (android_sdk_version >= kMinSdkVersionForNNAPI12) {
2403 Expect(context->tensors[node->inputs->data[0]].type == kTfLiteFloat32 ||
2404 IsQuantized(context->tensors[node->inputs->data[0]].type),
2405 NNAPIValidationFailureType::kUnsupportedInputType,
2406 "Expected Float32 or Quantized input", &val_ctx);
2407 } else {
2408 Expect(context->tensors[node->inputs->data[0]].type == kTfLiteFloat32,
2409 NNAPIValidationFailureType::kUnsupportedInputType,
2410 "Expected Float32 input", &val_ctx);
2411 }
2412 Expect(context->tensors[node->outputs->data[0]].dims->size > 0,
2413 NNAPIValidationFailureType::kUnsupportedOutputType,
2414 "NNAPI does not support generating a scalar as output for MEAN.",
2415 &val_ctx);
2416
2417 auto input_param = context->tensors[node->inputs->data[0]].params;
2418 auto output_param = context->tensors[node->outputs->data[0]].params;
2419 Expect(input_param.scale == output_param.scale &&
2420 input_param.zero_point == output_param.zero_point,
2421 NNAPIValidationFailureType::kUnsupportedOutputType,
2422 "NNAPI requires that the input and output have the same "
2423 "quantization parameters.",
2424 &val_ctx);
2425 } break;
2426 case kTfLiteBuiltinEmbeddingLookup: {
2427 ExpectOpVersion(version, 1, &val_ctx);
2428 Expect(context->tensors[node->inputs->data[1]].type == kTfLiteFloat32,
2429 NNAPIValidationFailureType::kUnsupportedInputType,
2430 "NNAPI only support float32 values.", &val_ctx);
2431 } break;
2432 case kTfLiteBuiltinHashtableLookup: {
2433 ExpectOpVersion(version, 1, &val_ctx);
2434 Expect(context->tensors[node->outputs->data[0]].type == kTfLiteFloat32,
2435 NNAPIValidationFailureType::kUnsupportedOutputType,
2436 "NNAPI only support float32 output.", &val_ctx);
2437 } break;
2438 case kTfLiteBuiltinMaximum:
2439 case kTfLiteBuiltinMinimum: {
2440 ExpectMaxOpVersion(version, 3, &val_ctx);
2441 ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
2442 &val_ctx);
2443 const auto input_type = context->tensors[node->inputs->data[0]].type;
2444 EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat32, kTfLiteUInt8,
2445 kTfLiteInt8, kTfLiteInt32);
2446 const TfLiteTensor& operand0 = context->tensors[node->inputs->data[0]];
2447 if (operand0.dims->size == 0) {
2448 Expect(operand0.allocation_type == kTfLiteMmapRo,
2449 NNAPIValidationFailureType::kUnsupportedInputType,
2450 "Scalar operand should be constant", &val_ctx);
2451 }
2452 const TfLiteTensor& operand1 = context->tensors[node->inputs->data[1]];
2453 if (operand1.dims->size == 0) {
2454 Expect(operand1.allocation_type == kTfLiteMmapRo,
2455 NNAPIValidationFailureType::kUnsupportedInputType,
2456 "Scalar operand should be constant", &val_ctx);
2457 }
2458 } break;
2459 case kTfLiteBuiltinCast: {
2460 ExpectOpVersion(version, 1, &val_ctx);
2461 ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
2462 &val_ctx);
2463 const TfLiteType input_type =
2464 context->tensors[node->inputs->data[0]].type;
2465 const TfLiteType output_type =
2466 context->tensors[node->outputs->data[0]].type;
2467 if (android_sdk_version >= kMinSdkVersionForNNAPI13) {
2468 EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat32, kTfLiteInt32,
2469 kTfLiteUInt8, kTfLiteInt8);
2470
2471 ExpectTypeIn(
2472 output_type,
2473 {kTfLiteFloat32, kTfLiteInt32, kTfLiteUInt8, kTfLiteInt8},
2474 NNAPIValidationFailureType::kUnsupportedOutputType,
2475 "Output type should be one of kTfLiteFloat32, kTfLiteInt32, "
2476 "kTfLiteUInt8, kTfLiteInt8.",
2477 &val_ctx);
2478 } else {
2479 EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat32, kTfLiteInt32,
2480 kTfLiteUInt8);
2481
2482 ExpectTypeIn(
2483 output_type, {kTfLiteFloat32, kTfLiteInt32, kTfLiteUInt8},
2484 NNAPIValidationFailureType::kUnsupportedOutputType,
2485 "Output type should be one of kTfLiteFloat32, kTfLiteInt32, "
2486 "kTfLiteUInt8.",
2487 &val_ctx);
2488 }
2489 } break;
2490 case kTfLiteBuiltinLeakyRelu:
2491 case kTfLiteBuiltinPrelu: {
2492 ExpectOpVersion(version, 1, &val_ctx);
2493 ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
2494 &val_ctx);
2495 const auto input_type = context->tensors[node->inputs->data[0]].type;
2496 EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat32, kTfLiteUInt8,
2497 kTfLiteInt8);
2498 } break;
2499 case kTfLiteBuiltinTile: {
2500 ExpectOpVersion(version, 1, &val_ctx);
2501 ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
2502 &val_ctx);
2503 const auto input_type = context->tensors[node->inputs->data[0]].type;
2504 EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat32, kTfLiteInt8,
2505 kTfLiteUInt8, kTfLiteInt32);
2506 const auto multipliers_type =
2507 context->tensors[node->inputs->data[1]].type;
2508 Expect(multipliers_type == kTfLiteInt32,
2509 NNAPIValidationFailureType::kUnsupportedInputType,
2510 "Multipliers should be Int32", &val_ctx);
2511 } break;
2512 case kTfLiteBuiltinLogicalOr:
2513 case kTfLiteBuiltinLogicalAnd:
2514 case kTfLiteBuiltinLogicalNot: {
2515 ExpectOpVersion(version, 1, &val_ctx);
2516 ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
2517 &val_ctx);
2518 const auto input_type = context->tensors[node->inputs->data[0]].type;
2519 Expect(input_type == kTfLiteBool,
2520 NNAPIValidationFailureType::kUnsupportedInputType,
2521 "Input should be bool", &val_ctx);
2522 } break;
2523 case kTfLiteBuiltinLess:
2524 case kTfLiteBuiltinLessEqual:
2525 case kTfLiteBuiltinGreater:
2526 case kTfLiteBuiltinGreaterEqual:
2527 case kTfLiteBuiltinEqual:
2528 case kTfLiteBuiltinNotEqual: {
2529 ExpectMaxOpVersion(version, 2, &val_ctx);
2530 ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI11,
2531 &val_ctx);
2532 const auto input_type = context->tensors[node->inputs->data[0]].type;
2533 EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat32, kTfLiteUInt8,
2534 kTfLiteInt8, kTfLiteBool, kTfLiteInt32);
2535 } break;
2536 case kTfLiteBuiltinNeg: {
2537 ExpectMaxOpVersion(version, 2, &val_ctx);
2538 ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI11,
2539 &val_ctx);
2540 const auto input_type = context->tensors[node->inputs->data[0]].type;
2541 EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat32, kTfLiteInt32);
2542 } break;
2543 case kTfLiteBuiltinTopkV2: {
2544 ExpectMaxOpVersion(version, 2, &val_ctx);
2545 ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
2546 &val_ctx);
2547 const auto& input_type = context->tensors[node->inputs->data[0]].type;
2548 EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat32, kTfLiteInt32,
2549 kTfLiteUInt8, kTfLiteInt8);
2550 const auto& k_param = context->tensors[node->inputs->data[1]];
2551 Expect(k_param.type == kTfLiteInt32 &&
2552 k_param.allocation_type == kTfLiteMmapRo,
2553 NNAPIValidationFailureType::kUnsupportedInputType,
2554 "K param should be a constant of type Int32", &val_ctx);
2555 } break;
2556 case kTfLiteBuiltinSelect: {
2557 ExpectMaxOpVersion(version, 2, &val_ctx);
2558 ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI11,
2559 &val_ctx);
2560 const auto value_type = context->tensors[node->inputs->data[1]].type;
2561 EXPECT_INPUT_TYPE_IN(value_type, kTfLiteFloat32, kTfLiteInt32,
2562 kTfLiteUInt8, kTfLiteInt8);
2563 TfLiteIntArray* condition_shape =
2564 context->tensors[node->inputs->data[0]].dims;
2565 TfLiteIntArray* input_shape =
2566 context->tensors[node->inputs->data[1]].dims;
2567 Expect(TfLiteIntArrayEqual(condition_shape, input_shape),
2568 NNAPIValidationFailureType::kUnsupportedOperandValue,
2569 "Condition and inputs tensors should have the same shape",
2570 &val_ctx);
2571 } break;
2572 case kTfLiteBuiltinGather: {
2573 ExpectOpVersion(version, 2, &val_ctx);
2574 ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
2575 &val_ctx);
2576 const auto input_type = context->tensors[node->inputs->data[0]].type;
2577 const auto& positions = context->tensors[node->inputs->data[1]];
2578
2579 EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat32, kTfLiteFloat16,
2580 kTfLiteInt32, kTfLiteUInt8, kTfLiteInt8);
2581
2582 Expect(positions.type == kTfLiteInt32,
2583 NNAPIValidationFailureType::kUnsupportedInputType,
2584 "Positions type should be one of kTfLiteInt32", &val_ctx);
2585 Expect(positions.dims->size != 0,
2586 NNAPIValidationFailureType::kUnsupportedOperandRank,
2587 "0-dimension args are not supported by NNAPI.", &val_ctx);
2588 } break;
2589 case kTfLiteBuiltinBidirectionalSequenceLstm: {
2590 ExpectOpVersion(version, 1, &val_ctx);
2591 ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
2592 &val_ctx);
2593 Expect(!IsHybridOperator(context, builtin_code, node),
2594 NNAPIValidationFailureType::kUnsupportedHybridOperator,
2595 "Hybrid version of this op is not supported by NN API.", &val_ctx);
2596 } break;
2597 case kTfLiteBuiltinExpandDims: {
2598 ExpectOpVersion(version, 1, &val_ctx);
2599 ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
2600 &val_ctx);
2601 const auto input_type = context->tensors[node->inputs->data[0]].type;
2602 EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat32, kTfLiteFloat16,
2603 kTfLiteInt32, kTfLiteUInt8, kTfLiteInt8);
2604 const auto axis = context->tensors[node->inputs->data[1]];
2605 Expect(axis.type == kTfLiteInt32 && axis.allocation_type == kTfLiteMmapRo,
2606 NNAPIValidationFailureType::kUnsupportedInputType,
2607 "NNAPI only supports constant int32 axis tensor.", &val_ctx);
2608 } break;
2609 case kTfLiteBuiltinSplit: {
2610 ExpectOpVersion(version, 3, &val_ctx);
2611 ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
2612 &val_ctx);
2613 // Tensor indices: split_dim: 0, value: 1
2614 const TfLiteTensor& input = context->tensors[node->inputs->data[1]];
2615 if (android_sdk_version >= kMinSdkVersionForNNAPI13) {
2616 EXPECT_INPUT_TYPE_IN(input.type, kTfLiteFloat32, kTfLiteUInt8,
2617 kTfLiteInt8, kTfLiteInt32);
2618 } else {
2619 EXPECT_INPUT_TYPE_IN(input.type, kTfLiteFloat32, kTfLiteUInt8,
2620 kTfLiteInt32);
2621 }
2622 const TfLiteTensor& axis = context->tensors[node->inputs->data[0]];
2623 Expect(axis.type == kTfLiteInt32 && axis.allocation_type == kTfLiteMmapRo,
2624 NNAPIValidationFailureType::kUnsupportedInputType,
2625 "NNAPI only supports constant int32 axis tensor.", &val_ctx);
2626 } break;
2627 case kTfLiteBuiltinLogSoftmax: {
2628 ExpectOpVersion(version, 1, &val_ctx);
2629 ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
2630 &val_ctx);
2631 const auto input_type = context->tensors[node->inputs->data[0]].type;
2632 Expect(input_type == kTfLiteFloat32,
2633 NNAPIValidationFailureType::kUnsupportedInputType,
2634 "Input should be Float32.", &val_ctx);
2635 } break;
2636 case kTfLiteBuiltinQuantize: {
2637 ExpectMaxOpVersion(version, 2, &val_ctx);
2638 ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
2639 &val_ctx);
2640 const auto value_type = context->tensors[node->inputs->data[0]].type;
2641 Expect(value_type == kTfLiteFloat32 || IsQuantized(value_type),
2642 NNAPIValidationFailureType::kUnsupportedInputType,
2643 "Value should be quantized or Float32.", &val_ctx);
2644 if (IsQuantized(value_type)) {
2645 const auto quantization_params =
2646 context->tensors[node->inputs->data[0]].params;
2647 Expect(quantization_params.scale > 0.f,
2648 NNAPIValidationFailureType::kUnsupportedQuantizationParameters,
2649 "Quantization scale should be > 0.", &val_ctx);
2650 }
2651 const auto output_type = context->tensors[node->outputs->data[0]].type;
2652 if (android_sdk_version < kMinSdkVersionForNNAPI13) {
2653 Expect(output_type == kTfLiteUInt8,
2654 NNAPIValidationFailureType::kUnsupportedOutputType,
2655 "Output should be kTfLiteUInt8.", &val_ctx);
2656 } else {
2657 ExpectTypeIn(output_type, {kTfLiteUInt8, kTfLiteInt8},
2658 NNAPIValidationFailureType::kUnsupportedOutputType,
2659 "Output should be kTfLiteUInt8.", &val_ctx);
2660 }
2661 const auto quantization_params =
2662 context->tensors[node->outputs->data[0]].params;
2663 Expect(quantization_params.scale > 0.f,
2664 NNAPIValidationFailureType::kUnsupportedQuantizationParameters,
2665 "Quantization scale should be > 0.", &val_ctx);
2666 } break;
2667 case kTfLiteBuiltinReduceAny: {
2668 ExpectOpVersion(version, 2, &val_ctx);
2669 ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
2670 &val_ctx);
2671 Expect(context->tensors[node->outputs->data[0]].dims->size != 0,
2672 NNAPIValidationFailureType::kUnsupportedOutputType,
2673 "NNAPI does not support generating a scalar as output.", &val_ctx);
2674 } break;
2675 case kTfLiteBuiltinReduceMin:
2676 case kTfLiteBuiltinReduceMax: {
2677 ExpectMaxOpVersion(version, 2, &val_ctx);
2678 ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
2679 &val_ctx);
2680 const auto input_tensor = context->tensors[node->inputs->data[0]];
2681 const auto input_type = input_tensor.type;
2682 EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat32, kTfLiteUInt8,
2683 kTfLiteInt8);
2684 Expect(input_tensor.dims->size != 0,
2685 NNAPIValidationFailureType::kUnsupportedOutputType,
2686 "NNAPI does not support generating a scalar as output.", &val_ctx);
2687 } break;
2688 case kTfLiteBuiltinDepthToSpace: {
2689 const TfLiteType input_type =
2690 context->tensors[node->inputs->data[0]].type;
2691 EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat32, kTfLiteUInt8,
2692 kTfLiteInt8);
2693 } break;
2694 case kTfLiteBuiltinReduceProd:
2695 case kTfLiteBuiltinSum: {
2696 ExpectOpVersion(version, 1, &val_ctx);
2697 ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
2698 &val_ctx);
2699 Expect(context->tensors[node->outputs->data[0]].dims->size != 0,
2700 NNAPIValidationFailureType::kUnsupportedOutputType,
2701 "NNAPI does not support generating a scalar as output", &val_ctx);
2702 const auto input_type = context->tensors[node->inputs->data[0]].type;
2703 Expect(input_type == kTfLiteFloat32,
2704 NNAPIValidationFailureType::kUnsupportedInputType,
2705 "NNAPI only supports floating point input.", &val_ctx);
2706 } break;
2707 case kTfLiteBuiltinElu: {
2708 ExpectOpVersion(version, 1, &val_ctx);
2709 ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI13,
2710 &val_ctx);
2711 const auto input_type = context->tensors[node->inputs->data[0]].type;
2712 Expect(input_type == kTfLiteFloat32,
2713 NNAPIValidationFailureType::kUnsupportedInputType,
2714 "NNAPI only supports floating point input.", &val_ctx);
2715 } break;
2716 case kTfLiteBuiltinFill: {
2717 ExpectOpVersion(version, 1, &val_ctx);
2718 ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI13,
2719 &val_ctx);
2720 const auto& dims_tensor = context->tensors[node->inputs->data[0]];
2721 Expect(IsConstantTensor(&dims_tensor),
2722 NNAPIValidationFailureType::kUnsupportedInputType,
2723 "NNAPI doesn't support dynamic dimensions tensor.", &val_ctx);
2724 EXPECT_INPUT_TYPE_IN(dims_tensor.type, kTfLiteInt32, kTfLiteInt64);
2725 if (IsConstantTensor(&dims_tensor)) {
2726 Expect(dims_tensor.dims->data[0] != 0,
2727 NNAPIValidationFailureType::kUnsupportedOperandValue,
2728 "NNAPI doesn't support generating scalars from FILL", &val_ctx);
2729 if (dims_tensor.type == kTfLiteInt64) {
2730 bool fit_in_int32 =
2731 std::all_of(dims_tensor.data.i64,
2732 dims_tensor.data.i64 + dims_tensor.dims->data[0],
2733 [](int64_t dim) {
2734 return std::numeric_limits<int32_t>::min() <= dim &&
2735 dim <= std::numeric_limits<int32_t>::max();
2736 });
2737 Expect(fit_in_int32,
2738 NNAPIValidationFailureType::kUnsupportedOperandValue,
2739 "NNAPI only supports int32 dimensions tensor. If the "
2740 "dimensions type is int64 and they are constant we can "
2741 "convert them to int32 if the value isn't too large.",
2742 &val_ctx);
2743 }
2744 }
2745 const auto& value_tensor = context->tensors[node->inputs->data[1]];
2746 EXPECT_INPUT_TYPE_IN(value_tensor.type, kTfLiteFloat32, kTfLiteInt32,
2747 kTfLiteInt64);
2748 if (value_tensor.type == kTfLiteInt64) {
2749 Expect(
2750 IsConstantTensor(&value_tensor) &&
2751 *value_tensor.data.i64 <= std::numeric_limits<int32_t>::max() &&
2752 *value_tensor.data.i64 >= std::numeric_limits<int32_t>::min(),
2753 NNAPIValidationFailureType::kUnsupportedInputType,
2754 "NNAPI only supports int32 input. If the input type is int64 and "
2755 "constant we can convert it to int32 if the value isn't too "
2756 "large.",
2757 &val_ctx);
2758 }
2759 } break;
2760 default:
2761 // All other operators are not mapped.
2762 AddValidationFailure(NNAPIValidationFailureType::kUnsupportedOperator,
2763 "Unsupported operation type.", &val_ctx);
2764 }
2765 return val_ctx.is_valid;
2766 } // NOLINT(readability/fn_size)
2767
Map(TfLiteContext * context,int builtin_code,int version,int android_sdk_version,const NNAPIOpMappingArgs & mapping_args,ANeuralNetworksOperationType * nn_op_type)2768 TfLiteStatus NNAPIDelegateKernel::Map(
2769 TfLiteContext* context, int builtin_code, int version,
2770 int android_sdk_version, const NNAPIOpMappingArgs& mapping_args,
2771 ANeuralNetworksOperationType* nn_op_type) {
2772 switch (builtin_code) {
2773 case kTfLiteBuiltinAdd: {
2774 auto builtin =
2775 reinterpret_cast<TfLiteAddParams*>(mapping_args.node->builtin_data);
2776 mapping_args.builder->AddScalarInt32Operand(builtin->activation);
2777 *nn_op_type = ANEURALNETWORKS_ADD;
2778 } break;
2779 case kTfLiteBuiltinArgMax: {
2780 *nn_op_type = ANEURALNETWORKS_ARGMAX;
2781 } break;
2782 case kTfLiteBuiltinArgMin: {
2783 *nn_op_type = ANEURALNETWORKS_ARGMIN;
2784 } break;
2785 case kTfLiteBuiltinMul: {
2786 auto builtin =
2787 reinterpret_cast<TfLiteMulParams*>(mapping_args.node->builtin_data);
2788 mapping_args.builder->AddScalarInt32Operand(builtin->activation);
2789 *nn_op_type = ANEURALNETWORKS_MUL;
2790 } break;
2791 case kTfLiteBuiltinAveragePool2d: {
2792 mapping_args.builder->AddPoolingParams(mapping_args.node->builtin_data);
2793 *nn_op_type = ANEURALNETWORKS_AVERAGE_POOL_2D;
2794 } break;
2795 case kTfLiteBuiltinMaxPool2d: {
2796 mapping_args.builder->AddPoolingParams(mapping_args.node->builtin_data);
2797 *nn_op_type = ANEURALNETWORKS_MAX_POOL_2D;
2798 } break;
2799 case kTfLiteBuiltinL2Pool2d: {
2800 mapping_args.builder->AddPoolingParams(mapping_args.node->builtin_data);
2801 *nn_op_type = ANEURALNETWORKS_L2_POOL_2D;
2802 } break;
2803 case kTfLiteBuiltinConv2d: {
2804 auto builtin =
2805 reinterpret_cast<TfLiteConvParams*>(mapping_args.node->builtin_data);
2806 mapping_args.builder->AddScalarInt32Operand(builtin->padding);
2807 mapping_args.builder->AddScalarInt32Operand(builtin->stride_width);
2808 mapping_args.builder->AddScalarInt32Operand(builtin->stride_height);
2809 mapping_args.builder->AddScalarInt32Operand(builtin->activation);
2810 // NNAPI supports dilated Conv2D since NNAPI 1.2.
2811 if (builtin->dilation_width_factor != 1 ||
2812 builtin->dilation_height_factor != 1) {
2813 mapping_args.builder->AddScalarBoolOperand(false); // Use NHWC format
2814 mapping_args.builder->AddScalarInt32Operand(
2815 builtin->dilation_width_factor);
2816 mapping_args.builder->AddScalarInt32Operand(
2817 builtin->dilation_height_factor);
2818 }
2819 *nn_op_type = ANEURALNETWORKS_CONV_2D;
2820 } break;
2821 case kTfLiteBuiltinDepthwiseConv2d: {
2822 auto builtin = reinterpret_cast<TfLiteDepthwiseConvParams*>(
2823 mapping_args.node->builtin_data);
2824 mapping_args.builder->AddScalarInt32Operand(builtin->padding);
2825 mapping_args.builder->AddScalarInt32Operand(builtin->stride_width);
2826 mapping_args.builder->AddScalarInt32Operand(builtin->stride_height);
2827 mapping_args.builder->AddScalarInt32Operand(builtin->depth_multiplier);
2828 mapping_args.builder->AddScalarInt32Operand(builtin->activation);
2829 if (builtin->dilation_width_factor != 1 ||
2830 builtin->dilation_height_factor != 1) {
2831 mapping_args.builder->AddScalarBoolOperand(false); // Use NHWC format.
2832 mapping_args.builder->AddScalarInt32Operand(
2833 builtin->dilation_width_factor);
2834 mapping_args.builder->AddScalarInt32Operand(
2835 builtin->dilation_height_factor);
2836 }
2837 *nn_op_type = ANEURALNETWORKS_DEPTHWISE_CONV_2D;
2838 } break;
2839 case kTfLiteBuiltinFullyConnected: {
2840 auto builtin = reinterpret_cast<TfLiteFullyConnectedParams*>(
2841 mapping_args.node->builtin_data);
2842 mapping_args.builder->AddScalarInt32Operand(builtin->activation);
2843 *nn_op_type = ANEURALNETWORKS_FULLY_CONNECTED;
2844 } break;
2845 case kTfLiteBuiltinHardSwish: {
2846 *nn_op_type = ANEURALNETWORKS_HARD_SWISH;
2847 } break;
2848 case kTfLiteBuiltinSoftmax: {
2849 auto builtin = reinterpret_cast<TfLiteSoftmaxParams*>(
2850 mapping_args.node->builtin_data);
2851 mapping_args.builder->AddScalarFloat32Operand(builtin->beta);
2852 // Optional scalar specifying the dimension the activation would be
2853 // performed on is not added. Default to -1.
2854 *nn_op_type = ANEURALNETWORKS_SOFTMAX;
2855 } break;
2856 case kTfLiteBuiltinReshape: {
2857 if (mapping_args.node->inputs->size == 1) {
2858 // if no new_shape tensor, construct the new shape from params.
2859 auto* params = reinterpret_cast<TfLiteReshapeParams*>(
2860 mapping_args.node->builtin_data);
2861 int num_dimensions = params->num_dimensions;
2862 std::vector<int32_t> output_shape(num_dimensions);
2863 for (int i = 0; i < num_dimensions; ++i) {
2864 output_shape[i] = params->shape[i];
2865 }
2866 mapping_args.builder->AddVectorInt32Operand(
2867 output_shape.data(), static_cast<uint32_t>(num_dimensions));
2868 }
2869 *nn_op_type = ANEURALNETWORKS_RESHAPE;
2870 } break;
2871 case kTfLiteBuiltinResizeBilinear: {
2872 const int output_id = mapping_args.node->outputs->data[0];
2873 auto& output = mapping_args.context->tensors[output_id];
2874 const int output_height = output.dims->data[1];
2875 const int output_width = output.dims->data[2];
2876 mapping_args.builder->AddScalarInt32Operand(output_width);
2877 mapping_args.builder->AddScalarInt32Operand(output_height);
2878 auto builtin = reinterpret_cast<TfLiteResizeBilinearParams*>(
2879 mapping_args.node->builtin_data);
2880 if (builtin->align_corners == true ||
2881 builtin->half_pixel_centers == true) {
2882 mapping_args.builder->AddScalarBoolOperand(false); // Use NHWC format
2883 mapping_args.builder->AddScalarBoolOperand(builtin->align_corners);
2884 mapping_args.builder->AddScalarBoolOperand(builtin->half_pixel_centers);
2885 }
2886 *nn_op_type = ANEURALNETWORKS_RESIZE_BILINEAR;
2887 } break;
2888 case kTfLiteBuiltinResizeNearestNeighbor: {
2889 const TfLiteTensor& new_shape =
2890 mapping_args.context->tensors[mapping_args.node->inputs->data[1]];
2891 // NNAPI uses scalar inputs for height and width.
2892 mapping_args.builder->AddScalarInt32Operand(new_shape.data.i32[1]);
2893 mapping_args.builder->AddScalarInt32Operand(new_shape.data.i32[0]);
2894 mapping_args.builder->AddScalarBoolOperand(false); // Use NHWC format
2895 auto builtin = reinterpret_cast<TfLiteResizeNearestNeighborParams*>(
2896 mapping_args.node->builtin_data);
2897 if (builtin->align_corners == true ||
2898 builtin->half_pixel_centers == true) {
2899 mapping_args.builder->AddScalarBoolOperand(builtin->align_corners);
2900 mapping_args.builder->AddScalarBoolOperand(builtin->half_pixel_centers);
2901 }
2902 *nn_op_type = ANEURALNETWORKS_RESIZE_NEAREST_NEIGHBOR;
2903 } break;
2904 case kTfLiteBuiltinSqueeze: {
2905 auto builtin = reinterpret_cast<TfLiteSqueezeParams*>(
2906 mapping_args.node->builtin_data);
2907 // Note that we add the squeeze dimensions even if the dimensions
2908 // were unspecified (empty), as NNAPI requires the operand.
2909 mapping_args.builder->AddVectorInt32Operand(
2910 builtin->num_squeeze_dims ? builtin->squeeze_dims : nullptr,
2911 static_cast<uint32_t>(builtin->num_squeeze_dims));
2912 *nn_op_type = ANEURALNETWORKS_SQUEEZE;
2913 } break;
2914 case kTfLiteBuiltinUnidirectionalSequenceLstm: {
2915 auto builtin = reinterpret_cast<TfLiteUnidirectionalSequenceLSTMParams*>(
2916 mapping_args.node->builtin_data);
2917 mapping_args.builder->AddScalarInt32Operand(builtin->activation);
2918 mapping_args.builder->AddScalarFloat32Operand(builtin->cell_clip);
2919 mapping_args.builder->AddScalarFloat32Operand(builtin->proj_clip);
2920 mapping_args.builder->AddScalarBoolOperand(builtin->time_major);
2921 const bool hybrid_op = IsHybridOperator(
2922 mapping_args.context, kTfLiteBuiltinUnidirectionalSequenceLstm,
2923 mapping_args.node);
2924 if (mapping_args.node->inputs->size == 24) {
2925 // Add layer normalization tensors if they are provided.
2926 for (int i = 20; i < 24; ++i) {
2927 const int input_index = mapping_args.node->inputs->data[i];
2928 if (input_index != kTfLiteOptionalTensor) {
2929 mapping_args.builder->AddTensorInput(input_index, hybrid_op);
2930 } else {
2931 mapping_args.builder->AddVectorFloat32Operand(nullptr, 0);
2932 }
2933 }
2934 } else {
2935 for (int i = 0; i < 4; ++i) {
2936 mapping_args.builder->AddVectorFloat32Operand(nullptr, 0);
2937 }
2938 }
2939
2940 *nn_op_type = ANEURALNETWORKS_UNIDIRECTIONAL_SEQUENCE_LSTM;
2941 } break;
2942 case kTfLiteBuiltinL2Normalization: {
2943 *nn_op_type = ANEURALNETWORKS_L2_NORMALIZATION;
2944 } break;
2945 case kTfLiteBuiltinLocalResponseNormalization: {
2946 auto builtin = reinterpret_cast<TfLiteLocalResponseNormParams*>(
2947 mapping_args.node->builtin_data);
2948 mapping_args.builder->AddScalarInt32Operand(builtin->radius);
2949 mapping_args.builder->AddScalarFloat32Operand(builtin->bias);
2950 mapping_args.builder->AddScalarFloat32Operand(builtin->alpha);
2951 mapping_args.builder->AddScalarFloat32Operand(builtin->beta);
2952 *nn_op_type = ANEURALNETWORKS_LOCAL_RESPONSE_NORMALIZATION;
2953 } break;
2954 case kTfLiteBuiltinLshProjection: {
2955 auto builtin = reinterpret_cast<TfLiteLSHProjectionParams*>(
2956 mapping_args.node->builtin_data);
2957 int type = builtin->type;
2958 // In Android Q+, NNAPI uses 3 to denote
2959 // kTfLiteLshProjectionSparse.
2960 const int kNNAPILshProjectionSparse = 3;
2961 if (builtin->type == kTfLiteLshProjectionSparse) {
2962 type = kNNAPILshProjectionSparse;
2963 // Add NNAPI null weight operand.
2964 mapping_args.builder->AddVectorFloat32Operand(nullptr, 0);
2965 }
2966 mapping_args.builder->AddScalarInt32Operand(type);
2967 *nn_op_type = ANEURALNETWORKS_LSH_PROJECTION;
2968 } break;
2969 case kTfLiteBuiltinConcatenation: {
2970 auto builtin = reinterpret_cast<TfLiteConcatenationParams*>(
2971 mapping_args.node->builtin_data);
2972 int axis = builtin->axis < 0
2973 ? mapping_args.context
2974 ->tensors[mapping_args.node->inputs->data[0]]
2975 .dims->size +
2976 builtin->axis
2977 : builtin->axis;
2978 mapping_args.builder->AddScalarInt32Operand(axis);
2979 *nn_op_type = ANEURALNETWORKS_CONCATENATION;
2980 } break;
2981 case kTfLiteBuiltinDequantize: {
2982 *nn_op_type = ANEURALNETWORKS_DEQUANTIZE;
2983 } break;
2984 case kTfLiteBuiltinFloor: {
2985 *nn_op_type = ANEURALNETWORKS_FLOOR;
2986 } break;
2987 case kTfLiteBuiltinRelu: {
2988 *nn_op_type = ANEURALNETWORKS_RELU;
2989 } break;
2990 case kTfLiteBuiltinReluN1To1: {
2991 *nn_op_type = ANEURALNETWORKS_RELU1;
2992 } break;
2993 case kTfLiteBuiltinRelu6: {
2994 *nn_op_type = ANEURALNETWORKS_RELU6;
2995 } break;
2996 case kTfLiteBuiltinLogistic: {
2997 *nn_op_type = ANEURALNETWORKS_LOGISTIC;
2998 } break;
2999 case kTfLiteBuiltinTanh: {
3000 *nn_op_type = ANEURALNETWORKS_TANH;
3001 } break;
3002 case kTfLiteBuiltinSub: {
3003 auto builtin =
3004 reinterpret_cast<TfLiteSubParams*>(mapping_args.node->builtin_data);
3005 mapping_args.builder->AddScalarInt32Operand(builtin->activation);
3006 *nn_op_type = ANEURALNETWORKS_SUB;
3007 } break;
3008 case kTfLiteBuiltinDiv: {
3009 auto builtin =
3010 reinterpret_cast<TfLiteDivParams*>(mapping_args.node->builtin_data);
3011 mapping_args.builder->AddScalarInt32Operand(builtin->activation);
3012 *nn_op_type = ANEURALNETWORKS_DIV;
3013 } break;
3014 case kTfLiteBuiltinPad:
3015 case kTfLiteBuiltinPadv2: {
3016 // We want to map to PAD as much as possible since it is more widely
3017 // supported. We map to PadV2 only when there is the need to specify
3018 // the padding value
3019 if (mapping_args.node->inputs->size == 2) {
3020 *nn_op_type = ANEURALNETWORKS_PAD;
3021 } else {
3022 const int constant_value_id = mapping_args.node->inputs->data[2];
3023 if (constant_value_id == kTfLiteOptionalTensor) {
3024 *nn_op_type = ANEURALNETWORKS_PAD;
3025 } else {
3026 *nn_op_type = ANEURALNETWORKS_PAD_V2;
3027 }
3028 }
3029 } break;
3030 case kTfLiteBuiltinUnidirectionalSequenceRnn: {
3031 auto builtin = reinterpret_cast<TfLiteSequenceRNNParams*>(
3032 mapping_args.node->builtin_data);
3033 mapping_args.builder->AddScalarInt32Operand(builtin->activation);
3034 mapping_args.builder->AddScalarInt32Operand(builtin->time_major);
3035 *nn_op_type = ANEURALNETWORKS_UNIDIRECTIONAL_SEQUENCE_RNN;
3036 } break;
3037 case kTfLiteBuiltinSpaceToBatchNd: {
3038 *nn_op_type = ANEURALNETWORKS_SPACE_TO_BATCH_ND;
3039 } break;
3040 case kTfLiteBuiltinBatchToSpaceNd: {
3041 *nn_op_type = ANEURALNETWORKS_BATCH_TO_SPACE_ND;
3042 } break;
3043 case kTfLiteBuiltinStridedSlice: {
3044 auto builtin = reinterpret_cast<TfLiteStridedSliceParams*>(
3045 mapping_args.node->builtin_data);
3046 mapping_args.builder->AddScalarInt32Operand(builtin->begin_mask);
3047 mapping_args.builder->AddScalarInt32Operand(builtin->end_mask);
3048 mapping_args.builder->AddScalarInt32Operand(builtin->shrink_axis_mask);
3049 *nn_op_type = ANEURALNETWORKS_STRIDED_SLICE;
3050 } break;
3051 case kTfLiteBuiltinTranspose: {
3052 *nn_op_type = ANEURALNETWORKS_TRANSPOSE;
3053 } break;
3054 case kTfLiteBuiltinAbs: {
3055 *nn_op_type = ANEURALNETWORKS_ABS;
3056 } break;
3057 case kTfLiteBuiltinExp: {
3058 *nn_op_type = ANEURALNETWORKS_EXP;
3059 } break;
3060 case kTfLiteBuiltinLog: {
3061 *nn_op_type = ANEURALNETWORKS_LOG;
3062 } break;
3063 case kTfLiteBuiltinRsqrt: {
3064 *nn_op_type = ANEURALNETWORKS_RSQRT;
3065 } break;
3066 case kTfLiteBuiltinPow: {
3067 *nn_op_type = ANEURALNETWORKS_POW;
3068 } break;
3069 case kTfLiteBuiltinSlice: {
3070 *nn_op_type = ANEURALNETWORKS_SLICE;
3071 } break;
3072 case kTfLiteBuiltinSin: {
3073 *nn_op_type = ANEURALNETWORKS_SIN;
3074 } break;
3075 case kTfLiteBuiltinTransposeConv: {
3076 int input_tensor_flags = 0;
3077 const int input_tensor_id =
3078 mapping_args.node->inputs->data[/*kDataInputTensor*/ 2];
3079 const int weight_tensor_id =
3080 mapping_args.node->inputs->data[/*kWeightsTensor*/ 1];
3081
3082 // Transpose convolution doesn't have hybrid variation.
3083 const bool hybrid_op = false;
3084
3085 if (android_sdk_version >= kMinSdkVersionForNNAPI13) {
3086 mapping_args.builder->AddTensorInput(
3087 input_tensor_id, hybrid_op,
3088 input_tensor_flags | NN_TENSOR_FLAG_USE_INT8_ASYMM_SIGNED);
3089
3090 } else {
3091 mapping_args.builder->AddTensorInput(
3092 input_tensor_id, hybrid_op,
3093 input_tensor_flags | NN_TENSOR_FLAG_INT8_CONVERSION);
3094 }
3095 // Transpose convlution uses per-channel quantization with int8 inputs
3096 // even if the number of channels in quantization parameters is equal to 1
3097 // (as opposed to conv2d, which uses per-tensor quantization in this
3098 // case).
3099 mapping_args.builder->AddTensorInput(
3100 weight_tensor_id, hybrid_op,
3101 input_tensor_flags | NN_TENSOR_FLAG_FORCE_PER_CHANNEL);
3102
3103 // NNAPI requires a bias tensor, so we allocate a new tensor to fill
3104 // it with zeroes. It is deleted with other tensors in the context
3105 // during subgraph destructor call.
3106 int bias_index = -1;
3107 mapping_args.context->AddTensors(mapping_args.context, 1, &bias_index);
3108 TfLiteTensor* bias_tensor = &mapping_args.context->tensors[bias_index];
3109 const auto input_type =
3110 mapping_args.context
3111 ->tensors[mapping_args.node->inputs->data[/*kDataInputTensor*/ 2]]
3112 .type;
3113 if (input_type == kTfLiteFloat32) {
3114 bias_tensor->type = kTfLiteFloat32;
3115 } else {
3116 bias_tensor->type = kTfLiteInt32;
3117 }
3118
3119 // Create an array with a required bias shape and resize the bias
3120 // tensor.
3121 TfLiteIntArray* bias_shape = TfLiteIntArrayCreate(1);
3122 const TfLiteTensor& output_shape =
3123 mapping_args.context->tensors[mapping_args.node->inputs
3124 ->data[/*kOutputShapeTensor*/ 0]];
3125 const int output_depth = output_shape.data.i32[3];
3126 bias_shape->data[0] = output_depth;
3127 bias_tensor->allocation_type = kTfLiteDynamic;
3128 mapping_args.context->ResizeTensor(mapping_args.context, bias_tensor,
3129 bias_shape);
3130
3131 // Set tensor's values to zeroes and add it using AddVector*, so
3132 // that the values are copied to NNAPI. We don't use the AddTensor
3133 // function because it doesn't copy values and the tensor we just
3134 // created is not in the node->inputs.
3135 if (input_type == kTfLiteFloat32) {
3136 memset(bias_tensor->data.f, 0, output_depth * sizeof(float));
3137 mapping_args.builder->AddVectorFloat32Operand(bias_tensor->data.f,
3138 output_depth);
3139 } else {
3140 memset(bias_tensor->data.i32, 0, output_depth * sizeof(int));
3141 const TfLiteTensor& input_tensor =
3142 mapping_args.context->tensors[mapping_args.node->inputs
3143 ->data[/*kDataInputTensor*/ 2]];
3144 const TfLiteTensor& filter_tensor =
3145 mapping_args.context->tensors[mapping_args.node->inputs
3146 ->data[/*kWeightsTensor*/ 1]];
3147 // NNAPI requires bias scale to be a product of an input scale and
3148 // a filter scale.
3149 bias_tensor->params.scale =
3150 input_tensor.params.scale * filter_tensor.params.scale;
3151 mapping_args.builder->AddVectorInt32Operand(
3152 bias_tensor->data.i32, output_depth,
3153 input_tensor.params.scale * filter_tensor.params.scale,
3154 /*zero_point=*/0);
3155 }
3156
3157 mapping_args.builder->AddTensorInput(
3158 mapping_args.node->inputs->data[/*kOutputShapeTensor*/ 0], hybrid_op);
3159
3160 auto builtin = reinterpret_cast<TfLiteTransposeConvParams*>(
3161 mapping_args.node->builtin_data);
3162 mapping_args.builder->AddScalarInt32Operand(builtin->padding);
3163 mapping_args.builder->AddScalarInt32Operand(builtin->stride_width);
3164 mapping_args.builder->AddScalarInt32Operand(builtin->stride_height);
3165 mapping_args.builder->AddScalarInt32Operand(
3166 /*ANEURALNETWORKS_FUSED_NONE*/ 0);
3167 // Use NHWC layout for input and output.
3168 mapping_args.builder->AddScalarBoolOperand(false);
3169 *nn_op_type = ANEURALNETWORKS_TRANSPOSE_CONV;
3170 } break;
3171 case kTfLiteBuiltinSqrt: {
3172 *nn_op_type = ANEURALNETWORKS_SQRT;
3173 } break;
3174 case kTfLiteBuiltinRnn: {
3175 // NNAPI need both state_in and state_out.
3176 int ann_index;
3177 mapping_args.builder->AddStateFloat32Tensor(
3178 mapping_args.node->inputs->data[/*kHiddenStateTensor*/ 4],
3179 &ann_index);
3180 mapping_args.model_state_outputs->push_back(ann_index);
3181 mapping_args.model_state_tfl_inputs->push_back(
3182 mapping_args.node->inputs->data[/*kHiddenStateTensor*/ 4]);
3183 auto builtin =
3184 reinterpret_cast<TfLiteRNNParams*>(mapping_args.node->builtin_data);
3185 mapping_args.builder->AddScalarInt32Operand(builtin->activation);
3186 *nn_op_type = ANEURALNETWORKS_RNN;
3187 } break;
3188 case kTfLiteBuiltinSpaceToDepth: {
3189 auto builtin = reinterpret_cast<TfLiteSpaceToDepthParams*>(
3190 mapping_args.node->builtin_data);
3191 mapping_args.builder->AddScalarInt32Operand(builtin->block_size);
3192 *nn_op_type = ANEURALNETWORKS_SPACE_TO_DEPTH;
3193 } break;
3194 case kTfLiteBuiltinSvdf: {
3195 // NNAPI need both state_in and state_out.
3196 int ann_index;
3197 mapping_args.builder->AddStateFloat32Tensor(
3198 mapping_args.node->inputs->data[/*kInputActivationStateTensor*/ 4],
3199 &ann_index);
3200 mapping_args.model_state_outputs->push_back(ann_index);
3201 mapping_args.model_state_tfl_inputs->push_back(
3202 mapping_args.node->inputs->data[/*kInputActivationStateTensor*/ 4]);
3203
3204 auto builtin =
3205 reinterpret_cast<TfLiteSVDFParams*>(mapping_args.node->builtin_data);
3206 mapping_args.builder->AddScalarInt32Operand(builtin->rank);
3207 mapping_args.builder->AddScalarInt32Operand(builtin->activation);
3208 *nn_op_type = ANEURALNETWORKS_SVDF;
3209 } break;
3210 case kTfLiteBuiltinLstm: {
3211 if (isLstmBasicKernel(mapping_args.node)) {
3212 const auto output_dims =
3213 mapping_args.context->tensors[mapping_args.node->outputs->data[1]]
3214 .dims;
3215
3216 // Inputs kInputData
3217 mapping_args.builder->AddTensorInput(
3218 mapping_args.node->inputs->data[0 /* kInputData */],
3219 /* hybrid_op */ false,
3220 /* scalar_as_tensor */ false);
3221
3222 // The 8 weights tensors are set decomposing the
3223 // kInputWeights param
3224 const auto weight_tensor =
3225 mapping_args.context->tensors[mapping_args.node->inputs
3226 ->data[2 /* kInputWeights */]];
3227
3228 std::vector<uint8_t> recurrent_to_input;
3229 std::vector<uint8_t> input_to_input;
3230 std::vector<uint8_t> recurrent_to_cell;
3231 std::vector<uint8_t> input_to_cell;
3232 std::vector<uint8_t> recurrent_to_forget;
3233 std::vector<uint8_t> input_to_forget;
3234 std::vector<uint8_t> recurrent_to_output;
3235 std::vector<uint8_t> input_to_output;
3236 tflite::delegate::nnapi::DecomposeQuantLstmWeightsTensor(
3237 weight_tensor.data.uint8, weight_tensor.dims, &recurrent_to_input,
3238 &input_to_input, &recurrent_to_cell, &input_to_cell,
3239 &recurrent_to_forget, &input_to_forget, &recurrent_to_output,
3240 &input_to_output);
3241
3242 TfLiteIntArray* recurrent_weight_dims = TfLiteIntArrayCreate(2);
3243 TfLiteIntArray* input_weight_dims = TfLiteIntArrayCreate(2);
3244 tflite::delegate::nnapi::SetWeightSubmatrixDims(
3245 weight_tensor.dims, recurrent_weight_dims, input_weight_dims);
3246
3247 int new_tensor_index = -1;
3248
3249 mapping_args.builder->AddNewInputConstantTensor<uint8_t>(
3250 ANEURALNETWORKS_TENSOR_QUANT8_ASYMM, kTfLiteUInt8,
3251 input_weight_dims, input_to_input, weight_tensor.params,
3252 &new_tensor_index);
3253
3254 mapping_args.builder->AddNewInputConstantTensor<uint8_t>(
3255 ANEURALNETWORKS_TENSOR_QUANT8_ASYMM, kTfLiteUInt8,
3256 input_weight_dims, input_to_forget, weight_tensor.params,
3257 &new_tensor_index);
3258
3259 mapping_args.builder->AddNewInputConstantTensor<uint8_t>(
3260 ANEURALNETWORKS_TENSOR_QUANT8_ASYMM, kTfLiteUInt8,
3261 input_weight_dims, input_to_cell, weight_tensor.params,
3262 &new_tensor_index);
3263
3264 mapping_args.builder->AddNewInputConstantTensor<uint8_t>(
3265 ANEURALNETWORKS_TENSOR_QUANT8_ASYMM, kTfLiteUInt8,
3266 input_weight_dims, input_to_output, weight_tensor.params,
3267 &new_tensor_index);
3268
3269 mapping_args.builder->AddNewInputConstantTensor<uint8_t>(
3270 ANEURALNETWORKS_TENSOR_QUANT8_ASYMM, kTfLiteUInt8,
3271 recurrent_weight_dims, recurrent_to_input, weight_tensor.params,
3272 &new_tensor_index);
3273
3274 mapping_args.builder->AddNewInputConstantTensor<uint8_t>(
3275 ANEURALNETWORKS_TENSOR_QUANT8_ASYMM, kTfLiteUInt8,
3276 recurrent_weight_dims, recurrent_to_forget, weight_tensor.params,
3277 &new_tensor_index);
3278
3279 mapping_args.builder->AddNewInputConstantTensor<uint8_t>(
3280 ANEURALNETWORKS_TENSOR_QUANT8_ASYMM, kTfLiteUInt8,
3281 recurrent_weight_dims, recurrent_to_cell, weight_tensor.params,
3282 &new_tensor_index);
3283
3284 mapping_args.builder->AddNewInputConstantTensor<uint8_t>(
3285 ANEURALNETWORKS_TENSOR_QUANT8_ASYMM, kTfLiteUInt8,
3286 recurrent_weight_dims, recurrent_to_output, weight_tensor.params,
3287 &new_tensor_index);
3288
3289 TfLiteIntArrayFree(input_weight_dims);
3290 TfLiteIntArrayFree(recurrent_weight_dims);
3291
3292 // Biases have to be split in four.
3293 const auto bias_size = output_dims->data[1];
3294 const TfLiteTensor& biases_tensor =
3295 mapping_args.context->tensors[mapping_args.node->inputs
3296 ->data[3 /* kInputBiases */]];
3297
3298 std::vector<int32_t> input_bias;
3299 std::vector<int32_t> cell_bias;
3300 std::vector<int32_t> forget_bias;
3301 std::vector<int32_t> output_bias;
3302 delegate::nnapi::DecomposeBiasTensor(biases_tensor.data.i32, bias_size,
3303 &input_bias, &cell_bias,
3304 &forget_bias, &output_bias);
3305
3306 int input_bias_tensor = -1;
3307 mapping_args.builder->AddNewInputConstantTensor<int32_t>(
3308 ANEURALNETWORKS_TENSOR_INT32, kTfLiteInt32, {bias_size}, input_bias,
3309 biases_tensor.params, &input_bias_tensor);
3310 int forget_bias_tensor = -1;
3311 mapping_args.builder->AddNewInputConstantTensor(
3312 ANEURALNETWORKS_TENSOR_INT32, kTfLiteInt32, {bias_size},
3313 forget_bias, biases_tensor.params, &forget_bias_tensor);
3314 int cell_gate_bias_tensor = -1;
3315 mapping_args.builder->AddNewInputConstantTensor(
3316 ANEURALNETWORKS_TENSOR_INT32, kTfLiteInt32, {bias_size}, cell_bias,
3317 biases_tensor.params, &cell_gate_bias_tensor);
3318 int output_gate_bias_tensor = -1;
3319 mapping_args.builder->AddNewInputConstantTensor(
3320 ANEURALNETWORKS_TENSOR_INT32, kTfLiteInt32, {bias_size},
3321 output_bias, biases_tensor.params, &output_gate_bias_tensor);
3322
3323 mapping_args.builder->AddTensorInput(
3324 mapping_args.node->inputs->data[4 /* kInputPrevState */],
3325 /* hybrid_op */ false,
3326 /* scalar_as_tensor */ false);
3327
3328 // kInputPrevActivation
3329 mapping_args.builder->AddTensorInput(
3330 mapping_args.node->inputs->data[1 /* kInputPrevActivation */],
3331 /* hybrid_op */ false,
3332 /* scalar_as_tensor */ false);
3333
3334 // Configuring the copy from the activation, state outputs
3335 // to their associated inputs
3336 mapping_args.feedback_loops->push_back(std::make_tuple(
3337 mapping_args.node->outputs->data[0 /*kOutputActivation*/],
3338 mapping_args.node->inputs->data[1 /*kInputPrevActivation*/]));
3339
3340 mapping_args.feedback_loops->push_back(std::make_tuple(
3341 mapping_args.node->outputs->data[1 /*kOutputState*/],
3342 mapping_args.node->inputs->data[4 /*kInputPrevState*/]));
3343
3344 // OUTPUTS
3345 // Setting only the first two since the remaining ones are
3346 // ignored by NNAPI
3347 mapping_args.builder->AddTensorOutput(
3348 mapping_args.node->outputs->data[1 /* kOutputState */], 0);
3349
3350 mapping_args.builder->AddTensorOutput(
3351 mapping_args.node->outputs->data[0 /* kOutputActivation */], 0);
3352
3353 *nn_op_type = ANEURALNETWORKS_QUANTIZED_16BIT_LSTM;
3354 } else {
3355 auto builtin = reinterpret_cast<TfLiteLSTMParams*>(
3356 mapping_args.node->builtin_data);
3357 mapping_args.builder->AddScalarInt32Operand(builtin->activation);
3358 mapping_args.builder->AddScalarFloat32Operand(builtin->cell_clip);
3359 mapping_args.builder->AddScalarFloat32Operand(builtin->proj_clip);
3360
3361 // Current NNAPI implementation requires the scratch_buffer as
3362 // output.
3363 mapping_args.builder->AddAdditionalFloat32OutputTensor(2);
3364
3365 // NNAPI need both state_in and state_out for cell_state and
3366 // output_state.
3367 int ann_index;
3368 mapping_args.builder->AddStateFloat32Tensor(
3369 mapping_args.node->inputs->data[/*kInputActivationStateTensor*/ 18],
3370 &ann_index);
3371 mapping_args.model_state_outputs->push_back(ann_index);
3372 mapping_args.model_state_tfl_inputs->push_back(
3373 mapping_args.node->inputs
3374 ->data[/*kInputActivationStateTensor*/ 18]);
3375 mapping_args.builder->AddStateFloat32Tensor(
3376 mapping_args.node->inputs->data[/*kInputCellStateTensor*/ 19],
3377 &ann_index);
3378 mapping_args.model_state_outputs->push_back(ann_index);
3379 mapping_args.model_state_tfl_inputs->push_back(
3380 mapping_args.node->inputs->data[/*kInputCellStateTensor*/ 19]);
3381
3382 const bool hybrid_op = IsHybridOperator(
3383 mapping_args.context, kTfLiteBuiltinLstm, mapping_args.node);
3384
3385 if (mapping_args.node->inputs->size == 24) {
3386 for (int i = 20; i < 24; ++i) {
3387 const auto input_index = mapping_args.node->inputs->data[i];
3388 if (input_index != kTfLiteOptionalTensor) {
3389 mapping_args.builder->AddTensorInput(input_index, hybrid_op);
3390 } else {
3391 mapping_args.builder->AddVectorFloat32Operand(nullptr, 0);
3392 }
3393 }
3394 }
3395
3396 *nn_op_type = ANEURALNETWORKS_LSTM;
3397 }
3398 } break;
3399 case kTfLiteBuiltinMean: {
3400 auto builtin = reinterpret_cast<TfLiteReducerParams*>(
3401 mapping_args.node->builtin_data);
3402 int32_t keep_dims = 0;
3403 if (builtin->keep_dims) keep_dims = 1;
3404 mapping_args.builder->AddScalarInt32Operand(keep_dims);
3405 *nn_op_type = ANEURALNETWORKS_MEAN;
3406 } break;
3407 case kTfLiteBuiltinEmbeddingLookup: {
3408 *nn_op_type = ANEURALNETWORKS_EMBEDDING_LOOKUP;
3409 } break;
3410 case kTfLiteBuiltinHashtableLookup: {
3411 *nn_op_type = ANEURALNETWORKS_HASHTABLE_LOOKUP;
3412 } break;
3413 case kTfLiteBuiltinMaximum: {
3414 *nn_op_type = ANEURALNETWORKS_MAXIMUM;
3415 } break;
3416 case kTfLiteBuiltinMinimum: {
3417 *nn_op_type = ANEURALNETWORKS_MINIMUM;
3418 } break;
3419 case kTfLiteBuiltinCast: {
3420 *nn_op_type = ANEURALNETWORKS_CAST;
3421 } break;
3422 case kTfLiteBuiltinLeakyRelu: {
3423 const auto input_type =
3424 mapping_args.context->tensors[mapping_args.node->inputs->data[0]]
3425 .type;
3426 auto builtin = reinterpret_cast<TfLiteLeakyReluParams*>(
3427 mapping_args.node->builtin_data);
3428
3429 TfLiteTensor alpha_tensor;
3430 alpha_tensor.type = input_type;
3431 alpha_tensor.allocation_type = kTfLiteDynamic;
3432 alpha_tensor.dims = TfLiteIntArrayCreate(1);
3433 alpha_tensor.dims->data[0] = 1;
3434 alpha_tensor.params.zero_point = 0;
3435
3436 int new_tensor_index = -1;
3437 if (input_type == kTfLiteFloat32) {
3438 alpha_tensor.params.scale = 0;
3439 std::vector<float> alpha_value = {builtin->alpha};
3440 mapping_args.builder->AddNewInputConstantTensor(
3441 ANEURALNETWORKS_TENSOR_FLOAT32, kTfLiteFloat32, alpha_tensor.dims,
3442 alpha_value, alpha_tensor.params, &new_tensor_index);
3443 } else if (input_type == kTfLiteInt8 &&
3444 android_sdk_version >= kMinSdkVersionForNNAPI13) {
3445 alpha_tensor.params.scale = builtin->alpha;
3446 std::vector<int8_t> alpha_value = {1};
3447 mapping_args.builder->AddNewInputConstantTensor(
3448 ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED, kTfLiteInt8,
3449 alpha_tensor.dims, alpha_value, alpha_tensor.params,
3450 &new_tensor_index);
3451 } else {
3452 alpha_tensor.params.scale = builtin->alpha;
3453 std::vector<uint8_t> alpha_value = {1};
3454 mapping_args.builder->AddNewInputConstantTensor(
3455 ANEURALNETWORKS_TENSOR_QUANT8_ASYMM, kTfLiteUInt8,
3456 alpha_tensor.dims, alpha_value, alpha_tensor.params,
3457 &new_tensor_index);
3458 }
3459
3460 *nn_op_type = ANEURALNETWORKS_PRELU;
3461 } break;
3462 case kTfLiteBuiltinPrelu: {
3463 *nn_op_type = ANEURALNETWORKS_PRELU;
3464 } break;
3465 case kTfLiteBuiltinTile: {
3466 *nn_op_type = ANEURALNETWORKS_TILE;
3467 } break;
3468 case kTfLiteBuiltinLogicalOr: {
3469 *nn_op_type = ANEURALNETWORKS_LOGICAL_OR;
3470 } break;
3471 case kTfLiteBuiltinLogicalAnd: {
3472 *nn_op_type = ANEURALNETWORKS_LOGICAL_AND;
3473 } break;
3474 case kTfLiteBuiltinLogicalNot: {
3475 *nn_op_type = ANEURALNETWORKS_LOGICAL_NOT;
3476 } break;
3477 case kTfLiteBuiltinLess: {
3478 *nn_op_type = ANEURALNETWORKS_LESS;
3479 } break;
3480 case kTfLiteBuiltinLessEqual: {
3481 *nn_op_type = ANEURALNETWORKS_LESS_EQUAL;
3482 } break;
3483 case kTfLiteBuiltinGreater: {
3484 *nn_op_type = ANEURALNETWORKS_GREATER;
3485 } break;
3486 case kTfLiteBuiltinGreaterEqual: {
3487 *nn_op_type = ANEURALNETWORKS_GREATER_EQUAL;
3488 } break;
3489 case kTfLiteBuiltinEqual: {
3490 *nn_op_type = ANEURALNETWORKS_EQUAL;
3491 } break;
3492 case kTfLiteBuiltinNotEqual: {
3493 *nn_op_type = ANEURALNETWORKS_NOT_EQUAL;
3494 } break;
3495 case kTfLiteBuiltinNeg: {
3496 *nn_op_type = ANEURALNETWORKS_NEG;
3497 } break;
3498 case kTfLiteBuiltinTopkV2: {
3499 const TfLiteTensor& k_param =
3500 mapping_args.context->tensors[mapping_args.node->inputs->data[1]];
3501 mapping_args.builder->AddScalarInt32Operand(*k_param.data.i32);
3502 *nn_op_type = ANEURALNETWORKS_TOPK_V2;
3503 } break;
3504 case kTfLiteBuiltinSelect: {
3505 *nn_op_type = ANEURALNETWORKS_SELECT;
3506 } break;
3507 case kTfLiteBuiltinGather: {
3508 auto builtin = reinterpret_cast<TfLiteGatherParams*>(
3509 mapping_args.node->builtin_data);
3510 mapping_args.builder->AddScalarInt32Operand(builtin->axis);
3511 mapping_args.builder->AddTensorInput(mapping_args.node->inputs->data[1],
3512 /* hybrid_op */ false,
3513 /* tensor_flags */ 0);
3514 *nn_op_type = ANEURALNETWORKS_GATHER;
3515 } break;
3516 case kTfLiteBuiltinBidirectionalSequenceLstm: {
3517 auto builtin = reinterpret_cast<TfLiteBidirectionalSequenceLSTMParams*>(
3518 mapping_args.node->builtin_data);
3519 mapping_args.builder->AddScalarInt32Operand(builtin->activation);
3520 mapping_args.builder->AddScalarFloat32Operand(builtin->cell_clip);
3521 mapping_args.builder->AddScalarFloat32Operand(builtin->proj_clip);
3522 mapping_args.builder->AddScalarBoolOperand(builtin->merge_outputs);
3523 mapping_args.builder->AddScalarBoolOperand(builtin->time_major);
3524 // TF Lite doesn't support layer normalization in bidirectional
3525 // sequence LSTM, so we insert optional tensors for NNAPI.
3526 for (int i = 0; i < 8; ++i) {
3527 mapping_args.builder->AddVectorFloat32Operand(nullptr, 0);
3528 }
3529 *nn_op_type = ANEURALNETWORKS_BIDIRECTIONAL_SEQUENCE_LSTM;
3530 } break;
3531 case kTfLiteBuiltinExpandDims: {
3532 const TfLiteTensor& axis_param =
3533 mapping_args.context->tensors[mapping_args.node->inputs->data[1]];
3534 mapping_args.builder->AddScalarInt32Operand(*axis_param.data.i32);
3535 *nn_op_type = ANEURALNETWORKS_EXPAND_DIMS;
3536 } break;
3537 case kTfLiteBuiltinSplit: {
3538 const TfLiteTensor& axis =
3539 mapping_args.context->tensors[mapping_args.node->inputs->data[0]];
3540 auto builtin =
3541 reinterpret_cast<TfLiteSplitParams*>(mapping_args.node->builtin_data);
3542 mapping_args.builder->AddScalarInt32Operand(*axis.data.i32);
3543 mapping_args.builder->AddScalarInt32Operand(builtin->num_splits);
3544 *nn_op_type = ANEURALNETWORKS_SPLIT;
3545 } break;
3546 case kTfLiteBuiltinLogSoftmax: {
3547 // Scaling and axis are hardcoded to respectively 1 and -1
3548 // in TFLite.
3549 mapping_args.builder->AddScalarFloat32Operand(1);
3550 mapping_args.builder->AddScalarInt32Operand(-1);
3551 *nn_op_type = ANEURALNETWORKS_LOG_SOFTMAX;
3552 } break;
3553 case kTfLiteBuiltinQuantize: {
3554 auto input_index = mapping_args.node->inputs->data[0];
3555 // NNAPI doesn't support requantization cases but only quantizations
3556 // from float. Dequantizing our input adding a Dequantize node before
3557 // this one.
3558 if (IsQuantized(mapping_args.context->tensors[input_index].type)) {
3559 mapping_args.builder->AddDequantize(0, input_index, kTfLiteFloat32,
3560 mapping_args.node_index);
3561 }
3562
3563 *nn_op_type = ANEURALNETWORKS_QUANTIZE;
3564 } break;
3565 case kTfLiteBuiltinReduceAny: {
3566 auto builtin = reinterpret_cast<TfLiteReducerParams*>(
3567 mapping_args.node->builtin_data);
3568 mapping_args.builder->AddScalarBoolOperand(builtin->keep_dims);
3569 *nn_op_type = ANEURALNETWORKS_REDUCE_ANY;
3570 } break;
3571 case kTfLiteBuiltinReduceMin: {
3572 auto builtin = reinterpret_cast<TfLiteReducerParams*>(
3573 mapping_args.node->builtin_data);
3574 mapping_args.builder->AddScalarBoolOperand(builtin->keep_dims);
3575 *nn_op_type = ANEURALNETWORKS_REDUCE_MIN;
3576 } break;
3577 case kTfLiteBuiltinReduceMax: {
3578 auto builtin = reinterpret_cast<TfLiteReducerParams*>(
3579 mapping_args.node->builtin_data);
3580 mapping_args.builder->AddScalarBoolOperand(builtin->keep_dims);
3581 *nn_op_type = ANEURALNETWORKS_REDUCE_MAX;
3582 } break;
3583 case kTfLiteBuiltinDepthToSpace: {
3584 auto builtin = reinterpret_cast<TfLiteDepthToSpaceParams*>(
3585 mapping_args.node->builtin_data);
3586 mapping_args.builder->AddScalarInt32Operand(builtin->block_size);
3587 *nn_op_type = ANEURALNETWORKS_DEPTH_TO_SPACE;
3588 } break;
3589 case kTfLiteBuiltinReduceProd: {
3590 auto builtin = reinterpret_cast<TfLiteReducerParams*>(
3591 mapping_args.node->builtin_data);
3592 mapping_args.builder->AddScalarBoolOperand(builtin->keep_dims);
3593 *nn_op_type = ANEURALNETWORKS_REDUCE_PROD;
3594 } break;
3595 case kTfLiteBuiltinSum: {
3596 auto builtin = reinterpret_cast<TfLiteReducerParams*>(
3597 mapping_args.node->builtin_data);
3598 mapping_args.builder->AddScalarBoolOperand(builtin->keep_dims);
3599 *nn_op_type = ANEURALNETWORKS_REDUCE_SUM;
3600 } break;
3601 case kTfLiteBuiltinElu: {
3602 mapping_args.builder->AddScalarFloat32Operand(1.0);
3603 *nn_op_type = ANEURALNETWORKS_ELU;
3604 } break;
3605 case kTfLiteBuiltinFill: {
3606 *nn_op_type = ANEURALNETWORKS_FILL;
3607 } break;
3608 default:
3609 // All other operators are not mapped.
3610 return kTfLiteError;
3611 }
3612 return kTfLiteOk;
3613 }
3614
3615 // Initialize the kernel (a NN model).
Init(TfLiteContext * context,const TfLiteDelegateParams * params,int * nnapi_errno)3616 TfLiteStatus NNAPIDelegateKernel::Init(TfLiteContext* context,
3617 const TfLiteDelegateParams* params,
3618 int* nnapi_errno) {
3619 for (auto node_index : TfLiteIntArrayView(params->nodes_to_replace)) {
3620 nodes_.push_back(node_index);
3621 }
3622
3623 const auto delegate_options =
3624 StatefulNnApiDelegate::GetOptions(params->delegate);
3625 if (nnapi_->android_sdk_version >= kMinSdkVersionForNNAPI12 &&
3626 ShouldUseTargetDevices(delegate_options, nnapi_)) {
3627 TF_LITE_ENSURE_STATUS(GetTargetDevices(context, params->delegate, nnapi_,
3628 nnapi_errno, &nnapi_devices_));
3629
3630 if (nnapi_devices_.empty()) {
3631 context->ReportError(
3632 context, "NNAPI delegate requested but no accelerators available.");
3633 return kTfLiteError;
3634 }
3635 }
3636
3637 // Mark the handle backed tensors.
3638 tensor_memory_map_ =
3639 &StatefulNnApiDelegate::GetTensorMemoryMap(params->delegate);
3640
3641 if (!nn_model_) {
3642 ANeuralNetworksModel* model = nullptr;
3643 RETURN_TFLITE_ERROR_IF_NN_ERROR(context,
3644 nnapi_->ANeuralNetworksModel_create(&model),
3645 "creating NNAPI model", nnapi_errno);
3646 nn_model_.reset(model);
3647
3648 TF_LITE_ENSURE_STATUS(BuildGraph(context, delegate_options,
3649 params->input_tensors,
3650 params->output_tensors, nnapi_errno));
3651 }
3652
3653 // Calculating model compilation cache here since the value depends on
3654 // some of the TfLiteDelegateParams
3655 nn_compilation_cache_token_.clear();
3656 const char* cache_dir = delegate_options.cache_dir;
3657 const char* model_token = delegate_options.model_token;
3658 if (nnapi_->android_sdk_version >= kMinSdkVersionForNNAPI12 && cache_dir &&
3659 model_token) {
3660 // Compilation caching could be enabled, try construct the uint8
3661 // token.
3662 // TODO(b/133342794): use a generic token generator class.
3663 uint64_t token_parts[4];
3664 // Create bits from model_token.
3665 // Using farmhash fingerprint instead of std::hash, as the latter is not
3666 // guaranteed to be stable across program invocations.
3667 token_parts[0] =
3668 farmhash::Fingerprint64(model_token, std::strlen(model_token));
3669 // Create bits from params->nodes_to_replace.
3670 token_parts[1] = GetHash(params->nodes_to_replace);
3671 // Create bits from params->input_tensors. These include the input tensor
3672 // sizes, as the cached compilations are size-dependent.
3673 token_parts[2] = GetHash(params->input_tensors);
3674 for (int i : TfLiteIntArrayView(params->input_tensors)) {
3675 if (i != kTfLiteOptionalTensor) {
3676 TfLiteTensor* t = &context->tensors[i];
3677 TF_LITE_ENSURE(context, t->dims);
3678 token_parts[2] = GetHash(t->dims, token_parts[2]);
3679 }
3680 }
3681 // bits from params->output_tensors.
3682 token_parts[3] = GetHash(params->output_tensors);
3683 // NNAPI requires the token to be 256bit long.
3684 // TODO(b/172238515): get token size from header instead of
3685 // hardcoding.
3686 std::vector<uint8_t> nnapi_cache_token(32, 0);
3687 // Copy the token bits.
3688 uint8_t* p = reinterpret_cast<uint8_t*>(token_parts);
3689 for (int i = 0; i < 4 * sizeof(uint64_t); i++) {
3690 nnapi_cache_token[i] = p[i];
3691 }
3692
3693 nn_compilation_cache_token_ = nnapi_cache_token;
3694 }
3695
3696 initialised_ = true;
3697
3698 return kTfLiteOk;
3699 }
3700
Prepare(TfLiteContext * context,TfLiteNode * node,int * nnapi_errno)3701 TfLiteStatus NNAPIDelegateKernel::Prepare(TfLiteContext* context,
3702 TfLiteNode* node, int* nnapi_errno) {
3703 if (!initialised_) {
3704 return kTfLiteError;
3705 }
3706
3707 const auto delegate_options =
3708 StatefulNnApiDelegate::GetOptions(node->delegate);
3709 if (nn_compilation_) {
3710 return kTfLiteOk;
3711 }
3712
3713 ANeuralNetworksCompilation* compilation = nullptr;
3714 if (!nnapi_devices_.empty()) {
3715 // Compile for the selected accelerator.
3716 RETURN_TFLITE_ERROR_IF_NN_ERROR(
3717 context,
3718 nnapi_->ANeuralNetworksCompilation_createForDevices(
3719 nn_model_.get(), nnapi_devices_.data(), nnapi_devices_.size(),
3720 &compilation),
3721 "creating NNAPI model for given devices", nnapi_errno);
3722 } else {
3723 RETURN_TFLITE_ERROR_IF_NN_ERROR(context,
3724 nnapi_->ANeuralNetworksCompilation_create(
3725 nn_model_.get(), &compilation),
3726 "creating NNAPI compilation", nnapi_errno);
3727 }
3728
3729 auto preference = delegate_options.execution_preference;
3730 if (preference !=
3731 StatefulNnApiDelegate::Options::ExecutionPreference::kUndefined) {
3732 const int preference_result =
3733 nnapi_->ANeuralNetworksCompilation_setPreference(compilation,
3734 preference);
3735 if (preference_result != ANEURALNETWORKS_NO_ERROR) {
3736 nnapi_->ANeuralNetworksCompilation_free(compilation);
3737 compilation = nullptr;
3738 }
3739 RETURN_TFLITE_ERROR_IF_NN_ERROR(context, preference_result,
3740 "setting compilation preferences",
3741 nnapi_errno);
3742 }
3743
3744 if (!nn_compilation_cache_token_.empty()) {
3745 const char* cache_dir = delegate_options.cache_dir;
3746 const int set_caching_result =
3747 nnapi_->ANeuralNetworksCompilation_setCaching(
3748 compilation, cache_dir, nn_compilation_cache_token_.data());
3749 if (set_caching_result != ANEURALNETWORKS_NO_ERROR) {
3750 nnapi_->ANeuralNetworksCompilation_free(compilation);
3751 compilation = nullptr;
3752 }
3753 RETURN_TFLITE_ERROR_IF_NN_ERROR(context, set_caching_result,
3754 "configuring NNAPI caching", nnapi_errno);
3755 }
3756 // Set compilation timeout if applicable.
3757 if (nnapi_->android_sdk_version >= kMinSdkVersionForNNAPI13) {
3758 if (delegate_options.max_compilation_timeout_duration_ns > 0) {
3759 RETURN_TFLITE_ERROR_IF_NN_ERROR(
3760 context,
3761 nnapi_->ANeuralNetworksCompilation_setTimeout(
3762 compilation,
3763 delegate_options.max_compilation_timeout_duration_ns),
3764 "setting compilation timeout", nnapi_errno);
3765 }
3766 RETURN_TFLITE_ERROR_IF_NN_ERROR(
3767 context,
3768 nnapi_->ANeuralNetworksCompilation_setPriority(
3769 compilation, delegate_options.execution_priority),
3770 "setting compilation priority", nnapi_errno);
3771 }
3772 const int finish_result =
3773 nnapi_->ANeuralNetworksCompilation_finish(compilation);
3774 if (finish_result != ANEURALNETWORKS_NO_ERROR) {
3775 nnapi_->ANeuralNetworksCompilation_free(compilation);
3776 compilation = nullptr;
3777 }
3778 RETURN_TFLITE_ERROR_IF_NN_ERROR(context, finish_result,
3779 "completing NNAPI compilation", nnapi_errno);
3780 nn_compilation_.reset(compilation);
3781
3782 return kTfLiteOk;
3783 }
3784
GetOperationsSupportedByTargetNnApiDevices(TfLiteContext * context,std::vector<int> * supported_nodes,int * nnapi_errno)3785 TfLiteStatus NNAPIDelegateKernel::GetOperationsSupportedByTargetNnApiDevices(
3786 TfLiteContext* context, std::vector<int>* supported_nodes,
3787 int* nnapi_errno) {
3788 if (!nnapi_->ANeuralNetworksModel_getSupportedOperationsForDevices) {
3789 return kTfLiteError;
3790 }
3791
3792 const auto nnapi_model_size = nnapi_to_tflite_op_mapping_.size();
3793
3794 // Determine the list of operations the device actually supports
3795 std::unique_ptr<bool[]> nnapi_ops_support_flags(new bool[nnapi_model_size]);
3796
3797 RETURN_TFLITE_ERROR_IF_NN_ERROR(
3798 context,
3799 nnapi_->ANeuralNetworksModel_getSupportedOperationsForDevices(
3800 nn_model_.get(), nnapi_devices_.data(), nnapi_devices_.size(),
3801 nnapi_ops_support_flags.get()),
3802 "Checking supported operations for devices", nnapi_errno);
3803
3804 // A TfLite op is supported only if all the associated NNAPI ones are.
3805 auto tflite_ops_support_status = std::map<int, bool>();
3806 std::for_each(nodes_.begin(), nodes_.end(),
3807 [&tflite_ops_support_status](int tflite_node_index) {
3808 tflite_ops_support_status[tflite_node_index] = true;
3809 });
3810 for (int nnapi_op_index = 0; nnapi_op_index < nnapi_model_size;
3811 nnapi_op_index++) {
3812 const auto tflite_op_index = nnapi_to_tflite_op_mapping_[nnapi_op_index];
3813 tflite_ops_support_status[tflite_op_index] &=
3814 nnapi_ops_support_flags[nnapi_op_index];
3815 }
3816
3817 supported_nodes->clear();
3818 std::for_each(nodes_.begin(), nodes_.end(),
3819 [&supported_nodes, &tflite_ops_support_status](int node_index) {
3820 if (tflite_ops_support_status[node_index]) {
3821 supported_nodes->push_back(node_index);
3822 }
3823 });
3824
3825 return kTfLiteOk;
3826 }
3827
Invoke(TfLiteContext * context,TfLiteNode * node,int * nnapi_errno)3828 TfLiteStatus NNAPIDelegateKernel::Invoke(TfLiteContext* context,
3829 TfLiteNode* node, int* nnapi_errno) {
3830 ANeuralNetworksExecution* execution = nullptr;
3831 RETURN_TFLITE_ERROR_IF_NN_ERROR(context,
3832 nnapi_->ANeuralNetworksExecution_create(
3833 nn_compilation_.get(), &execution),
3834 "creating NNAPI execution", nnapi_errno);
3835 std::unique_ptr<ANeuralNetworksExecution, NNFreeExecution>
3836 execution_unique_ptr(execution, NNFreeExecution(nnapi_));
3837
3838 // Set compilation timeout if applicable.
3839 const auto delegate_options =
3840 StatefulNnApiDelegate::GetOptions(node->delegate);
3841 if (nnapi_->android_sdk_version >= kMinSdkVersionForNNAPI13) {
3842 if (delegate_options.max_execution_timeout_duration_ns > 0) {
3843 RETURN_TFLITE_ERROR_IF_NN_ERROR(
3844 context,
3845 nnapi_->ANeuralNetworksExecution_setTimeout(
3846 execution, delegate_options.max_execution_timeout_duration_ns),
3847 "setting execution timeout", nnapi_errno);
3848 }
3849 if (delegate_options.max_execution_loop_timeout_duration_ns > 0) {
3850 RETURN_TFLITE_ERROR_IF_NN_ERROR(
3851 context,
3852 nnapi_->ANeuralNetworksExecution_setLoopTimeout(
3853 execution,
3854 delegate_options.max_execution_loop_timeout_duration_ns),
3855 "setting execution loop timeout", nnapi_errno);
3856 }
3857 }
3858 // Check if the size of input and output memory pool needs to be resized.
3859 if (delegate_options.allow_dynamic_dimensions) {
3860 size_t total_input_byte_size = 0;
3861 // Make the TensorFlow Lite inputs and outputs to ann_indices.
3862 for (int i : TfLiteIntArrayView(node->inputs)) {
3863 // Constant tensors are not NNAPI inputs.
3864 if (i != kTfLiteOptionalTensor &&
3865 context->tensors[i].allocation_type != kTfLiteMmapRo &&
3866 // The delegate might not have mapped this input (this can
3867 // happen if one tensor is split in several ones)
3868 operand_mapping_.lite_index_to_ann(i) != -1) {
3869 if (context->tensors[i].buffer_handle != kTfLiteNullBufferHandle) {
3870 continue;
3871 }
3872 const TfLiteType nn_type_conversion =
3873 operand_mapping_.lite_index_to_ann_type_conversion(i);
3874 int tensor_size = 0;
3875 if (nn_type_conversion == kTfLiteNoType) {
3876 tensor_size = context->tensors[i].bytes;
3877 } else {
3878 size_t type_size;
3879 TF_LITE_ENSURE_OK(
3880 context, GetSizeOfType(context, nn_type_conversion, &type_size));
3881 tensor_size = NumElements(&context->tensors[i]) * type_size;
3882 }
3883 total_input_byte_size += tensor_size;
3884 total_input_byte_size += getNumPaddingBytes(tensor_size);
3885 }
3886 }
3887 if (total_input_byte_size > nn_input_memory_->get_byte_size()) {
3888 nn_input_memory_.reset(
3889 new NNMemory(nnapi_, "input_pool", total_input_byte_size));
3890 }
3891
3892 size_t total_output_byte_size = 0;
3893 for (int i : TfLiteIntArrayView(node->outputs)) {
3894 if (context->tensors[i].buffer_handle != kTfLiteNullBufferHandle) {
3895 continue;
3896 }
3897 total_output_byte_size += context->tensors[i].bytes;
3898 total_output_byte_size += getNumPaddingBytes(context->tensors[i].bytes);
3899 }
3900 if (total_output_byte_size > nn_output_memory_->get_byte_size()) {
3901 nn_output_memory_.reset(
3902 new NNMemory(nnapi_, "output_pool", total_output_byte_size));
3903 }
3904 }
3905
3906 // Set the input tensor buffers. Note: we access tflite tensors using
3907 // absolute indices but NN api indices inputs by relative indices.
3908 int relative_input_index = 0;
3909
3910 const bool use_int8_asymm_signed =
3911 target_sdk_version_ >= kMinSdkVersionForNNAPI13;
3912
3913 size_t input_offset = 0;
3914 for (auto absolute_input_index : TfLiteIntArrayView(node->inputs)) {
3915 if (absolute_input_index == kTfLiteOptionalTensor) {
3916 continue;
3917 }
3918 ANeuralNetworksOperandType input_nn_operand_type;
3919 ANeuralNetworksOperandType* input_nn_operand_type_ptr = nullptr;
3920 TfLiteTensor* tensor = &context->tensors[absolute_input_index];
3921 TfLiteType ann_type_equivalent =
3922 operand_mapping_.lite_index_to_ann_type_conversion(
3923 absolute_input_index);
3924 if (delegate_options.allow_dynamic_dimensions &&
3925 HasUnspecifiedDimension(tensor)) {
3926 input_nn_operand_type =
3927 ConvertTensorTypeToNNType(tensor, ann_type_equivalent);
3928 input_nn_operand_type_ptr = &input_nn_operand_type;
3929 }
3930 if (tensor->allocation_type != kTfLiteMmapRo) {
3931 if (tensor->buffer_handle != kTfLiteNullBufferHandle &&
3932 tensor->buffer_handle < tensor_memory_map_->size()) {
3933 RETURN_TFLITE_ERROR_IF_NN_ERROR_FOR_TENSOR(
3934 context,
3935 nnapi_->ANeuralNetworksExecution_setInputFromMemory(
3936 execution, relative_input_index, input_nn_operand_type_ptr,
3937 tensor_memory_map_->at(tensor->buffer_handle).memory, 0,
3938 tensor->bytes),
3939 "associating NNAPI execution input with a memory object", tensor,
3940 nnapi_errno);
3941 relative_input_index++;
3942 continue;
3943 }
3944 int tensor_size = 0;
3945 if (ann_type_equivalent != kTfLiteNoType) {
3946 const auto num_elements = NumElements(tensor);
3947 uint8_t* input_ptr = nn_input_memory_->get_data_ptr() + input_offset;
3948 if (tensor->type == kTfLiteUInt8 &&
3949 ann_type_equivalent == kTfLiteInt32) {
3950 for (int i = 0; i < num_elements; ++i) {
3951 reinterpret_cast<int32_t*>(input_ptr)[i] =
3952 static_cast<const int32_t>(tensor->data.uint8[i]);
3953 }
3954 } else if (tensor->type == kTfLiteInt8 &&
3955 ann_type_equivalent == kTfLiteUInt8) {
3956 // Explicitly convert int8 values to uint8 values.
3957 for (int i = 0; i < num_elements; ++i) {
3958 input_ptr[i] = static_cast<const uint8_t>(
3959 static_cast<int32_t>(tensor->data.int8[i]) + 128);
3960 }
3961 } else if (tensor->type == kTfLiteInt8 &&
3962 ann_type_equivalent == kTfLiteInt32) {
3963 if (use_int8_asymm_signed) {
3964 for (int i = 0; i < num_elements; ++i) {
3965 reinterpret_cast<int32_t*>(input_ptr)[i] =
3966 static_cast<const int32_t>(tensor->data.int8[i]);
3967 }
3968 } else {
3969 for (int i = 0; i < num_elements; ++i) {
3970 reinterpret_cast<int32_t*>(input_ptr)[i] =
3971 static_cast<const int32_t>(tensor->data.int8[i]) + 128;
3972 }
3973 }
3974 } else {
3975 context->ReportError(
3976 context,
3977 "NN API Delegate: unsupported tensor types conversion: "
3978 "from type code %d to type code %d.\n",
3979 tensor->type, ann_type_equivalent);
3980 return kTfLiteError;
3981 }
3982 size_t type_size;
3983 TF_LITE_ENSURE_OK(
3984 context, GetSizeOfType(context, ann_type_equivalent, &type_size));
3985 tensor_size = NumElements(tensor) * type_size;
3986 RETURN_TFLITE_ERROR_IF_NN_ERROR_FOR_TENSOR(
3987 context,
3988 nnapi_->ANeuralNetworksExecution_setInputFromMemory(
3989 execution, relative_input_index, input_nn_operand_type_ptr,
3990 nn_input_memory_->get_handle(), input_offset, tensor_size),
3991 "associating NNAPI execution input with a memory object", tensor,
3992 nnapi_errno);
3993 } else {
3994 // copy data to pre-allocated shared memory.
3995 memcpy(nn_input_memory_->get_data_ptr() + input_offset,
3996 tensor->data.raw, tensor->bytes);
3997 RETURN_TFLITE_ERROR_IF_NN_ERROR_FOR_TENSOR(
3998 context,
3999 nnapi_->ANeuralNetworksExecution_setInputFromMemory(
4000 execution, relative_input_index, input_nn_operand_type_ptr,
4001 nn_input_memory_->get_handle(), input_offset, tensor->bytes),
4002 "associating NNAPI execution input with a memory object", tensor,
4003 nnapi_errno);
4004 tensor_size = tensor->bytes;
4005 }
4006 input_offset += tensor_size;
4007 input_offset += getNumPaddingBytes(tensor_size);
4008 relative_input_index++;
4009 }
4010 }
4011
4012 // Set the output tensor buffers.
4013 int relative_output_index = 0;
4014 size_t output_offset = 0;
4015 for (auto output_index : TfLiteIntArrayView(node->outputs)) {
4016 // If the NNAPI implementation doesn't have some of the outputs
4017 // they are left unmapped and we should not try to read their value here
4018 if (operand_mapping_.lite_index_to_ann(output_index) == -1) {
4019 continue;
4020 }
4021 ANeuralNetworksOperandType output_nn_operand_type;
4022 ANeuralNetworksOperandType* output_nn_operand_type_ptr = nullptr;
4023 TfLiteTensor* tensor = &context->tensors[output_index];
4024 if (delegate_options.allow_dynamic_dimensions &&
4025 HasUnspecifiedDimension(tensor)) {
4026 TfLiteType ann_type_equivalent =
4027 operand_mapping_.lite_index_to_ann_type_conversion(output_index);
4028 output_nn_operand_type =
4029 ConvertTensorTypeToNNType(tensor, ann_type_equivalent);
4030 output_nn_operand_type_ptr = &output_nn_operand_type;
4031 }
4032 if (tensor->buffer_handle != kTfLiteNullBufferHandle &&
4033 tensor->buffer_handle < tensor_memory_map_->size()) {
4034 RETURN_TFLITE_ERROR_IF_NN_ERROR_FOR_TENSOR(
4035 context,
4036 nnapi_->ANeuralNetworksExecution_setOutputFromMemory(
4037 execution, relative_output_index, output_nn_operand_type_ptr,
4038 tensor_memory_map_->at(tensor->buffer_handle).memory, 0,
4039 tensor->bytes),
4040 "associating NNAPI execution output to a memory object", tensor,
4041 nnapi_errno);
4042
4043 } else {
4044 RETURN_TFLITE_ERROR_IF_NN_ERROR_FOR_TENSOR(
4045 context,
4046 nnapi_->ANeuralNetworksExecution_setOutputFromMemory(
4047 execution, relative_output_index, output_nn_operand_type_ptr,
4048 nn_output_memory_->get_handle(), output_offset, tensor->bytes),
4049 "associating NNAPI execution output to a memory object", tensor,
4050 nnapi_errno);
4051 output_offset += tensor->bytes;
4052 output_offset += getNumPaddingBytes(tensor->bytes);
4053 }
4054 relative_output_index++;
4055 }
4056
4057 // The state_out of previous invocation need to be mapped to state_in of
4058 // current invocation.
4059 for (size_t i = 0; i < model_state_tfl_inputs_.size(); i++) {
4060 int state_tensor_idx = model_state_tfl_inputs_[i];
4061 TfLiteTensor* tensor = &context->tensors[state_tensor_idx];
4062 // Here we are using a deep copy for state_in tensors so that we are not
4063 // reading and writing into the same buffer during a invocation.
4064 // TODO(b/110369471): using double shared buffer to minimize the copies.
4065 RETURN_TFLITE_ERROR_IF_NN_ERROR(
4066 context,
4067 nnapi_->ANeuralNetworksExecution_setOutput(
4068 execution, relative_output_index, nullptr, tensor->data.raw,
4069 tensor->bytes),
4070 "associating NNAPI execution output to a buffer", nnapi_errno);
4071 relative_output_index++;
4072 }
4073 // Invoke ANN in blocking fashion.
4074 if (nnapi_->android_sdk_version < kMinSdkVersionForNNAPI12) {
4075 ANeuralNetworksEvent* event = nullptr;
4076 RETURN_TFLITE_ERROR_IF_NN_ERROR(
4077 context,
4078 nnapi_->ANeuralNetworksExecution_startCompute(execution, &event),
4079 "starting async computation", nnapi_errno);
4080 const int wait_result = nnapi_->ANeuralNetworksEvent_wait(event);
4081 nnapi_->ANeuralNetworksEvent_free(event);
4082 RETURN_TFLITE_ERROR_IF_NN_ERROR(context, wait_result,
4083 "waiting for async computation completion",
4084 nnapi_errno);
4085 } else {
4086 // Use synchronous execution for NNAPI 1.2+.
4087 RETURN_TFLITE_ERROR_IF_NN_ERROR(
4088 context, nnapi_->ANeuralNetworksExecution_compute(execution),
4089 "running computation", nnapi_errno);
4090 }
4091
4092 // copy results from shared memory to the destination.
4093 output_offset = 0;
4094 for (auto output_index : TfLiteIntArrayView(node->outputs)) {
4095 TfLiteTensor* tensor = &context->tensors[output_index];
4096 if (tensor->buffer_handle != kTfLiteNullBufferHandle) {
4097 continue;
4098 }
4099 TfLiteType ann_type_equivalent =
4100 operand_mapping_.lite_index_to_ann_type_conversion(output_index);
4101 if (tensor->type == kTfLiteInt8 && ann_type_equivalent == kTfLiteUInt8) {
4102 // Explicitly convert uint8 values to int8 values.
4103 uint8_t* output_ptr = reinterpret_cast<uint8_t*>(
4104 nn_output_memory_->get_data_ptr() + output_offset);
4105 const auto num_elements = NumElements(tensor);
4106 for (int i = 0; i < num_elements; ++i) {
4107 output_ptr[i] =
4108 static_cast<uint8_t>(static_cast<int32_t>(output_ptr[i]) - 128);
4109 }
4110 }
4111 memcpy(tensor->data.raw, nn_output_memory_->get_data_ptr() + output_offset,
4112 tensor->bytes);
4113 output_offset += tensor->bytes;
4114 output_offset += getNumPaddingBytes(tensor->bytes);
4115 }
4116
4117 // copy output of all output tensors in feedback_loops_ into the
4118 // associated input
4119 for (auto feedback_loop : feedback_loops_) {
4120 int output_tensor_idx;
4121 int input_tensor_idx;
4122 std::tie(output_tensor_idx, input_tensor_idx) = feedback_loop;
4123 TfLiteTensor& src = context->tensors[output_tensor_idx];
4124 TfLiteTensor& dest = context->tensors[input_tensor_idx];
4125
4126 memcpy(dest.data.raw, src.data.raw, src.bytes);
4127 }
4128
4129 return kTfLiteOk;
4130 }
4131
AddDequantizeOperatorsWhereNeeded(const TfLiteContext * context,int builtin_code,const TfLiteNode * node,int tflite_node_index,NNAPIOpBuilder * builder,int * nnapi_errno)4132 void NNAPIDelegateKernel::AddDequantizeOperatorsWhereNeeded(
4133 const TfLiteContext* context, int builtin_code, const TfLiteNode* node,
4134 int tflite_node_index, NNAPIOpBuilder* builder, int* nnapi_errno) {
4135 // Depending on the operator and the input data format, Dequantize
4136 // operators may need to be added. For example when the input is
4137 // floating-point but weights are quantized then the weights will first be
4138 // dequantized to the same format as the input before being passed to the
4139 // operator.
4140
4141 // The tensor determining whether the inputs should be floating-point.
4142 int input_tensor_index = -1;
4143 std::vector<int> inputs_to_potentially_dequantize;
4144
4145 switch (builtin_code) {
4146 case kTfLiteBuiltinConv2d:
4147 case kTfLiteBuiltinFullyConnected: {
4148 input_tensor_index = 0;
4149 // Weights and bias are inputs #1 and #2 respectively and may require
4150 // dequantization.
4151 inputs_to_potentially_dequantize = {1, 2};
4152 break;
4153 }
4154 case kTfLiteBuiltinLstm: {
4155 input_tensor_index = 0;
4156 inputs_to_potentially_dequantize = {1, 2, 3, 4, 5, 6, 7,
4157 8, 9, 10, 11, 12, 13, 14,
4158 15, 16, 17, 20, 21, 22, 23};
4159 break;
4160 }
4161 default:
4162 return;
4163 }
4164
4165 int tensor_id = node->inputs->data[input_tensor_index];
4166 if (tensor_id < 0) return;
4167
4168 // Nothing to do if the input is not floating-point.
4169 if (!IsFloat(context->tensors[tensor_id].type)) return;
4170
4171 for (int i : inputs_to_potentially_dequantize) {
4172 if (i < 0 || i >= node->inputs->size) continue; // Ignore invalid index.
4173 tensor_id = node->inputs->data[i];
4174 if (tensor_id < 0) continue; // Ignore optional input.
4175
4176 const TfLiteType type = context->tensors[tensor_id].type;
4177 // Nothing to do for this tensor if it's not quantized.
4178 if (!IsQuantized(type)) continue;
4179
4180 // Insert Dequantize operator if it hasn't been done already and change
4181 // the node's input accordingly.
4182 builder->AddDequantize(i, node->inputs->data[i], type, tflite_node_index);
4183 }
4184 }
4185
AddOpsAndTensors(TfLiteContext * context,int * nnapi_errno,bool allow_dynamic_dimensions)4186 TfLiteStatus NNAPIDelegateKernel::AddOpsAndTensors(
4187 TfLiteContext* context, int* nnapi_errno, bool allow_dynamic_dimensions) {
4188 DequantizeMapping dequantize_mapping;
4189 // The operand builder allows creating a single op. It is created outside
4190 // the for loop to avoid reallocating the vectors.
4191 NNAPIOpBuilder builder(nnapi_, context, &operand_mapping_,
4192 &dequantize_mapping, &allocation_memory_mapping_,
4193 &nnapi_to_tflite_op_mapping_, nn_model_.get(),
4194 nnapi_errno, allow_dynamic_dimensions);
4195 // If we have target accelerators the target SDK version might be
4196 // different than the current android version.
4197 target_sdk_version_ = nnapi_->android_sdk_version;
4198 if (!nnapi_devices_.empty()) {
4199 TF_LITE_ENSURE_STATUS(GetTargetSdkVersion(
4200 context, nnapi_, nnapi_devices_, &target_sdk_version_, nnapi_errno));
4201 }
4202 // Add Tensors.
4203 for (auto node_index : nodes_) {
4204 // Obtain the op and registration.
4205 TfLiteNode* node;
4206 TfLiteRegistration* reg;
4207 TF_LITE_ENSURE_STATUS(
4208 context->GetNodeAndRegistration(context, node_index, &node, ®));
4209
4210 // Fully quantized full LSTM.
4211 if (target_sdk_version_ >= kMinSdkVersionForNNAPI13 &&
4212 reg->builtin_code == kTfLiteBuiltinLstm && isLstmFullKernel(node) &&
4213 context->tensors[node->inputs->data[0]].type == kTfLiteInt8) {
4214 const auto quant8_full_lstm_op_code = ANEURALNETWORKS_QUANTIZED_LSTM;
4215
4216 constexpr int kInputTensor = 0;
4217 constexpr int kInputToInputWeightsTensor = 1;
4218 constexpr int kRecurrentToInputWeightsTensor = 5;
4219 constexpr int kInputGateBiasTensor = 12;
4220 constexpr int kForgetGateBiasTensor = 13;
4221 constexpr int kCellGateBiasTensor = 14;
4222 constexpr int kOutputGateBiasTensor = 15;
4223 constexpr int kProjectionWeightsTensor = 16;
4224 constexpr int kProjectionBiasTensor = 17;
4225 constexpr int kPrevOutputTensor = 18;
4226
4227 // Add input tensors.
4228 for (int input_pos = 0; input_pos < node->inputs->size; ++input_pos) {
4229 const auto input_index = node->inputs->data[input_pos];
4230 if (input_index == kTfLiteOptionalTensor) {
4231 if (input_pos == kInputToInputWeightsTensor ||
4232 input_pos == kRecurrentToInputWeightsTensor ||
4233 input_pos == kProjectionWeightsTensor) {
4234 TF_LITE_ENSURE_STATUS(builder.AddVectorInt8Operand(nullptr, 0));
4235 } else if (input_pos == kInputGateBiasTensor ||
4236 input_pos == kForgetGateBiasTensor ||
4237 input_pos == kCellGateBiasTensor ||
4238 input_pos == kOutputGateBiasTensor ||
4239 input_pos == kProjectionBiasTensor) {
4240 TF_LITE_ENSURE_STATUS(builder.AddVectorInt32Operand(nullptr, 0));
4241 } else { // cell-to-* and layer norm weights.
4242 TF_LITE_ENSURE_STATUS(builder.AddVectorInt16Operand(nullptr, 0));
4243 }
4244 } else {
4245 // Only input and previous output use INT8_ASYM_SIGNED.
4246 int flags =
4247 (input_pos == kInputTensor || input_pos == kPrevOutputTensor)
4248 ? NN_TENSOR_FLAG_USE_INT8_ASYMM_SIGNED
4249 : 0;
4250 TF_LITE_ENSURE_STATUS(
4251 builder.AddTensorInput(input_index, /*hybrid_op=*/false, flags));
4252 }
4253 }
4254
4255 // Add clip parameters.
4256 auto builtin = reinterpret_cast<TfLiteLSTMParams*>(node->builtin_data);
4257 TF_LITE_ENSURE_STATUS(
4258 builder.AddScalarFloat32Operand(builtin->cell_clip));
4259 TF_LITE_ENSURE_STATUS(
4260 builder.AddScalarFloat32Operand(builtin->proj_clip));
4261
4262 // Add quantization parameters for intermediate tensors.
4263 TF_LITE_ENSURE_EQ(context, node->intermediates->size, 5);
4264 for (int intermediate_pos = 0;
4265 intermediate_pos < node->intermediates->size; ++intermediate_pos) {
4266 const auto intermediate_index =
4267 node->intermediates->data[intermediate_pos];
4268 const TfLiteTensor& tensor = context->tensors[intermediate_index];
4269 TfLiteAffineQuantization* quantization_params =
4270 static_cast<TfLiteAffineQuantization*>(tensor.quantization.params);
4271 if (intermediate_pos == 4) {
4272 TF_LITE_ENSURE_STATUS(builder.AddScalarInt32Operand(
4273 quantization_params->zero_point->data[0]));
4274 }
4275 TF_LITE_ENSURE_STATUS(builder.AddScalarFloat32Operand(
4276 quantization_params->scale->data[0]));
4277 }
4278
4279 // Activation state output.
4280 int ann_index;
4281 builder.AddStateInt8AsymTensor(
4282 node->inputs->data[/*kInputActivationStateTensor*/ 18], &ann_index);
4283 model_state_outputs_.push_back(ann_index);
4284 model_state_tfl_inputs_.push_back(
4285 node->inputs->data[/*kInputActivationStateTensor*/ 18]);
4286
4287 // Cell state output.
4288 builder.AddStateInt16Tensor(
4289 node->inputs->data[/*kInputCellStateTensor*/ 19], &ann_index);
4290 model_state_outputs_.push_back(ann_index);
4291 model_state_tfl_inputs_.push_back(
4292 node->inputs->data[/*kInputCellStateTensor*/ 19]);
4293
4294 // Add output tensors.
4295 for (int output_pos = 0; output_pos < node->outputs->size; ++output_pos) {
4296 const auto output_index = node->outputs->data[output_pos];
4297 TF_LITE_ENSURE_STATUS(builder.AddTensorOutput(
4298 output_index, NN_TENSOR_FLAG_USE_INT8_ASYMM_SIGNED));
4299 }
4300
4301 builder.FinalizeAddOperation(quant8_full_lstm_op_code, node_index);
4302 continue;
4303 }
4304
4305 const bool hybrid_op = IsHybridOperator(context, reg->builtin_code, node);
4306 const bool scalar_as_tensor = IsScalarInputSupported(reg->builtin_code);
4307 const bool need_int8_conversion =
4308 target_sdk_version_ < kMinSdkVersionForNNAPI13 &&
4309 NeedInt8Conversion(context, reg->builtin_code, node);
4310 const bool use_int8_asymm_signed =
4311 target_sdk_version_ >= kMinSdkVersionForNNAPI13 && !hybrid_op;
4312
4313 int input_tensor_flags = 0;
4314 if (scalar_as_tensor) {
4315 input_tensor_flags |= NN_TENSOR_FLAG_SCALAR_AS_TENSOR;
4316 }
4317 if (use_int8_asymm_signed) {
4318 input_tensor_flags |= NN_TENSOR_FLAG_USE_INT8_ASYMM_SIGNED;
4319 }
4320
4321 // On SDK level less than 30, h_swish will be lowered into supported NNAPI
4322 // operations. Since SDK level 30, h_swish is supported as a single
4323 // operation.
4324 if (reg->builtin_code == kTfLiteBuiltinHardSwish &&
4325 nnapi_->android_sdk_version < kMinSdkVersionForNNAPI13) {
4326 builder.TransformHardSwishIntoSupportedOps(
4327 node->inputs->data[0], node->outputs->data[0], need_int8_conversion,
4328 node_index);
4329 continue;
4330 }
4331 // Map inputs to NN API tensor indices.
4332 for (int input_pos = 0; input_pos < node->inputs->size; ++input_pos) {
4333 if (reg->builtin_code == kTfLiteBuiltinTransposeConv) {
4334 // Everything is added during Map since input tensors
4335 // have different order.
4336 continue;
4337 }
4338 const auto input_index = node->inputs->data[input_pos];
4339 if (need_int8_conversion &&
4340 (input_pos == 0 ||
4341 reg->builtin_code == kTfLiteBuiltinFullyConnected ||
4342 reg->builtin_code == kTfLiteBuiltinConv2d ||
4343 reg->builtin_code == kTfLiteBuiltinDepthwiseConv2d ||
4344 reg->builtin_code == kTfLiteBuiltinAdd ||
4345 reg->builtin_code == kTfLiteBuiltinMul ||
4346 reg->builtin_code == kTfLiteBuiltinSub ||
4347 reg->builtin_code == kTfLiteBuiltinConcatenation ||
4348 reg->builtin_code == kTfLiteBuiltinMaximum ||
4349 reg->builtin_code == kTfLiteBuiltinMinimum ||
4350 reg->builtin_code == kTfLiteBuiltinLeakyRelu ||
4351 reg->builtin_code == kTfLiteBuiltinLess ||
4352 reg->builtin_code == kTfLiteBuiltinLessEqual ||
4353 reg->builtin_code == kTfLiteBuiltinPrelu ||
4354 reg->builtin_code == kTfLiteBuiltinGreater ||
4355 reg->builtin_code == kTfLiteBuiltinGreaterEqual ||
4356 reg->builtin_code == kTfLiteBuiltinEqual ||
4357 reg->builtin_code == kTfLiteBuiltinNotEqual ||
4358 reg->builtin_code == kTfLiteBuiltinSelect)) {
4359 // Only selected inputs require int8 conversion.
4360 TF_LITE_ENSURE_STATUS(builder.AddTensorInput(
4361 input_index, hybrid_op,
4362 input_tensor_flags | NN_TENSOR_FLAG_INT8_CONVERSION));
4363 continue;
4364 }
4365 if (reg->builtin_code == kTfLiteBuiltinLstm && isLstmFullKernel(node) &&
4366 input_pos >= 20) {
4367 // Skip layer normalization weights. They are added in the Map
4368 // function (after all the other inputs added there) since layer
4369 // normalization weights are the last four inputs of the LSTM op in
4370 // NNAPI.
4371 continue;
4372 }
4373 if (reg->builtin_code == kTfLiteBuiltinLstm && isLstmBasicKernel(node)) {
4374 // Configuring all inputs in the Map function
4375 continue;
4376 }
4377 if (reg->builtin_code == kTfLiteBuiltinUnidirectionalSequenceLstm) {
4378 if (input_pos >= 20) {
4379 // Skip layer normalization weights. They are added in the Map
4380 // function (after all the other inputs added there) since layer
4381 // normalization weights are the last four inputs of the
4382 // unidirectional sequence LSTM op in NNAPI.
4383 continue;
4384 }
4385 if (input_index == kTfLiteOptionalTensor) {
4386 TF_LITE_ENSURE_STATUS(builder.AddVectorFloat32Operand(nullptr, 0));
4387 continue;
4388 }
4389 }
4390 if ((reg->builtin_code == kTfLiteBuiltinSplit) &&
4391 (input_index == node->inputs->data[0])) {
4392 // Skip the axis input tensor; it will be added as a scalar operand
4393 // by the Map() mapping.
4394 continue;
4395 }
4396
4397 // Pad and Padv2 have an optional parameter for a pad value which has
4398 // to be converted to a scalar type in NN API.
4399 if ((reg->builtin_code == kTfLiteBuiltinPadv2 ||
4400 reg->builtin_code == kTfLiteBuiltinPad) &&
4401 node->inputs->size == 3 && input_pos == 2) {
4402 const int constant_value_id = node->inputs->data[2];
4403 if (constant_value_id == kTfLiteOptionalTensor) {
4404 continue;
4405 }
4406 const TfLiteTensor constant_value = context->tensors[constant_value_id];
4407
4408 switch (constant_value.type) {
4409 case kTfLiteFloat32:
4410 if (constant_value.allocation_type == kTfLiteMmapRo) {
4411 builder.AddScalarFloat32Operand(*constant_value.data.f);
4412 } else {
4413 builder.AddSingleValueTensorAsScalarOperand(
4414 constant_value_id, ANEURALNETWORKS_FLOAT32);
4415 }
4416 break;
4417 case kTfLiteUInt8:
4418 if (constant_value.allocation_type == kTfLiteMmapRo) {
4419 builder.AddScalarInt32Operand(
4420 static_cast<int32_t>(*constant_value.data.uint8));
4421 } else {
4422 builder.AddSingleValueTensorAsScalarOperand(
4423 constant_value_id, ANEURALNETWORKS_INT32);
4424 }
4425 break;
4426 case kTfLiteInt8:
4427 if (constant_value.allocation_type == kTfLiteMmapRo) {
4428 if (need_int8_conversion) {
4429 builder.AddScalarInt32Operand(
4430 static_cast<int32_t>(*constant_value.data.int8) + 128);
4431 } else {
4432 builder.AddScalarInt32Operand(*constant_value.data.int8);
4433 }
4434 } else {
4435 builder.AddSingleValueTensorAsScalarOperand(
4436 constant_value_id, ANEURALNETWORKS_INT32);
4437 }
4438 break;
4439 default:
4440 context->ReportError(context,
4441 "Unsupported type of pad value for pad_v2\n");
4442 return kTfLiteError;
4443 }
4444 continue;
4445 }
4446
4447 if (input_index == kTfLiteOptionalTensor &&
4448 (reg->builtin_code == kTfLiteBuiltinLstm ||
4449 reg->builtin_code == kTfLiteBuiltinSvdf ||
4450 reg->builtin_code == kTfLiteBuiltinBidirectionalSequenceLstm)) {
4451 // properly handle the optional tensor for LSTM and SVDF.
4452 // currently only support float32.
4453 TF_LITE_ENSURE_STATUS(builder.AddVectorFloat32Operand(nullptr, 0));
4454 } else if (reg->builtin_code == kTfLiteBuiltinResizeBilinear ||
4455 reg->builtin_code == kTfLiteBuiltinResizeNearestNeighbor) {
4456 if (input_pos == 0) {
4457 // Only the first input tensor is added. The second one,
4458 // specifying the output height and width, is not added and
4459 // instead the height and width will be added individually as
4460 // scalars by the mapping function returned by Map().
4461 TF_LITE_ENSURE_STATUS(builder.AddTensorInput(input_index, hybrid_op,
4462 input_tensor_flags));
4463 }
4464 } else if (reg->builtin_code == kTfLiteBuiltinTopkV2 && input_pos > 0) {
4465 // The K parameter tensor is not handled here but by the functor
4466 // returned by Map, the input tensor is instead added in
4467 // the else clause below
4468 continue;
4469 } else if (reg->builtin_code == kTfLiteBuiltinGather) {
4470 // Everything else is added during Map since input tensors
4471 // have different order.
4472 if (input_pos == 0) {
4473 TF_LITE_ENSURE_STATUS(builder.AddTensorInput(input_index, hybrid_op,
4474 input_tensor_flags));
4475 }
4476 continue;
4477 } else if (reg->builtin_code == kTfLiteBuiltinExpandDims &&
4478 input_pos == 1) {
4479 // The axis param is added during Map
4480 continue;
4481 } else if (reg->builtin_code == kTfLiteBuiltinBatchToSpaceNd &&
4482 input_pos == 2) {
4483 // NNAPI does not support crops.
4484 // The Map function will check if all crops are zero.
4485 continue;
4486 } else if (reg->builtin_code == kTfLiteBuiltinArgMin ||
4487 reg->builtin_code == kTfLiteBuiltinArgMax) {
4488 // The first input tensor is added as is. The second one, specifying
4489 // the axis, needs to be converted to a scalar since TFLite uses a
4490 // tensor but NNAPI uses a scalar as the axis.
4491 if (input_pos == 0) {
4492 TF_LITE_ENSURE_STATUS(builder.AddTensorInput(input_index, hybrid_op,
4493 input_tensor_flags));
4494 } else {
4495 const int axis_id = node->inputs->data[1];
4496 const TfLiteTensor& axis_tensor = context->tensors[axis_id];
4497 switch (axis_tensor.type) {
4498 case kTfLiteInt32:
4499 if (axis_tensor.allocation_type == kTfLiteMmapRo) {
4500 TF_LITE_ENSURE_STATUS(builder.AddScalarInt32Operand(
4501 static_cast<int32_t>(*axis_tensor.data.i32)));
4502 } else {
4503 TF_LITE_ENSURE_STATUS(
4504 builder.AddSingleValueTensorAsScalarOperand(
4505 axis_id, ANEURALNETWORKS_INT32));
4506 }
4507 break;
4508 case kTfLiteInt64:
4509 // Map() function already makes sure int64 input is constant.
4510 TF_LITE_ENSURE_STATUS(builder.AddScalarInt32Operand(
4511 static_cast<int32_t>(*axis_tensor.data.i64)));
4512 break;
4513 default:
4514 return kTfLiteError;
4515 }
4516 }
4517 } else if (reg->builtin_code == kTfLiteBuiltinMaximum ||
4518 reg->builtin_code == kTfLiteBuiltinMinimum) {
4519 const TfLiteTensor& operand_tensor =
4520 context->tensors[node->inputs->data[input_pos]];
4521 if (operand_tensor.dims->size == 0) {
4522 int tensor_index;
4523
4524 TF_LITE_ENSURE_EQ(context, operand_tensor.allocation_type,
4525 kTfLiteMmapRo);
4526 switch (operand_tensor.type) {
4527 case kTfLiteFloat32:
4528 TF_LITE_ENSURE_STATUS(builder.AddNewInputConstantTensor(
4529 ANEURALNETWORKS_TENSOR_FLOAT32, operand_tensor.type, {1},
4530 std::vector<float>(1, operand_tensor.data.f[0]),
4531 operand_tensor.params, &tensor_index));
4532 break;
4533 case kTfLiteUInt8:
4534 TF_LITE_ENSURE_STATUS(builder.AddNewInputConstantTensor(
4535 ANEURALNETWORKS_TENSOR_QUANT8_ASYMM, operand_tensor.type, {1},
4536 std::vector<uint8_t>(1, operand_tensor.data.uint8[0]),
4537 operand_tensor.params, &tensor_index));
4538 break;
4539 case kTfLiteInt8: {
4540 auto params = operand_tensor.params;
4541 if (params.scale == 0.0) {
4542 params.scale = 1.0;
4543 }
4544
4545 if (use_int8_asymm_signed) {
4546 TF_LITE_ENSURE_STATUS(builder.AddNewInputConstantTensor(
4547 ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED,
4548 operand_tensor.type, {1},
4549 std::vector<int8_t>(1, operand_tensor.data.int8[0]), params,
4550 &tensor_index));
4551 } else {
4552 TF_LITE_ENSURE_STATUS(builder.AddNewInputConstantTensor(
4553 ANEURALNETWORKS_TENSOR_QUANT8_ASYMM, operand_tensor.type,
4554 {1},
4555 std::vector<int8_t>(1, operand_tensor.data.int8[0] + 128),
4556 params, &tensor_index));
4557 }
4558 } break;
4559 case kTfLiteInt32:
4560 TF_LITE_ENSURE_STATUS(builder.AddNewInputConstantTensor(
4561 ANEURALNETWORKS_TENSOR_INT32, operand_tensor.type, {1},
4562 std::vector<int32_t>(1, operand_tensor.data.i32[0]),
4563 operand_tensor.params, &tensor_index));
4564 break;
4565 default:
4566 return kTfLiteError;
4567 }
4568 } else {
4569 TF_LITE_ENSURE_STATUS(builder.AddTensorInput(input_index, hybrid_op,
4570 input_tensor_flags));
4571 }
4572 } else if ((reg->builtin_code == kTfLiteBuiltinReduceAny ||
4573 reg->builtin_code == kTfLiteBuiltinReduceMax ||
4574 reg->builtin_code == kTfLiteBuiltinReduceMin ||
4575 reg->builtin_code == kTfLiteBuiltinReduceProd ||
4576 reg->builtin_code == kTfLiteBuiltinSum) &&
4577 (input_pos == 1)) {
4578 // The axis needs, be converted to a tensor if specified as scalar
4579 const TfLiteTensor& axis_tensor =
4580 context->tensors[node->inputs->data[input_pos]];
4581 if (axis_tensor.dims->size == 0) {
4582 TF_LITE_ENSURE_STATUS(
4583 builder.AddVectorInt32Operand(axis_tensor.data.i32, 1));
4584 } else {
4585 TF_LITE_ENSURE_STATUS(builder.AddTensorInput(input_index, hybrid_op,
4586 input_tensor_flags));
4587 }
4588 } else if (reg->builtin_code == kTfLiteBuiltinFill) {
4589 if (input_pos == 0) {
4590 const int dims_id = node->inputs->data[0];
4591 const TfLiteTensor& dims_tensor = context->tensors[dims_id];
4592 switch (dims_tensor.type) {
4593 case kTfLiteInt32:
4594 TF_LITE_ENSURE_STATUS(
4595 builder.AddTensorInput(input_index, hybrid_op));
4596 break;
4597 case kTfLiteInt64: {
4598 // We made sure that dimensions are constant and fit into int32
4599 // in Map(), so we can safely create a new tensor with casted
4600 // values.
4601 const int dims_size = dims_tensor.dims->data[0];
4602 std::vector<int32_t> dims_int32(dims_size);
4603 std::copy(dims_tensor.data.i64, dims_tensor.data.i64 + dims_size,
4604 dims_int32.begin());
4605 int new_tensor_index = -1;
4606 builder.AddNewInputConstantTensor(
4607 ANEURALNETWORKS_TENSOR_INT32, kTfLiteInt32, dims_tensor.dims,
4608 dims_int32, dims_tensor.params, &new_tensor_index);
4609 } break;
4610 default:
4611 return kTfLiteError;
4612 }
4613 } else {
4614 const int value_id = node->inputs->data[1];
4615 const TfLiteTensor& value_tensor = context->tensors[value_id];
4616 switch (value_tensor.type) {
4617 case kTfLiteFloat32:
4618 TF_LITE_ENSURE_STATUS(
4619 builder.AddScalarFloat32Operand(*value_tensor.data.f));
4620 break;
4621 case kTfLiteInt32:
4622 TF_LITE_ENSURE_STATUS(
4623 builder.AddScalarInt32Operand(*value_tensor.data.i32));
4624 break;
4625 case kTfLiteInt64:
4626 // Map() function already makes sure int64 input is constant and
4627 // fits into int32.
4628 TF_LITE_ENSURE_STATUS(builder.AddScalarInt32Operand(
4629 static_cast<int32_t>(*value_tensor.data.i64)));
4630 break;
4631 default:
4632 return kTfLiteError;
4633 }
4634 }
4635 } else {
4636 TF_LITE_ENSURE_STATUS(
4637 builder.AddTensorInput(input_index, hybrid_op, input_tensor_flags));
4638 }
4639 }
4640
4641 // Get op type and operands
4642 // Fails if the Validate function failed
4643 int nn_op_type;
4644 TF_LITE_ENSURE_STATUS(
4645 Map(context, reg->builtin_code, reg->version, target_sdk_version_,
4646 {context, &builder, node, node_index, &model_state_outputs_,
4647 &model_state_tfl_inputs_, &feedback_loops_, nnapi_errno},
4648 &nn_op_type));
4649
4650 // Map outputs to NN API tensor indices.
4651 int output_tensor_flags = 0;
4652 if (need_int8_conversion) {
4653 output_tensor_flags |= NN_TENSOR_FLAG_INT8_CONVERSION;
4654 }
4655 if (use_int8_asymm_signed) {
4656 output_tensor_flags |= NN_TENSOR_FLAG_USE_INT8_ASYMM_SIGNED;
4657 }
4658 for (int output_pos = 0; output_pos < node->outputs->size; ++output_pos) {
4659 const auto output_index = node->outputs->data[output_pos];
4660
4661 // Outputs for basic LSTM cell are set in the Map function since
4662 if (reg->builtin_code == kTfLiteBuiltinLstm && isLstmBasicKernel(node)) {
4663 continue;
4664 }
4665
4666 TF_LITE_ENSURE_STATUS(
4667 builder.AddTensorOutput(output_index, output_tensor_flags));
4668 }
4669
4670 // Dequantize operators may have to be added in case inputs are to be
4671 // floating-point.
4672 AddDequantizeOperatorsWhereNeeded(context, reg->builtin_code, node,
4673 node_index, &builder, nnapi_errno);
4674
4675 TF_LITE_ENSURE_OK(context_,
4676 builder.FinalizeAddOperation(nn_op_type, node_index));
4677 }
4678 return kTfLiteOk;
4679 }
4680
BuildGraph(TfLiteContext * context,const StatefulNnApiDelegate::Options & delegate_options,const TfLiteIntArray * input_tensors,const TfLiteIntArray * output_tensors,int * nnapi_errno)4681 TfLiteStatus NNAPIDelegateKernel::BuildGraph(
4682 TfLiteContext* context,
4683 const StatefulNnApiDelegate::Options& delegate_options,
4684 const TfLiteIntArray* input_tensors, const TfLiteIntArray* output_tensors,
4685 int* nnapi_errno) {
4686 // Build the ops and tensors.
4687 TF_LITE_ENSURE_STATUS(AddOpsAndTensors(
4688 context, nnapi_errno, delegate_options.allow_dynamic_dimensions));
4689 // Map input and output tensor indices to ANN
4690 std::vector<uint32_t> inputs;
4691 inputs.reserve(input_tensors->size);
4692 std::vector<uint32_t> outputs;
4693 outputs.reserve(output_tensors->size);
4694
4695 size_t total_input_byte_size = 0;
4696 // Make the TensorFlow Lite inputs and outputs to ann_indices.
4697 for (int i : TfLiteIntArrayView(input_tensors)) {
4698 // Constant tensors are not NNAPI inputs.
4699 if (i != kTfLiteOptionalTensor &&
4700 context->tensors[i].allocation_type != kTfLiteMmapRo &&
4701 // The delegate might not have mapped this input (this can
4702 // happen if one tensor is split in several ones)
4703 operand_mapping_.lite_index_to_ann(i) != -1) {
4704 inputs.push_back(operand_mapping_.lite_index_to_ann(i));
4705 if (context->tensors[i].buffer_handle != kTfLiteNullBufferHandle) {
4706 continue;
4707 }
4708 const TfLiteType nn_type_conversion =
4709 operand_mapping_.lite_index_to_ann_type_conversion(i);
4710 int tensor_size = 0;
4711 if (nn_type_conversion == kTfLiteNoType) {
4712 tensor_size = context->tensors[i].bytes;
4713 } else {
4714 size_t type_size;
4715 TF_LITE_ENSURE_OK(
4716 context, GetSizeOfType(context, nn_type_conversion, &type_size));
4717 tensor_size = NumElements(&context->tensors[i]) * type_size;
4718 }
4719 total_input_byte_size += tensor_size;
4720 total_input_byte_size += getNumPaddingBytes(tensor_size);
4721 }
4722 }
4723
4724 size_t total_output_byte_size = 0;
4725 for (int i : TfLiteIntArrayView(output_tensors)) {
4726 const int output_tensor_ann_index = operand_mapping_.lite_index_to_ann(i);
4727 // Unmapped outputs are not added
4728 if (output_tensor_ann_index != -1) {
4729 outputs.push_back(output_tensor_ann_index);
4730 }
4731 if (context->tensors[i].buffer_handle != kTfLiteNullBufferHandle) {
4732 continue;
4733 }
4734 total_output_byte_size += context->tensors[i].bytes;
4735 total_output_byte_size += getNumPaddingBytes(context->tensors[i].bytes);
4736 }
4737
4738 // Add state output tensors as model outputs.
4739 for (int i : model_state_outputs_) {
4740 outputs.push_back(i);
4741 }
4742
4743 // Tell ANN to declare inputs/outputs
4744 RETURN_TFLITE_ERROR_IF_NN_ERROR(
4745 context,
4746 nnapi_->ANeuralNetworksModel_identifyInputsAndOutputs(
4747 nn_model_.get(), inputs.size(), inputs.data(), outputs.size(),
4748 outputs.data()),
4749 "identifying model inputs and outputs", nnapi_errno);
4750
4751 auto allow_fp16 =
4752 context->allow_fp32_relax_to_fp16 | delegate_options.allow_fp16;
4753 if (nnapi_->android_sdk_version >= kMinSdkVersionForNNAPI11) {
4754 RETURN_TFLITE_ERROR_IF_NN_ERROR(
4755 context,
4756 nnapi_->ANeuralNetworksModel_relaxComputationFloat32toFloat16(
4757 nn_model_.get(), allow_fp16),
4758 "set relaxed computation mode for fp32 if possible", nnapi_errno);
4759 }
4760
4761 RETURN_TFLITE_ERROR_IF_NN_ERROR(
4762 context, nnapi_->ANeuralNetworksModel_finish(nn_model_.get()),
4763 "finalizing the model", nnapi_errno);
4764
4765 // Create shared memory pool for inputs and outputs.
4766 nn_input_memory_.reset(
4767 new NNMemory(nnapi_, "input_pool", total_input_byte_size));
4768 nn_output_memory_.reset(
4769 new NNMemory(nnapi_, "output_pool", total_output_byte_size));
4770
4771 return kTfLiteOk;
4772 }
4773
4774 } // namespace nnapi
4775 } // namespace delegate
4776
4777 using ::tflite::delegate::nnapi::kMinSdkVersionForNNAPI;
4778 using ::tflite::delegate::nnapi::kMinSdkVersionForNNAPI11;
4779 using ::tflite::delegate::nnapi::kMinSdkVersionForNNAPI12;
4780 using ::tflite::delegate::nnapi::NNAPIDelegateKernel;
4781
Data(const NnApi * nnapi)4782 StatefulNnApiDelegate::Data::Data(const NnApi* nnapi) : nnapi(nnapi) {}
4783
~Data()4784 StatefulNnApiDelegate::Data::~Data() {
4785 std::for_each(std::begin(delegate_state_cache),
4786 std::end(delegate_state_cache),
4787 [](const std::pair<int, NNAPIDelegateKernel*>& entry) {
4788 delete entry.second;
4789 });
4790 }
4791
CacheDelegateKernel(const TfLiteDelegateParams * delegate_params,NNAPIDelegateKernel * delegate_state)4792 void StatefulNnApiDelegate::Data::CacheDelegateKernel(
4793 const TfLiteDelegateParams* delegate_params,
4794 NNAPIDelegateKernel* delegate_state) {
4795 const int cache_key = delegate_params->nodes_to_replace->data[0];
4796 delegate_state_cache.emplace(cache_key, delegate_state);
4797 }
4798
MaybeGetCachedDelegateKernel(const TfLiteDelegateParams * delegate_params)4799 NNAPIDelegateKernel* StatefulNnApiDelegate::Data::MaybeGetCachedDelegateKernel(
4800 const TfLiteDelegateParams* delegate_params) {
4801 const int cache_key = delegate_params->nodes_to_replace->data[0];
4802 const auto cached_state = delegate_state_cache.find(cache_key);
4803 if (cached_state != std::end(delegate_state_cache)) {
4804 auto result = cached_state->second;
4805 delegate_state_cache.erase(cached_state);
4806 return result;
4807 } else {
4808 return nullptr;
4809 }
4810 }
4811
StatefulNnApiDelegate(const NnApi * nnapi)4812 StatefulNnApiDelegate::StatefulNnApiDelegate(const NnApi* nnapi)
4813 : StatefulNnApiDelegate(nnapi, Options()) {}
4814
StatefulNnApiDelegate(Options options)4815 StatefulNnApiDelegate::StatefulNnApiDelegate(Options options)
4816 : StatefulNnApiDelegate(NnApiImplementation(), options) {}
4817
StatefulNnApiDelegate(const NnApi * nnapi,Options options)4818 StatefulNnApiDelegate::StatefulNnApiDelegate(const NnApi* nnapi,
4819 Options options)
4820 : TfLiteDelegate(TfLiteDelegateCreate()), delegate_data_(nnapi) {
4821 if (options.accelerator_name) {
4822 delegate_data_.accelerator_name = options.accelerator_name;
4823 }
4824 if (options.cache_dir) {
4825 delegate_data_.cache_dir = options.cache_dir;
4826 }
4827 if (options.model_token) {
4828 delegate_data_.model_token = options.model_token;
4829 }
4830 delegate_data_.execution_preference = options.execution_preference;
4831 delegate_data_.disallow_nnapi_cpu = options.disallow_nnapi_cpu;
4832 delegate_data_.max_number_delegated_partitions =
4833 options.max_number_delegated_partitions;
4834 delegate_data_.allow_fp16 = options.allow_fp16;
4835 delegate_data_.execution_priority = options.execution_priority;
4836 delegate_data_.max_compilation_timeout_duration_ns =
4837 options.max_compilation_timeout_duration_ns;
4838 delegate_data_.max_execution_timeout_duration_ns =
4839 options.max_execution_timeout_duration_ns;
4840 delegate_data_.max_execution_loop_timeout_duration_ns =
4841 options.max_execution_loop_timeout_duration_ns;
4842 if (nnapi->android_sdk_version >= kMinSdkVersionForNNAPI11) {
4843 delegate_data_.allow_dynamic_dimensions = options.allow_dynamic_dimensions;
4844 }
4845 TFLITE_LOG_PROD_ONCE(tflite::TFLITE_LOG_INFO,
4846 "Created TensorFlow Lite delegate for NNAPI.");
4847 Prepare = DoPrepare;
4848 CopyFromBufferHandle = DoCopyFromBufferHandle;
4849 CopyToBufferHandle = DoCopyToBufferHandle;
4850 FreeBufferHandle = DoFreeBufferHandle;
4851 data_ = &delegate_data_;
4852 if (delegate_data_.allow_dynamic_dimensions) {
4853 flags |= kTfLiteDelegateFlagsAllowDynamicTensors;
4854 flags |= kTfLiteDelegateFlagsRequirePropagatedShapes;
4855 }
4856 }
4857
StatefulNnApiDelegate()4858 StatefulNnApiDelegate::StatefulNnApiDelegate()
4859 : StatefulNnApiDelegate(Options()) {}
4860
GetOptions(TfLiteDelegate * delegate)4861 const StatefulNnApiDelegate::Options StatefulNnApiDelegate::GetOptions(
4862 TfLiteDelegate* delegate) {
4863 auto delegate_data = reinterpret_cast<Data*>(delegate->data_);
4864 StatefulNnApiDelegate::Options options;
4865 options.execution_preference = delegate_data->execution_preference;
4866 options.accelerator_name = delegate_data->accelerator_name.empty()
4867 ? nullptr
4868 : delegate_data->accelerator_name.c_str();
4869 options.cache_dir = delegate_data->cache_dir.empty()
4870 ? nullptr
4871 : delegate_data->cache_dir.c_str();
4872 options.model_token = delegate_data->model_token.empty()
4873 ? nullptr
4874 : delegate_data->model_token.c_str();
4875 options.disallow_nnapi_cpu = delegate_data->disallow_nnapi_cpu;
4876 options.max_number_delegated_partitions =
4877 delegate_data->max_number_delegated_partitions;
4878 options.allow_fp16 = delegate_data->allow_fp16;
4879 options.execution_priority = delegate_data->execution_priority;
4880 options.max_compilation_timeout_duration_ns =
4881 delegate_data->max_compilation_timeout_duration_ns;
4882 options.max_execution_timeout_duration_ns =
4883 delegate_data->max_execution_timeout_duration_ns;
4884 options.max_execution_loop_timeout_duration_ns =
4885 delegate_data->max_execution_loop_timeout_duration_ns;
4886 options.allow_dynamic_dimensions = delegate_data->allow_dynamic_dimensions;
4887 return options;
4888 }
4889
4890 const std::vector<StatefulNnApiDelegate::MemoryRegistration>&
GetTensorMemoryMap(TfLiteDelegate * delegate)4891 StatefulNnApiDelegate::GetTensorMemoryMap(TfLiteDelegate* delegate) {
4892 auto delegate_data = reinterpret_cast<Data*>(delegate->data_);
4893 return delegate_data->tensor_memory_map;
4894 }
4895
RegisterNnapiMemory(ANeuralNetworksMemory * memory,CopyToHostTensorFnPtr callback,void * callback_context)4896 TfLiteBufferHandle StatefulNnApiDelegate::RegisterNnapiMemory(
4897 ANeuralNetworksMemory* memory, CopyToHostTensorFnPtr callback,
4898 void* callback_context) {
4899 int map_size = delegate_data_.tensor_memory_map.size();
4900 for (int i = 0; i < map_size; i++) {
4901 if (delegate_data_.tensor_memory_map[i].memory == nullptr) {
4902 delegate_data_.tensor_memory_map[i] = {memory, callback,
4903 callback_context};
4904 return i;
4905 }
4906 }
4907 delegate_data_.tensor_memory_map.push_back(
4908 {memory, callback, callback_context});
4909 return map_size;
4910 }
4911
DoCopyFromBufferHandle(TfLiteContext * context,TfLiteDelegate * delegate,TfLiteBufferHandle buffer_handle,TfLiteTensor * tensor)4912 TfLiteStatus StatefulNnApiDelegate::DoCopyFromBufferHandle(
4913 TfLiteContext* context, TfLiteDelegate* delegate,
4914 TfLiteBufferHandle buffer_handle, TfLiteTensor* tensor) {
4915 auto delegate_data = reinterpret_cast<Data*>(delegate->data_);
4916 if (buffer_handle < 0 ||
4917 buffer_handle >= delegate_data->tensor_memory_map.size()) {
4918 return kTfLiteError;
4919 }
4920 auto memory = delegate_data->tensor_memory_map[buffer_handle].memory;
4921 auto callback = delegate_data->tensor_memory_map[buffer_handle].callback;
4922 auto callback_context =
4923 delegate_data->tensor_memory_map[buffer_handle].callback_context;
4924 if (!memory || !callback) {
4925 return kTfLiteError;
4926 }
4927 return callback(tensor, memory, 0, tensor->bytes, callback_context);
4928 }
4929
DoCopyToBufferHandle(TfLiteContext * context,TfLiteDelegate * delegate,TfLiteBufferHandle buffer_handle,TfLiteTensor * tensor)4930 TfLiteStatus StatefulNnApiDelegate::DoCopyToBufferHandle(
4931 TfLiteContext* context, TfLiteDelegate* delegate,
4932 TfLiteBufferHandle buffer_handle, TfLiteTensor* tensor) {
4933 return kTfLiteError;
4934 }
4935
DoFreeBufferHandle(TfLiteContext * context,TfLiteDelegate * delegate,TfLiteBufferHandle * handle)4936 void StatefulNnApiDelegate::DoFreeBufferHandle(TfLiteContext* context,
4937 TfLiteDelegate* delegate,
4938 TfLiteBufferHandle* handle) {
4939 auto delegate_data = reinterpret_cast<Data*>(delegate->data_);
4940 if (*handle >= 0 && *handle < delegate_data->tensor_memory_map.size()) {
4941 delegate_data->tensor_memory_map[*handle] = {nullptr, nullptr, nullptr};
4942 *handle = kTfLiteNullBufferHandle;
4943 }
4944 }
4945
GetNnApiErrno() const4946 int StatefulNnApiDelegate::GetNnApiErrno() const {
4947 return delegate_data_.nnapi_errno;
4948 }
4949
4950 // static
GetNodesSupportedByAccelerator(TfLiteContext * context,TfLiteDelegate * delegate,const NnApi * nnapi,const std::vector<int> & supported_nodes,std::vector<int> * device_supported_nodes,int * num_partitions,TfLiteDelegateParams ** params_array,int * nnapi_errno)4951 TfLiteStatus StatefulNnApiDelegate::GetNodesSupportedByAccelerator(
4952 TfLiteContext* context, TfLiteDelegate* delegate, const NnApi* nnapi,
4953 const std::vector<int>& supported_nodes,
4954 std::vector<int>* device_supported_nodes, int* num_partitions,
4955 TfLiteDelegateParams** params_array, int* nnapi_errno) {
4956 auto* delegate_data = static_cast<Data*>(delegate->data_);
4957 // The first entry in the array is the element count
4958
4959 auto supported_nodes_int_array = BuildTfLiteIntArray(supported_nodes);
4960 TF_LITE_ENSURE_STATUS(context->PreviewDelegatePartitioning(
4961 context, supported_nodes_int_array.get(), params_array, num_partitions));
4962 // For each partition check if which nodes are actually supported by the
4963 // target accelerators.
4964 delegate_data->delegate_state_cache.clear();
4965 for (int idx = 0; idx < *num_partitions; idx++) {
4966 const auto& partition_params = (*params_array)[idx];
4967 std::unique_ptr<NNAPIDelegateKernel> kernel_state(
4968 new NNAPIDelegateKernel(nnapi));
4969 TfLiteDelegateParams params_with_delegate = partition_params;
4970 params_with_delegate.delegate = delegate;
4971 TF_LITE_ENSURE_STATUS(
4972 kernel_state->Init(context, ¶ms_with_delegate, nnapi_errno));
4973 std::vector<int> supported_partition_nodes;
4974 TF_LITE_ENSURE_STATUS(
4975 kernel_state->GetOperationsSupportedByTargetNnApiDevices(
4976 context, &supported_partition_nodes, nnapi_errno));
4977 device_supported_nodes->insert(device_supported_nodes->end(),
4978 supported_partition_nodes.begin(),
4979 supported_partition_nodes.end());
4980
4981 bool model_fully_supported = (supported_partition_nodes.size() ==
4982 partition_params.nodes_to_replace->size);
4983 if (model_fully_supported) {
4984 delegate_data->CacheDelegateKernel(&partition_params,
4985 kernel_state.release());
4986 }
4987 }
4988
4989 if (device_supported_nodes->size() != supported_nodes.size()) {
4990 // We changed the set of nodes to delegate this will create a different
4991 // partitioning layout.
4992 auto device_sup_nodes_int_array =
4993 BuildTfLiteIntArray(*device_supported_nodes);
4994 TF_LITE_ENSURE_STATUS(context->PreviewDelegatePartitioning(
4995 context, device_sup_nodes_int_array.get(), params_array,
4996 num_partitions));
4997 }
4998
4999 return kTfLiteOk;
5000 }
5001
5002 // static
LimitDelegatedPartitions(int max_partitions,std::vector<TfLiteDelegateParams> partition_params_array,std::vector<int> * nodes_to_delegate)5003 TfLiteStatus StatefulNnApiDelegate::LimitDelegatedPartitions(
5004 int max_partitions,
5005 std::vector<TfLiteDelegateParams> partition_params_array,
5006 std::vector<int>* nodes_to_delegate) {
5007 int num_partitions = partition_params_array.size();
5008 if (max_partitions <= 0 || num_partitions <= max_partitions) {
5009 return kTfLiteOk;
5010 }
5011
5012 int number_delegated_partitions = std::count_if(
5013 partition_params_array.begin(), partition_params_array.end(),
5014 [nodes_to_delegate](const TfLiteDelegateParams& partition_params) {
5015 return std::find(nodes_to_delegate->begin(), nodes_to_delegate->end(),
5016 partition_params.nodes_to_replace->data[0]) !=
5017 nodes_to_delegate->end();
5018 });
5019
5020 if (number_delegated_partitions > max_partitions) {
5021 std::sort(partition_params_array.begin(), partition_params_array.end(),
5022 [](const TfLiteDelegateParams& left,
5023 const TfLiteDelegateParams& right) -> bool {
5024 // Reverse sort
5025 return left.nodes_to_replace->size >
5026 right.nodes_to_replace->size;
5027 });
5028
5029 nodes_to_delegate->clear();
5030
5031 for (int i = 0; i < max_partitions; i++) {
5032 const TfLiteDelegateParams& partition_params = partition_params_array[i];
5033
5034 nodes_to_delegate->insert(nodes_to_delegate->end(),
5035 partition_params.nodes_to_replace->data,
5036 partition_params.nodes_to_replace->data +
5037 partition_params.nodes_to_replace->size);
5038 }
5039 }
5040
5041 return kTfLiteOk;
5042 }
5043
DoPrepare(TfLiteContext * context,TfLiteDelegate * delegate)5044 TfLiteStatus StatefulNnApiDelegate::DoPrepare(TfLiteContext* context,
5045 TfLiteDelegate* delegate) {
5046 auto* delegate_data = static_cast<Data*>(delegate->data_);
5047 int* nnapi_errno = &(delegate_data->nnapi_errno);
5048 const NnApi* nnapi = delegate_data->nnapi;
5049
5050 // Resetting the error code when the delegate is initialized
5051 // by TFLite. This causes the error to be reset if reusing the same
5052 // StatefulNnApiDelegate after a failure
5053 *nnapi_errno = 0;
5054
5055 // Do not check nodes_ if NN API is unavailable.
5056 if (nnapi->android_sdk_version < kMinSdkVersionForNNAPI ||
5057 !nnapi->nnapi_exists) {
5058 return kTfLiteOk;
5059 }
5060
5061 int target_sdk_version = nnapi->android_sdk_version;
5062 const StatefulNnApiDelegate::Options delegate_options =
5063 StatefulNnApiDelegate::GetOptions(delegate);
5064 // For NNAPI 1.2+, check if there is any accelerator available.
5065 // If not, don't delegate to NNAPI's CPU reference implementation unless
5066 // it has been specified as target accelerator.
5067 if (nnapi->android_sdk_version >= kMinSdkVersionForNNAPI12) {
5068 if (ShouldUseTargetDevices(delegate_options, nnapi)) {
5069 std::vector<ANeuralNetworksDevice*> devices;
5070 TF_LITE_ENSURE_STATUS(
5071 GetTargetDevices(context, delegate, nnapi, nnapi_errno, &devices));
5072
5073 if (devices.empty()) {
5074 if (delegate_options.accelerator_name) {
5075 // There was a selected device and it is not available.
5076 return kTfLiteError;
5077 } else {
5078 // Only nnapi-reference is available but was disabled by the delegate
5079 // options
5080 return kTfLiteOk;
5081 }
5082 }
5083
5084 TF_LITE_ENSURE_STATUS(GetTargetSdkVersion(
5085 context, nnapi, devices, &target_sdk_version, nnapi_errno));
5086 } else {
5087 // If no accelerator is specified, only use NNAPI if an accelerator is
5088 // available. Any available accelerator will make the device_count larger
5089 // than 1. More sophisticated check and allowlisting can be added later.
5090 uint32_t device_count = 0;
5091 RETURN_TFLITE_ERROR_IF_NN_ERROR(
5092 context, nnapi->ANeuralNetworks_getDeviceCount(&device_count),
5093 "getting number of NNAPI devices", nnapi_errno);
5094 if (device_count <= 1) {
5095 return kTfLiteOk;
5096 }
5097 }
5098 }
5099
5100 std::vector<int> supported_nodes;
5101 // We don't care about all nodes_, we only care about ones in the
5102 // current plan.
5103 TfLiteIntArray* plan;
5104 TF_LITE_ENSURE_STATUS(context->GetExecutionPlan(context, &plan));
5105
5106 // Check for every node if it is supported
5107 const bool is_accelerator_specified = ShouldUseTargetDevices(
5108 delegate_options, nnapi, /*exclude_nnapi_reference=*/true);
5109 for (int node_index : TfLiteIntArrayView(plan)) {
5110 TfLiteNode* node;
5111 TfLiteRegistration* registration;
5112 TF_LITE_ENSURE_STATUS(context->GetNodeAndRegistration(
5113 context, node_index, &node, ®istration));
5114 if (NNAPIDelegateKernel::Validate(context, registration->builtin_code,
5115 registration->version, target_sdk_version,
5116 node, is_accelerator_specified)) {
5117 supported_nodes.push_back(node_index);
5118 }
5119 }
5120
5121 // If there are no delegated nodes, short-circuit node replacement.
5122 if (supported_nodes.empty()) {
5123 return kTfLiteOk;
5124 }
5125
5126 // NN API Delegate Registration (the pseudo kernel that will invoke NN
5127 // API node sub sets)
5128 static const TfLiteRegistration nnapi_delegate_kernel = {
5129 .init = [](TfLiteContext* context, const char* buffer,
5130 size_t length) -> void* {
5131 const TfLiteDelegateParams* params =
5132 reinterpret_cast<const TfLiteDelegateParams*>(buffer);
5133
5134 auto* delegate_data = static_cast<Data*>(params->delegate->data_);
5135 int* nnapi_errno = &(delegate_data->nnapi_errno);
5136
5137 NNAPIDelegateKernel* kernel_state =
5138 delegate_data->MaybeGetCachedDelegateKernel(params);
5139 if (!kernel_state) {
5140 kernel_state = new NNAPIDelegateKernel(delegate_data->nnapi);
5141 kernel_state->Init(context, params, nnapi_errno);
5142 }
5143
5144 return kernel_state;
5145 },
5146
5147 .free = [](TfLiteContext* context, void* buffer) -> void {
5148 delete reinterpret_cast<NNAPIDelegateKernel*>(buffer);
5149 },
5150
5151 .prepare = [](TfLiteContext* context, TfLiteNode* node) -> TfLiteStatus {
5152 NNAPIDelegateKernel* state =
5153 reinterpret_cast<NNAPIDelegateKernel*>(node->user_data);
5154 int* nnapi_errno =
5155 &(static_cast<Data*>(node->delegate->data_)->nnapi_errno);
5156 return state->Prepare(context, node, nnapi_errno);
5157 },
5158
5159 .invoke = [](TfLiteContext* context, TfLiteNode* node) -> TfLiteStatus {
5160 NNAPIDelegateKernel* state =
5161 reinterpret_cast<NNAPIDelegateKernel*>(node->user_data);
5162 int* nnapi_errno =
5163 &(static_cast<Data*>(node->delegate->data_)->nnapi_errno);
5164 return state->Invoke(context, node, nnapi_errno);
5165 },
5166
5167 .profiling_string = nullptr,
5168 .builtin_code = kTfLiteBuiltinDelegate,
5169 .custom_name = "TfLiteNnapiDelegate",
5170 .version = 1,
5171 };
5172
5173 std::vector<int> nodes_to_delegate;
5174
5175 int num_partitions;
5176 TfLiteDelegateParams* params_array;
5177 if (is_accelerator_specified &&
5178 nnapi->android_sdk_version >= kMinSdkVersionForNNAPI12) {
5179 // Filtering out nodes not supported by target accelerators.
5180 // Cannot query supported operation before NNAPI 1.2
5181 TF_LITE_ENSURE_STATUS(GetNodesSupportedByAccelerator(
5182 context, delegate, nnapi, supported_nodes, &nodes_to_delegate,
5183 &num_partitions, ¶ms_array, nnapi_errno));
5184 } else {
5185 nodes_to_delegate = supported_nodes;
5186 auto supported_nodes_int_array = BuildTfLiteIntArray(supported_nodes);
5187 TF_LITE_ENSURE_STATUS(context->PreviewDelegatePartitioning(
5188 context, supported_nodes_int_array.get(), ¶ms_array,
5189 &num_partitions));
5190 }
5191
5192 TF_LITE_ENSURE_STATUS(
5193 LimitDelegatedPartitions(delegate_options.max_number_delegated_partitions,
5194 std::vector<TfLiteDelegateParams>(
5195 params_array, params_array + num_partitions),
5196 &nodes_to_delegate));
5197
5198 if (nodes_to_delegate.empty()) {
5199 return kTfLiteOk;
5200 } else {
5201 // Request TFLite to partition the graph and make kernels
5202 // for each independent node sub set a new nnapi_delegate_kernel.
5203 auto nodes_to_delegate_int_array = BuildTfLiteIntArray(nodes_to_delegate);
5204 return context->ReplaceNodeSubsetsWithDelegateKernels(
5205 context, nnapi_delegate_kernel, nodes_to_delegate_int_array.get(),
5206 delegate);
5207 }
5208 }
5209
5210 // Returns a singleton NNAPI Delegate that can check for support of ops.
NnApiDelegate()5211 TfLiteDelegate* NnApiDelegate() {
5212 static StatefulNnApiDelegate* delegate = new StatefulNnApiDelegate();
5213 return delegate;
5214 }
5215
5216 } // namespace tflite
5217