1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #ifndef TENSORFLOW_LITE_DELEGATES_NNAPI_NNAPI_DELEGATE_KERNEL_H_
17 #define TENSORFLOW_LITE_DELEGATES_NNAPI_NNAPI_DELEGATE_KERNEL_H_
18 
19 #include <map>
20 #include <memory>
21 
22 #include "tensorflow/lite/allocation.h"
23 #include "tensorflow/lite/c/common.h"
24 #include "tensorflow/lite/delegates/nnapi/nnapi_delegate.h"
25 #include "tensorflow/lite/nnapi/nnapi_implementation.h"
26 
27 namespace tflite {
28 namespace delegate {
29 namespace nnapi {
30 
31 constexpr int32_t kMinSdkVersionForNNAPI = 27;
32 constexpr int32_t kMinSdkVersionForNNAPI11 = 28;
33 constexpr int32_t kMinSdkVersionForNNAPI12 = 29;
34 constexpr int32_t kMinSdkVersionForNNAPI13 = 30;
35 
36 // Track tensor indices to NN API tensor indices mapping.
37 class OperandMapping {
38  public:
39   // Given a TFLite index return the ANN index. If it doesn't exist
40   // return -1.
lite_index_to_ann(int index)41   int lite_index_to_ann(int index) const {
42     const int64_t max_size = lite_tensor_to_ann_tensor_.size();
43     if (index >= 0 && index < max_size)
44       return lite_tensor_to_ann_tensor_[index];
45     else
46       return -1;
47   }
48 
49   // NN API uses non tensor operands instead of structs. This creates one
50   // and returns the index. It uses a std::vector and resizes it as needed
51   // keeping -1 to unmapped values. Intermediate tensors likely will not
52   // be mapped.
add_new_non_tensor_operand()53   int add_new_non_tensor_operand() { return next_ann_tensor_index_++; }
54 
55   // This call is necessary for input operands generated by the delegate
56   // to map constant inputs not present in TFLite but required by NNAPI,
57   // for example when splitting one input in several ones.
add_delegate_generated_input_ann_tensors_operand()58   int add_delegate_generated_input_ann_tensors_operand() {
59     return next_ann_tensor_index_++;
60   }
61 
62   // Add a new mapping from `tflite_index` and return the NN API tensor index.
add_new_ann_tensor_index(int tflite_index)63   int add_new_ann_tensor_index(int tflite_index) {
64     const int64_t current_size = lite_tensor_to_ann_tensor_.size();
65     if (tflite_index >= current_size) {
66       lite_tensor_to_ann_tensor_.resize(tflite_index + 1, -1);
67     }
68     const int new_tensor_index = next_ann_tensor_index_++;
69     lite_tensor_to_ann_tensor_[tflite_index] = new_tensor_index;
70     return new_tensor_index;
71   }
72 
73   // Given a TFLite index returns a TFLite type to which a tensor must be
74   // converted during copying the data to the memory allocated for NN API.
75   // kTfLiteNoType means no conversion is needed.
lite_index_to_ann_type_conversion(int index)76   TfLiteType lite_index_to_ann_type_conversion(int index) const {
77     const int64_t max_size = index_to_type_conversion_.size();
78     if (index >= 0 && index < max_size)
79       return index_to_type_conversion_[index];
80     else
81       return kTfLiteNoType;
82   }
83 
84   // Add a new mapping from TFLite index to a type conversion.
add_type_conversion(int tflite_index,TfLiteType tflite_type)85   void add_type_conversion(int tflite_index, TfLiteType tflite_type) {
86     const int64_t current_size = index_to_type_conversion_.size();
87     if (tflite_index >= current_size) {
88       index_to_type_conversion_.resize(tflite_index + 1, kTfLiteNoType);
89     }
90     index_to_type_conversion_[tflite_index] = tflite_type;
91   }
92 
93  private:
94   // Next index of ann tensor
95   int next_ann_tensor_index_ = 0;
96 
97   // Mapping from lite index. Use a std::vector for speed and code size
98   // rather than a map.
99   std::vector<int> lite_tensor_to_ann_tensor_;
100   // Mapping from lite index to a type which tensor must be converted to during
101   // the copying of the data to the memory allocated for NN API. kTfLiteNoType
102   // means no conversion is needed. Use an std::vector for speed and code size
103   // rather than a map.
104   std::vector<TfLiteType> index_to_type_conversion_;
105 };
106 
107 class NNAPIOpBuilder;
108 
109 // The kernel that represents the node sub set of TF Lite being run on NN API.
110 struct NNAPIOpMappingArgs {
111   TfLiteContext* context;
112   NNAPIOpBuilder* builder;
113   TfLiteNode* node;
114   int node_index;
115   std::vector<int>* model_state_outputs;
116   std::vector<int>* model_state_tfl_inputs;
117   std::vector<std::tuple<int, int>>* feedback_loops;
118   int* nnapi_errno;
119 };
120 
121 // RAII NN API Model Destructor for use with std::unique_ptr
122 class NNFreeModel {
123  public:
NNFreeModel(const NnApi * nnapi)124   explicit NNFreeModel(const NnApi* nnapi) : nnapi_(nnapi) {}
operator()125   void operator()(ANeuralNetworksModel* model) {
126     nnapi_->ANeuralNetworksModel_free(model);
127   }
128 
129  private:
130   // NnApi instance to use. Not owned by this object.
131   const NnApi* nnapi_;
132 };
133 // RAII NN API Compilation Destructor for use with std::unique_ptr
134 class NNFreeCompilation {
135  public:
NNFreeCompilation(const NnApi * nnapi)136   explicit NNFreeCompilation(const NnApi* nnapi) : nnapi_(nnapi) {}
operator()137   void operator()(ANeuralNetworksCompilation* model) {
138     nnapi_->ANeuralNetworksCompilation_free(model);
139   }
140 
141  private:
142   // NnApi instance to use. Not owned by this object.
143   const NnApi* nnapi_;
144 };
145 // RAII NN API Execution Destructor for use with std::unique_ptr
146 class NNFreeExecution {
147  public:
NNFreeExecution(const NnApi * nnapi)148   explicit NNFreeExecution(const NnApi* nnapi) : nnapi_(nnapi) {}
operator()149   void operator()(ANeuralNetworksExecution* execution) {
150     nnapi_->ANeuralNetworksExecution_free(execution);
151   }
152 
153  private:
154   // NnApi instance to use. Not owned by this object.
155   const NnApi* nnapi_;
156 };
157 
158 // Manage NNAPI shared memory handle
159 class NNMemory {
160  public:
161   NNMemory(const NnApi* nnapi, const char* name, size_t size);
162 
163   ~NNMemory();
164 
get_handle()165   ANeuralNetworksMemory* get_handle() { return nn_memory_handle_; }
get_data_ptr()166   uint8_t* get_data_ptr() { return data_ptr_; }
get_byte_size()167   size_t get_byte_size() { return byte_size_; }
168 
169  private:
170   // NnApi instance to use. Not owned by this object.
171   const NnApi* nnapi_;
172   int fd_ = 0;
173   size_t byte_size_ = 0;
174   uint8_t* data_ptr_ = nullptr;
175   ANeuralNetworksMemory* nn_memory_handle_ = nullptr;
176 };
177 
178 // LINT.IfChange
179 enum class NNAPIValidationFailureType : int {
180   // The operator is not supported by either NNAPI or the NNAPI Delegate.
181   kUnsupportedOperator = 0,
182   // The given operation or operands are not supported on the specified
183   // Android SDK version. The min supported version is specified in the
184   // validation failure message.
185   kUnsupportedAndroidVersion = 1,
186   // The version of the operator (value of TfLiteRegistration::version)
187   // for the given op is not supported. The max supported version
188   // is specified in the validation failure message.
189   // For more details on each operator version see
190   // the GetBuiltinOperatorVersion function in
191   // third_party/tensorflow/lite/tools/versioning/op_version.cc.
192   kUnsupportedOperatorVersion = 2,
193   // The given input operand type is not supported for the current combination
194   // of operator type and sdk version.
195   kUnsupportedInputType = 3,
196   // When using NN API version 1.0 or 1.1, the condition
197   //   input_scale * filter_scale < output_scale
198   // must be true for quantized versions of the following ops:
199   // * CONV_2D
200   // * DEPTHWISE_CONV_2D
201   // * FULLY_CONNECTED (where filter actually stands for weights)
202   // The condition is relaxed and no longer required since version 1.2.
203   kNotRestrictedScaleCompliant = 4,
204   // The given output operand type is not supported for the current combination
205   // of operator type and sdk version.
206   kUnsupportedOutputType = 5,
207   // The size of the operand tensor is too large.
208   kUnsupportedOperandSize = 6,
209   // The value of one of the operands or of a combination of operands is
210   // not supported. Details are provided in the failure message.
211   kUnsupportedOperandValue = 7,
212   // The combination of float inputs and quantized weights or filters
213   // is not supported
214   kUnsupportedHybridOperator = 8,
215   // The quantization type (for example per-channel quantization) is not
216   // supported.
217   kUnsupportedQuantizationType = 9,
218   // The accelerated version of operation requires a specific operand to be
219   // specified.
220   kMissingRequiredOperand = 10,
221   // The rank of the operand is not supported. Details in the failure message.
222   kUnsupportedOperandRank = 11,
223   // The input tensor cannot be dynamically-sized.
224   kInputTensorShouldHaveConstantShape = 12,
225   // The operator has a different number of inputs of the one or ones that
226   // are supported by NNAPI.
227   kUnsupportedOperatorVariant = 13,
228   // The accelerated version of the operator cannot specify an activation
229   // function.
230   kNoActivationExpected = 14,
231   // Quantization scale and/or zero point are not in the supported value(s)
232   // for the accelerated operation.
233   kUnsupportedQuantizationParameters = 15,
234 };
235 // LINT.ThenChange(nnapi_linter/linter.proto)
236 
237 struct NNAPIValidationFailure {
238   NNAPIValidationFailureType type;
239   std::string message;
240 
NNAPIValidationFailureNNAPIValidationFailure241   NNAPIValidationFailure(NNAPIValidationFailureType type, const char* message)
242       : type(type), message(message) {}
243 };
244 
245 // The kernel that represents the node sub set of TF Lite being run on NN API.
246 class NNAPIDelegateKernel {
247  public:
NNAPIDelegateKernel(const NnApi * nnapi)248   explicit NNAPIDelegateKernel(const NnApi* nnapi)
249       : initialised_(false),
250         nnapi_(nnapi),
251         nn_model_(nullptr, NNFreeModel(nnapi_)),
252         nn_compilation_(nullptr, NNFreeCompilation(nnapi_)) {}
NNAPIDelegateKernel()253   NNAPIDelegateKernel() : NNAPIDelegateKernel(NnApiImplementation()) {}
~NNAPIDelegateKernel()254   ~NNAPIDelegateKernel() {
255     for (auto content : allocation_memory_mapping_) {
256       nnapi_->ANeuralNetworksMemory_free(content.second);
257     }
258   }
259 
260   // Translate a node into its operands
261   // It assumes that the call to Validate for has been successful for
262   // the operation.
263   // In case of success it returns kTfLiteOk and stores in n_op_type the
264   // NNAPI Operation code.
265   // Returns kTfLiteError in case of failures during mapping.
266   static TfLiteStatus Map(TfLiteContext* context, int builtin_code, int version,
267                           int android_sdk_version,
268                           const NNAPIOpMappingArgs& mapping_args,
269                           ANeuralNetworksOperationType* nn_op_type);
270 
271   // Returns true if the node can be accelerated with NNAPI.
272   static bool Validate(
273       const TfLiteContext* context, int builtin_code, int version,
274       int android_sdk_version, const TfLiteNode* node,
275       bool is_accelerator_specified,
276       // Collects lists of failures collected during
277       // the validation of the possibility of accelerating
278       // the given node
279       std::vector<NNAPIValidationFailure>* map_failures = nullptr);
280 
281   // Initialize the kernel (a NN model) and builds the NN Model.
282   // Any NNAPI Related error causing this method to fail will have the
283   // associated error number stored in nnapi_errno
284   TfLiteStatus Init(TfLiteContext* context, const TfLiteDelegateParams* params,
285                     int* nnapi_errno);
286 
287   // Creates the NNAPI Compilation for the NN model. It assumes that Init has
288   // been called and completed successfully.
289   // Any NNAPI Related error causing this method to fail will have the
290   // associated error number stored in nnapi_errno
291   TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node,
292                        int* nnapi_errno);
293 
294   // Invoke the NN Model. Expects Init and Prepare to have been completed
295   // successfully.
296   // Any NNAPI Related error causing this method to fail will have the
297   // associated error number stored in nnapi_errno
298   TfLiteStatus Invoke(TfLiteContext* context, TfLiteNode* node,
299                       int* nnapi_errno);
300 
301   // Returns the list of operations supported by the current NNAPI model as
302   // built in Prepare. Every operation is identified by the index as provided
303   // in the delegate parameters given to the delegate during the Init call.
304   // It expects the Init method has been called and completed successfully and
305   // returns kTfLiteError if not. Returns an error if any of the NNAPI
306   // operations fails or if the
307   // ANeuralNetworksModel_getSupportedOperationsForDevices function is not
308   // available in the NnApi object.
309   TfLiteStatus GetOperationsSupportedByTargetNnApiDevices(
310       TfLiteContext* context, std::vector<int>* supported_nodes,
311       int* nnapi_errno);
312 
313  private:
314   // True if initialization has been completed successfully
315   bool initialised_;
316   // Access to NNApi.
317   const NnApi* nnapi_;
318   // ANN device handle.
319   std::vector<ANeuralNetworksDevice*> nnapi_devices_;
320   // Name of the nnapi device, empty if nnapi_devices_ is empty;
321   std::string device_name_;
322   // ANN API state.
323   std::unique_ptr<ANeuralNetworksModel, NNFreeModel> nn_model_;
324   std::unique_ptr<ANeuralNetworksCompilation, NNFreeCompilation>
325       nn_compilation_;
326   // Node indices that this delegate is responsible for. Indices here
327   // indexes into the nodes array in the TfLiteContext.
328   std::vector<int> nodes_;
329   // Track indices we use
330   OperandMapping operand_mapping_;
331   std::map<const MMAPAllocation*, ANeuralNetworksMemory*>
332       allocation_memory_mapping_;
333   // Track memory map
334   const std::vector<StatefulNnApiDelegate::MemoryRegistration>*
335       tensor_memory_map_;
336   std::vector<int> model_state_outputs_;
337   std::vector<int> model_state_tfl_inputs_;
338   // This is the equivalent of the pair model_state_outputs_,
339   // model_state_tfl_inputs_ for all tensors where we have to keep the output
340   // data available for TFLite model users
341   std::vector<std::tuple<int, int>> feedback_loops_;
342 
343   std::unique_ptr<NNMemory> nn_input_memory_;
344   std::unique_ptr<NNMemory> nn_output_memory_;
345 
346   std::vector<uint8_t> nn_compilation_cache_token_;
347 
348   std::vector<int> nnapi_to_tflite_op_mapping_;
349 
350   // Fully initialized in NNAPIDelegateKernel::AddOpsAndTensors
351   int target_sdk_version_ = 27;  // kMinSdkVersionForNNAPI13
352 
353   void AddDequantizeOperatorsWhereNeeded(
354       const TfLiteContext* context, int builtin_code, const TfLiteNode* node,
355       int tflite_node_index, NNAPIOpBuilder* builder, int* nnapi_errno);
356 
357   TfLiteStatus AddOpsAndTensors(TfLiteContext* context, int* nnapi_errno,
358                                 bool allow_dynamic_dimensions);
359 
360   TfLiteStatus BuildGraph(TfLiteContext* context,
361                           const StatefulNnApiDelegate::Options& options,
362                           const TfLiteIntArray* input_tensors,
363                           const TfLiteIntArray* output_tensors,
364                           int* nnapi_errno);
365 };
366 
367 }  // namespace nnapi
368 }  // namespace delegate
369 }  // namespace tflite
370 
371 #endif  // TENSORFLOW_LITE_DELEGATES_NNAPI_NNAPI_DELEGATE_KERNEL_H_
372