1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 #ifndef TENSORFLOW_LITE_DELEGATES_NNAPI_NNAPI_DELEGATE_KERNEL_H_ 17 #define TENSORFLOW_LITE_DELEGATES_NNAPI_NNAPI_DELEGATE_KERNEL_H_ 18 19 #include <map> 20 #include <memory> 21 22 #include "tensorflow/lite/allocation.h" 23 #include "tensorflow/lite/c/common.h" 24 #include "tensorflow/lite/delegates/nnapi/nnapi_delegate.h" 25 #include "tensorflow/lite/nnapi/nnapi_implementation.h" 26 27 namespace tflite { 28 namespace delegate { 29 namespace nnapi { 30 31 constexpr int32_t kMinSdkVersionForNNAPI = 27; 32 constexpr int32_t kMinSdkVersionForNNAPI11 = 28; 33 constexpr int32_t kMinSdkVersionForNNAPI12 = 29; 34 constexpr int32_t kMinSdkVersionForNNAPI13 = 30; 35 36 // Track tensor indices to NN API tensor indices mapping. 37 class OperandMapping { 38 public: 39 // Given a TFLite index return the ANN index. If it doesn't exist 40 // return -1. lite_index_to_ann(int index)41 int lite_index_to_ann(int index) const { 42 const int64_t max_size = lite_tensor_to_ann_tensor_.size(); 43 if (index >= 0 && index < max_size) 44 return lite_tensor_to_ann_tensor_[index]; 45 else 46 return -1; 47 } 48 49 // NN API uses non tensor operands instead of structs. This creates one 50 // and returns the index. It uses a std::vector and resizes it as needed 51 // keeping -1 to unmapped values. Intermediate tensors likely will not 52 // be mapped. add_new_non_tensor_operand()53 int add_new_non_tensor_operand() { return next_ann_tensor_index_++; } 54 55 // This call is necessary for input operands generated by the delegate 56 // to map constant inputs not present in TFLite but required by NNAPI, 57 // for example when splitting one input in several ones. add_delegate_generated_input_ann_tensors_operand()58 int add_delegate_generated_input_ann_tensors_operand() { 59 return next_ann_tensor_index_++; 60 } 61 62 // Add a new mapping from `tflite_index` and return the NN API tensor index. add_new_ann_tensor_index(int tflite_index)63 int add_new_ann_tensor_index(int tflite_index) { 64 const int64_t current_size = lite_tensor_to_ann_tensor_.size(); 65 if (tflite_index >= current_size) { 66 lite_tensor_to_ann_tensor_.resize(tflite_index + 1, -1); 67 } 68 const int new_tensor_index = next_ann_tensor_index_++; 69 lite_tensor_to_ann_tensor_[tflite_index] = new_tensor_index; 70 return new_tensor_index; 71 } 72 73 // Given a TFLite index returns a TFLite type to which a tensor must be 74 // converted during copying the data to the memory allocated for NN API. 75 // kTfLiteNoType means no conversion is needed. lite_index_to_ann_type_conversion(int index)76 TfLiteType lite_index_to_ann_type_conversion(int index) const { 77 const int64_t max_size = index_to_type_conversion_.size(); 78 if (index >= 0 && index < max_size) 79 return index_to_type_conversion_[index]; 80 else 81 return kTfLiteNoType; 82 } 83 84 // Add a new mapping from TFLite index to a type conversion. add_type_conversion(int tflite_index,TfLiteType tflite_type)85 void add_type_conversion(int tflite_index, TfLiteType tflite_type) { 86 const int64_t current_size = index_to_type_conversion_.size(); 87 if (tflite_index >= current_size) { 88 index_to_type_conversion_.resize(tflite_index + 1, kTfLiteNoType); 89 } 90 index_to_type_conversion_[tflite_index] = tflite_type; 91 } 92 93 private: 94 // Next index of ann tensor 95 int next_ann_tensor_index_ = 0; 96 97 // Mapping from lite index. Use a std::vector for speed and code size 98 // rather than a map. 99 std::vector<int> lite_tensor_to_ann_tensor_; 100 // Mapping from lite index to a type which tensor must be converted to during 101 // the copying of the data to the memory allocated for NN API. kTfLiteNoType 102 // means no conversion is needed. Use an std::vector for speed and code size 103 // rather than a map. 104 std::vector<TfLiteType> index_to_type_conversion_; 105 }; 106 107 class NNAPIOpBuilder; 108 109 // The kernel that represents the node sub set of TF Lite being run on NN API. 110 struct NNAPIOpMappingArgs { 111 TfLiteContext* context; 112 NNAPIOpBuilder* builder; 113 TfLiteNode* node; 114 int node_index; 115 std::vector<int>* model_state_outputs; 116 std::vector<int>* model_state_tfl_inputs; 117 std::vector<std::tuple<int, int>>* feedback_loops; 118 int* nnapi_errno; 119 }; 120 121 // RAII NN API Model Destructor for use with std::unique_ptr 122 class NNFreeModel { 123 public: NNFreeModel(const NnApi * nnapi)124 explicit NNFreeModel(const NnApi* nnapi) : nnapi_(nnapi) {} operator()125 void operator()(ANeuralNetworksModel* model) { 126 nnapi_->ANeuralNetworksModel_free(model); 127 } 128 129 private: 130 // NnApi instance to use. Not owned by this object. 131 const NnApi* nnapi_; 132 }; 133 // RAII NN API Compilation Destructor for use with std::unique_ptr 134 class NNFreeCompilation { 135 public: NNFreeCompilation(const NnApi * nnapi)136 explicit NNFreeCompilation(const NnApi* nnapi) : nnapi_(nnapi) {} operator()137 void operator()(ANeuralNetworksCompilation* model) { 138 nnapi_->ANeuralNetworksCompilation_free(model); 139 } 140 141 private: 142 // NnApi instance to use. Not owned by this object. 143 const NnApi* nnapi_; 144 }; 145 // RAII NN API Execution Destructor for use with std::unique_ptr 146 class NNFreeExecution { 147 public: NNFreeExecution(const NnApi * nnapi)148 explicit NNFreeExecution(const NnApi* nnapi) : nnapi_(nnapi) {} operator()149 void operator()(ANeuralNetworksExecution* execution) { 150 nnapi_->ANeuralNetworksExecution_free(execution); 151 } 152 153 private: 154 // NnApi instance to use. Not owned by this object. 155 const NnApi* nnapi_; 156 }; 157 158 // Manage NNAPI shared memory handle 159 class NNMemory { 160 public: 161 NNMemory(const NnApi* nnapi, const char* name, size_t size); 162 163 ~NNMemory(); 164 get_handle()165 ANeuralNetworksMemory* get_handle() { return nn_memory_handle_; } get_data_ptr()166 uint8_t* get_data_ptr() { return data_ptr_; } get_byte_size()167 size_t get_byte_size() { return byte_size_; } 168 169 private: 170 // NnApi instance to use. Not owned by this object. 171 const NnApi* nnapi_; 172 int fd_ = 0; 173 size_t byte_size_ = 0; 174 uint8_t* data_ptr_ = nullptr; 175 ANeuralNetworksMemory* nn_memory_handle_ = nullptr; 176 }; 177 178 // LINT.IfChange 179 enum class NNAPIValidationFailureType : int { 180 // The operator is not supported by either NNAPI or the NNAPI Delegate. 181 kUnsupportedOperator = 0, 182 // The given operation or operands are not supported on the specified 183 // Android SDK version. The min supported version is specified in the 184 // validation failure message. 185 kUnsupportedAndroidVersion = 1, 186 // The version of the operator (value of TfLiteRegistration::version) 187 // for the given op is not supported. The max supported version 188 // is specified in the validation failure message. 189 // For more details on each operator version see 190 // the GetBuiltinOperatorVersion function in 191 // third_party/tensorflow/lite/tools/versioning/op_version.cc. 192 kUnsupportedOperatorVersion = 2, 193 // The given input operand type is not supported for the current combination 194 // of operator type and sdk version. 195 kUnsupportedInputType = 3, 196 // When using NN API version 1.0 or 1.1, the condition 197 // input_scale * filter_scale < output_scale 198 // must be true for quantized versions of the following ops: 199 // * CONV_2D 200 // * DEPTHWISE_CONV_2D 201 // * FULLY_CONNECTED (where filter actually stands for weights) 202 // The condition is relaxed and no longer required since version 1.2. 203 kNotRestrictedScaleCompliant = 4, 204 // The given output operand type is not supported for the current combination 205 // of operator type and sdk version. 206 kUnsupportedOutputType = 5, 207 // The size of the operand tensor is too large. 208 kUnsupportedOperandSize = 6, 209 // The value of one of the operands or of a combination of operands is 210 // not supported. Details are provided in the failure message. 211 kUnsupportedOperandValue = 7, 212 // The combination of float inputs and quantized weights or filters 213 // is not supported 214 kUnsupportedHybridOperator = 8, 215 // The quantization type (for example per-channel quantization) is not 216 // supported. 217 kUnsupportedQuantizationType = 9, 218 // The accelerated version of operation requires a specific operand to be 219 // specified. 220 kMissingRequiredOperand = 10, 221 // The rank of the operand is not supported. Details in the failure message. 222 kUnsupportedOperandRank = 11, 223 // The input tensor cannot be dynamically-sized. 224 kInputTensorShouldHaveConstantShape = 12, 225 // The operator has a different number of inputs of the one or ones that 226 // are supported by NNAPI. 227 kUnsupportedOperatorVariant = 13, 228 // The accelerated version of the operator cannot specify an activation 229 // function. 230 kNoActivationExpected = 14, 231 // Quantization scale and/or zero point are not in the supported value(s) 232 // for the accelerated operation. 233 kUnsupportedQuantizationParameters = 15, 234 }; 235 // LINT.ThenChange(nnapi_linter/linter.proto) 236 237 struct NNAPIValidationFailure { 238 NNAPIValidationFailureType type; 239 std::string message; 240 NNAPIValidationFailureNNAPIValidationFailure241 NNAPIValidationFailure(NNAPIValidationFailureType type, const char* message) 242 : type(type), message(message) {} 243 }; 244 245 // The kernel that represents the node sub set of TF Lite being run on NN API. 246 class NNAPIDelegateKernel { 247 public: NNAPIDelegateKernel(const NnApi * nnapi)248 explicit NNAPIDelegateKernel(const NnApi* nnapi) 249 : initialised_(false), 250 nnapi_(nnapi), 251 nn_model_(nullptr, NNFreeModel(nnapi_)), 252 nn_compilation_(nullptr, NNFreeCompilation(nnapi_)) {} NNAPIDelegateKernel()253 NNAPIDelegateKernel() : NNAPIDelegateKernel(NnApiImplementation()) {} ~NNAPIDelegateKernel()254 ~NNAPIDelegateKernel() { 255 for (auto content : allocation_memory_mapping_) { 256 nnapi_->ANeuralNetworksMemory_free(content.second); 257 } 258 } 259 260 // Translate a node into its operands 261 // It assumes that the call to Validate for has been successful for 262 // the operation. 263 // In case of success it returns kTfLiteOk and stores in n_op_type the 264 // NNAPI Operation code. 265 // Returns kTfLiteError in case of failures during mapping. 266 static TfLiteStatus Map(TfLiteContext* context, int builtin_code, int version, 267 int android_sdk_version, 268 const NNAPIOpMappingArgs& mapping_args, 269 ANeuralNetworksOperationType* nn_op_type); 270 271 // Returns true if the node can be accelerated with NNAPI. 272 static bool Validate( 273 const TfLiteContext* context, int builtin_code, int version, 274 int android_sdk_version, const TfLiteNode* node, 275 bool is_accelerator_specified, 276 // Collects lists of failures collected during 277 // the validation of the possibility of accelerating 278 // the given node 279 std::vector<NNAPIValidationFailure>* map_failures = nullptr); 280 281 // Initialize the kernel (a NN model) and builds the NN Model. 282 // Any NNAPI Related error causing this method to fail will have the 283 // associated error number stored in nnapi_errno 284 TfLiteStatus Init(TfLiteContext* context, const TfLiteDelegateParams* params, 285 int* nnapi_errno); 286 287 // Creates the NNAPI Compilation for the NN model. It assumes that Init has 288 // been called and completed successfully. 289 // Any NNAPI Related error causing this method to fail will have the 290 // associated error number stored in nnapi_errno 291 TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node, 292 int* nnapi_errno); 293 294 // Invoke the NN Model. Expects Init and Prepare to have been completed 295 // successfully. 296 // Any NNAPI Related error causing this method to fail will have the 297 // associated error number stored in nnapi_errno 298 TfLiteStatus Invoke(TfLiteContext* context, TfLiteNode* node, 299 int* nnapi_errno); 300 301 // Returns the list of operations supported by the current NNAPI model as 302 // built in Prepare. Every operation is identified by the index as provided 303 // in the delegate parameters given to the delegate during the Init call. 304 // It expects the Init method has been called and completed successfully and 305 // returns kTfLiteError if not. Returns an error if any of the NNAPI 306 // operations fails or if the 307 // ANeuralNetworksModel_getSupportedOperationsForDevices function is not 308 // available in the NnApi object. 309 TfLiteStatus GetOperationsSupportedByTargetNnApiDevices( 310 TfLiteContext* context, std::vector<int>* supported_nodes, 311 int* nnapi_errno); 312 313 private: 314 // True if initialization has been completed successfully 315 bool initialised_; 316 // Access to NNApi. 317 const NnApi* nnapi_; 318 // ANN device handle. 319 std::vector<ANeuralNetworksDevice*> nnapi_devices_; 320 // Name of the nnapi device, empty if nnapi_devices_ is empty; 321 std::string device_name_; 322 // ANN API state. 323 std::unique_ptr<ANeuralNetworksModel, NNFreeModel> nn_model_; 324 std::unique_ptr<ANeuralNetworksCompilation, NNFreeCompilation> 325 nn_compilation_; 326 // Node indices that this delegate is responsible for. Indices here 327 // indexes into the nodes array in the TfLiteContext. 328 std::vector<int> nodes_; 329 // Track indices we use 330 OperandMapping operand_mapping_; 331 std::map<const MMAPAllocation*, ANeuralNetworksMemory*> 332 allocation_memory_mapping_; 333 // Track memory map 334 const std::vector<StatefulNnApiDelegate::MemoryRegistration>* 335 tensor_memory_map_; 336 std::vector<int> model_state_outputs_; 337 std::vector<int> model_state_tfl_inputs_; 338 // This is the equivalent of the pair model_state_outputs_, 339 // model_state_tfl_inputs_ for all tensors where we have to keep the output 340 // data available for TFLite model users 341 std::vector<std::tuple<int, int>> feedback_loops_; 342 343 std::unique_ptr<NNMemory> nn_input_memory_; 344 std::unique_ptr<NNMemory> nn_output_memory_; 345 346 std::vector<uint8_t> nn_compilation_cache_token_; 347 348 std::vector<int> nnapi_to_tflite_op_mapping_; 349 350 // Fully initialized in NNAPIDelegateKernel::AddOpsAndTensors 351 int target_sdk_version_ = 27; // kMinSdkVersionForNNAPI13 352 353 void AddDequantizeOperatorsWhereNeeded( 354 const TfLiteContext* context, int builtin_code, const TfLiteNode* node, 355 int tflite_node_index, NNAPIOpBuilder* builder, int* nnapi_errno); 356 357 TfLiteStatus AddOpsAndTensors(TfLiteContext* context, int* nnapi_errno, 358 bool allow_dynamic_dimensions); 359 360 TfLiteStatus BuildGraph(TfLiteContext* context, 361 const StatefulNnApiDelegate::Options& options, 362 const TfLiteIntArray* input_tensors, 363 const TfLiteIntArray* output_tensors, 364 int* nnapi_errno); 365 }; 366 367 } // namespace nnapi 368 } // namespace delegate 369 } // namespace tflite 370 371 #endif // TENSORFLOW_LITE_DELEGATES_NNAPI_NNAPI_DELEGATE_KERNEL_H_ 372