1 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #include "tensorflow/lite/core/subgraph.h"
17 #include "tensorflow/lite/arena_planner.h"
18 #include "tensorflow/lite/c/c_api_internal.h"
19 #include "tensorflow/lite/context_util.h"
20 #include "tensorflow/lite/graph_info.h"
21 #include "tensorflow/lite/nnapi_delegate.h"
22 #include "tensorflow/lite/schema/schema_generated.h"
23 
24 namespace tflite {
25 
26 namespace {
ReportOpError(TfLiteContext * context,const TfLiteNode & node,const TfLiteRegistration & registration,int node_index,const char * message)27 TfLiteStatus ReportOpError(TfLiteContext* context, const TfLiteNode& node,
28                            const TfLiteRegistration& registration,
29                            int node_index, const char* message) {
30   context->ReportError(
31       context, "Node number %d (%s) %s.\n", node_index,
32       registration.custom_name
33           ? registration.custom_name
34           : EnumNameBuiltinOperator(
35                 static_cast<BuiltinOperator>(registration.builtin_code)),
36       message);
37   return kTfLiteError;
38 }
39 
40 // Stub method which returns kTfLiteError when the function is forbidden.
41 // We're registrating this function to several different function to save
42 // compiled binary size. Please note the restrictions:
43 // * The type of first parameter have to be `TfLiteContext*`.
44 // * All paramteters must be trivailly destructible. (E.g. No C++ class)
ForbiddenContextFunction(TfLiteContext * context,...)45 TfLiteStatus ForbiddenContextFunction(TfLiteContext* context, ...) {
46   context->ReportError(context,
47                        "The function is forbidden if not calling in delegate.");
48   return kTfLiteError;
49 }
50 
51 // Set the ForbiddenContextFunction to a compatible function pointer.
52 template <typename FunctionType>
SetForbiddenContextFunction(FunctionType * func)53 void SetForbiddenContextFunction(FunctionType* func) {
54   *func = reinterpret_cast<FunctionType>(ForbiddenContextFunction);
55 }
56 
57 // Returns true if at least one tensor in the given list is kTfLiteDynamic.
58 template <typename TensorIntArray>
HasDynamicTensorImpl(const TfLiteContext & context,const TensorIntArray & int_array)59 bool HasDynamicTensorImpl(const TfLiteContext& context,
60                           const TensorIntArray& int_array) {
61   for (int i : int_array) {
62     const TfLiteTensor& tensor = context.tensors[i];
63     if (tensor.allocation_type == kTfLiteDynamic) {
64       return true;
65     }
66   }
67   return false;
68 }
69 
HasDynamicTensor(const TfLiteContext & context,const TfLiteIntArray * int_array)70 bool HasDynamicTensor(const TfLiteContext& context,
71                       const TfLiteIntArray* int_array) {
72   return HasDynamicTensorImpl(context, TfLiteIntArrayView{int_array});
73 }
74 
75 // Gets the legacy TfLiteQuantizationParams from the current TfLiteQuantization.
GetLegacyQuantization(const TfLiteQuantization & quantization)76 TfLiteQuantizationParams GetLegacyQuantization(
77     const TfLiteQuantization& quantization) {
78   TfLiteQuantizationParams legacy_quantization;
79   legacy_quantization.scale = 0;
80   legacy_quantization.zero_point = 0;
81 
82   // If the quantization type isn't affine, return the empty
83   // legacy_quantization.
84   if (quantization.type != kTfLiteAffineQuantization) {
85     return legacy_quantization;
86   }
87 
88   auto* affine_quantization =
89       reinterpret_cast<TfLiteAffineQuantization*>(quantization.params);
90   if (!affine_quantization || !affine_quantization->scale ||
91       !affine_quantization->zero_point ||
92       affine_quantization->scale->size != 1 ||
93       affine_quantization->zero_point->size != 1) {
94     return legacy_quantization;
95   }
96 
97   // We know its per-layer quantization now.
98   legacy_quantization.scale = affine_quantization->scale->data[0];
99   legacy_quantization.zero_point = affine_quantization->zero_point->data[0];
100   return legacy_quantization;
101 }
102 
103 }  // namespace
104 
105 // A trivial implementation of GraphInfo around the Interpreter.
106 // NOTE: this interpreter info represents the subset of the
107 // graph that is executed according to execution plan. Thus,
108 // the indices are execution plan indices rather than raw node
109 // indices.
110 class InterpreterInfo : public GraphInfo {
111  public:
InterpreterInfo(Subgraph * subgraph)112   explicit InterpreterInfo(Subgraph* subgraph) : subgraph_(subgraph) {}
113 
num_tensors() const114   size_t num_tensors() const override { return subgraph_->tensors().size(); }
tensor(size_t index)115   TfLiteTensor* tensor(size_t index) override {
116     return &subgraph_->tensors()[index];
117   }
num_nodes() const118   size_t num_nodes() const override {
119     return subgraph_->execution_plan().size();
120   }
node(size_t index) const121   const TfLiteNode& node(size_t index) const override {
122     int node_index = subgraph_->execution_plan()[index];
123     return subgraph_->nodes_and_registration()[node_index].first;
124   }
inputs() const125   const std::vector<int>& inputs() const override {
126     return subgraph_->inputs();
127   }
outputs() const128   const std::vector<int>& outputs() const override {
129     return subgraph_->outputs();
130   }
variables() const131   const std::vector<int>& variables() const override {
132     return subgraph_->variables();
133   }
134 
135  public:
136   Subgraph* subgraph_;
137 };
138 
Subgraph(ErrorReporter * error_reporter,TfLiteExternalContext ** external_contexts,std::vector<std::unique_ptr<Subgraph>> * subgraphs)139 Subgraph::Subgraph(ErrorReporter* error_reporter,
140                    TfLiteExternalContext** external_contexts,
141                    std::vector<std::unique_ptr<Subgraph>>* subgraphs)
142     : context_(&owned_context_),
143       error_reporter_(error_reporter),
144       next_execution_plan_index_to_prepare_(0),
145       external_contexts_(external_contexts),
146       subgraphs_(subgraphs) {
147   context_->impl_ = static_cast<void*>(this);
148   context_->ResizeTensor = ResizeTensor;
149   context_->ReportError = ReportErrorC;
150   context_->AddTensors = AddTensors;
151   context_->tensors = nullptr;
152   context_->tensors_size = 0;
153   context_->allow_fp32_relax_to_fp16 = false;
154   context_->recommended_num_threads = -1;
155   context_->GetExternalContext = GetExternalContext;
156   context_->SetExternalContext = SetExternalContext;
157   context_->profiler = nullptr;
158 
159   // Reserve some space for the tensors to avoid excessive resizing.
160   tensors_.reserve(kTensorsReservedCapacity);
161   nodes_and_registration().reserve(kTensorsReservedCapacity);
162   // Invalid to call these these except from TfLiteDelegate
163   SwitchToKernelContext();
164 }
165 
~Subgraph()166 Subgraph::~Subgraph() {
167   for (auto& node_and_reg : nodes_and_registration_) {
168     TfLiteNode& node = node_and_reg.first;
169     TfLiteIntArrayFree(node.inputs);
170     TfLiteIntArrayFree(node.outputs);
171     TfLiteIntArrayFree(node.temporaries);
172     if (node.builtin_data) free(node.builtin_data);
173     OpFree(node_and_reg.second, node.user_data);
174     node.builtin_data = nullptr;
175   }
176 
177   for (size_t i = 0; i < context_->tensors_size; i++) {
178     TfLiteTensor* tensor = &context_->tensors[i];
179     if (tensor->buffer_handle != kTfLiteNullBufferHandle &&
180         tensor->delegate->FreeBufferHandle != nullptr) {
181       tensor->delegate->FreeBufferHandle(context_, tensor->delegate,
182                                          &tensor->buffer_handle);
183     }
184     TfLiteTensorFree(tensor);
185   }
186 }
187 
ReplaceNodeSubsetsWithDelegateKernels(TfLiteContext * context,TfLiteRegistration registration,const TfLiteIntArray * nodes_to_replace,TfLiteDelegate * delegate)188 TfLiteStatus Subgraph::ReplaceNodeSubsetsWithDelegateKernels(
189     TfLiteContext* context, TfLiteRegistration registration,
190     const TfLiteIntArray* nodes_to_replace, TfLiteDelegate* delegate) {
191   return static_cast<Subgraph*>(context->impl_)
192       ->ReplaceNodeSubsetsWithDelegateKernels(registration, nodes_to_replace,
193                                               delegate);
194 }
195 
196 namespace {
197 
198 // Copy a std::vector<int> to an existing TfLiteIntArray.
199 // This is a low-level data manipulation function, and it's caller's
200 // responsibility to ensure TfLiteIntArray has enough size.
CopyVectorToTfLiteIntArray(const std::vector<int> & vec,TfLiteIntArray * arr)201 void CopyVectorToTfLiteIntArray(const std::vector<int>& vec,
202                                 TfLiteIntArray* arr) {
203   arr->size = vec.size();
204   memcpy(arr->data, vec.data(), sizeof(int) * arr->size);
205 }
206 
207 // This function allocates a continuous memory space that contains a
208 // TfLiteDelegateParams followed by a several TfLiteIntArray.
209 // When calling `free` at TfLiteDelegateParams*, all the allocated space
210 // will be freed together.
211 //
212 // +-----------------------------------+
213 // | TfLiteDelegateParams              |
214 // | TfLiteDelegate* delegate;         |
215 // | TfLiteIntArray* nodes_to_replace; |--\
216 // | TfLiteIntArray* input_tensors;    |--+--\
217 // | TfLiteIntArray* output_tensors;   |--+--+--\
218 // +-----------------------------------+  |  |  |
219 // | TfLiteIntArray (variable size)    |<-/  |  |
220 // +-----------------------------------+     |  |
221 // | TfLiteIntArray (variable size)    |<----/  |
222 // +-----------------------------------+        |
223 // | TfLiteIntArray (variable size)    |<-------/
224 // +-----------------------------------+
CreateDelegateParams(TfLiteDelegate * delegate,const NodeSubset & node_subset)225 TfLiteDelegateParams* CreateDelegateParams(TfLiteDelegate* delegate,
226                                            const NodeSubset& node_subset) {
227   // Step 1: Calculate the allocation size.
228   int allocation_size = sizeof(TfLiteDelegateParams);
229 
230   int nodes_to_replace_size =
231       TfLiteIntArrayGetSizeInBytes(node_subset.nodes.size());
232   allocation_size += nodes_to_replace_size;
233 
234   int input_tensors_size =
235       TfLiteIntArrayGetSizeInBytes(node_subset.input_tensors.size());
236   allocation_size += input_tensors_size;
237 
238   int output_tensors_size =
239       TfLiteIntArrayGetSizeInBytes(node_subset.output_tensors.size());
240   allocation_size += output_tensors_size;
241 
242   // Step 2: Allocate the memory.
243   // Use `char*` for conveniently step through the allocated space by bytes.
244   char* allocation = reinterpret_cast<char*>(malloc(allocation_size));
245 
246   // Step 3: Fill all data structures structures.
247   TfLiteDelegateParams* params =
248       reinterpret_cast<TfLiteDelegateParams*>(allocation);
249   params->delegate = delegate;
250   allocation += sizeof(TfLiteDelegateParams);
251 
252   params->nodes_to_replace = reinterpret_cast<TfLiteIntArray*>(allocation);
253   CopyVectorToTfLiteIntArray(node_subset.nodes, params->nodes_to_replace);
254   allocation += nodes_to_replace_size;
255 
256   params->input_tensors = reinterpret_cast<TfLiteIntArray*>(allocation);
257   CopyVectorToTfLiteIntArray(node_subset.input_tensors, params->input_tensors);
258   allocation += input_tensors_size;
259 
260   params->output_tensors = reinterpret_cast<TfLiteIntArray*>(allocation);
261   CopyVectorToTfLiteIntArray(node_subset.output_tensors,
262                              params->output_tensors);
263   allocation += output_tensors_size;
264 
265   return params;
266 }
267 
268 }  // namespace
269 
ReplaceNodeSubsetsWithDelegateKernels(TfLiteRegistration registration,const TfLiteIntArray * nodes_to_replace,TfLiteDelegate * delegate)270 TfLiteStatus Subgraph::ReplaceNodeSubsetsWithDelegateKernels(
271     TfLiteRegistration registration, const TfLiteIntArray* nodes_to_replace,
272     TfLiteDelegate* delegate) {
273   // Annotate the registration as DELEGATE op.
274   registration.builtin_code = BuiltinOperator_DELEGATE;
275 
276   // Analyze the graph to find all independent node_subsets that are either
277   // fully not-this-delegate or this-delegate computation.
278   InterpreterInfo info(this);
279   std::vector<NodeSubset> node_subsets;
280   PartitionGraphIntoIndependentNodeSubsets(&info, nodes_to_replace,
281                                            &node_subsets);
282 
283   execution_plan_.clear();
284 
285   for (auto& node_subset : node_subsets) {
286     // Subsets calimed by the delegate should have a "macro" op created, the
287     // other node_subsets (kTfNonPartition) just have their nodes added back to
288     // the execution plan.
289     switch (node_subset.type) {
290       case NodeSubset::kTfNonPartition:
291         for (auto it = node_subset.nodes.begin(); it != node_subset.nodes.end();
292              ++it) {
293           execution_plan_.push_back(*it);
294         }
295         break;
296       case NodeSubset::kTfPartition: {
297         int node_index;
298 
299         TfLiteDelegateParams* params =
300             CreateDelegateParams(delegate, node_subset);
301         TF_LITE_ENSURE_STATUS(AddNodeWithParameters(
302             node_subset.input_tensors, node_subset.output_tensors, nullptr, 0,
303             params, &registration, &node_index));
304 
305         // Initialize the output tensors's delegate-related fields.
306         for (int tensor_index : node_subset.output_tensors) {
307           TfLiteTensor* tensor = &tensors_[tensor_index];
308           TF_LITE_ENSURE(context_, tensor->delegate == nullptr ||
309                                        tensor->delegate == delegate);
310           tensor->delegate = delegate;
311         }
312 
313         // Associate the node with the delegate.
314         TfLiteNode* node = &nodes_and_registration_[node_index].first;
315         node->delegate = delegate;
316       } break;
317       case NodeSubset::kTfUnexplored:
318         return kTfLiteError;
319         break;
320     }
321   }
322   return kTfLiteOk;
323 }
324 
GetExternalContext(TfLiteExternalContextType type)325 TfLiteExternalContext* Subgraph::GetExternalContext(
326     TfLiteExternalContextType type) {
327   if (type >= 0 && type < kTfLiteMaxExternalContexts) {
328     return external_contexts_[type];
329   }
330   return nullptr;
331 }
332 
GetExternalContext(struct TfLiteContext * context,TfLiteExternalContextType type)333 TfLiteExternalContext* Subgraph::GetExternalContext(
334     struct TfLiteContext* context, TfLiteExternalContextType type) {
335   return static_cast<Subgraph*>(context->impl_)->GetExternalContext(type);
336 }
337 
SetExternalContext(TfLiteExternalContextType type,TfLiteExternalContext * ctx)338 void Subgraph::SetExternalContext(TfLiteExternalContextType type,
339                                   TfLiteExternalContext* ctx) {
340   if (type >= 0 && type < kTfLiteMaxExternalContexts) {
341     external_contexts_[type] = ctx;
342   }
343 }
344 
SetExternalContext(struct TfLiteContext * context,TfLiteExternalContextType type,TfLiteExternalContext * ctx)345 void Subgraph::SetExternalContext(struct TfLiteContext* context,
346                                   TfLiteExternalContextType type,
347                                   TfLiteExternalContext* ctx) {
348   return static_cast<Subgraph*>(context->impl_)->SetExternalContext(type, ctx);
349 }
350 
351 // Gets an TfLiteIntArray* representing the execution plan. The interpreter owns
352 // this memory and it is only guaranteed to exist during the invocation of the
353 // delegate prepare.
GetExecutionPlan(TfLiteIntArray ** execution_plan)354 TfLiteStatus Subgraph::GetExecutionPlan(TfLiteIntArray** execution_plan) {
355   // TODO(aselle): Do not make a copy here
356   plan_cache_.reset(TfLiteIntArrayCreate(execution_plan_.size()));
357   *execution_plan = plan_cache_.get();
358   static_assert(sizeof(plan_cache_->data[0]) == sizeof(execution_plan_[0]),
359                 "TfLiteIntArray and execution_plan do not contain same type.");
360   std::memcpy(plan_cache_->data, execution_plan_.data(),
361               sizeof(plan_cache_->data[0]) * execution_plan_.size());
362   return kTfLiteOk;
363 }
364 
365 // WARNING: This is an experimental interface that is subject to change.
366 // Entry point for C node plugin API to get the execution plan
GetExecutionPlan(struct TfLiteContext * context,TfLiteIntArray ** execution_plan)367 TfLiteStatus Subgraph::GetExecutionPlan(struct TfLiteContext* context,
368                                         TfLiteIntArray** execution_plan) {
369   return static_cast<Subgraph*>(context->impl_)
370       ->GetExecutionPlan(execution_plan);
371 }
372 
SetInputs(std::vector<int> inputs)373 TfLiteStatus Subgraph::SetInputs(std::vector<int> inputs) {
374   TF_LITE_ENSURE_OK(&context_,
375                     CheckTensorIndices("inputs", inputs.data(), inputs.size()));
376   inputs_ = std::move(inputs);
377   return kTfLiteOk;
378 }
379 
SetOutputs(std::vector<int> outputs)380 TfLiteStatus Subgraph::SetOutputs(std::vector<int> outputs) {
381   TF_LITE_ENSURE_OK(
382       &context_, CheckTensorIndices("outputs", outputs.data(), outputs.size()));
383   outputs_ = std::move(outputs);
384   return kTfLiteOk;
385 }
386 
SetVariables(std::vector<int> variables)387 TfLiteStatus Subgraph::SetVariables(std::vector<int> variables) {
388   TF_LITE_ENSURE_OK(&context_, CheckTensorIndices("variables", variables.data(),
389                                                   variables.size()));
390   variables_ = std::move(variables);
391   return kTfLiteOk;
392 }
393 
SetCancellationFunction(void * data,bool (* check_cancelled_func)(void *))394 void Subgraph::SetCancellationFunction(void* data,
395                                        bool (*check_cancelled_func)(void*)) {
396   cancellation_data_ = data;
397   check_cancelled_func_ = check_cancelled_func;
398 }
399 
ReserveNodes(int count)400 void Subgraph::ReserveNodes(int count) {
401   nodes_and_registration_.reserve(count);
402 }
403 
CheckTensorIndices(const char * label,const int * indices,int length)404 TfLiteStatus Subgraph::CheckTensorIndices(const char* label, const int* indices,
405                                           int length) {
406   // Making sure kOptionalTensor is not re-defined to something other than -1.
407   static_assert(kOptionalTensor == -1, "kOptionalTensor should be defined -1");
408 
409   for (int i = 0; i < length; i++) {
410     int index = indices[i];
411     // Continue if index == kOptionalTensor before additional comparisons below,
412     // size_t(-1) is always >= context_tensors_size.
413     if (index == kOptionalTensor) {
414       continue;
415     }
416     if (index < 0 || static_cast<size_t>(index) >= context_->tensors_size) {
417       ReportError(
418           "Invalid tensor index %d in %s. The subgraph has %d tensors\n", index,
419           label, context_->tensors_size);
420       consistent_ = false;
421       return kTfLiteError;
422     }
423   }
424   return kTfLiteOk;
425 }
426 
BytesRequired(TfLiteType type,const int * dims,size_t dims_size,size_t * bytes)427 TfLiteStatus Subgraph::BytesRequired(TfLiteType type, const int* dims,
428                                      size_t dims_size, size_t* bytes) {
429   // TODO(aselle): Check for overflow here using overflow.h in TensorFlow
430   // MultiplyWithoutOverflow.
431   TF_LITE_ENSURE(context_, bytes != nullptr);
432   size_t count = 1;
433   for (int k = 0; k < dims_size; k++) count *= dims[k];
434   switch (type) {
435     case kTfLiteFloat32:
436       *bytes = sizeof(float) * count;
437       break;
438     case kTfLiteInt16:
439       *bytes = sizeof(int16_t) * count;
440       break;
441     case kTfLiteInt32:
442       *bytes = sizeof(int32_t) * count;
443       break;
444     case kTfLiteUInt8:
445       *bytes = sizeof(uint8_t) * count;
446       break;
447     case kTfLiteInt64:
448       *bytes = sizeof(int64_t) * count;
449       break;
450     case kTfLiteBool:
451       *bytes = sizeof(bool) * count;
452       break;
453     case kTfLiteComplex64:
454       *bytes = sizeof(std::complex<float>) * count;
455       break;
456     case kTfLiteInt8:
457       *bytes = sizeof(int8_t) * count;
458       break;
459     default:
460       ReportError(
461           "Only float32, int8, int16, int32, int64, uint8, bool, complex64 "
462           "supported currently.");
463       return kTfLiteError;
464   }
465   return kTfLiteOk;
466 }
467 
AllocateTensors()468 TfLiteStatus Subgraph::AllocateTensors() {
469   if (!consistent_) {
470     ReportError("AllocateTensors() called on inconsistent model.");
471     return kTfLiteError;
472   }
473 
474   // Explicit (re)allocation is necessary if nodes have been changed or tensors
475   // have been resized. For inputs marked as dynamic, we can't short-circuit the
476   // allocation as the client may have done the resize manually.
477   if (state_ != kStateUninvokable &&
478       !HasDynamicTensorImpl(*context_, inputs())) {
479     return kTfLiteOk;
480   }
481 
482   next_execution_plan_index_to_prepare_ = 0;
483   if (memory_planner_) {
484     TF_LITE_ENSURE_STATUS(memory_planner_->ResetAllocations());
485   }
486 
487   TF_LITE_ENSURE_STATUS(PrepareOpsAndTensors());
488 
489   state_ = kStateInvokable;
490 
491   // Reset the variable tensors to zero after (re)allocating the tensors.
492   // Developers shouldn't rely on the side effect of this function to reset
493   // variable tesnsors. They should call `ResetVariableTensors` directly
494   // instead.
495   ResetVariableTensors();
496 
497   return kTfLiteOk;
498 }
499 
500 // TODO(ycling): Support non-zero default values.
ResetVariableTensors()501 TfLiteStatus Subgraph::ResetVariableTensors() {
502   for (auto& tensor : tensors_) {
503     if (!tensor.is_variable) {
504       continue;
505     }
506 
507     // Variable tensors have to be `kTfLiteArenaRwPersistent`, and must be
508     // allocated after the initial `PrepareOpsAndTensors()` is called.
509     TF_LITE_ENSURE_EQ(context_, tensor.allocation_type,
510                       kTfLiteArenaRwPersistent);
511     TF_LITE_ENSURE(context_, tensor.data.raw != nullptr);
512 
513     memset(tensor.data.raw, 0, tensor.bytes);
514   }
515   return kTfLiteOk;
516 }
517 
AddNodeWithParameters(const std::vector<int> & inputs,const std::vector<int> & outputs,const char * init_data,size_t init_data_size,void * builtin_data,const TfLiteRegistration * registration,int * node_index)518 TfLiteStatus Subgraph::AddNodeWithParameters(
519     const std::vector<int>& inputs, const std::vector<int>& outputs,
520     const char* init_data, size_t init_data_size, void* builtin_data,
521     const TfLiteRegistration* registration, int* node_index) {
522   if (state_ == kStateInvokableAndImmutable) {
523     ReportError("AddNodeWithParameters is disallowed when graph is immutable.");
524     return kTfLiteError;
525   }
526   state_ = kStateUninvokable;
527 
528   std::unique_ptr<void, decltype(free)*> builtin_data_deleter(builtin_data,
529                                                               free);
530 
531   TF_LITE_ENSURE_OK(context_, CheckTensorIndices("node inputs", inputs.data(),
532                                                  inputs.size()));
533   TF_LITE_ENSURE_OK(
534       &context_,
535       CheckTensorIndices("node outputs", outputs.data(), outputs.size()));
536 
537   int new_node_index = nodes_and_registration_.size();
538   if (node_index) *node_index = new_node_index;
539   nodes_and_registration_.resize(nodes_and_registration_.size() + 1);
540   auto& node_and_reg = nodes_and_registration_.back();
541   TfLiteNode& node = node_and_reg.first;
542   if (node.inputs) TfLiteIntArrayFree(node.inputs);
543   if (node.outputs) TfLiteIntArrayFree(node.outputs);
544   if (node.temporaries) TfLiteIntArrayFree(node.temporaries);
545 
546   // NOTE, here we are not using move semantics yet, since our internal
547   // representation isn't std::vector, but in the future we would like to avoid
548   // copies, so we want the interface to take r-value references now.
549   node.inputs = ConvertVectorToTfLiteIntArray(inputs);
550   node.outputs = ConvertVectorToTfLiteIntArray(outputs);
551   node.temporaries = TfLiteIntArrayCreate(0);
552   if (init_data) {
553     node.user_data = OpInit(*registration, init_data, init_data_size);
554   } else {
555     node.user_data =
556         OpInit(*registration,
557                reinterpret_cast<const char*>(builtin_data_deleter.get()), 0);
558   }
559 
560   node.builtin_data = builtin_data_deleter.release();
561   // TODO(ycling): Filling `custom_initial_data` and `custom_initial_data_size`
562   // properly for nodes generated by ReplaceNodeSubsetsWithDelegateKernels.
563 
564   if (registration->builtin_code == BuiltinOperator_CUSTOM) {
565     // When it's a CUSTOM op, the `custom_options` field in the Flatbuffer
566     // `Operator` table is passed in.
567     node.custom_initial_data = init_data;
568     node.custom_initial_data_size = init_data_size;
569   } else {
570     node.custom_initial_data = nullptr;
571     node.custom_initial_data_size = 0;
572   }
573 
574   node.delegate = nullptr;
575   node_and_reg.second = *registration;
576   execution_plan_.push_back(new_node_index);
577   return kTfLiteOk;
578 }
579 
ResizeInputTensor(int tensor_index,const std::vector<int> & dims)580 TfLiteStatus Subgraph::ResizeInputTensor(int tensor_index,
581                                          const std::vector<int>& dims) {
582   if (state_ == kStateInvokableAndImmutable) {
583     ReportError("ResizeInputTensor is disallowed when graph is immutable.");
584     return kTfLiteError;
585   }
586 
587   // TODO(aselle): All bounds checks can be implemented as one-sided bounds
588   // checks by casting to unsigned for efficiency. Profile before doing this.
589   TF_LITE_ENSURE(context_,
590                  tensor_index < context_->tensors_size && tensor_index >= 0);
591   TfLiteTensor* tensor = &context_->tensors[tensor_index];
592 
593   // Short-circuit the state change if the dimensions don't change, avoiding
594   // unnecessary (re)allocations.
595   //
596   // Note that it's required to check `tensor->data.raw != nullptr`. Otherwise
597   // the subgraph won't allocate memory for a dynamic tensor when its size
598   // is equal to the original tensor size.
599   if (tensor->data.raw != nullptr &&
600       EqualArrayAndTfLiteIntArray(tensor->dims, dims.size(), dims.data())) {
601     return kTfLiteOk;
602   }
603 
604   state_ = kStateUninvokable;
605   return ResizeTensorImpl(tensor, ConvertVectorToTfLiteIntArray(dims));
606 }
607 
PrepareOpsStartingAt(int first_execution_plan_index,int * last_execution_plan_index_prepared)608 TfLiteStatus Subgraph::PrepareOpsStartingAt(
609     int first_execution_plan_index, int* last_execution_plan_index_prepared) {
610   if (first_execution_plan_index == 0) {
611     has_dynamic_tensors_ = false;
612   }
613   for (int execution_plan_index = first_execution_plan_index;
614        execution_plan_index < execution_plan_.size(); execution_plan_index++) {
615     int node_index = execution_plan_[execution_plan_index];
616     TfLiteNode& node = nodes_and_registration_[node_index].first;
617     const TfLiteRegistration& registration =
618         nodes_and_registration_[node_index].second;
619     EnsureTensorsVectorCapacity();
620     if (OpPrepare(registration, &node) == kTfLiteError) {
621       return ReportOpError(context_, node, registration, node_index,
622                            "failed to prepare");
623     }
624 
625     *last_execution_plan_index_prepared = execution_plan_index;
626 
627     // Discontinue if the node has dynamic outputs. Note that we don't
628     // stop for dynamic temporary tensors since they won't affect the
629     // sizes of other tensors in the graph.
630     if (HasDynamicTensor(*context_, node.outputs)) {
631       has_dynamic_tensors_ = true;
632       return kTfLiteOk;
633     }
634   }
635   return kTfLiteOk;
636 }
637 
PrepareOpsAndTensors()638 TfLiteStatus Subgraph::PrepareOpsAndTensors() {
639   if (!memory_planner_) {
640     memory_planner_.reset(new ArenaPlanner(
641         context_, std::unique_ptr<GraphInfo>(new InterpreterInfo(this)),
642         /*preserve_inputs=*/true, /*preserve_intermediates*/ false));
643     memory_planner_->PlanAllocations();
644   }
645 
646   int last_exec_plan_index_prepared = 0;
647 
648   TF_LITE_ENSURE_STATUS(PrepareOpsStartingAt(
649       next_execution_plan_index_to_prepare_, &last_exec_plan_index_prepared));
650   TF_LITE_ENSURE_STATUS(memory_planner_->ExecuteAllocations(
651       next_execution_plan_index_to_prepare_, last_exec_plan_index_prepared));
652 
653   next_execution_plan_index_to_prepare_ = last_exec_plan_index_prepared + 1;
654   return kTfLiteOk;
655 }
656 
Invoke()657 TfLiteStatus Subgraph::Invoke() {
658   if (!consistent_) {
659     ReportError("Invoke called on model that is not consistent.");
660     return kTfLiteError;
661   }
662 
663   TfLiteStatus status = kTfLiteOk;
664   if (state_ == kStateUninvokable) {
665     ReportError("Invoke called on model that is not ready.");
666     return kTfLiteError;
667   }
668 
669   if (nnapi_delegate_) {
670     if (next_execution_plan_index_to_prepare_ == execution_plan_.size()) {
671       TF_LITE_ENSURE_OK(context_, nnapi_delegate_->Invoke(this));
672       return kTfLiteOk;
673     } else {
674       // TODO(aselle): In the future, we would like this to be an
675       // automatic tflite CPU fallback.
676       ReportError(
677           "NNAPI was requested, but dependent sized tensors "
678           "being used.\n");
679       return kTfLiteError;
680     }
681   }
682 
683   // Invocations are always done in node order.
684   // Note that calling Invoke repeatedly will cause the original memory plan to
685   // be reused, unless either ResizeInputTensor() or AllocateTensors() has been
686   // called.
687   for (int execution_plan_index = 0;
688        execution_plan_index < execution_plan_.size(); execution_plan_index++) {
689     if (execution_plan_index == next_execution_plan_index_to_prepare_) {
690       TF_LITE_ENSURE_STATUS(PrepareOpsAndTensors());
691       TF_LITE_ENSURE(context_, next_execution_plan_index_to_prepare_ >=
692                                    execution_plan_index);
693     }
694     int node_index = execution_plan_[execution_plan_index];
695     TfLiteNode& node = nodes_and_registration_[node_index].first;
696     const TfLiteRegistration& registration =
697         nodes_and_registration_[node_index].second;
698     SCOPED_OPERATOR_PROFILE(profiler_, node_index);
699 
700     // TODO(ycling): This is an extra loop through inputs to check if the data
701     // need to be copied from Delegate buffer to raw memory, which is often not
702     // needed. We may want to cache this in prepare to know if this needs to be
703     // done for a node or not.
704     for (int i = 0; i < node.inputs->size; ++i) {
705       int tensor_index = node.inputs->data[i];
706       if (tensor_index == kOptionalTensor) {
707         continue;
708       }
709       TfLiteTensor* tensor = &tensors_[tensor_index];
710       if (tensor->delegate && tensor->delegate != node.delegate &&
711           tensor->data_is_stale) {
712         TF_LITE_ENSURE_STATUS(EnsureTensorDataIsReadable(tensor_index));
713       }
714     }
715 
716     if (check_cancelled_func_ != nullptr &&
717         check_cancelled_func_(cancellation_data_)) {
718       ReportError("Client requested cancel during Invoke()");
719       return kTfLiteError;
720     }
721 
722     EnsureTensorsVectorCapacity();
723     tensor_resized_since_op_invoke_ = false;
724     if (OpInvoke(registration, &node) == kTfLiteError) {
725       return ReportOpError(context_, node, registration, node_index,
726                            "failed to invoke");
727     }
728 
729     // Force execution prep for downstream ops if the latest op triggered the
730     // resize of a dynamic tensor.
731     if (tensor_resized_since_op_invoke_ &&
732         HasDynamicTensor(*context_, node.outputs)) {
733       next_execution_plan_index_to_prepare_ = execution_plan_index + 1;
734     }
735   }
736 
737   return status;
738 }
739 
ResizeTensor(TfLiteContext * context,TfLiteTensor * tensor,TfLiteIntArray * new_size)740 TfLiteStatus Subgraph::ResizeTensor(TfLiteContext* context,
741                                     TfLiteTensor* tensor,
742                                     TfLiteIntArray* new_size) {
743   // Note here that context->impl_ is recovering the this pointer for an
744   // instance of Interpreter to call into the member function ResizeTensorImpl
745   // (this function is static).
746   return static_cast<Subgraph*>(context->impl_)
747       ->ResizeTensorImpl(tensor, new_size);
748 }
749 
ReportErrorImpl(const char * format,va_list args)750 void Subgraph::ReportErrorImpl(const char* format, va_list args) {
751   error_reporter_->Report(format, args);
752 }
753 
ReportErrorC(TfLiteContext * context,const char * format,...)754 void Subgraph::ReportErrorC(TfLiteContext* context, const char* format, ...) {
755   va_list args;
756   va_start(args, format);
757   auto* f = static_cast<Subgraph*>(context->impl_);
758   // Note here that context->impl_ is recovering the this pointer for an
759   // instance of Subgraph to call into the member function ReportErrorImpl
760   // (this function is static).
761   f->ReportErrorImpl(format, args);
762   va_end(args);
763 }
764 
765 // Entry point for C node plugin API to report an error.
ReportError(const char * format,...)766 void Subgraph::ReportError(const char* format, ...) {
767   va_list args;
768   va_start(args, format);
769   auto* f = static_cast<Subgraph*>(context_->impl_);
770   // Note here that context->impl_ is recovering the this pointer for an
771   // instance of Subgraph to call into the member function ReportErrorImpl
772   // (this function is static).
773   f->ReportErrorImpl(format, args);
774   va_end(args);
775 }
776 
AddTensors(int tensors_to_add,int * first_new_tensor_index)777 TfLiteStatus Subgraph::AddTensors(int tensors_to_add,
778                                   int* first_new_tensor_index) {
779   const size_t base_index = tensors_.size();
780   if (first_new_tensor_index) *first_new_tensor_index = base_index;
781   tensors_.resize(tensors_.size() + tensors_to_add);
782   for (size_t i = base_index; i < tensors_.size(); i++) {
783     memset(&tensors_[i], 0, sizeof(tensors_[i]));
784     tensors_[i].buffer_handle = kTfLiteNullBufferHandle;
785   }
786   context_->tensors = tensors_.data();
787   context_->tensors_size = tensors_.size();
788   return kTfLiteOk;
789 }
790 
AddTensors(TfLiteContext * context,int tensors_to_add,int * first_new_tensor_index)791 TfLiteStatus Subgraph::AddTensors(TfLiteContext* context, int tensors_to_add,
792                                   int* first_new_tensor_index) {
793   // Note here that context->impl_ is recovering the this pointer for an
794   // instance of Interpreter to call into the member function AddTensors
795   // (this function is static).
796   return static_cast<Subgraph*>(context->impl_)
797       ->AddTensors(tensors_to_add, first_new_tensor_index);
798 }
799 
GetNodeAndRegistration(int node_index,TfLiteNode ** node,TfLiteRegistration ** registration)800 TfLiteStatus Subgraph::GetNodeAndRegistration(
801     int node_index, TfLiteNode** node, TfLiteRegistration** registration) {
802   TF_LITE_ENSURE(context_, node_index >= 0);
803   auto nodes_size = nodes_and_registration_.size();
804   TF_LITE_ENSURE(context_, static_cast<size_t>(node_index) < nodes_size);
805   TF_LITE_ENSURE(context_, node != nullptr && registration != nullptr);
806   auto& node_and_reg = nodes_and_registration_[node_index];
807   *node = &node_and_reg.first;
808   *registration = &node_and_reg.second;
809   return kTfLiteOk;
810 }
811 
GetNodeAndRegistration(struct TfLiteContext * context,int node_index,TfLiteNode ** node,TfLiteRegistration ** registration)812 TfLiteStatus Subgraph::GetNodeAndRegistration(
813     struct TfLiteContext* context, int node_index, TfLiteNode** node,
814     TfLiteRegistration** registration) {
815   return static_cast<Subgraph*>(context->impl_)
816       ->GetNodeAndRegistration(node_index, node, registration);
817 }
818 
SetTensorParametersReadOnly(int tensor_index,TfLiteType type,const char * name,const size_t rank,const int * dims,TfLiteQuantization quantization,const char * buffer,size_t bytes,const Allocation * allocation)819 TfLiteStatus Subgraph::SetTensorParametersReadOnly(
820     int tensor_index, TfLiteType type, const char* name, const size_t rank,
821     const int* dims, TfLiteQuantization quantization, const char* buffer,
822     size_t bytes, const Allocation* allocation) {
823   if (state_ == kStateInvokableAndImmutable) {
824     ReportError(
825         "SetTensorParametersReadOnly is disallowed when graph is immutable.");
826     return kTfLiteError;
827   }
828 
829   TF_LITE_ENSURE(context_,
830                  tensor_index < context_->tensors_size && tensor_index >= 0);
831   // For most tensors we know exactly how much memory is necessary so we can
832   // ensure the buffer is large enough. However, we need to skip string tensors
833   // because their sizes change with the contents of the individual strings.
834   if (type != kTfLiteString) {
835     size_t required_bytes;
836     TF_LITE_ENSURE_OK(context_,
837                       BytesRequired(type, dims, rank, &required_bytes));
838     TF_LITE_ENSURE_EQ(context_, required_bytes, bytes);
839   }
840 
841   TfLiteTensor& tensor = context_->tensors[tensor_index];
842   if (type == tensor.type &&
843       EqualArrayAndTfLiteIntArray(tensor.dims, rank, dims)) {
844     // Fast path which does not invalidate the invokable property.
845     TfLiteTensorDataFree(&tensor);
846     TfLiteQuantizationFree(&tensor.quantization);
847     tensor.data.raw = const_cast<char*>(buffer);
848     if (!tensor.dims) tensor.dims = ConvertArrayToTfLiteIntArray(rank, dims);
849     tensor.params = GetLegacyQuantization(quantization);
850     tensor.quantization = quantization;
851     tensor.allocation_type = kTfLiteMmapRo;
852     tensor.allocation = allocation;
853   } else {
854     state_ = kStateUninvokable;
855     TfLiteTensorReset(type, name, ConvertArrayToTfLiteIntArray(rank, dims),
856                       GetLegacyQuantization(quantization),
857                       const_cast<char*>(buffer), bytes, kTfLiteMmapRo,
858                       allocation, false, &tensor);
859     // TODO(suharshs): Update TfLiteTensorReset to include the new quantization
860     // if there are other required callers.
861     tensor.quantization = quantization;
862   }
863   return kTfLiteOk;
864 }
865 
866 // Set description of inputs/outputs/data/fptrs for node `node_index`.
867 // This variant assumes an external buffer has been allocated of size
868 // bytes. The lifetime of buffer must be ensured to be greater or equal
869 // to Interpreter.
SetTensorParametersReadWrite(int tensor_index,TfLiteType type,const char * name,const size_t rank,const int * dims,TfLiteQuantization quantization,bool is_variable)870 TfLiteStatus Subgraph::SetTensorParametersReadWrite(
871     int tensor_index, TfLiteType type, const char* name, const size_t rank,
872     const int* dims, TfLiteQuantization quantization, bool is_variable) {
873   if (state_ == kStateInvokableAndImmutable) {
874     ReportError(
875         "SetTensorParametersReadWrite is disallowed when graph is immutable.");
876     return kTfLiteError;
877   }
878   TF_LITE_ENSURE(context_,
879                  tensor_index < context_->tensors_size && tensor_index >= 0);
880   size_t required_bytes = 0;
881   if (type != kTfLiteString) {
882     // These types will be allocated in our arena so we need to record how
883     // many bytes we will need based on the dimensions. String tensors are
884     // allocated dynamically and we can't know ahead of time how much space
885     // they will require.
886     TF_LITE_ENSURE_OK(context_,
887                       BytesRequired(type, dims, rank, &required_bytes));
888   }
889 
890   TfLiteAllocationType allocation_type = kTfLiteArenaRw;
891   if (type == kTfLiteString) {
892     if (is_variable) {
893       // We don't have a real use case for string variable tensor.
894       ReportError("String variable tensor isn't supported.");
895       return kTfLiteError;
896     }
897     allocation_type = kTfLiteDynamic;
898   } else if (is_variable) {
899     allocation_type = kTfLiteArenaRwPersistent;
900   }
901 
902   TfLiteTensor& tensor = context_->tensors[tensor_index];
903   TfLiteTensorReset(type, name, ConvertArrayToTfLiteIntArray(rank, dims),
904                     GetLegacyQuantization(quantization),
905                     /*buffer=*/nullptr, required_bytes, allocation_type,
906                     nullptr, is_variable, &tensor);
907   // TODO(suharshs): Update TfLiteTensorReset to include the new quantization
908   // if there are other required callers.
909   tensor.quantization = quantization;
910   return kTfLiteOk;
911 }
912 
SetExecutionPlan(const std::vector<int> & new_plan)913 TfLiteStatus Subgraph::SetExecutionPlan(const std::vector<int>& new_plan) {
914   for (int node_index : new_plan) {
915     TF_LITE_ENSURE(context_, node_index >= 0 &&
916                                  node_index < nodes_and_registration_.size());
917   }
918   execution_plan_ = new_plan;
919   return kTfLiteOk;
920 }
921 
ResizeTensorImpl(TfLiteTensor * tensor,TfLiteIntArray * new_size)922 TfLiteStatus Subgraph::ResizeTensorImpl(TfLiteTensor* tensor,
923                                         TfLiteIntArray* new_size) {
924   // Note that in theory we could resize kTfLiteArenaRwPersistent tensors too.
925   if (tensor->allocation_type == kTfLiteArenaRw ||
926       tensor->allocation_type == kTfLiteDynamic ||
927       tensor->allocation_type == kTfLiteArenaRwPersistent) {
928     tensor_resized_since_op_invoke_ |=
929         TfLiteIntArrayEqual(tensor->dims, new_size) == 0;
930     if (tensor->type != kTfLiteString) {
931       size_t bytesRequired;
932       TfLiteStatus status = BytesRequired(tensor->type, new_size->data,
933                                           new_size->size, &bytesRequired);
934       if (status != kTfLiteOk) {
935         TfLiteIntArrayFree(new_size);
936         return kTfLiteError;
937       }
938 
939       // Realloc space for kTfLiteDynamic tensors.
940       TfLiteTensorRealloc(bytesRequired, tensor);
941       tensor->bytes = bytesRequired;
942     }
943     if (tensor->dims) TfLiteIntArrayFree(tensor->dims);
944     tensor->dims = new_size;
945 
946     if (tensor->allocation_type != kTfLiteDynamic) {
947       tensor->data.raw = nullptr;
948     }
949   } else {
950     // kTfLiteMmapRo tensors are stored in the flatbuffer and are therefore
951     // of fixed size.
952     TfLiteIntArrayFree(new_size);
953     ReportError("Attempting to resize a fixed-size tensor.");
954     return kTfLiteError;
955   }
956   return kTfLiteOk;
957 }
958 
UseNNAPI(bool enable)959 void Subgraph::UseNNAPI(bool enable) {
960   // TODO(aselle): This is a workaround for finding if NNAPI exists.
961   // We also need to make sure getLibraryHandle() is renamed to be NNAPI
962   // prefixed.
963   if (!NNAPIDelegate::IsSupported()) enable = false;
964   if (!enable) {
965     nnapi_delegate_.reset();
966   } else if (!nnapi_delegate_) {
967     nnapi_delegate_.reset(new NNAPIDelegate);
968   }
969 }
970 
SwitchToDelegateContext()971 void Subgraph::SwitchToDelegateContext() {
972   context_->GetNodeAndRegistration = GetNodeAndRegistration;
973   context_->ReplaceNodeSubsetsWithDelegateKernels =
974       ReplaceNodeSubsetsWithDelegateKernels;
975   context_->GetExecutionPlan = GetExecutionPlan;
976 }
977 
SwitchToKernelContext()978 void Subgraph::SwitchToKernelContext() {
979   context_->GetNodeAndRegistration = [](struct TfLiteContext* context,
980                                         int node_index, TfLiteNode** node,
981                                         TfLiteRegistration** registration) {
982     return ForbiddenContextFunction(context);
983   };
984   context_->ReplaceNodeSubsetsWithDelegateKernels =
985       [](TfLiteContext* context, TfLiteRegistration registration,
986          const TfLiteIntArray* nodes_to_replace, TfLiteDelegate* delegate) {
987         return ForbiddenContextFunction(context);
988       };
989   context_->GetExecutionPlan = [](struct TfLiteContext* context,
990                                   TfLiteIntArray**) {
991     return ForbiddenContextFunction(context);
992   };
993 }
994 
ModifyGraphWithDelegate(TfLiteDelegate * delegate)995 TfLiteStatus Subgraph::ModifyGraphWithDelegate(TfLiteDelegate* delegate) {
996   if (state_ == kStateInvokableAndImmutable) {
997     ReportError(
998         "ModifyGraphWithDelegate is disallowed when graph is immutable.");
999     return kTfLiteError;
1000   }
1001 
1002   if (!(delegate->flags & kTfLiteDelegateFlagsAllowDynamicTensors)) {
1003     int last_execution_plan_index_prepared;
1004     TF_LITE_ENSURE_OK(&context_, PrepareOpsStartingAt(
1005                                      0, &last_execution_plan_index_prepared));
1006     if (has_dynamic_tensors_) {
1007       ReportError(
1008           "Attempting to use a delegate that only supports static-sized "
1009           "tensors with a graph that has dynamic-sized tensors.");
1010       return kTfLiteError;
1011     }
1012   }
1013 
1014   const bool was_invokable_before_delegate = state_ == kStateInvokable;
1015 
1016   // TODO(aselle): Consider if it is worth storing pointers to delegates.
1017   // Setup additional context interface.
1018   SwitchToDelegateContext();
1019 
1020   TfLiteStatus status = delegate->Prepare(context_, delegate);
1021 
1022   // Remove additional context info.
1023   SwitchToKernelContext();
1024 
1025   TF_LITE_ENSURE_OK(context_, status);
1026 
1027   // If the memory planner has already been created, we need to execute
1028   // planning again to account for the updated graph topology.
1029   if (memory_planner_) {
1030     state_ = kStateUninvokable;
1031     TF_LITE_ENSURE_OK(context_, memory_planner_->PlanAllocations());
1032   }
1033 
1034   if (!(delegate->flags & kTfLiteDelegateFlagsAllowDynamicTensors)) {
1035     // Reset the state to force tensor/op reallocation.
1036     state_ = kStateUninvokable;
1037     TF_LITE_ENSURE_OK(context_, AllocateTensors());
1038     TF_LITE_ENSURE_EQ(context_, state_, kStateInvokable);
1039     // After using a delegate which doesn't support dynamic tensors, make the
1040     // entire graph immutable.
1041     state_ = kStateInvokableAndImmutable;
1042   } else if (was_invokable_before_delegate) {
1043     // If the graph was invokable prior to delegate application, flush
1044     // allocation now to leave it in a consistent state.
1045     TF_LITE_ENSURE_OK(context_, AllocateTensors());
1046     TF_LITE_ENSURE_EQ(context_, state_, kStateInvokable);
1047   }
1048 
1049   return status;
1050 }
1051 
1052 }  // namespace tflite
1053