1 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #include "tensorflow/lite/interpreter.h"
17 
18 #include <stddef.h>
19 #include <stdlib.h>
20 
21 #include <cstdint>
22 #include <functional>
23 #include <memory>
24 #include <utility>
25 #include <vector>
26 
27 #include "tensorflow/lite/allocation.h"
28 #include "tensorflow/lite/core/api/error_reporter.h"
29 #include "tensorflow/lite/core/api/profiler.h"
30 #include "tensorflow/lite/core/subgraph.h"
31 #include "tensorflow/lite/external_cpu_backend_context.h"
32 #include "tensorflow/lite/minimal_logging.h"
33 #include "tensorflow/lite/stderr_reporter.h"
34 #include "tensorflow/lite/util.h"
35 
36 // TODO(b/139446230): Move to portable platform header.
37 #if defined(__ANDROID__)
38 #define TFLITE_IS_MOBILE_PLATFORM
39 #endif  // defined(__ANDROID__)
40 
41 #if defined(__APPLE__)
42 #include "TargetConditionals.h"
43 #if TARGET_IPHONE_SIMULATOR
44 #define TFLITE_IS_MOBILE_PLATFORM
45 #elif TARGET_OS_IPHONE
46 #define TFLITE_IS_MOBILE_PLATFORM
47 #endif
48 #endif  // defined(__APPLE__)
49 
50 // TODO(b/132087118): move static_assert to c_api_internal when compiled with
51 // C++.
52 static_assert(sizeof(TfLiteFloat16) == sizeof(uint16_t),
53               "Float 16 type must be 16 bits.");
54 
55 namespace tflite {
56 
57 namespace {
58 
59 // Gets the current TfLiteQuantization from the legacy TfLiteQuantizationParams.
GetQuantizationFromLegacy(const TfLiteQuantizationParams & legacy_quantization)60 TfLiteQuantization GetQuantizationFromLegacy(
61     const TfLiteQuantizationParams& legacy_quantization) {
62   TfLiteQuantization quantization;
63   quantization.type = kTfLiteAffineQuantization;
64   auto* affine_quantization = reinterpret_cast<TfLiteAffineQuantization*>(
65       malloc(sizeof(TfLiteAffineQuantization)));
66   affine_quantization->scale = TfLiteFloatArrayCreate(1);
67   affine_quantization->zero_point = TfLiteIntArrayCreate(1);
68   affine_quantization->scale->data[0] = legacy_quantization.scale;
69   affine_quantization->zero_point->data[0] = legacy_quantization.zero_point;
70   quantization.params = affine_quantization;
71 
72   return quantization;
73 }
74 
75 // TODO(b/153131797): We have put 'delegate_status' to 0 in the following macro
76 // temporarily because delegate-specific error codes are either not retrievable
77 // at the moment, which we will add later.
78 #define TF_LITE_ENSURE_STATUS_WITH_SCOPED_INSTRUMENTATION(runtime_event, a) \
79   do {                                                                      \
80     TfLiteStatus status = (a);                                              \
81     runtime_event.set_runtime_status(/*delegate_status=*/0,                 \
82                                      static_cast<int64_t>(status));         \
83     TF_LITE_ENSURE_STATUS(status);                                          \
84   } while (0)
85 
86 }  // namespace
87 
Interpreter(ErrorReporter * error_reporter)88 Interpreter::Interpreter(ErrorReporter* error_reporter)
89     : error_reporter_(error_reporter ? error_reporter
90                                      : DefaultErrorReporter()) {
91   // TODO(b/128420794): Include the TFLite runtime version in the log.
92   // Prod logging is useful for mobile platforms where scraping console logs is
93   // critical for debugging.
94 #if defined(TFLITE_IS_MOBILE_PLATFORM)
95   TFLITE_LOG_PROD_ONCE(TFLITE_LOG_INFO, "Initialized TensorFlow Lite runtime.");
96 #else
97   TFLITE_LOG_ONCE(TFLITE_LOG_INFO, "Initialized TensorFlow Lite runtime.");
98 #endif
99 
100   // There's always at least 1 subgraph which is the primary subgraph.
101   AddSubgraphs(1);
102   context_ = primary_subgraph().context();
103 
104   // Reserve some space for the tensors to avoid excessive resizing.
105   for (int i = 0; i < kTfLiteMaxExternalContexts; ++i) {
106     external_contexts_[i] = nullptr;
107   }
108 
109   // This operation is cheap because we allocate the CPU context resources (i.e.
110   // threads) lazily.
111   own_external_cpu_backend_context_.reset(new ExternalCpuBackendContext());
112   external_contexts_[kTfLiteCpuBackendContext] =
113       own_external_cpu_backend_context_.get();
114 }
115 
~Interpreter()116 Interpreter::~Interpreter() {
117   // The owned external Cpu Backend Context will go out of scope with this
118   // interpreter. If we have an external backend context that is not
119   // owned, we need to clear the cache for other interpreters that may
120   // use the context.
121   if (external_contexts_[kTfLiteCpuBackendContext] &&
122       (external_contexts_[kTfLiteCpuBackendContext] !=
123        own_external_cpu_backend_context_.get())) {
124     ExternalCpuBackendContext* external_context =
125         static_cast<ExternalCpuBackendContext*>(
126             external_contexts_[kTfLiteCpuBackendContext]);
127     TfLiteInternalBackendContext* internal_context =
128         external_context->internal_backend_context();
129     if (internal_context) {
130       // This call may have negative performance impacts on the next inference
131       // for any interpreter using this context. The cache will be refreshed
132       // by the next inference.
133       internal_context->ClearCaches();
134     }
135   }
136 }
137 
SetExternalContext(TfLiteExternalContextType type,TfLiteExternalContext * ctx)138 void Interpreter::SetExternalContext(TfLiteExternalContextType type,
139                                      TfLiteExternalContext* ctx) {
140   if (ctx == own_external_cpu_backend_context_.get()) {
141     error_reporter_->Report(
142         "WARNING: The passed external context is identical to the internally "
143         "owned one.");
144     return;
145   }
146 
147   // We have an internally owned external context of kTfLiteCpuBackendContext.
148   // If it's overwritten here, we will release the resource of the internally
149   // owned external context.
150   // Note: the 'max thread count' info associated with the overwritten context
151   // will be lost here, and such info is now determined by the new context, thus
152   // affecting how much parallelism a TFLite op would have.
153   if (kTfLiteCpuBackendContext == type &&
154       external_contexts_[kTfLiteCpuBackendContext] ==
155           own_external_cpu_backend_context_.get()) {
156     own_external_cpu_backend_context_.reset();
157   }
158 
159   // This essentially changes the "external_contexts_[type]".
160   primary_subgraph().SetExternalContext(type, ctx);
161 }
162 
SetCustomAllocationForTensor(int tensor_index,const TfLiteCustomAllocation & allocation)163 TfLiteStatus Interpreter::SetCustomAllocationForTensor(
164     int tensor_index, const TfLiteCustomAllocation& allocation) {
165   return primary_subgraph().SetCustomAllocationForTensor(tensor_index,
166                                                          allocation);
167 }
168 
SetInputs(std::vector<int> inputs)169 TfLiteStatus Interpreter::SetInputs(std::vector<int> inputs) {
170   return primary_subgraph().SetInputs(std::move(inputs));
171 }
172 
SetOutputs(std::vector<int> outputs)173 TfLiteStatus Interpreter::SetOutputs(std::vector<int> outputs) {
174   return primary_subgraph().SetOutputs(std::move(outputs));
175 }
176 
SetVariables(std::vector<int> variables)177 TfLiteStatus Interpreter::SetVariables(std::vector<int> variables) {
178   return primary_subgraph().SetVariables(std::move(variables));
179 }
180 
AllocateTensors()181 TfLiteStatus Interpreter::AllocateTensors() {
182   // Apply the default delegate that TFLite will enable at this point to allow
183   // other user-level delegates to be applied first.
184   if (!lazy_delegate_providers_.empty()) {
185     // We only apply lazy delegate providers once.
186     std::vector<TfLiteDelegatePtr> delegate_providers;
187     delegate_providers.swap(lazy_delegate_providers_);
188 
189     TFLITE_LOG(TFLITE_LOG_INFO,
190                "Applying %zu TensorFlow Lite delegate(s) lazily.",
191                delegate_providers.size());
192     // At the momement, XNNPACK delegate is the only one that might be applied
193     // by default, in which case, the execution will fall back to default
194     // implementation if the XNNPACK delegate fails to be applied. Therefore, we
195     // ignore the return status here and let it fall through the rest of the
196     // code.
197     for (size_t i = 0; i < delegate_providers.size(); ++i) {
198       auto status = ModifyGraphWithDelegate(std::move(delegate_providers[i]));
199       switch (status) {
200         case kTfLiteOk:
201           TFLITE_LOG(TFLITE_LOG_INFO,
202                      "Successfully applied the default TensorFlow Lite "
203                      "delegate indexed at %zu.",
204                      i);
205           break;
206         case kTfLiteError:
207           TF_LITE_REPORT_ERROR(error_reporter_,
208                                "Failed to apply the default TensorFlow Lite "
209                                "delegate indexed at %zu.",
210                                i);
211           return kTfLiteError;
212         case kTfLiteDelegateError:
213           TF_LITE_REPORT_ERROR(
214               error_reporter_,
215               "Error in applying the default TensorFlow Lite delegate indexed "
216               "at %zu, and all previously applied delegates are reverted.",
217               i);
218           break;
219         case kTfLiteApplicationError:
220           TF_LITE_REPORT_ERROR(error_reporter_,
221                                "Ignoring failed application of the default "
222                                "TensorFlow Lite delegate indexed at %zu.",
223                                i);
224           break;
225         default:
226           TF_LITE_REPORT_ERROR(error_reporter_,
227                                "Unknown status (%d) after applying the default "
228                                "TensorFlow Lite delegate indexed at %zu.",
229                                status, i);
230           return kTfLiteError;
231       }
232     }
233   }
234 
235   return primary_subgraph().AllocateTensors();
236 }
237 
ReserveNodes(int count)238 void Interpreter::ReserveNodes(int count) {
239   primary_subgraph().ReserveNodes(count);
240 }
241 
AddSubgraphs(int subgraphs_to_add,int * first_new_subgraph_index)242 void Interpreter::AddSubgraphs(int subgraphs_to_add,
243                                int* first_new_subgraph_index) {
244   const size_t base_index = subgraphs_.size();
245   if (first_new_subgraph_index) *first_new_subgraph_index = base_index;
246 
247   subgraphs_.reserve(base_index + subgraphs_to_add);
248   for (int i = 0; i < subgraphs_to_add; ++i) {
249     Subgraph* subgraph = new Subgraph(error_reporter_, external_contexts_,
250                                       &subgraphs_, &resources_);
251     subgraphs_.emplace_back(subgraph);
252   }
253 }
254 
AddNodeWithParameters(const std::vector<int> & inputs,const std::vector<int> & outputs,const char * init_data,size_t init_data_size,void * builtin_data,const TfLiteRegistration * registration,int * node_index)255 TfLiteStatus Interpreter::AddNodeWithParameters(
256     const std::vector<int>& inputs, const std::vector<int>& outputs,
257     const char* init_data, size_t init_data_size, void* builtin_data,
258     const TfLiteRegistration* registration, int* node_index) {
259   return primary_subgraph().AddNodeWithParameters(
260       inputs, outputs, {}, init_data, init_data_size, builtin_data,
261       registration, node_index);
262 }
263 
ResizeInputTensor(int tensor_index,const std::vector<int> & dims)264 TfLiteStatus Interpreter::ResizeInputTensor(int tensor_index,
265                                             const std::vector<int>& dims) {
266   return primary_subgraph().ResizeInputTensor(tensor_index, dims);
267 }
268 
ResizeInputTensorStrict(int tensor_index,const std::vector<int> & dims)269 TfLiteStatus Interpreter::ResizeInputTensorStrict(
270     int tensor_index, const std::vector<int>& dims) {
271   return primary_subgraph().ResizeInputTensorStrict(tensor_index, dims);
272 }
273 
ReleaseNonPersistentMemory()274 TfLiteStatus Interpreter::ReleaseNonPersistentMemory() {
275   // TODO(b/138790287): We could do this for all subgraphs whose tensors have
276   // been allocated. However, AllocateTensors() relies on Control Flow ops to
277   // allocate tensors on 'children' subgraphs. Revisit this if required.
278   return primary_subgraph().ReleaseNonPersistentMemory();
279 }
280 
Invoke()281 TfLiteStatus Interpreter::Invoke() {
282   ScopedRuntimeInstrumentationProfile scoped_runtime_event(installed_profiler_,
283                                                            "invoke");
284   TF_LITE_ENSURE_STATUS_WITH_SCOPED_INSTRUMENTATION(
285       scoped_runtime_event, primary_subgraph().Invoke());
286 
287   if (!allow_buffer_handle_output_) {
288     for (int tensor_index : outputs()) {
289       TF_LITE_ENSURE_STATUS_WITH_SCOPED_INSTRUMENTATION(
290           scoped_runtime_event,
291           primary_subgraph().EnsureTensorDataIsReadable(tensor_index));
292     }
293   }
294 
295   return kTfLiteOk;
296 }
297 
AddTensors(int tensors_to_add,int * first_new_tensor_index)298 TfLiteStatus Interpreter::AddTensors(int tensors_to_add,
299                                      int* first_new_tensor_index) {
300   return primary_subgraph().AddTensors(tensors_to_add, first_new_tensor_index);
301 }
302 
ResetVariableTensors()303 TfLiteStatus Interpreter::ResetVariableTensors() {
304   return primary_subgraph().ResetVariableTensors();
305 }
306 
SetTensorParametersReadOnly(int tensor_index,TfLiteType type,const char * name,const std::vector<int> & dims,TfLiteQuantization quantization,const char * buffer,size_t bytes,const Allocation * allocation)307 TfLiteStatus Interpreter::SetTensorParametersReadOnly(
308     int tensor_index, TfLiteType type, const char* name,
309     const std::vector<int>& dims, TfLiteQuantization quantization,
310     const char* buffer, size_t bytes, const Allocation* allocation) {
311   return primary_subgraph().SetTensorParametersReadOnly(
312       tensor_index, type, name, dims.size(), dims.data(), quantization, buffer,
313       bytes, allocation);
314 }
315 
SetTensorParametersReadWrite(int tensor_index,TfLiteType type,const char * name,const std::vector<int> & dims,TfLiteQuantization quantization,bool is_variable)316 TfLiteStatus Interpreter::SetTensorParametersReadWrite(
317     int tensor_index, TfLiteType type, const char* name,
318     const std::vector<int>& dims, TfLiteQuantization quantization,
319     bool is_variable) {
320   return primary_subgraph().SetTensorParametersReadWrite(
321       tensor_index, type, name, dims.size(), dims.data(), quantization,
322       is_variable);
323 }
324 
SetTensorParametersReadOnly(int tensor_index,TfLiteType type,const char * name,const size_t rank,const int * dims,TfLiteQuantizationParams quantization,const char * buffer,size_t bytes,const Allocation * allocation)325 TfLiteStatus Interpreter::SetTensorParametersReadOnly(
326     int tensor_index, TfLiteType type, const char* name, const size_t rank,
327     const int* dims, TfLiteQuantizationParams quantization, const char* buffer,
328     size_t bytes, const Allocation* allocation) {
329   TfLiteQuantization new_quantization = GetQuantizationFromLegacy(quantization);
330   return primary_subgraph().SetTensorParametersReadOnly(
331       tensor_index, type, name, rank, dims, new_quantization, buffer, bytes,
332       allocation);
333 }
334 
SetTensorParametersReadWrite(int tensor_index,TfLiteType type,const char * name,const size_t rank,const int * dims,TfLiteQuantizationParams quantization,bool is_variable,const size_t rank_dims_signature,const int * dims_signature)335 TfLiteStatus Interpreter::SetTensorParametersReadWrite(
336     int tensor_index, TfLiteType type, const char* name, const size_t rank,
337     const int* dims, TfLiteQuantizationParams quantization, bool is_variable,
338     const size_t rank_dims_signature, const int* dims_signature) {
339   TfLiteQuantization new_quantization = GetQuantizationFromLegacy(quantization);
340   return primary_subgraph().SetTensorParametersReadWrite(
341       tensor_index, type, name, rank, dims, new_quantization, is_variable,
342       rank_dims_signature, dims_signature);
343 }
344 
SetExecutionPlan(const std::vector<int> & new_plan)345 TfLiteStatus Interpreter::SetExecutionPlan(const std::vector<int>& new_plan) {
346   return primary_subgraph().SetExecutionPlan(new_plan);
347 }
348 
SetNumThreads(int num_threads)349 TfLiteStatus Interpreter::SetNumThreads(int num_threads) {
350   if (num_threads < -1) {
351     context_->ReportError(context_,
352                           "num_threads should be >=0 or just -1 to let TFLite "
353                           "runtime set the value.");
354     return kTfLiteError;
355   }
356 
357   for (auto& subgraph : subgraphs_) {
358     subgraph->context()->recommended_num_threads = num_threads;
359   }
360 
361   for (int i = 0; i < kTfLiteMaxExternalContexts; ++i) {
362     auto* c = external_contexts_[i];
363     if (c && c->Refresh) {
364       c->Refresh(context_);
365     }
366   }
367   return kTfLiteOk;
368 }
369 
SetAllowFp16PrecisionForFp32(bool allow)370 void Interpreter::SetAllowFp16PrecisionForFp32(bool allow) {
371   for (auto& subgraph : subgraphs_) {
372     subgraph->context()->allow_fp32_relax_to_fp16 = allow;
373   }
374 }
375 
376 // TODO(b/121264966): Subgraphs added after cancellation is set will not get the
377 // cancellation function added to their context.
SetCancellationFunction(void * data,bool (* check_cancelled_func)(void *))378 void Interpreter::SetCancellationFunction(void* data,
379                                           bool (*check_cancelled_func)(void*)) {
380   for (auto& subgraph : subgraphs_) {
381     subgraph->SetCancellationFunction(data, check_cancelled_func);
382   }
383 }
384 
IsCancelled()385 bool Interpreter::IsCancelled() { return primary_subgraph().IsCancelled(); }
386 
ModifyGraphWithDelegate(TfLiteDelegate * delegate)387 TfLiteStatus Interpreter::ModifyGraphWithDelegate(TfLiteDelegate* delegate) {
388   TfLiteStatus status = kTfLiteOk;
389   for (auto& subgraph : subgraphs_) {
390     if (IsValidationSubgraph(subgraph->GetName().c_str())) {
391       continue;
392     }
393     status = subgraph->ModifyGraphWithDelegate(delegate);
394     if (status != kTfLiteOk) {
395       break;
396     }
397   }
398   // Delegate-specific errors can be recovered from by restoring Interpreter to
399   // its original state.
400   if (status == kTfLiteDelegateError) {
401     TF_LITE_ENSURE_STATUS(RemoveAllDelegates());
402   }
403   return status;
404 }
405 
RemoveAllDelegates()406 TfLiteStatus Interpreter::RemoveAllDelegates() {
407   for (auto& subgraph : subgraphs_) {
408     TF_LITE_ENSURE_STATUS(subgraph->RemoveAllDelegates());
409   }
410   return kTfLiteOk;
411 }
412 
HasDelegates()413 bool Interpreter::HasDelegates() { return primary_subgraph().HasDelegates(); }
414 
SetBufferHandle(int tensor_index,TfLiteBufferHandle buffer_handle,TfLiteDelegate * delegate)415 TfLiteStatus Interpreter::SetBufferHandle(int tensor_index,
416                                           TfLiteBufferHandle buffer_handle,
417                                           TfLiteDelegate* delegate) {
418   TF_LITE_ENSURE(context_, tensor_index < tensors_size());
419   TfLiteTensor* tensor = primary_subgraph().tensor(tensor_index);
420 
421   TF_LITE_ENSURE(context_,
422                  tensor->delegate == nullptr || tensor->delegate == delegate);
423   tensor->delegate = delegate;
424   if (tensor->buffer_handle != kTfLiteNullBufferHandle) {
425     TF_LITE_ENSURE(context_, tensor->delegate->FreeBufferHandle != nullptr);
426     tensor->delegate->FreeBufferHandle(context_, tensor->delegate,
427                                        &tensor->buffer_handle);
428   }
429   tensor->buffer_handle = buffer_handle;
430 
431   return kTfLiteOk;
432 }
433 
GetBufferHandle(int tensor_index,TfLiteBufferHandle * buffer_handle,TfLiteDelegate ** delegate)434 TfLiteStatus Interpreter::GetBufferHandle(int tensor_index,
435                                           TfLiteBufferHandle* buffer_handle,
436                                           TfLiteDelegate** delegate) {
437   TF_LITE_ENSURE(context_, tensor_index < tensors_size());
438   TfLiteTensor* tensor = primary_subgraph().tensor(tensor_index);
439 
440   *delegate = tensor->delegate;
441   *buffer_handle = tensor->buffer_handle;
442 
443   return kTfLiteOk;
444 }
445 
SetProfiler(Profiler * profiler)446 void Interpreter::SetProfiler(Profiler* profiler) {
447   // Release resources occupied by owned_profiler_ which is replaced by
448   // caller-owned profiler.
449   owned_profiler_.reset(nullptr);
450   installed_profiler_ = profiler;
451   SetSubgraphProfiler();
452 }
453 
SetProfiler(std::unique_ptr<Profiler> profiler)454 void Interpreter::SetProfiler(std::unique_ptr<Profiler> profiler) {
455   owned_profiler_ = std::move(profiler);
456   installed_profiler_ = owned_profiler_.get();
457   SetSubgraphProfiler();
458 }
459 
SetSubgraphProfiler()460 void Interpreter::SetSubgraphProfiler() {
461   for (int subgraph_index = 0; subgraph_index < subgraphs_.size();
462        ++subgraph_index) {
463     subgraphs_[subgraph_index]->SetProfiler(installed_profiler_,
464                                             subgraph_index);
465   }
466 }
467 
GetProfiler()468 Profiler* Interpreter::GetProfiler() {
469   return primary_subgraph().GetProfiler();
470 }
471 
472 }  // namespace tflite
473