1 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 #include "tensorflow/lite/arena_planner.h"
16 
17 #include <stddef.h>
18 
19 #include <algorithm>
20 #include <cstdint>
21 #include <limits>
22 #include <memory>
23 #include <utility>
24 #include <vector>
25 
26 #include "tensorflow/lite/c/common.h"
27 #include "tensorflow/lite/graph_info.h"
28 #include "tensorflow/lite/simple_memory_arena.h"
29 
30 namespace tflite {
31 namespace {
32 
33 constexpr int32_t kNodeNotAssigned = std::numeric_limits<int32_t>::max();
34 
35 }  // namespace
36 
ArenaPlanner(TfLiteContext * context,std::unique_ptr<GraphInfo> graph_info,bool preserve_inputs,bool preserve_intermediates,int tensor_alignment)37 ArenaPlanner::ArenaPlanner(TfLiteContext* context,
38                            std::unique_ptr<GraphInfo> graph_info,
39                            bool preserve_inputs, bool preserve_intermediates,
40                            int tensor_alignment)
41     : context_(context),
42       graph_info_(std::move(graph_info)),
43       arena_(kDefaultArenaAlignment),
44       persistent_arena_(kDefaultArenaAlignment),
45       preserve_inputs_(preserve_inputs),
46       preserve_intermediates_(preserve_intermediates),
47       tensor_alignment_(tensor_alignment) {}
48 
~ArenaPlanner()49 ArenaPlanner::~ArenaPlanner() {}
50 
BasePointer(TfLiteAllocationType type)51 std::intptr_t ArenaPlanner::BasePointer(TfLiteAllocationType type) {
52   if (type == kTfLiteArenaRwPersistent) {
53     return persistent_arena_.BasePointer();
54   }
55   if (type == kTfLiteArenaRw) {
56     return arena_.BasePointer();
57   }
58   return 0;
59 }
60 
ResetAllocations()61 TfLiteStatus ArenaPlanner::ResetAllocations() {
62   TF_LITE_ENSURE_STATUS(arena_.ClearPlan());
63   TF_LITE_ENSURE_STATUS(persistent_arena_.ClearPlan());
64   allocs_.clear();
65   allocs_.resize(graph_info_->num_tensors());
66   return kTfLiteOk;
67 }
68 
ResetAllocationsAfter(int node)69 TfLiteStatus ArenaPlanner::ResetAllocationsAfter(int node) {
70   for (int i = 0; i < static_cast<int>(allocs_.size()); ++i) {
71     if (allocs_[i].first_node > node && allocs_[i].size > 0) {
72       TfLiteTensor& tensor = *graph_info_->tensor(i);
73       if (tensor.allocation_type == kTfLiteArenaRw) {
74         TF_LITE_ENSURE_STATUS(arena_.Deallocate(context_, allocs_[i]));
75         allocs_[i].reset();
76         tensor.data.raw = nullptr;
77       }
78     }
79   }
80 
81   return kTfLiteOk;
82 }
83 
PlanAllocations()84 TfLiteStatus ArenaPlanner::PlanAllocations() {
85   // Invalidate any existing data.
86   TF_LITE_ENSURE_STATUS(ResetAllocations());
87   // Maybe other verb instead of 'Assigned'
88   alloc_node_.assign(graph_info_->num_tensors(), kNodeNotAssigned);
89   dealloc_node_.assign(graph_info_->num_tensors(), kNodeNotAssigned);
90 
91   // Keeps track of references to each tensor.
92   std::vector<int> refcounts(graph_info_->num_tensors(), 0);
93 
94   auto allocate = [this](int node, int tensor) -> TfLiteStatus {
95     if (alloc_node_[tensor] != kNodeNotAssigned) {
96       // Tensor has already been allocated.
97       return kTfLiteOk;
98     }
99     TF_LITE_ENSURE(context_, dealloc_node_[tensor] == kNodeNotAssigned);
100     alloc_node_[tensor] = node;
101     return kTfLiteOk;
102   };
103 
104   auto deallocate = [this](int node, int tensor) -> TfLiteStatus {
105     if (alloc_node_[tensor] == kNodeNotAssigned) {
106       // We don't need to deallocate the tensor, that is never allocated.
107       // This happened with the constant tensors.
108       return kTfLiteOk;
109     }
110     TF_LITE_ENSURE(context_, dealloc_node_[tensor] == kNodeNotAssigned);
111     dealloc_node_[tensor] = node;
112     return kTfLiteOk;
113   };
114 
115   // We must make sure the output tensors are never overwritten. We do that by
116   // artificially adding one to their ref-counts so they are never selected
117   // for deallocation.
118   for (int tensor_index : graph_info_->outputs()) {
119     refcounts[tensor_index]++;
120   }
121 
122   // Variable tensors also should be ensured to be never overwritten and need to
123   // be alive all the time.
124   for (int tensor_index : graph_info_->variables()) {
125     // Increase the reference count for variable tensors by one, so it will
126     // never be deallocated.
127     refcounts[tensor_index]++;
128     // `variables` is a subgraph-level list and it should never be
129     // kTfLiteOptionalTensor.
130     TF_LITE_ENSURE(context_, tensor_index != kTfLiteOptionalTensor);
131     // Variable tensor should be allocated at the very beginning.
132     TF_LITE_ENSURE_STATUS(allocate(0, tensor_index));
133   }
134 
135   // Queue all graph inputs for allocation. If preserve_inputs_ is true, make
136   // sure they never be overwritten.
137   for (int tensor_index : graph_info_->inputs()) {
138     if (tensor_index != kTfLiteOptionalTensor) {
139       if (preserve_inputs_) {
140         refcounts[tensor_index]++;
141       }
142       TF_LITE_ENSURE_STATUS(allocate(0, tensor_index));
143     }
144   }
145 
146   // Count references to node input tensors.
147   for (size_t i = 0; i < graph_info_->num_execution_nodes(); ++i) {
148     const TfLiteNode& node = graph_info_->node(i);
149     TfLiteIntArray* node_inputs = node.inputs;
150     for (int j = 0; j < node_inputs->size; ++j) {
151       int tensor_index = node_inputs->data[j];
152       if (tensor_index != kTfLiteOptionalTensor) {
153         refcounts[tensor_index]++;
154       }
155     }
156   }
157 
158   // Go through the graph in execution order.
159   for (size_t i = 0; i < graph_info_->num_execution_nodes(); ++i) {
160     const TfLiteNode& node = graph_info_->node(i);
161 
162     // First queue output tensors for allocation.
163     TfLiteIntArray* node_outputs = node.outputs;
164     for (int j = 0; j < node_outputs->size; ++j) {
165       int tensor_index = node_outputs->data[j];
166       TF_LITE_ENSURE_STATUS(allocate(i, tensor_index));
167     }
168 
169     // Then update the ref-counts of the node's inputs, and if necessary queue
170     // them for deallocation.
171     if (!preserve_intermediates_) {
172       TfLiteIntArray* node_inputs = node.inputs;
173       for (int j = 0; j < node_inputs->size; ++j) {
174         int tensor_index = node_inputs->data[j];
175         if (tensor_index != kTfLiteOptionalTensor) {
176           refcounts[tensor_index]--;
177           if (refcounts[tensor_index] == 0) {
178             TF_LITE_ENSURE_STATUS(deallocate(i, tensor_index));
179           }
180         }
181       }
182     }
183   }
184 
185   // Note that graph outputs will never be scheduled for deallocation. We
186   // could do that here for completeness, but it won't have any effect.
187   return kTfLiteOk;
188 }
189 
ExecuteAllocations(int first_node,int last_node)190 TfLiteStatus ArenaPlanner::ExecuteAllocations(int first_node, int last_node) {
191   // Grow the size of `allocs_` if necessary. This allows allocating temporary
192   // tensors in op's `prepare` function.
193   TF_LITE_ENSURE(context_, graph_info_->num_tensors() >= allocs_.size());
194   alloc_node_.resize(graph_info_->num_tensors(), kNodeNotAssigned);
195   dealloc_node_.resize(graph_info_->num_tensors(), kNodeNotAssigned);
196   allocs_.resize(graph_info_->num_tensors());
197   // Set allocation and deallocation for temporary tensors.
198   for (size_t i = first_node; i <= static_cast<size_t>(last_node) &&
199                               i < graph_info_->num_execution_nodes();
200        ++i) {
201     const TfLiteNode& node = graph_info_->node(i);
202     TfLiteIntArray* node_temporaries = node.temporaries;
203     for (int j = 0; j < node_temporaries->size; ++j) {
204       int tensor_index = node_temporaries->data[j];
205       alloc_node_[tensor_index] = i;
206       if (!preserve_intermediates_) {
207         dealloc_node_[tensor_index] = i;
208       }
209     }
210   }
211 
212   TF_LITE_ENSURE_STATUS(CalculateAllocations(first_node, last_node));
213   TF_LITE_ENSURE_STATUS(Commit());
214 
215   for (int i = 0; i < static_cast<int>(graph_info_->num_tensors()); ++i) {
216     // TODO(ahentz): we could do this only for the tensors that were modified
217     // in CalculateAllocations(), instead of redoing it for tensors that
218     // already had proper pointers. However we must be very careful, because
219     // SimpleMemoryArena::Commit() could move the base pointer.
220     TF_LITE_ENSURE_STATUS(ResolveTensorAllocation(i));
221   }
222 
223   return kTfLiteOk;
224 }
225 
ReleaseNonPersistentMemory()226 TfLiteStatus ArenaPlanner::ReleaseNonPersistentMemory() {
227   // Clear non-persistent arena's buffer.
228   TF_LITE_ENSURE_STATUS(arena_.ReleaseBuffer());
229   // Set data pointers for all non-persistent tensors to nullptr.
230   for (int i = 0; i < static_cast<int>(graph_info_->num_tensors()); ++i) {
231     TfLiteTensor& tensor = *graph_info_->tensor(i);
232     if (tensor.allocation_type == kTfLiteArenaRw) {
233       tensor.data.raw = nullptr;
234     }
235   }
236   return kTfLiteOk;
237 }
238 
AcquireNonPersistentMemory()239 TfLiteStatus ArenaPlanner::AcquireNonPersistentMemory() {
240   // First commit arena_ to allocate underlying buffer.
241   TF_LITE_ENSURE_STATUS(arena_.Commit(context_));
242   // Resolve allocations for all tensors not on the persistent arena.
243   for (int i = 0; i < static_cast<int>(graph_info_->num_tensors()); ++i) {
244     TfLiteTensor& tensor = *graph_info_->tensor(i);
245     if (tensor.allocation_type == kTfLiteArenaRw) {
246       TF_LITE_ENSURE_STATUS(ResolveTensorAllocation(i));
247     }
248   }
249   return kTfLiteOk;
250 }
251 
HasNonPersistentMemory()252 bool ArenaPlanner::HasNonPersistentMemory() {
253   return arena_.GetBufferSize() != 0;
254 }
255 
Commit()256 TfLiteStatus ArenaPlanner::Commit() {
257   TF_LITE_ENSURE_STATUS(arena_.Commit(context_));
258   TF_LITE_ENSURE_STATUS(persistent_arena_.Commit(context_));
259   return kTfLiteOk;
260 }
261 
CreateTensorAllocationVector(int first_node,int last_node)262 std::vector<int32_t> ArenaPlanner::CreateTensorAllocationVector(int first_node,
263                                                                 int last_node) {
264   auto tensor_compare = [this](int idx1, int idx2) {
265     // Tensors that have lifespan through the whole model inference time are
266     // allocated at the beginning of memory slice. Their respective order
267     // doesn't matter in fact, so here they are sorted by index.
268     if (this->alloc_node_[idx1] == 0 &&
269         this->dealloc_node_[idx1] == kNodeNotAssigned) {
270       if (this->alloc_node_[idx2] == 0 &&
271           this->dealloc_node_[idx2] == kNodeNotAssigned) {
272         return idx1 < idx2;
273       }
274       return true;
275     }
276     if (this->alloc_node_[idx2] == 0 &&
277         this->dealloc_node_[idx2] == kNodeNotAssigned) {
278       return false;
279     }
280 
281     // All other tensors are sorted in non-increasing order of their size.
282     auto size1 = this->graph_info_->tensor(idx1)->bytes;
283     auto size2 = this->graph_info_->tensor(idx2)->bytes;
284     if (size1 != size2) {
285       return size1 > size2;
286     }
287     // Tensors with equal size are sorted in order of their allocation time.
288     return this->alloc_node_[idx1] < this->alloc_node_[idx2];
289   };
290 
291   std::vector<int32_t> tensor_order;
292   for (int i = 0; i < static_cast<int>(graph_info_->num_tensors()); ++i) {
293     if (alloc_node_[i] >= first_node && alloc_node_[i] <= last_node) {
294       tensor_order.push_back(i);
295     }
296   }
297   // Indices of tensors in order their allocation offsets will be calculated.
298   std::sort(tensor_order.begin(), tensor_order.end(), tensor_compare);
299 
300   return tensor_order;
301 }
302 
CalculateAllocations(int first_node,int last_node)303 TfLiteStatus ArenaPlanner::CalculateAllocations(int first_node, int last_node) {
304   // Indices of tensors in order their allocation offsets will be calculated.
305   const std::vector<int32_t> tensor_order =
306       CreateTensorAllocationVector(first_node, last_node);
307 
308   // Deallocate if the tensor was already allocated.
309   for (const auto& tensor_index : tensor_order) {
310     TfLiteTensor& tensor = *graph_info_->tensor(tensor_index);
311     if (tensor.allocation_type == kTfLiteArenaRw &&
312         allocs_[tensor_index].size != 0) {
313       TF_LITE_ENSURE_STATUS(arena_.Deallocate(context_, allocs_[tensor_index]));
314     }
315   }
316 
317   // Vector of ids of already allocated tensors, ordered by offset.
318   for (const auto& tensor_index : tensor_order) {
319     TfLiteTensor& tensor = *graph_info_->tensor(tensor_index);
320     if (tensor.allocation_type == kTfLiteArenaRw) {
321       TF_LITE_ENSURE_STATUS(
322           arena_.Allocate(context_, tensor_alignment_, tensor.bytes,
323                           tensor_index, alloc_node_[tensor_index],
324                           dealloc_node_[tensor_index], &allocs_[tensor_index]));
325     }
326     // Check allocs_[].size to prevent from reallocation of persistent tensors.
327     if (tensor.allocation_type == kTfLiteArenaRwPersistent &&
328         allocs_[tensor_index].size == 0) {
329       TF_LITE_ENSURE_STATUS(persistent_arena_.Allocate(
330           context_, tensor_alignment_, tensor.bytes, tensor_index,
331           /*first_node=*/alloc_node_[tensor_index],
332           /*last_node=*/std::numeric_limits<int32_t>::max(),
333           &allocs_[tensor_index]));
334     }
335   }
336   return kTfLiteOk;
337 }
338 
ResolveTensorAllocation(int tensor_index)339 TfLiteStatus ArenaPlanner::ResolveTensorAllocation(int tensor_index) {
340   TfLiteTensor& tensor = *graph_info_->tensor(tensor_index);
341   if (tensor.allocation_type == kTfLiteArenaRw) {
342     // Skip resolution if the size of the tensor is zero, leaving it as a
343     // nullptr.
344     if (allocs_[tensor_index].size != 0) {
345       TF_LITE_ENSURE_STATUS(arena_.ResolveAlloc(context_, allocs_[tensor_index],
346                                                 &tensor.data.raw));
347     }
348   }
349   if (tensor.allocation_type == kTfLiteArenaRwPersistent) {
350     TF_LITE_ENSURE_STATUS(persistent_arena_.ResolveAlloc(
351         context_, allocs_[tensor_index], &tensor.data.raw));
352   }
353   return kTfLiteOk;
354 }
355 
356 }  // namespace tflite
357