1 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 #include <algorithm>
16 #include <memory>
17 #include <set>
18 #include <string>
19 #include <unordered_map>
20 #include <utility>
21 #include <vector>
22 
23 #include "tensorflow/lite/toco/allocate_transient_arrays.h"
24 #include "tensorflow/lite/toco/model.h"
25 #include "tensorflow/lite/toco/model_flags.pb.h"
26 #include "tensorflow/lite/toco/tooling_util.h"
27 #include "tensorflow/core/platform/logging.h"
28 
29 namespace toco {
30 namespace {
31 
32 // The life span of an array.
33 struct ArrayLifespan {
34   // If true, the array is persistent state (as in a RNN). In that case,
35   // its allocation is permanent and the first_op, last_op members are
36   // unused. (The term 'transient' is a misnomer and we should think in
37   // terms of 'workspace' instead).
38   bool persistent = false;
39   // Index of the first op addressing that array. The array must be allocated
40   // just before executing this op.
41   std::size_t first_op = 0;
42   // Index of the last op addressing that array. We want to deallocate the array
43   // immediately after executing this op.
44   std::size_t last_op = 0;
45 };
46 
StartsAt(const ArrayLifespan & lifespan,std::size_t op_index)47 bool StartsAt(const ArrayLifespan& lifespan, std::size_t op_index) {
48   return !lifespan.persistent && lifespan.first_op == op_index;
49 }
50 
EndsAt(const ArrayLifespan & lifespan,std::size_t op_index)51 bool EndsAt(const ArrayLifespan& lifespan, std::size_t op_index) {
52   return !lifespan.persistent && lifespan.last_op == op_index;
53 }
54 
55 // Helper function for ComputeArrayLifespans: updates one ArrayLifespan for
56 // one array for one op.
UpdateArrayLifespan(const string & array_name,std::size_t op_index,std::unordered_map<string,ArrayLifespan> * array_lifespans)57 void UpdateArrayLifespan(
58     const string& array_name, std::size_t op_index,
59     std::unordered_map<string, ArrayLifespan>* array_lifespans) {
60   if (array_lifespans->count(array_name)) {
61     auto& lifespan = array_lifespans->at(array_name);
62     if (!lifespan.persistent) {
63       lifespan.first_op = std::min(lifespan.first_op, op_index);
64       lifespan.last_op = std::max(lifespan.last_op, op_index);
65     }
66   } else {
67     ArrayLifespan lifespan;
68     lifespan.first_op = op_index;
69     lifespan.last_op = op_index;
70     (*array_lifespans)[array_name] = lifespan;
71   }
72 }
73 
74 // Computes the ArrayLifespan for each array.
ComputeArrayLifespans(const Model & model,std::unordered_map<string,ArrayLifespan> * array_lifespans)75 void ComputeArrayLifespans(
76     const Model& model,
77     std::unordered_map<string, ArrayLifespan>* array_lifespans) {
78   CHECK(array_lifespans->empty());
79   for (const auto& rnn_state : model.flags.rnn_states()) {
80     ArrayLifespan lifespan;
81     lifespan.persistent = true;
82     (*array_lifespans)[rnn_state.state_array()] = lifespan;
83   }
84   for (std::size_t op_index = 0; op_index < model.operators.size();
85        op_index++) {
86     const auto& op = model.operators[op_index];
87     for (const auto& input : op->inputs) {
88       UpdateArrayLifespan(input, op_index, array_lifespans);
89     }
90     for (const auto& output : op->outputs) {
91       UpdateArrayLifespan(output, op_index, array_lifespans);
92     }
93   }
94 }
95 
operator ==(const Alloc & a,const Alloc & b)96 inline bool operator==(const Alloc& a, const Alloc& b) {
97   CHECK(a.start != b.start || a.end == b.end);
98   return a.start == b.start;
99 }
100 
101 // Helper to keep track of total allocation size and of currently live
102 // allocations, and containing the core allocation routine.
103 class Allocator {
104  public:
Allocator()105   Allocator() : total_size_(0) {}
106 
107   // Core allocation routine.
Allocate(std::size_t size,Alloc * result)108   void Allocate(std::size_t size, Alloc* result) {
109     if (size == 0) {
110       // zero-sized arrays get a dummy alloc of (0, 0) that does not
111       // need to be kept in the books (no need to insert that into
112       // live_allocs_).
113       // Note: zero-sized arrays shouldn't exist, but handling that case
114       // here allows such pathological cases to get a cleaner error message
115       // later instead of generating spurious allocator failures.
116       result->start = 0;
117       result->end = 0;
118       return;
119     }
120     // Naive algorithm: pick the first gap between live allocations,
121     // that is wide enough for the new array.
122     std::size_t pos = 0;
123     for (const auto& a : live_allocs_) {
124       if (a.start >= pos + size) {
125         result->start = pos;
126         result->end = pos + size;
127         live_allocs_.insert(*result);
128         return;
129       }
130       pos = a.end;
131     }
132     // No sufficiently wide gap was found before an existing live allocation,
133     // so we allocate the new array at the end of the allocation space.
134     // We may then have to grow total_size_.
135     total_size_ = std::max(total_size_, pos + size);
136     result->start = pos;
137     result->end = pos + size;
138     live_allocs_.insert(*result);
139   }
140 
Deallocate(const Alloc & a)141   void Deallocate(const Alloc& a) {
142     // Special-case dummy allocs for zero-sized arrays.
143     if (a.start == 0 && a.end == 0) {
144       // Nothing needs to be done, these aren't kept in the books.
145       return;
146     }
147     auto iter = std::lower_bound(live_allocs_.begin(), live_allocs_.end(), a);
148     CHECK(iter != live_allocs_.end());
149     CHECK(*iter == a);
150     live_allocs_.erase(iter);
151   }
152 
total_size() const153   std::size_t total_size() const { return total_size_; }
154 
155  private:
156   std::size_t total_size_;
157   std::set<Alloc> live_allocs_;
158 };
159 
160 // Returns the required transient allocation size (in bytes) for a given array,
161 // or 0 if it's not a transient array.
TransientArraySize(const Model & model,const string & array_name,std::size_t transient_data_alignment)162 std::size_t TransientArraySize(const Model& model, const string& array_name,
163                                std::size_t transient_data_alignment) {
164   if (!IsAllocatableTransientArray(model, array_name)) {
165     return 0;
166   }
167   const auto& array = &model.GetArray(array_name);
168   CHECK(array->has_shape())
169       << "Array '" << array_name << "' doesn't have a shape";
170   if (array->data_type == ArrayDataType::kNone) {
171     // Catch a typical issue at the moment with RNN states
172     for (const auto& rnn_state : model.flags.rnn_states()) {
173       if (rnn_state.state_array() == array_name) {
174         LOG(FATAL)
175             << "A RNN state array, " << array_name << ", still does not "
176             << "have a known data type after all graph transformations have "
177             << "run.";
178       }
179     }
180     LOG(FATAL) << "An array, " << array_name << ", still does not "
181                << "have a known data type after all graph transformations have "
182                << "run.";
183   }
184   const std::size_t elem_size = ElementSize(array->data_type);
185   const std::size_t raw_size =
186       elem_size * RequiredBufferSizeForShape(array->shape());
187   const std::size_t rounded_size =
188       RoundUpToNextMultipleOf(raw_size, transient_data_alignment);
189   return rounded_size;
190 }
191 
192 // Allocates an array: call this for every array just before the first
193 // op where it is used.
AllocateTransientArray(const Model & model,const string & array_name,Allocator * allocator,std::size_t transient_data_alignment)194 void AllocateTransientArray(const Model& model, const string& array_name,
195                             Allocator* allocator,
196                             std::size_t transient_data_alignment) {
197   if (!IsAllocatableTransientArray(model, array_name)) {
198     return;
199   }
200   const std::size_t size =
201       TransientArraySize(model, array_name, transient_data_alignment);
202   const auto& array = &model.GetArray(array_name);
203   CHECK(!array->alloc);
204   allocator->Allocate(size, &array->GetOrCreateAlloc());
205 }
206 
207 // Deallocates an array: call this for every array just after the last
208 // op where it is used.
DeallocateTransientArray(const Model & model,const string & array_name,Allocator * allocator)209 void DeallocateTransientArray(const Model& model, const string& array_name,
210                               Allocator* allocator) {
211   if (!IsAllocatableTransientArray(model, array_name)) {
212     return;
213   }
214   const auto& array = &model.GetArray(array_name);
215   CHECK(!!array->alloc);
216   allocator->Deallocate(*array->alloc);
217 }
218 
PushBackIfNotFound(const string & s,std::vector<string> * v)219 void PushBackIfNotFound(const string& s, std::vector<string>* v) {
220   if (std::find(v->begin(), v->end(), s) == v->end()) {
221     v->push_back(s);
222   }
223 }
224 
225 }  // namespace
226 
AllocateTransientArrays(Model * model,std::size_t transient_data_alignment)227 void AllocateTransientArrays(Model* model,
228                              std::size_t transient_data_alignment) {
229   // Precompute the lifespans for all arrays.
230   std::unordered_map<string, ArrayLifespan> array_lifespans;
231   ComputeArrayLifespans(*model, &array_lifespans);
232 
233   // In case of variable batch, our convention will be to compute the
234   // allocations for batch==1, then let the inference code multiply all
235   // the offsets by the actual runtime batch size. Conveniently,
236   // the variable_batch and batch flags are mutually exclusive, and the default
237   // value of batch is 1, so we have nothing special to do here. Let us
238   // just guard this assumption with a CHECK:
239   bool batchless_input_shapes = true;
240   for (const auto& input_array : model->flags.input_arrays()) {
241     if (!input_array.has_shape() || input_array.shape().dims().empty() ||
242         input_array.shape().dims(0) != 1) {
243       batchless_input_shapes = false;
244       break;
245     }
246   }
247   CHECK(!model->flags.variable_batch() || batchless_input_shapes);
248 
249   Allocator allocator;
250 
251   // Construct a sorted map of array names, so that other layout engines can
252   // match exactly.
253   std::map<string, const Array*> ordered_arrays_map;
254   for (const auto& pair : model->GetArrayMap()) {
255     ordered_arrays_map[pair.first] = pair.second.get();
256   }
257 
258   // Allocate persistent arrays (like RNN states). For them, 'transient'
259   // is a misnormer, should read 'workspace'.
260   for (const auto& array_pair : ordered_arrays_map) {
261     const string& array_name = array_pair.first;
262     auto it = array_lifespans.find(array_name);
263     if (it != array_lifespans.end() && it->second.persistent) {
264       AllocateTransientArray(*model, array_name, &allocator,
265                              transient_data_alignment);
266     }
267   }
268 
269   for (std::size_t op_index = 0; op_index < model->operators.size();
270        op_index++) {
271     const auto& op = model->operators[op_index];
272     // Allocate those arrays whose lifespan starts exactly here.
273     std::vector<string> arrays_to_allocate;
274     for (const auto& input : op->inputs) {
275       if (StartsAt(array_lifespans[input], op_index)) {
276         PushBackIfNotFound(input, &arrays_to_allocate);
277       }
278     }
279     for (const auto& output : op->outputs) {
280       if (StartsAt(array_lifespans[output], op_index)) {
281         PushBackIfNotFound(output, &arrays_to_allocate);
282       }
283     }
284     for (const string& array : arrays_to_allocate) {
285       AllocateTransientArray(*model, array, &allocator,
286                              transient_data_alignment);
287     }
288 
289     // Deallocate those arrays whose lifespan ends exactly here.
290     std::vector<string> arrays_to_deallocate;
291     for (const auto& input : op->inputs) {
292       if (EndsAt(array_lifespans[input], op_index)) {
293         PushBackIfNotFound(input, &arrays_to_deallocate);
294       }
295     }
296     for (const auto& output : op->outputs) {
297       if (EndsAt(array_lifespans[output], op_index)) {
298         PushBackIfNotFound(output, &arrays_to_deallocate);
299       }
300     }
301     for (const string& array : arrays_to_deallocate) {
302       DeallocateTransientArray(*model, array, &allocator);
303     }
304   }
305 
306   // Just out of curiosity (not used in the actual allocation process)
307   // evaluate the optimal total allocated size.
308   // First, compute the size of persistent arrays.
309   std::size_t optimal_transient_alloc_size = 0;
310   std::size_t persistent_alloc_size = 0;
311   for (const auto& array_pair : ordered_arrays_map) {
312     const string& array_name = array_pair.first;
313     auto it = array_lifespans.find(array_name);
314     if (it != array_lifespans.end() && it->second.persistent) {
315       persistent_alloc_size +=
316           TransientArraySize(*model, array_name, transient_data_alignment);
317     }
318   }
319   for (const auto& op : model->operators) {
320     // for each operator, compute the sum of the sizes of the array that must
321     // be live during the execution of this operator, plus the size of
322     // persistent arrays that must be live at all times.
323     std::vector<string> non_persistent_edges;
324     for (const auto& input : op->inputs) {
325       if (!array_lifespans[input].persistent) {
326         PushBackIfNotFound(input, &non_persistent_edges);
327       }
328     }
329     for (const auto& output : op->outputs) {
330       if (!array_lifespans[output].persistent) {
331         PushBackIfNotFound(output, &non_persistent_edges);
332       }
333     }
334     std::size_t size = persistent_alloc_size;
335     for (const string& edge : non_persistent_edges) {
336       size += TransientArraySize(*model, edge, transient_data_alignment);
337     }
338     // The optimal total size is the maximum of all operator-specific sizes.
339     optimal_transient_alloc_size = std::max(optimal_transient_alloc_size, size);
340   }
341 
342   model->transient_data_size = allocator.total_size();
343   model->transient_data_alignment = transient_data_alignment;
344   CHECK_GE(model->transient_data_size, optimal_transient_alloc_size);
345   LOG(INFO) << "Total transient array allocated size: "
346             << model->transient_data_size << " bytes, "
347             << "theoretical optimal value: " << optimal_transient_alloc_size
348             << " bytes.";
349   CheckInvariants(*model);
350 }
351 }  // namespace toco
352