1 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 #include <algorithm>
16 #include <memory>
17 #include <set>
18 #include <string>
19 #include <unordered_map>
20 #include <utility>
21 #include <vector>
22
23 #include "tensorflow/lite/toco/allocate_transient_arrays.h"
24 #include "tensorflow/lite/toco/model.h"
25 #include "tensorflow/lite/toco/model_flags.pb.h"
26 #include "tensorflow/lite/toco/tooling_util.h"
27 #include "tensorflow/core/platform/logging.h"
28
29 namespace toco {
30 namespace {
31
32 // The life span of an array.
33 struct ArrayLifespan {
34 // If true, the array is persistent state (as in a RNN). In that case,
35 // its allocation is permanent and the first_op, last_op members are
36 // unused. (The term 'transient' is a misnomer and we should think in
37 // terms of 'workspace' instead).
38 bool persistent = false;
39 // Index of the first op addressing that array. The array must be allocated
40 // just before executing this op.
41 std::size_t first_op = 0;
42 // Index of the last op addressing that array. We want to deallocate the array
43 // immediately after executing this op.
44 std::size_t last_op = 0;
45 };
46
StartsAt(const ArrayLifespan & lifespan,std::size_t op_index)47 bool StartsAt(const ArrayLifespan& lifespan, std::size_t op_index) {
48 return !lifespan.persistent && lifespan.first_op == op_index;
49 }
50
EndsAt(const ArrayLifespan & lifespan,std::size_t op_index)51 bool EndsAt(const ArrayLifespan& lifespan, std::size_t op_index) {
52 return !lifespan.persistent && lifespan.last_op == op_index;
53 }
54
55 // Helper function for ComputeArrayLifespans: updates one ArrayLifespan for
56 // one array for one op.
UpdateArrayLifespan(const string & array_name,std::size_t op_index,std::unordered_map<string,ArrayLifespan> * array_lifespans)57 void UpdateArrayLifespan(
58 const string& array_name, std::size_t op_index,
59 std::unordered_map<string, ArrayLifespan>* array_lifespans) {
60 if (array_lifespans->count(array_name)) {
61 auto& lifespan = array_lifespans->at(array_name);
62 if (!lifespan.persistent) {
63 lifespan.first_op = std::min(lifespan.first_op, op_index);
64 lifespan.last_op = std::max(lifespan.last_op, op_index);
65 }
66 } else {
67 ArrayLifespan lifespan;
68 lifespan.first_op = op_index;
69 lifespan.last_op = op_index;
70 (*array_lifespans)[array_name] = lifespan;
71 }
72 }
73
74 // Computes the ArrayLifespan for each array.
ComputeArrayLifespans(const Model & model,std::unordered_map<string,ArrayLifespan> * array_lifespans)75 void ComputeArrayLifespans(
76 const Model& model,
77 std::unordered_map<string, ArrayLifespan>* array_lifespans) {
78 CHECK(array_lifespans->empty());
79 for (const auto& rnn_state : model.flags.rnn_states()) {
80 ArrayLifespan lifespan;
81 lifespan.persistent = true;
82 (*array_lifespans)[rnn_state.state_array()] = lifespan;
83 }
84 for (std::size_t op_index = 0; op_index < model.operators.size();
85 op_index++) {
86 const auto& op = model.operators[op_index];
87 for (const auto& input : op->inputs) {
88 UpdateArrayLifespan(input, op_index, array_lifespans);
89 }
90 for (const auto& output : op->outputs) {
91 UpdateArrayLifespan(output, op_index, array_lifespans);
92 }
93 }
94 }
95
operator ==(const Alloc & a,const Alloc & b)96 inline bool operator==(const Alloc& a, const Alloc& b) {
97 CHECK(a.start != b.start || a.end == b.end);
98 return a.start == b.start;
99 }
100
101 // Helper to keep track of total allocation size and of currently live
102 // allocations, and containing the core allocation routine.
103 class Allocator {
104 public:
Allocator()105 Allocator() : total_size_(0) {}
106
107 // Core allocation routine.
Allocate(std::size_t size,Alloc * result)108 void Allocate(std::size_t size, Alloc* result) {
109 if (size == 0) {
110 // zero-sized arrays get a dummy alloc of (0, 0) that does not
111 // need to be kept in the books (no need to insert that into
112 // live_allocs_).
113 // Note: zero-sized arrays shouldn't exist, but handling that case
114 // here allows such pathological cases to get a cleaner error message
115 // later instead of generating spurious allocator failures.
116 result->start = 0;
117 result->end = 0;
118 return;
119 }
120 // Naive algorithm: pick the first gap between live allocations,
121 // that is wide enough for the new array.
122 std::size_t pos = 0;
123 for (const auto& a : live_allocs_) {
124 if (a.start >= pos + size) {
125 result->start = pos;
126 result->end = pos + size;
127 live_allocs_.insert(*result);
128 return;
129 }
130 pos = a.end;
131 }
132 // No sufficiently wide gap was found before an existing live allocation,
133 // so we allocate the new array at the end of the allocation space.
134 // We may then have to grow total_size_.
135 total_size_ = std::max(total_size_, pos + size);
136 result->start = pos;
137 result->end = pos + size;
138 live_allocs_.insert(*result);
139 }
140
Deallocate(const Alloc & a)141 void Deallocate(const Alloc& a) {
142 // Special-case dummy allocs for zero-sized arrays.
143 if (a.start == 0 && a.end == 0) {
144 // Nothing needs to be done, these aren't kept in the books.
145 return;
146 }
147 auto iter = std::lower_bound(live_allocs_.begin(), live_allocs_.end(), a);
148 CHECK(iter != live_allocs_.end());
149 CHECK(*iter == a);
150 live_allocs_.erase(iter);
151 }
152
total_size() const153 std::size_t total_size() const { return total_size_; }
154
155 private:
156 std::size_t total_size_;
157 std::set<Alloc> live_allocs_;
158 };
159
160 // Returns the required transient allocation size (in bytes) for a given array,
161 // or 0 if it's not a transient array.
TransientArraySize(const Model & model,const string & array_name,std::size_t transient_data_alignment)162 std::size_t TransientArraySize(const Model& model, const string& array_name,
163 std::size_t transient_data_alignment) {
164 if (!IsAllocatableTransientArray(model, array_name)) {
165 return 0;
166 }
167 const auto& array = &model.GetArray(array_name);
168 CHECK(array->has_shape())
169 << "Array '" << array_name << "' doesn't have a shape";
170 if (array->data_type == ArrayDataType::kNone) {
171 // Catch a typical issue at the moment with RNN states
172 for (const auto& rnn_state : model.flags.rnn_states()) {
173 if (rnn_state.state_array() == array_name) {
174 LOG(FATAL)
175 << "A RNN state array, " << array_name << ", still does not "
176 << "have a known data type after all graph transformations have "
177 << "run.";
178 }
179 }
180 LOG(FATAL) << "An array, " << array_name << ", still does not "
181 << "have a known data type after all graph transformations have "
182 << "run.";
183 }
184 const std::size_t elem_size = ElementSize(array->data_type);
185 const std::size_t raw_size =
186 elem_size * RequiredBufferSizeForShape(array->shape());
187 const std::size_t rounded_size =
188 RoundUpToNextMultipleOf(raw_size, transient_data_alignment);
189 return rounded_size;
190 }
191
192 // Allocates an array: call this for every array just before the first
193 // op where it is used.
AllocateTransientArray(const Model & model,const string & array_name,Allocator * allocator,std::size_t transient_data_alignment)194 void AllocateTransientArray(const Model& model, const string& array_name,
195 Allocator* allocator,
196 std::size_t transient_data_alignment) {
197 if (!IsAllocatableTransientArray(model, array_name)) {
198 return;
199 }
200 const std::size_t size =
201 TransientArraySize(model, array_name, transient_data_alignment);
202 const auto& array = &model.GetArray(array_name);
203 CHECK(!array->alloc);
204 allocator->Allocate(size, &array->GetOrCreateAlloc());
205 }
206
207 // Deallocates an array: call this for every array just after the last
208 // op where it is used.
DeallocateTransientArray(const Model & model,const string & array_name,Allocator * allocator)209 void DeallocateTransientArray(const Model& model, const string& array_name,
210 Allocator* allocator) {
211 if (!IsAllocatableTransientArray(model, array_name)) {
212 return;
213 }
214 const auto& array = &model.GetArray(array_name);
215 CHECK(!!array->alloc);
216 allocator->Deallocate(*array->alloc);
217 }
218
PushBackIfNotFound(const string & s,std::vector<string> * v)219 void PushBackIfNotFound(const string& s, std::vector<string>* v) {
220 if (std::find(v->begin(), v->end(), s) == v->end()) {
221 v->push_back(s);
222 }
223 }
224
225 } // namespace
226
AllocateTransientArrays(Model * model,std::size_t transient_data_alignment)227 void AllocateTransientArrays(Model* model,
228 std::size_t transient_data_alignment) {
229 // Precompute the lifespans for all arrays.
230 std::unordered_map<string, ArrayLifespan> array_lifespans;
231 ComputeArrayLifespans(*model, &array_lifespans);
232
233 // In case of variable batch, our convention will be to compute the
234 // allocations for batch==1, then let the inference code multiply all
235 // the offsets by the actual runtime batch size. Conveniently,
236 // the variable_batch and batch flags are mutually exclusive, and the default
237 // value of batch is 1, so we have nothing special to do here. Let us
238 // just guard this assumption with a CHECK:
239 bool batchless_input_shapes = true;
240 for (const auto& input_array : model->flags.input_arrays()) {
241 if (!input_array.has_shape() || input_array.shape().dims().empty() ||
242 input_array.shape().dims(0) != 1) {
243 batchless_input_shapes = false;
244 break;
245 }
246 }
247 CHECK(!model->flags.variable_batch() || batchless_input_shapes);
248
249 Allocator allocator;
250
251 // Construct a sorted map of array names, so that other layout engines can
252 // match exactly.
253 std::map<string, const Array*> ordered_arrays_map;
254 for (const auto& pair : model->GetArrayMap()) {
255 ordered_arrays_map[pair.first] = pair.second.get();
256 }
257
258 // Allocate persistent arrays (like RNN states). For them, 'transient'
259 // is a misnormer, should read 'workspace'.
260 for (const auto& array_pair : ordered_arrays_map) {
261 const string& array_name = array_pair.first;
262 auto it = array_lifespans.find(array_name);
263 if (it != array_lifespans.end() && it->second.persistent) {
264 AllocateTransientArray(*model, array_name, &allocator,
265 transient_data_alignment);
266 }
267 }
268
269 for (std::size_t op_index = 0; op_index < model->operators.size();
270 op_index++) {
271 const auto& op = model->operators[op_index];
272 // Allocate those arrays whose lifespan starts exactly here.
273 std::vector<string> arrays_to_allocate;
274 for (const auto& input : op->inputs) {
275 if (StartsAt(array_lifespans[input], op_index)) {
276 PushBackIfNotFound(input, &arrays_to_allocate);
277 }
278 }
279 for (const auto& output : op->outputs) {
280 if (StartsAt(array_lifespans[output], op_index)) {
281 PushBackIfNotFound(output, &arrays_to_allocate);
282 }
283 }
284 for (const string& array : arrays_to_allocate) {
285 AllocateTransientArray(*model, array, &allocator,
286 transient_data_alignment);
287 }
288
289 // Deallocate those arrays whose lifespan ends exactly here.
290 std::vector<string> arrays_to_deallocate;
291 for (const auto& input : op->inputs) {
292 if (EndsAt(array_lifespans[input], op_index)) {
293 PushBackIfNotFound(input, &arrays_to_deallocate);
294 }
295 }
296 for (const auto& output : op->outputs) {
297 if (EndsAt(array_lifespans[output], op_index)) {
298 PushBackIfNotFound(output, &arrays_to_deallocate);
299 }
300 }
301 for (const string& array : arrays_to_deallocate) {
302 DeallocateTransientArray(*model, array, &allocator);
303 }
304 }
305
306 // Just out of curiosity (not used in the actual allocation process)
307 // evaluate the optimal total allocated size.
308 // First, compute the size of persistent arrays.
309 std::size_t optimal_transient_alloc_size = 0;
310 std::size_t persistent_alloc_size = 0;
311 for (const auto& array_pair : ordered_arrays_map) {
312 const string& array_name = array_pair.first;
313 auto it = array_lifespans.find(array_name);
314 if (it != array_lifespans.end() && it->second.persistent) {
315 persistent_alloc_size +=
316 TransientArraySize(*model, array_name, transient_data_alignment);
317 }
318 }
319 for (const auto& op : model->operators) {
320 // for each operator, compute the sum of the sizes of the array that must
321 // be live during the execution of this operator, plus the size of
322 // persistent arrays that must be live at all times.
323 std::vector<string> non_persistent_edges;
324 for (const auto& input : op->inputs) {
325 if (!array_lifespans[input].persistent) {
326 PushBackIfNotFound(input, &non_persistent_edges);
327 }
328 }
329 for (const auto& output : op->outputs) {
330 if (!array_lifespans[output].persistent) {
331 PushBackIfNotFound(output, &non_persistent_edges);
332 }
333 }
334 std::size_t size = persistent_alloc_size;
335 for (const string& edge : non_persistent_edges) {
336 size += TransientArraySize(*model, edge, transient_data_alignment);
337 }
338 // The optimal total size is the maximum of all operator-specific sizes.
339 optimal_transient_alloc_size = std::max(optimal_transient_alloc_size, size);
340 }
341
342 model->transient_data_size = allocator.total_size();
343 model->transient_data_alignment = transient_data_alignment;
344 CHECK_GE(model->transient_data_size, optimal_transient_alloc_size);
345 LOG(INFO) << "Total transient array allocated size: "
346 << model->transient_data_size << " bytes, "
347 << "theoretical optimal value: " << optimal_transient_alloc_size
348 << " bytes.";
349 CheckInvariants(*model);
350 }
351 } // namespace toco
352