1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #include "tensorflow/lite/delegates/gpu/common/memory_management/min_cost_flow_assignment.h"
17 
18 #include <algorithm>
19 #include <cstddef>
20 #include <limits>
21 #include <queue>
22 #include <utility>
23 #include <vector>
24 
25 #include "absl/status/status.h"
26 #include "tensorflow/lite/delegates/gpu/common/memory_management/internal.h"
27 #include "tensorflow/lite/delegates/gpu/common/memory_management/types.h"
28 
29 namespace tflite {
30 namespace gpu {
31 namespace {
32 
33 // This class build flow graph and solves Minimum-cost flow problem in it.
34 class MinCostFlowSolver {
35  public:
36   // Build auxiliary flow graph, based on information about intermediate
37   // tensors.
Build(const std::vector<TensorUsageRecord<size_t>> & usage_records)38   void Build(const std::vector<TensorUsageRecord<size_t>>& usage_records) {
39     usage_records_ = &usage_records;
40     num_tensors_ = usage_records.size();
41     source_ = 2 * num_tensors_;
42     sink_ = source_ + 1;
43     edges_from_.resize(sink_ + 1);
44     std::vector<size_t> old_record_ids;
45     std::priority_queue<QueueRecord> objects_in_use;
46     for (size_t i = 0; i < usage_records.size(); i++) {
47       // Pop from the queue all objects that are no longer in use at the time
48       // of execution of the first_task of i-th intermediate tensor.
49       while (!objects_in_use.empty() &&
50              objects_in_use.top().last_task < usage_records[i].first_task) {
51         old_record_ids.push_back(objects_in_use.top().object_id);
52         objects_in_use.pop();
53       }
54       objects_in_use.push({usage_records[i].last_task, i});
55       AddEdge(source_, i, 1, 0);
56       AddEdge(RightPartTwin(i), sink_, 1, 0);
57 
58       // Edge from source_ to i-th vertex in the right part of flow graph
59       // are added for the case of allocation of new shared object for i-th
60       // tensor. Cost of these edges is equal to the size of i-th tensor.
61       AddEdge(source_, RightPartTwin(i), 1, usage_records[i].tensor_size);
62 
63       // Edges from vertices of the left part of flow graph, corresponding to
64       // old_record_ids, to i-th vertex in the right part of flow graph are
65       // added for the case of reusing previously created shared objects for
66       // i-th tensor. Cost of these edges is an approximation of the size of
67       // new allocated memory.
68       for (auto record_id : old_record_ids) {
69         int cost = 0;
70         if (usage_records[i].tensor_size >
71             usage_records[record_id].tensor_size) {
72           cost = usage_records[i].tensor_size -
73                  usage_records[record_id].tensor_size;
74         }
75         AddEdge(record_id, RightPartTwin(i), 1, cost);
76       }
77     }
78   }
79 
80   // Solve Minimum-cost flow problem with Shortest Path Faster Algorithm.
Solve()81   void Solve() {
82     const int kInf = std::numeric_limits<int>::max();
83     std::vector<size_t> prev_edge(sink_ + 1);
84     while (true) {
85       std::queue<size_t> cur_queue, next_queue;
86       std::vector<size_t> last_it_in_queue(sink_ + 1);
87       std::vector<size_t> dist(sink_ + 1, kInf);
88       size_t it = 1;
89       cur_queue.push(source_);
90       last_it_in_queue[source_] = it;
91       dist[source_] = 0;
92       // Find shortest path from source_ to sink_, using only edges with
93       // positive capacity.
94       while (!cur_queue.empty()) {
95         ++it;
96         while (!cur_queue.empty()) {
97           auto v = cur_queue.front();
98           cur_queue.pop();
99           for (const auto& edge_id : edges_from_[v]) {
100             const Edge& edge = edges_[edge_id];
101             if (edge.cap > 0) {
102               auto u = edge.dst;
103               int new_dist = dist[v] + edge.cost;
104               if (new_dist < dist[u]) {
105                 dist[u] = new_dist;
106                 prev_edge[u] = edge_id;
107                 if (last_it_in_queue[u] != it) {
108                   next_queue.push(u);
109                   last_it_in_queue[u] = it;
110                 }
111               }
112             }
113           }
114         }
115         std::swap(cur_queue, next_queue);
116       }
117       // If path is not found, final result is ready.
118       if (dist[sink_] == kInf) break;
119 
120       // If path is found, we need to decrease the capacity of its edges, and
121       // increase the capacity of its reversed edges.
122       for (size_t v = sink_; v != source_;) {
123         --edges_[prev_edge[v]].cap;
124         Edge& rev_edge = edges_[prev_edge[v] ^ 1];
125         ++rev_edge.cap;
126         v = rev_edge.dst;
127       }
128     }
129   }
130 
CalculateAssignment(ObjectsAssignment<size_t> * assignment)131   void CalculateAssignment(ObjectsAssignment<size_t>* assignment) {
132     assignment->object_sizes.clear();
133     assignment->object_ids.assign(num_tensors_, kNotAssigned);
134     is_tensor_assigned_.resize(num_tensors_);
135     for (const auto& edge_id : edges_from_[source_]) {
136       const Edge& edge = edges_[edge_id];
137       if (edge.cap == 0 && IsRightPartVertex(edge.dst)) {
138         assignment->object_sizes.push_back(
139             AssignTensorsToNewSharedObject(LeftPartTwin(edge.dst), assignment));
140       }
141     }
142   }
143 
144  private:
145   struct Edge {
Edgetflite::gpu::__anondc65f6650111::MinCostFlowSolver::Edge146     Edge(size_t dst, int cap, int cost) : dst(dst), cap(cap), cost(cost) {}
147 
148     size_t dst;
149     int cap;
150     int cost;
151   };
152 
153   // Add edge from vertex src to vertex dst with given capacity and cost and
154   // its reversed edge to the flow graph. If some edge has index idx, its
155   // reversed edge has index idx^1.
AddEdge(size_t src,size_t dst,int cap,int cost)156   void AddEdge(size_t src, size_t dst, int cap, int cost) {
157     edges_from_[src].push_back(edges_.size());
158     edges_.emplace_back(dst, cap, cost);
159     edges_from_[dst].push_back(edges_.size());
160     edges_.push_back({src, 0, -cost});
161   }
162 
163   // Check, if vertex_id belongs to right part of the flow graph.
IsRightPartVertex(size_t vertex_id) const164   bool IsRightPartVertex(size_t vertex_id) const {
165     return vertex_id >= num_tensors_ && vertex_id < 2 * num_tensors_;
166   }
167 
168   // Return vertex from another part of the graph, that corresponds to the
169   // same intermediate tensor.
LeftPartTwin(size_t vertex_id) const170   size_t LeftPartTwin(size_t vertex_id) const {
171     return vertex_id - num_tensors_;
172   }
RightPartTwin(size_t vertex_id) const173   size_t RightPartTwin(size_t vertex_id) const {
174     return vertex_id + num_tensors_;
175   }
176 
177   // This function uses recursive implementation of depth-first search and
178   // returns maximum size from tensor tensor_id and all tensors, that will be
179   // allocated at the same place with it after all operations that use
180   // tensor_id are executed. Next tensor to be allocated at the same place
181   // with tensor_id is a left part twin of such vertex v, that the edge
182   // tensor_id->v is saturated (has zero residual capacity).
AssignTensorsToNewSharedObject(size_t tensor_id,ObjectsAssignment<size_t> * assignment)183   size_t AssignTensorsToNewSharedObject(size_t tensor_id,
184                                         ObjectsAssignment<size_t>* assignment) {
185     size_t cost = (*usage_records_)[tensor_id].tensor_size;
186     is_tensor_assigned_[tensor_id] = true;
187     assignment->object_ids[tensor_id] = assignment->object_sizes.size();
188     for (const auto& edge_id : edges_from_[tensor_id]) {
189       const Edge& edge = edges_[edge_id];
190       size_t v = edge.dst;
191       size_t left_twin = LeftPartTwin(v);
192       if (edge.cap == 0 && IsRightPartVertex(v) &&
193           !is_tensor_assigned_[left_twin]) {
194         cost = std::max(cost,
195                         AssignTensorsToNewSharedObject(left_twin, assignment));
196       }
197     }
198     return cost;
199   }
200 
201   size_t source_;
202   size_t sink_;
203   size_t num_tensors_;
204   const std::vector<TensorUsageRecord<size_t>>* usage_records_;
205   std::vector<Edge> edges_;
206   std::vector<std::vector<size_t>> edges_from_;
207   std::vector<bool> is_tensor_assigned_;
208 };
209 
210 }  // namespace
211 
212 // Implements memory management with a Minimum-cost flow matching algorithm.
213 //
214 // The problem of memory management is NP-complete. This function creates
215 // auxiliary flow graph, find minimum-cost flow in it and calculates the
216 // assignment of shared objects to tensors, using the result of the flow
217 // algorithm.
MinCostFlowAssignment(const std::vector<TensorUsageRecord<size_t>> & usage_records,ObjectsAssignment<size_t> * assignment)218 absl::Status MinCostFlowAssignment(
219     const std::vector<TensorUsageRecord<size_t>>& usage_records,
220     ObjectsAssignment<size_t>* assignment) {
221   MinCostFlowSolver solver;
222   solver.Build(usage_records);
223   solver.Solve();
224   solver.CalculateAssignment(assignment);
225   return absl::OkStatus();
226 }
227 
228 }  // namespace gpu
229 }  // namespace tflite
230