1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #include "tensorflow/lite/delegates/gpu/common/memory_management/min_cost_flow_assignment.h"
17
18 #include <algorithm>
19 #include <cstddef>
20 #include <limits>
21 #include <queue>
22 #include <utility>
23 #include <vector>
24
25 #include "absl/status/status.h"
26 #include "tensorflow/lite/delegates/gpu/common/memory_management/internal.h"
27 #include "tensorflow/lite/delegates/gpu/common/memory_management/types.h"
28
29 namespace tflite {
30 namespace gpu {
31 namespace {
32
33 // This class build flow graph and solves Minimum-cost flow problem in it.
34 class MinCostFlowSolver {
35 public:
36 // Build auxiliary flow graph, based on information about intermediate
37 // tensors.
Build(const std::vector<TensorUsageRecord<size_t>> & usage_records)38 void Build(const std::vector<TensorUsageRecord<size_t>>& usage_records) {
39 usage_records_ = &usage_records;
40 num_tensors_ = usage_records.size();
41 source_ = 2 * num_tensors_;
42 sink_ = source_ + 1;
43 edges_from_.resize(sink_ + 1);
44 std::vector<size_t> old_record_ids;
45 std::priority_queue<QueueRecord> objects_in_use;
46 for (size_t i = 0; i < usage_records.size(); i++) {
47 // Pop from the queue all objects that are no longer in use at the time
48 // of execution of the first_task of i-th intermediate tensor.
49 while (!objects_in_use.empty() &&
50 objects_in_use.top().last_task < usage_records[i].first_task) {
51 old_record_ids.push_back(objects_in_use.top().object_id);
52 objects_in_use.pop();
53 }
54 objects_in_use.push({usage_records[i].last_task, i});
55 AddEdge(source_, i, 1, 0);
56 AddEdge(RightPartTwin(i), sink_, 1, 0);
57
58 // Edge from source_ to i-th vertex in the right part of flow graph
59 // are added for the case of allocation of new shared object for i-th
60 // tensor. Cost of these edges is equal to the size of i-th tensor.
61 AddEdge(source_, RightPartTwin(i), 1, usage_records[i].tensor_size);
62
63 // Edges from vertices of the left part of flow graph, corresponding to
64 // old_record_ids, to i-th vertex in the right part of flow graph are
65 // added for the case of reusing previously created shared objects for
66 // i-th tensor. Cost of these edges is an approximation of the size of
67 // new allocated memory.
68 for (auto record_id : old_record_ids) {
69 int cost = 0;
70 if (usage_records[i].tensor_size >
71 usage_records[record_id].tensor_size) {
72 cost = usage_records[i].tensor_size -
73 usage_records[record_id].tensor_size;
74 }
75 AddEdge(record_id, RightPartTwin(i), 1, cost);
76 }
77 }
78 }
79
80 // Solve Minimum-cost flow problem with Shortest Path Faster Algorithm.
Solve()81 void Solve() {
82 const int kInf = std::numeric_limits<int>::max();
83 std::vector<size_t> prev_edge(sink_ + 1);
84 while (true) {
85 std::queue<size_t> cur_queue, next_queue;
86 std::vector<size_t> last_it_in_queue(sink_ + 1);
87 std::vector<size_t> dist(sink_ + 1, kInf);
88 size_t it = 1;
89 cur_queue.push(source_);
90 last_it_in_queue[source_] = it;
91 dist[source_] = 0;
92 // Find shortest path from source_ to sink_, using only edges with
93 // positive capacity.
94 while (!cur_queue.empty()) {
95 ++it;
96 while (!cur_queue.empty()) {
97 auto v = cur_queue.front();
98 cur_queue.pop();
99 for (const auto& edge_id : edges_from_[v]) {
100 const Edge& edge = edges_[edge_id];
101 if (edge.cap > 0) {
102 auto u = edge.dst;
103 int new_dist = dist[v] + edge.cost;
104 if (new_dist < dist[u]) {
105 dist[u] = new_dist;
106 prev_edge[u] = edge_id;
107 if (last_it_in_queue[u] != it) {
108 next_queue.push(u);
109 last_it_in_queue[u] = it;
110 }
111 }
112 }
113 }
114 }
115 std::swap(cur_queue, next_queue);
116 }
117 // If path is not found, final result is ready.
118 if (dist[sink_] == kInf) break;
119
120 // If path is found, we need to decrease the capacity of its edges, and
121 // increase the capacity of its reversed edges.
122 for (size_t v = sink_; v != source_;) {
123 --edges_[prev_edge[v]].cap;
124 Edge& rev_edge = edges_[prev_edge[v] ^ 1];
125 ++rev_edge.cap;
126 v = rev_edge.dst;
127 }
128 }
129 }
130
CalculateAssignment(ObjectsAssignment<size_t> * assignment)131 void CalculateAssignment(ObjectsAssignment<size_t>* assignment) {
132 assignment->object_sizes.clear();
133 assignment->object_ids.assign(num_tensors_, kNotAssigned);
134 is_tensor_assigned_.resize(num_tensors_);
135 for (const auto& edge_id : edges_from_[source_]) {
136 const Edge& edge = edges_[edge_id];
137 if (edge.cap == 0 && IsRightPartVertex(edge.dst)) {
138 assignment->object_sizes.push_back(
139 AssignTensorsToNewSharedObject(LeftPartTwin(edge.dst), assignment));
140 }
141 }
142 }
143
144 private:
145 struct Edge {
Edgetflite::gpu::__anondc65f6650111::MinCostFlowSolver::Edge146 Edge(size_t dst, int cap, int cost) : dst(dst), cap(cap), cost(cost) {}
147
148 size_t dst;
149 int cap;
150 int cost;
151 };
152
153 // Add edge from vertex src to vertex dst with given capacity and cost and
154 // its reversed edge to the flow graph. If some edge has index idx, its
155 // reversed edge has index idx^1.
AddEdge(size_t src,size_t dst,int cap,int cost)156 void AddEdge(size_t src, size_t dst, int cap, int cost) {
157 edges_from_[src].push_back(edges_.size());
158 edges_.emplace_back(dst, cap, cost);
159 edges_from_[dst].push_back(edges_.size());
160 edges_.push_back({src, 0, -cost});
161 }
162
163 // Check, if vertex_id belongs to right part of the flow graph.
IsRightPartVertex(size_t vertex_id) const164 bool IsRightPartVertex(size_t vertex_id) const {
165 return vertex_id >= num_tensors_ && vertex_id < 2 * num_tensors_;
166 }
167
168 // Return vertex from another part of the graph, that corresponds to the
169 // same intermediate tensor.
LeftPartTwin(size_t vertex_id) const170 size_t LeftPartTwin(size_t vertex_id) const {
171 return vertex_id - num_tensors_;
172 }
RightPartTwin(size_t vertex_id) const173 size_t RightPartTwin(size_t vertex_id) const {
174 return vertex_id + num_tensors_;
175 }
176
177 // This function uses recursive implementation of depth-first search and
178 // returns maximum size from tensor tensor_id and all tensors, that will be
179 // allocated at the same place with it after all operations that use
180 // tensor_id are executed. Next tensor to be allocated at the same place
181 // with tensor_id is a left part twin of such vertex v, that the edge
182 // tensor_id->v is saturated (has zero residual capacity).
AssignTensorsToNewSharedObject(size_t tensor_id,ObjectsAssignment<size_t> * assignment)183 size_t AssignTensorsToNewSharedObject(size_t tensor_id,
184 ObjectsAssignment<size_t>* assignment) {
185 size_t cost = (*usage_records_)[tensor_id].tensor_size;
186 is_tensor_assigned_[tensor_id] = true;
187 assignment->object_ids[tensor_id] = assignment->object_sizes.size();
188 for (const auto& edge_id : edges_from_[tensor_id]) {
189 const Edge& edge = edges_[edge_id];
190 size_t v = edge.dst;
191 size_t left_twin = LeftPartTwin(v);
192 if (edge.cap == 0 && IsRightPartVertex(v) &&
193 !is_tensor_assigned_[left_twin]) {
194 cost = std::max(cost,
195 AssignTensorsToNewSharedObject(left_twin, assignment));
196 }
197 }
198 return cost;
199 }
200
201 size_t source_;
202 size_t sink_;
203 size_t num_tensors_;
204 const std::vector<TensorUsageRecord<size_t>>* usage_records_;
205 std::vector<Edge> edges_;
206 std::vector<std::vector<size_t>> edges_from_;
207 std::vector<bool> is_tensor_assigned_;
208 };
209
210 } // namespace
211
212 // Implements memory management with a Minimum-cost flow matching algorithm.
213 //
214 // The problem of memory management is NP-complete. This function creates
215 // auxiliary flow graph, find minimum-cost flow in it and calculates the
216 // assignment of shared objects to tensors, using the result of the flow
217 // algorithm.
MinCostFlowAssignment(const std::vector<TensorUsageRecord<size_t>> & usage_records,ObjectsAssignment<size_t> * assignment)218 absl::Status MinCostFlowAssignment(
219 const std::vector<TensorUsageRecord<size_t>>& usage_records,
220 ObjectsAssignment<size_t>* assignment) {
221 MinCostFlowSolver solver;
222 solver.Build(usage_records);
223 solver.Solve();
224 solver.CalculateAssignment(assignment);
225 return absl::OkStatus();
226 }
227
228 } // namespace gpu
229 } // namespace tflite
230