1 // Copyright 2020 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5 
6 #include <assert.h>
7 #include <stdbool.h>
8 #include <stdint.h>
9 #include <stdlib.h>
10 
11 #include <xnnpack/memory-planner.h>
12 #include <xnnpack/subgraph.h>
13 
14 // Check if two xnn_value's lifecycles overlap.
value_lifecycle_overlap(const struct xnn_value_usage * a,const struct xnn_value_usage * b)15 inline static bool value_lifecycle_overlap(const struct xnn_value_usage* a, const struct xnn_value_usage* b) {
16   assert(a->last_node >= a->first_node);
17   assert(b->last_node >= b->first_node);
18   if (a->first_node < b->first_node) {
19     return a->last_node >= b->first_node;
20   } else {
21     return b->last_node >= a->first_node;
22   }
23 }
24 
25 // Use this comparison function to sort xnn_value_usage according to the
26 // tensor_size in decreasing order.
cmp_value_usage_tensor_size(const void * a,const void * b)27 static inline int cmp_value_usage_tensor_size(const void* a, const void* b) {
28   const size_t tensor_size_a = (*(struct xnn_value_usage**)a)->tensor_size;
29   const size_t tensor_size_b = (*(struct xnn_value_usage**)b)->tensor_size;
30   return (tensor_size_b > tensor_size_a) - (tensor_size_b < tensor_size_a);
31 }
32 
populate_value_lifecycle(const xnn_subgraph_t subgraph,struct xnn_value_usage * usage)33 static void populate_value_lifecycle(const xnn_subgraph_t subgraph, struct xnn_value_usage* usage) {
34   assert(subgraph != NULL);
35   if (subgraph->num_nodes == 0) {
36     return;
37   }
38   // As we initialized first/last_node in each xnn_value_usage to 0 as in 'xnn_init_value_mem_allocation_tracker',
39   // we start with the second node to tell whether first/last_node have been set or not, and check the first node last.
40   for (uint32_t nid = 1; nid < subgraph->num_nodes; ++nid) {
41     const struct xnn_node* node = subgraph->nodes + nid;
42     for (uint32_t i = 0; i < node->num_inputs; ++i) {
43       if (usage[node->inputs[i]].first_node == 0) {
44         usage[node->inputs[i]].first_node = nid;
45       }
46       usage[node->inputs[i]].last_node = nid;
47     }
48     for (uint32_t i = 0; i < node->num_outputs; ++i) {
49       if (usage[node->outputs[i]].first_node == 0) {
50         usage[node->outputs[i]].first_node = nid;
51       }
52       usage[node->outputs[i]].last_node = nid;
53     }
54   }
55   const struct xnn_node* first_node = subgraph->nodes;
56   for (uint32_t i = 0; i < first_node->num_inputs; ++i) {
57     usage[first_node->inputs[i]].first_node = 0;
58   }
59   for (uint32_t i = 0; i < first_node->num_outputs; ++i) {
60     usage[first_node->outputs[i]].first_node = 0;
61   }
62 }
63 
64 // Represent a memory block [start, end)
65 struct memory_block {
66   size_t start;
67   size_t end;
68 };
69 
70 // Use this comparison function to sort memory_block according to the 'start'
71 // in increasing order.
cmp_memory_block(const void * a,const void * b)72 static inline int cmp_memory_block(const void* a, const void* b) {
73   const size_t start_a = ((struct memory_block*)a)->start;
74   const size_t start_b = ((struct memory_block*)b)->start;
75   return (start_a > start_b) - (start_a < start_b);
76 }
77 
78 // Given the current live memory blocks, return the offset in a memory arena for a to-be-allocated value of size
79 // 'to_alloc_size'.
find_value_alloc_offset(struct memory_block * live_mem_blocks,size_t num_mem_blocks,size_t to_alloc_size)80 static size_t find_value_alloc_offset(struct memory_block* live_mem_blocks,
81                                       size_t num_mem_blocks,
82                                       size_t to_alloc_size) {
83   if (num_mem_blocks == 0) {
84     return 0;
85   }
86 
87   if (num_mem_blocks == 1) {
88     return live_mem_blocks[0].end;
89   }
90 
91   // Sort memory blocks according to 'start' in increasing order.
92   qsort(live_mem_blocks, num_mem_blocks, sizeof(struct memory_block), cmp_memory_block);
93 
94   // Coalesce overlapping or immediate adjacent memory blocks to form a list of non-overlapping memory blocks in order
95   // to find the smallest gap.
96   size_t num_coalesced_mem_blocks = 1;
97   for (size_t i = 1; i < num_mem_blocks; ++i) {
98     const size_t current_coalesced_end =
99         live_mem_blocks[num_coalesced_mem_blocks - 1].end;
100     if (live_mem_blocks[i].start > current_coalesced_end) {
101       assert(num_coalesced_mem_blocks <= i);
102       live_mem_blocks[num_coalesced_mem_blocks] = live_mem_blocks[i];
103       num_coalesced_mem_blocks++;
104       continue;
105     }
106     if (live_mem_blocks[i].end > current_coalesced_end) {
107       live_mem_blocks[num_coalesced_mem_blocks - 1].end = live_mem_blocks[i].end;
108     }
109   }
110 
111   size_t smallest_gap_size = SIZE_MAX;
112   // The first index to live_mem_blocks that the 'to_alloc_size' should be allocated after.
113   size_t smallest_gap_index = num_coalesced_mem_blocks - 1;
114   for (size_t i = 0; i < num_coalesced_mem_blocks - 1; ++i) {
115     assert(live_mem_blocks[i + 1].start > live_mem_blocks[i].end);
116     const size_t gap = live_mem_blocks[i + 1].start - live_mem_blocks[i].end;
117     if (gap >= to_alloc_size && gap < smallest_gap_size) {
118       smallest_gap_index = i;
119       smallest_gap_size = gap;
120     }
121   }
122   return live_mem_blocks[smallest_gap_index].end;
123 }
124 
xnn_init_value_allocation_tracker(struct xnn_value_allocation_tracker * tracker,const xnn_subgraph_t subgraph)125 void xnn_init_value_allocation_tracker(struct xnn_value_allocation_tracker* tracker, const xnn_subgraph_t subgraph) {
126   tracker->subgraph = subgraph;
127   tracker->mem_arena_size = 0;
128   tracker->usage = xnn_allocate_zero_memory(sizeof(struct xnn_value_usage) * subgraph->num_values);
129 #if XNN_ENABLE_MEMOPT
130   populate_value_lifecycle(tracker->subgraph, tracker->usage);
131 #endif
132   tracker->min_value_id = XNN_INVALID_VALUE_ID;
133   tracker->max_value_id = XNN_INVALID_VALUE_ID;
134 }
135 
xnn_add_value_allocation_tracker(struct xnn_value_allocation_tracker * tracker,uint32_t value_id,size_t tensor_size)136 void xnn_add_value_allocation_tracker(struct xnn_value_allocation_tracker* tracker,
137                                       uint32_t value_id,
138                                       size_t tensor_size) {
139   tracker->usage[value_id].tensor_size = tensor_size;
140   if (tracker->min_value_id == XNN_INVALID_VALUE_ID) {
141     tracker->min_value_id = value_id;
142   } else {
143     // Note that values are expected to be added in increasing order.
144     assert(value_id > tracker->min_value_id);
145     assert(value_id > tracker->max_value_id);
146   }
147 
148   tracker->max_value_id = value_id;
149 }
150 
xnn_plan_value_allocation_tracker(struct xnn_value_allocation_tracker * tracker)151 void xnn_plan_value_allocation_tracker(struct xnn_value_allocation_tracker* tracker) {
152 #if XNN_ENABLE_MEMOPT
153   if (tracker->min_value_id == XNN_INVALID_VALUE_ID) {
154     assert(tracker->max_value_id == XNN_INVALID_VALUE_ID);
155     return;
156   }
157 
158   const uint32_t num_values = tracker->max_value_id - tracker->min_value_id + 1;
159   struct xnn_value_usage** sorted_usage = xnn_allocate_zero_memory(sizeof(struct xnn_value_usage*) * num_values);
160   size_t num_values_to_alloc = 0;
161   for (size_t i = tracker->min_value_id; i <= tracker->max_value_id; ++i) {
162     struct xnn_value_usage* info = tracker->usage + i;
163     if (info->tensor_size != 0) {
164       sorted_usage[num_values_to_alloc++] = info;
165     }
166   }
167   qsort(sorted_usage, num_values_to_alloc, sizeof(struct xnn_value_usage*), cmp_value_usage_tensor_size);
168 
169   // Start the allocation planning process.
170   struct memory_block* current_live_mem_blocks = xnn_allocate_zero_memory(
171       sizeof(struct memory_block) * num_values_to_alloc);
172   size_t mem_arena_size = 0;
173   for (size_t i = 0; i < num_values_to_alloc; ++i) {
174     size_t num_live_mem_blocks = 0;
175     struct xnn_value_usage* current = sorted_usage[i];
176     for (size_t j = 0; j < i; ++j) {
177       const struct xnn_value_usage* allocated = sorted_usage[j];
178       if (value_lifecycle_overlap(current, allocated)) {
179         current_live_mem_blocks[num_live_mem_blocks++] = (struct memory_block){
180             .start = allocated->alloc_offset,
181             .end = allocated->alloc_offset + allocated->tensor_size,
182         };
183       }
184     }
185     current->alloc_offset = find_value_alloc_offset(current_live_mem_blocks, num_live_mem_blocks, current->tensor_size);
186     if (mem_arena_size < current->alloc_offset + current->tensor_size) {
187       mem_arena_size = current->alloc_offset + current->tensor_size;
188     }
189   }
190 
191   tracker->mem_arena_size = mem_arena_size;
192   xnn_release_memory(sorted_usage);
193   xnn_release_memory(current_live_mem_blocks);
194 #else
195   tracker->mem_arena_size = 0;
196   for (uint32_t i = tracker->min_value_id; i <= tracker->max_value_id; ++i) {
197     if (tracker->usage[i].tensor_size > 0) {
198       tracker->usage[i].alloc_offset = tracker->mem_arena_size;
199       tracker->mem_arena_size += tracker->usage[i].tensor_size;
200     }
201   }
202 #endif
203 }
204