1 /*
2  * Copyright 2018 Google Inc.
3  *
4  * Use of this source code is governed by a BSD-style license that can
5  * be found in the LICENSE file.
6  *
7  */
8 
9 //
10 //
11 //
12 
13 #include <assert.h>
14 #include <memory.h>
15 
16 #include "runtime_cl_12.h"
17 #include "scheduler.h"
18 
19 //
20 //
21 //
22 
23 #ifndef NDEBUG
24 
25 #include <stdio.h>
26 
27 #define SKC_SUBALLOCATOR_DEBUG_ALLOC(suballocator,subbuf_id,ss)         \
28   fprintf(stderr,                                                       \
29           "suballocator %s : [ %4u ] : alloc( %9u ) @ %4u = %u\n",      \
30           suballocator->name,                                           \
31           suballocator->rem.avail,                                      \
32           (skc_uint)ss,                                                 \
33           subbuf_id,                                                    \
34           (skc_uint)suballocator->total);
35 
36 #define SKC_SUBALLOCATOR_DEBUG_FREE(suballocator,subbuf_id,ss)          \
37   fprintf(stderr,                                                       \
38           "suballocator %s : [ %4u ] : free ( %9u ) @ %4u = %u\n",      \
39           suballocator->name,                                           \
40           suballocator->rem.avail,                                      \
41           (skc_uint)ss,                                                 \
42           subbuf_id,                                                    \
43           (skc_uint)suballocator->total);
44 
45 #else
46 
47 #define SKC_SUBALLOCATOR_DEBUG_ALLOC(suballocator,subbuf_id,ss)
48 #define SKC_SUBALLOCATOR_DEBUG_FREE(suballocator,subbuf_id,ss)
49 
50 #endif
51 
52 //
53 //
54 //
55 
56 void
skc_suballocator_create(struct skc_runtime * const runtime,struct skc_suballocator * const suballocator,char const * const name,skc_uint const subbufs,size_t const align,size_t const size)57 skc_suballocator_create(struct skc_runtime      * const runtime,
58                         struct skc_suballocator * const suballocator,
59                         char              const * const name,
60                         skc_uint                  const subbufs,
61                         size_t                    const align,
62                         size_t                    const size)
63 {
64   size_t const subbufs_size = sizeof(*suballocator->subbufs) * subbufs;
65 
66   // allocate array of subbuf records
67   suballocator->subbufs = skc_runtime_host_perm_alloc(runtime,SKC_MEM_FLAGS_READ_WRITE,subbufs_size);
68 
69   // zero subbufs
70   memset(suballocator->subbufs,0,subbufs_size);
71 
72   // initialize starting subbuf
73   suballocator->subbufs[0].size = (skc_subbuf_size_t)size;
74 
75   // allocate array of ids
76   suballocator->ids = skc_runtime_host_perm_alloc(runtime,
77                                                   SKC_MEM_FLAGS_READ_WRITE,
78                                                   sizeof(*suballocator->ids) * subbufs);
79   for (skc_uint ii=0; ii<subbufs; ii++)
80     suballocator->ids[ii] = ii;
81 
82   suballocator->rem.avail = 1;
83   suballocator->rem.spare = subbufs - 1;
84 
85   suballocator->align     = (skc_uint)align;
86   suballocator->count     = subbufs;
87 
88   suballocator->size      = (skc_subbuf_size_t)size;
89   suballocator->total     = 0;
90 
91   suballocator->name      = name;
92 }
93 
94 void
skc_suballocator_dispose(struct skc_runtime * const runtime,struct skc_suballocator * const suballocator)95 skc_suballocator_dispose(struct skc_runtime      * const runtime,
96                          struct skc_suballocator * const suballocator)
97 {
98   skc_runtime_host_perm_free(runtime,suballocator->ids);
99   skc_runtime_host_perm_free(runtime,suballocator->subbufs);
100 }
101 
102 
103 //
104 // Sets id and returns origin
105 //
106 
107 size_t
skc_suballocator_subbuf_alloc(struct skc_suballocator * const suballocator,struct skc_scheduler * const scheduler,size_t const size,skc_subbuf_id_t * const subbuf_id,size_t * const subbuf_size)108 skc_suballocator_subbuf_alloc(struct skc_suballocator * const suballocator,
109                               struct skc_scheduler    * const scheduler,
110                               size_t                    const size,
111                               skc_subbuf_id_t         * const subbuf_id,
112                               size_t                  * const subbuf_size)
113 {
114   //
115   // Note that we can't deadlock here because everything allocated is
116   // expected to be freed within msecs.  Worst case, we wait for a
117   // availability of resources while a fully utilized GPU is making
118   // forward progress on kernels.
119   //
120   // This behavior should guide the sizing of the suballocator's
121   // number of subbuffers and extent.
122   //
123   // We want to allocate a large enough extent and enough subbuffer
124   // records so that the CPU/GPU is never starved.
125   //
126 
127   // round up the size
128   skc_subbuf_size_t const size_ru = (skc_subbuf_size_t)SKC_ROUND_UP_POW2(size,suballocator->align);
129 
130   // save it
131   if (subbuf_size != NULL)
132     *subbuf_size = size_ru;
133 
134   //
135   // We precheck to see there is at least one region of memory
136   // available but do not check to see if there is a spare. Instead,
137   // we simply keep looking for an exact fit.
138   //
139   skc_subbuf_id_t * const ids = suballocator->ids;
140 
141   while (true)
142     {
143       skc_uint avail_rem = suballocator->rem.avail;
144       skc_uint spare_rem = suballocator->rem.spare;
145 
146       for (skc_uint avail_idx=0; avail_idx<avail_rem; avail_idx++)
147         {
148           skc_subbuf_id_t     const avail_id = ids[avail_idx];
149           struct skc_subbuf * const avail    = suballocator->subbufs + avail_id;
150 
151           assert(avail->inuse == 0);
152 
153           if (avail->size == size_ru) // size matches exactly
154             {
155               suballocator->total += size_ru;
156 
157               // return this id
158               *subbuf_id = avail_id;
159 
160               SKC_SUBALLOCATOR_DEBUG_ALLOC(suballocator,avail_id,size_ru);
161 
162               // mark the subbuffer as in use
163               avail->inuse += 1;
164 
165               assert(avail->inuse == 1);
166 
167               // update rem avail count
168               suballocator->rem.avail = --avail_rem;
169 
170               // replace now inuse id with last avail id
171               if ((avail_rem > 0) && (avail_idx != avail_rem))
172                 {
173                   skc_subbuf_id_t     const last_id = ids[avail_rem];
174                   struct skc_subbuf * const last    = suballocator->subbufs + last_id;
175 
176                   ids[avail_idx] = last_id;   // move id
177                   last->idx      = avail_idx; // update idx[]
178                 }
179 
180               assert(suballocator->rem.avail > 0);
181 
182               // return origin
183               return avail->origin;
184             }
185           else if ((avail->size > size_ru) && (spare_rem > 0)) // requested is less than available so split it
186             {
187               suballocator->total += size_ru;
188 
189               skc_uint                  spare_idx = suballocator->count - spare_rem;
190               skc_subbuf_id_t     const spare_id  = ids[spare_idx];
191               struct skc_subbuf * const spare     = suballocator->subbufs + spare_id;
192 
193               assert(spare->inuse == 0);
194 
195               // simple -- we're popping the top-of-stack of spares
196               suballocator->rem.spare -= 1;
197 
198               // return id
199               *subbuf_id = spare_id;
200 
201               SKC_SUBALLOCATOR_DEBUG_ALLOC(suballocator,spare_id,size_ru);
202 
203               // get prev
204               struct skc_subbuf * const prev = avail->prev;
205 
206               if (prev != NULL)
207                 prev->next = spare;
208 
209               // init spare
210               spare->prev    = prev;
211               spare->next    = avail;
212               spare->size    = size_ru;
213               spare->origin  = avail->origin;
214               spare->idx     = SKC_UINT_MAX; // defensive
215               spare->inuse  += 1;
216 
217               // update curr
218               avail->prev    = spare;
219               avail->size   -= size_ru;
220               avail->origin += size_ru;
221 
222               assert(suballocator->rem.avail > 0);
223 
224               return spare->origin;
225             }
226         }
227 
228       // uh oh... couldn't find enough memory
229       skc_scheduler_wait(scheduler);
230     }
231 }
232 
233 //
234 // FIXME -- simplify this with a merge-with-prev() primitive
235 //
236 
237 void
skc_suballocator_subbuf_free(struct skc_suballocator * const suballocator,skc_subbuf_id_t subbuf_id)238 skc_suballocator_subbuf_free(struct skc_suballocator * const suballocator,
239                              skc_subbuf_id_t                 subbuf_id)
240 {
241   // get subbuf for id
242   struct skc_subbuf * const subbuf = suballocator->subbufs + subbuf_id;
243 
244   assert(subbuf->inuse == 1);
245 
246   suballocator->total -= subbuf->size;
247 
248   SKC_SUBALLOCATOR_DEBUG_FREE(suballocator,subbuf_id,subbuf->size);
249 
250   //
251   // try to merge subbuf with left and maybe right and then dispose
252   //
253   struct skc_subbuf * prev;
254   struct skc_subbuf * next;
255 
256   if (((prev = subbuf->prev) != NULL) && !prev->inuse)
257     {
258       next = subbuf->next;
259 
260       if ((next != NULL) && !next->inuse)
261         {
262           subbuf->inuse -= 1;
263 
264           assert(next->inuse == 0);
265 
266           // increment size
267           prev->size += (subbuf->size + next->size);
268 
269           struct skc_subbuf * const nextnext = next->next;
270 
271           // update next link
272           prev->next = nextnext;
273 
274           // update prev link
275           if (nextnext != NULL)
276             nextnext->prev = prev;
277 
278           //
279           // both subbuf and next are now spare which means we need to
280           // move final available subbuffer into next's old position
281           // unless they're the same
282           //
283           skc_uint const last_idx = --suballocator->rem.avail;
284           skc_uint const next_idx = next->idx;
285 
286           assert(suballocator->rem.avail > 0);
287 
288           if (last_idx != next_idx)
289             {
290               skc_subbuf_id_t     const last_id = suballocator->ids[last_idx];
291               struct skc_subbuf * const last    = suballocator->subbufs + last_id;
292 
293               suballocator->ids[next_idx]       = last_id;
294               last->idx                         = next_idx;
295             }
296 
297           skc_subbuf_id_t  const next_id   = (skc_subbuf_id_t)(next - suballocator->subbufs);
298 
299           skc_uint         const spare_rem = suballocator->rem.spare + 2;
300           skc_uint         const spare_idx = suballocator->count - spare_rem;
301 
302           suballocator->rem.spare          = spare_rem;
303           suballocator->ids[spare_idx + 0] = subbuf_id;
304           suballocator->ids[spare_idx + 1] = next_id;
305         }
306       else
307         {
308           prev->size += subbuf->size;
309           prev->next  = next;
310 
311           if (next != NULL)
312             next->prev = prev;
313 
314           subbuf->inuse -= 1;
315 
316           assert(subbuf->inuse == 0);
317           assert(suballocator->rem.avail > 0);
318 
319           suballocator->ids[suballocator->count - ++suballocator->rem.spare] = subbuf_id;
320         }
321     }
322   //
323   // try to merge with right
324   //
325   else if (((next = subbuf->next) != NULL) && !next->inuse)
326     {
327       subbuf->inuse -= 1;
328 
329       assert(subbuf->inuse == 0);
330       assert(suballocator->rem.avail > 0);
331 
332       next->prev     = prev;
333       next->origin   = subbuf->origin;
334       next->size    += subbuf->size;
335 
336       if (prev != NULL)
337         prev->next = next;
338 
339       // subbuf is now spare
340       suballocator->ids[suballocator->count - ++suballocator->rem.spare] = subbuf_id;
341     }
342   else // couldn't merge with a neighbor
343     {
344       skc_uint avail_idx = suballocator->rem.avail++;
345 
346       // subbuf is now available
347       subbuf->idx    = avail_idx;
348       subbuf->inuse -= 1;
349 
350       assert(subbuf->inuse == 0);
351       assert(suballocator->rem.avail > 0);
352 
353       suballocator->ids[avail_idx] = subbuf_id;
354     }
355 }
356 
357 //
358 //
359 //
360 
361 #if 0
362 
363 //
364 // At some point there might be a reason to sort the available
365 // subbuffers into some useful order -- presumably to binary search
366 // for the closest match or to chip away at the largest available
367 // subbuffer
368 //
369 
370 static
371 void
372 skc_suballocator_optimize(struct skc_suballocator * const suballocator)
373 {
374   ;
375 }
376 
377 #endif
378 
379 //
380 //
381 //
382