1 /*
2 * Permission is hereby granted, free of charge, to any person obtaining a
3 * copy of this software and associated documentation files (the "Software"),
4 * to deal in the Software without restriction, including without limitation
5 * on the rights to use, copy, modify, merge, publish, distribute, sub
6 * license, and/or sell copies of the Software, and to permit persons to whom
7 * the Software is furnished to do so, subject to the following conditions:
8 *
9 * The above copyright notice and this permission notice (including the next
10 * paragraph) shall be included in all copies or substantial portions of the
11 * Software.
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
16 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
17 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
18 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
19 * USE OR OTHER DEALINGS IN THE SOFTWARE.
20 *
21 * Authors:
22 * Adam Rak <adam.rak@streamnovation.com>
23 */
24
25 #include "pipe/p_defines.h"
26 #include "pipe/p_state.h"
27 #include "pipe/p_context.h"
28 #include "util/u_blitter.h"
29 #include "util/u_double_list.h"
30 #include "util/u_transfer.h"
31 #include "util/u_surface.h"
32 #include "util/u_pack_color.h"
33 #include "util/u_memory.h"
34 #include "util/u_inlines.h"
35 #include "util/u_framebuffer.h"
36 #include "r600.h"
37 #include "r600_resource.h"
38 #include "r600_shader.h"
39 #include "r600_pipe.h"
40 #include "r600_formats.h"
41 #include "compute_memory_pool.h"
42 #include "evergreen_compute_internal.h"
43
create_pool_texture(struct r600_screen * screen,unsigned size_in_dw)44 static struct r600_texture * create_pool_texture(struct r600_screen * screen,
45 unsigned size_in_dw)
46 {
47
48 struct pipe_resource templ;
49 struct r600_texture * tex;
50
51 if (size_in_dw == 0) {
52 return NULL;
53 }
54 memset(&templ, 0, sizeof(templ));
55 templ.target = PIPE_TEXTURE_1D;
56 templ.format = PIPE_FORMAT_R32_UINT;
57 templ.bind = PIPE_BIND_CUSTOM;
58 templ.usage = PIPE_USAGE_IMMUTABLE;
59 templ.flags = 0;
60 templ.width0 = size_in_dw;
61 templ.height0 = 1;
62 templ.depth0 = 1;
63 templ.array_size = 1;
64
65 tex = (struct r600_texture *)r600_texture_create(
66 &screen->screen, &templ);
67 /* XXX: Propagate this error */
68 assert(tex && "Out of memory");
69 tex->is_rat = 1;
70 return tex;
71 }
72
73 /**
74 * Creates a new pool
75 */
compute_memory_pool_new(struct r600_screen * rscreen)76 struct compute_memory_pool* compute_memory_pool_new(
77 struct r600_screen * rscreen)
78 {
79 struct compute_memory_pool* pool = (struct compute_memory_pool*)
80 CALLOC(sizeof(struct compute_memory_pool), 1);
81
82 COMPUTE_DBG("* compute_memory_pool_new()\n");
83
84 pool->screen = rscreen;
85 return pool;
86 }
87
compute_memory_pool_init(struct compute_memory_pool * pool,unsigned initial_size_in_dw)88 static void compute_memory_pool_init(struct compute_memory_pool * pool,
89 unsigned initial_size_in_dw)
90 {
91
92 COMPUTE_DBG("* compute_memory_pool_init() initial_size_in_dw = %ld\n",
93 initial_size_in_dw);
94
95 /* XXX: pool->shadow is used when the buffer needs to be resized, but
96 * resizing does not work at the moment.
97 * pool->shadow = (uint32_t*)CALLOC(4, pool->size_in_dw);
98 */
99 pool->next_id = 1;
100 pool->size_in_dw = initial_size_in_dw;
101 pool->bo = (struct r600_resource*)create_pool_texture(pool->screen,
102 pool->size_in_dw);
103 }
104
105 /**
106 * Frees all stuff in the pool and the pool struct itself too
107 */
compute_memory_pool_delete(struct compute_memory_pool * pool)108 void compute_memory_pool_delete(struct compute_memory_pool* pool)
109 {
110 COMPUTE_DBG("* compute_memory_pool_delete()\n");
111 free(pool->shadow);
112 if (pool->bo) {
113 pool->screen->screen.resource_destroy((struct pipe_screen *)
114 pool->screen, (struct pipe_resource *)pool->bo);
115 }
116 free(pool);
117 }
118
119 /**
120 * Searches for an empty space in the pool, return with the pointer to the
121 * allocatable space in the pool, returns -1 on failure.
122 */
compute_memory_prealloc_chunk(struct compute_memory_pool * pool,int64_t size_in_dw)123 int64_t compute_memory_prealloc_chunk(
124 struct compute_memory_pool* pool,
125 int64_t size_in_dw)
126 {
127 assert(size_in_dw <= pool->size_in_dw);
128
129 struct compute_memory_item *item;
130
131 int last_end = 0;
132
133 COMPUTE_DBG("* compute_memory_prealloc_chunk() size_in_dw = %ld\n",
134 size_in_dw);
135
136 for (item = pool->item_list; item; item = item->next) {
137 if (item->start_in_dw > -1) {
138 if (item->start_in_dw-last_end > size_in_dw) {
139 return last_end;
140 }
141
142 last_end = item->start_in_dw + item->size_in_dw;
143 last_end += (1024 - last_end % 1024);
144 }
145 }
146
147 if (pool->size_in_dw - last_end < size_in_dw) {
148 return -1;
149 }
150
151 return last_end;
152 }
153
154 /**
155 * Search for the chunk where we can link our new chunk after it.
156 */
compute_memory_postalloc_chunk(struct compute_memory_pool * pool,int64_t start_in_dw)157 struct compute_memory_item* compute_memory_postalloc_chunk(
158 struct compute_memory_pool* pool,
159 int64_t start_in_dw)
160 {
161 struct compute_memory_item* item;
162
163 COMPUTE_DBG("* compute_memory_postalloc_chunck() start_in_dw = %ld\n",
164 start_in_dw);
165
166 for (item = pool->item_list; item; item = item->next) {
167 if (item->next) {
168 if (item->start_in_dw < start_in_dw
169 && item->next->start_in_dw > start_in_dw) {
170 return item;
171 }
172 }
173 else {
174 /* end of chain */
175 assert(item->start_in_dw < start_in_dw);
176 return item;
177 }
178 }
179
180 assert(0 && "unreachable");
181 return NULL;
182 }
183
184 /**
185 * Reallocates pool, conserves data
186 */
compute_memory_grow_pool(struct compute_memory_pool * pool,struct pipe_context * pipe,int new_size_in_dw)187 void compute_memory_grow_pool(struct compute_memory_pool* pool,
188 struct pipe_context * pipe, int new_size_in_dw)
189 {
190 COMPUTE_DBG("* compute_memory_grow_pool() new_size_in_dw = %d\n",
191 new_size_in_dw);
192
193 assert(new_size_in_dw >= pool->size_in_dw);
194
195 assert(!pool->bo && "Growing the global memory pool is not yet "
196 "supported. You will see this message if you are trying to"
197 "use more than 64 kb of memory");
198
199 if (!pool->bo) {
200 compute_memory_pool_init(pool, 1024 * 16);
201 } else {
202 /* XXX: Growing memory pools does not work at the moment. I think
203 * it is because we are using fragment shaders to copy data to
204 * the new texture and some of the compute registers are being
205 * included in the 3D command stream. */
206 fprintf(stderr, "Warning: growing the global memory pool to"
207 "more than 64 kb is not yet supported\n");
208 new_size_in_dw += 1024 - (new_size_in_dw % 1024);
209
210 COMPUTE_DBG(" Aligned size = %d\n", new_size_in_dw);
211
212 compute_memory_shadow(pool, pipe, 1);
213 pool->shadow = (uint32_t*)realloc(pool->shadow, new_size_in_dw*4);
214 pool->size_in_dw = new_size_in_dw;
215 pool->screen->screen.resource_destroy(
216 (struct pipe_screen *)pool->screen,
217 (struct pipe_resource *)pool->bo);
218 pool->bo = (struct r600_resource*)create_pool_texture(
219 pool->screen,
220 pool->size_in_dw);
221 compute_memory_shadow(pool, pipe, 0);
222 }
223 }
224
225 /**
226 * Copy pool from device to host, or host to device.
227 */
compute_memory_shadow(struct compute_memory_pool * pool,struct pipe_context * pipe,int device_to_host)228 void compute_memory_shadow(struct compute_memory_pool* pool,
229 struct pipe_context * pipe, int device_to_host)
230 {
231 struct compute_memory_item chunk;
232
233 COMPUTE_DBG("* compute_memory_shadow() device_to_host = %d\n",
234 device_to_host);
235
236 chunk.id = 0;
237 chunk.start_in_dw = 0;
238 chunk.size_in_dw = pool->size_in_dw;
239 chunk.prev = chunk.next = NULL;
240 compute_memory_transfer(pool, pipe, device_to_host, &chunk,
241 pool->shadow, 0, pool->size_in_dw*4);
242 }
243
244 /**
245 * Allocates pending allocations in the pool
246 */
compute_memory_finalize_pending(struct compute_memory_pool * pool,struct pipe_context * pipe)247 void compute_memory_finalize_pending(struct compute_memory_pool* pool,
248 struct pipe_context * pipe)
249 {
250 struct compute_memory_item *pending_list = NULL, *end_p = NULL;
251 struct compute_memory_item *item, *next;
252
253 int64_t allocated = 0;
254 int64_t unallocated = 0;
255
256 COMPUTE_DBG("* compute_memory_finalize_pending()\n");
257
258 for (item = pool->item_list; item; item = item->next) {
259 COMPUTE_DBG("list: %i %p\n", item->start_in_dw, item->next);
260 }
261
262 for (item = pool->item_list; item; item = next) {
263 next = item->next;
264
265
266 if (item->start_in_dw == -1) {
267 if (end_p) {
268 end_p->next = item;
269 }
270 else {
271 pending_list = item;
272 }
273
274 if (item->prev) {
275 item->prev->next = next;
276 }
277 else {
278 pool->item_list = next;
279 }
280
281 if (next) {
282 next->prev = item->prev;
283 }
284
285 item->prev = end_p;
286 item->next = NULL;
287 end_p = item;
288
289 unallocated += item->size_in_dw+1024;
290 }
291 else {
292 allocated += item->size_in_dw;
293 }
294 }
295
296 if (pool->size_in_dw < allocated+unallocated) {
297 compute_memory_grow_pool(pool, pipe, allocated+unallocated);
298 }
299
300 for (item = pending_list; item; item = next) {
301 next = item->next;
302
303 int64_t start_in_dw;
304
305 while ((start_in_dw=compute_memory_prealloc_chunk(pool,
306 item->size_in_dw)) == -1) {
307 int64_t need = item->size_in_dw+2048 -
308 (pool->size_in_dw - allocated);
309
310 need += 1024 - (need % 1024);
311
312 if (need > 0) {
313 compute_memory_grow_pool(pool,
314 pipe,
315 pool->size_in_dw + need);
316 }
317 else {
318 need = pool->size_in_dw / 10;
319 need += 1024 - (need % 1024);
320 compute_memory_grow_pool(pool,
321 pipe,
322 pool->size_in_dw + need);
323 }
324 }
325
326 item->start_in_dw = start_in_dw;
327 item->next = NULL;
328 item->prev = NULL;
329
330 if (pool->item_list) {
331 struct compute_memory_item *pos;
332
333 pos = compute_memory_postalloc_chunk(pool, start_in_dw);
334 item->prev = pos;
335 item->next = pos->next;
336 pos->next = item;
337
338 if (item->next) {
339 item->next->prev = item;
340 }
341 }
342 else {
343 pool->item_list = item;
344 }
345
346 allocated += item->size_in_dw;
347 }
348 }
349
350
compute_memory_free(struct compute_memory_pool * pool,int64_t id)351 void compute_memory_free(struct compute_memory_pool* pool, int64_t id)
352 {
353 struct compute_memory_item *item, *next;
354
355 COMPUTE_DBG("* compute_memory_free() id + %ld \n", id);
356
357 for (item = pool->item_list; item; item = next) {
358 next = item->next;
359
360 if (item->id == id) {
361 if (item->prev) {
362 item->prev->next = item->next;
363 }
364 else {
365 pool->item_list = item->next;
366 }
367
368 if (item->next) {
369 item->next->prev = item->prev;
370 }
371
372 free(item);
373
374 return;
375 }
376 }
377
378 fprintf(stderr, "Internal error, invalid id %ld "
379 "for compute_memory_free\n", id);
380
381 assert(0 && "error");
382 }
383
384 /**
385 * Creates pending allocations
386 */
compute_memory_alloc(struct compute_memory_pool * pool,int64_t size_in_dw)387 struct compute_memory_item* compute_memory_alloc(
388 struct compute_memory_pool* pool,
389 int64_t size_in_dw)
390 {
391 struct compute_memory_item *new_item;
392
393 COMPUTE_DBG("* compute_memory_alloc() size_in_dw = %ld\n", size_in_dw);
394
395 new_item = (struct compute_memory_item *)
396 CALLOC(sizeof(struct compute_memory_item), 1);
397 new_item->size_in_dw = size_in_dw;
398 new_item->start_in_dw = -1; /* mark pending */
399 new_item->id = pool->next_id++;
400 new_item->pool = pool;
401
402 struct compute_memory_item *last_item;
403
404 if (pool->item_list) {
405 for (last_item = pool->item_list; last_item->next;
406 last_item = last_item->next);
407
408 last_item->next = new_item;
409 new_item->prev = last_item;
410 }
411 else {
412 pool->item_list = new_item;
413 }
414
415 return new_item;
416 }
417
418 /**
419 * Transfer data host<->device, offset and size is in bytes
420 */
compute_memory_transfer(struct compute_memory_pool * pool,struct pipe_context * pipe,int device_to_host,struct compute_memory_item * chunk,void * data,int offset_in_chunk,int size)421 void compute_memory_transfer(
422 struct compute_memory_pool* pool,
423 struct pipe_context * pipe,
424 int device_to_host,
425 struct compute_memory_item* chunk,
426 void* data,
427 int offset_in_chunk,
428 int size)
429 {
430 int64_t aligned_size = pool->size_in_dw;
431 struct pipe_resource* gart = (struct pipe_resource*)pool->bo;
432 int64_t internal_offset = chunk->start_in_dw*4 + offset_in_chunk;
433
434 struct pipe_transfer *xfer;
435 uint32_t *map;
436
437 assert(gart);
438
439 COMPUTE_DBG("* compute_memory_transfer() device_to_host = %d, "
440 "offset_in_chunk = %d, size = %d\n", device_to_host,
441 offset_in_chunk, size);
442
443 if (device_to_host)
444 {
445 xfer = pipe->get_transfer(pipe, gart, 0, PIPE_TRANSFER_READ,
446 &(struct pipe_box) { .width = aligned_size,
447 .height = 1, .depth = 1 });
448 assert(xfer);
449 map = pipe->transfer_map(pipe, xfer);
450 assert(map);
451 memcpy(data, map + internal_offset, size);
452 pipe->transfer_unmap(pipe, xfer);
453 pipe->transfer_destroy(pipe, xfer);
454 } else {
455 xfer = pipe->get_transfer(pipe, gart, 0, PIPE_TRANSFER_WRITE,
456 &(struct pipe_box) { .width = aligned_size,
457 .height = 1, .depth = 1 });
458 assert(xfer);
459 map = pipe->transfer_map(pipe, xfer);
460 assert(map);
461 memcpy(map + internal_offset, data, size);
462 pipe->transfer_unmap(pipe, xfer);
463 pipe->transfer_destroy(pipe, xfer);
464 }
465 }
466
467 /**
468 * Transfer data between chunk<->data, it is for VRAM<->GART transfers
469 */
compute_memory_transfer_direct(struct compute_memory_pool * pool,int chunk_to_data,struct compute_memory_item * chunk,struct r600_resource * data,int offset_in_chunk,int offset_in_data,int size)470 void compute_memory_transfer_direct(
471 struct compute_memory_pool* pool,
472 int chunk_to_data,
473 struct compute_memory_item* chunk,
474 struct r600_resource* data,
475 int offset_in_chunk,
476 int offset_in_data,
477 int size)
478 {
479 ///TODO: DMA
480 }
481