1 /*
2  * Permission is hereby granted, free of charge, to any person obtaining a
3  * copy of this software and associated documentation files (the "Software"),
4  * to deal in the Software without restriction, including without limitation
5  * on the rights to use, copy, modify, merge, publish, distribute, sub
6  * license, and/or sell copies of the Software, and to permit persons to whom
7  * the Software is furnished to do so, subject to the following conditions:
8  *
9  * The above copyright notice and this permission notice (including the next
10  * paragraph) shall be included in all copies or substantial portions of the
11  * Software.
12  *
13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
16  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
17  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
18  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
19  * USE OR OTHER DEALINGS IN THE SOFTWARE.
20  *
21  * Authors:
22  *      Adam Rak <adam.rak@streamnovation.com>
23  */
24 
25 #include "pipe/p_defines.h"
26 #include "pipe/p_state.h"
27 #include "pipe/p_context.h"
28 #include "util/u_blitter.h"
29 #include "util/u_double_list.h"
30 #include "util/u_transfer.h"
31 #include "util/u_surface.h"
32 #include "util/u_pack_color.h"
33 #include "util/u_memory.h"
34 #include "util/u_inlines.h"
35 #include "util/u_framebuffer.h"
36 #include "r600.h"
37 #include "r600_resource.h"
38 #include "r600_shader.h"
39 #include "r600_pipe.h"
40 #include "r600_formats.h"
41 #include "compute_memory_pool.h"
42 #include "evergreen_compute_internal.h"
43 
create_pool_texture(struct r600_screen * screen,unsigned size_in_dw)44 static struct r600_texture * create_pool_texture(struct r600_screen * screen,
45 		unsigned size_in_dw)
46 {
47 
48 	struct pipe_resource templ;
49 	struct r600_texture * tex;
50 
51 	if (size_in_dw == 0) {
52 		return NULL;
53 	}
54 	memset(&templ, 0, sizeof(templ));
55 	templ.target = PIPE_TEXTURE_1D;
56 	templ.format = PIPE_FORMAT_R32_UINT;
57 	templ.bind = PIPE_BIND_CUSTOM;
58 	templ.usage = PIPE_USAGE_IMMUTABLE;
59 	templ.flags = 0;
60 	templ.width0 = size_in_dw;
61 	templ.height0 = 1;
62 	templ.depth0 = 1;
63 	templ.array_size = 1;
64 
65 	tex = (struct r600_texture *)r600_texture_create(
66 						&screen->screen, &templ);
67 	/* XXX: Propagate this error */
68 	assert(tex && "Out of memory");
69 	tex->is_rat = 1;
70 	return tex;
71 }
72 
73 /**
74  * Creates a new pool
75  */
compute_memory_pool_new(struct r600_screen * rscreen)76 struct compute_memory_pool* compute_memory_pool_new(
77 	struct r600_screen * rscreen)
78 {
79 	struct compute_memory_pool* pool = (struct compute_memory_pool*)
80 				CALLOC(sizeof(struct compute_memory_pool), 1);
81 
82 	COMPUTE_DBG("* compute_memory_pool_new()\n");
83 
84 	pool->screen = rscreen;
85 	return pool;
86 }
87 
compute_memory_pool_init(struct compute_memory_pool * pool,unsigned initial_size_in_dw)88 static void compute_memory_pool_init(struct compute_memory_pool * pool,
89 	unsigned initial_size_in_dw)
90 {
91 
92 	COMPUTE_DBG("* compute_memory_pool_init() initial_size_in_dw = %ld\n",
93 		initial_size_in_dw);
94 
95 	/* XXX: pool->shadow is used when the buffer needs to be resized, but
96 	 * resizing does not work at the moment.
97 	 * pool->shadow = (uint32_t*)CALLOC(4, pool->size_in_dw);
98 	 */
99 	pool->next_id = 1;
100 	pool->size_in_dw = initial_size_in_dw;
101 	pool->bo = (struct r600_resource*)create_pool_texture(pool->screen,
102 							pool->size_in_dw);
103 }
104 
105 /**
106  * Frees all stuff in the pool and the pool struct itself too
107  */
compute_memory_pool_delete(struct compute_memory_pool * pool)108 void compute_memory_pool_delete(struct compute_memory_pool* pool)
109 {
110 	COMPUTE_DBG("* compute_memory_pool_delete()\n");
111 	free(pool->shadow);
112 	if (pool->bo) {
113 		pool->screen->screen.resource_destroy((struct pipe_screen *)
114 			pool->screen, (struct pipe_resource *)pool->bo);
115 	}
116 	free(pool);
117 }
118 
119 /**
120  * Searches for an empty space in the pool, return with the pointer to the
121  * allocatable space in the pool, returns -1 on failure.
122  */
compute_memory_prealloc_chunk(struct compute_memory_pool * pool,int64_t size_in_dw)123 int64_t compute_memory_prealloc_chunk(
124 	struct compute_memory_pool* pool,
125 	int64_t size_in_dw)
126 {
127 	assert(size_in_dw <= pool->size_in_dw);
128 
129 	struct compute_memory_item *item;
130 
131 	int last_end = 0;
132 
133 	COMPUTE_DBG("* compute_memory_prealloc_chunk() size_in_dw = %ld\n",
134 		size_in_dw);
135 
136 	for (item = pool->item_list; item; item = item->next) {
137 		if (item->start_in_dw > -1) {
138 			if (item->start_in_dw-last_end > size_in_dw) {
139 				return last_end;
140 			}
141 
142 			last_end = item->start_in_dw + item->size_in_dw;
143 			last_end += (1024 - last_end % 1024);
144 		}
145 	}
146 
147 	if (pool->size_in_dw - last_end < size_in_dw) {
148 		return -1;
149 	}
150 
151 	return last_end;
152 }
153 
154 /**
155  *  Search for the chunk where we can link our new chunk after it.
156  */
compute_memory_postalloc_chunk(struct compute_memory_pool * pool,int64_t start_in_dw)157 struct compute_memory_item* compute_memory_postalloc_chunk(
158 	struct compute_memory_pool* pool,
159 	int64_t start_in_dw)
160 {
161 	struct compute_memory_item* item;
162 
163 	COMPUTE_DBG("* compute_memory_postalloc_chunck() start_in_dw = %ld\n",
164 		start_in_dw);
165 
166 	for (item = pool->item_list; item; item = item->next) {
167 		if (item->next) {
168 			if (item->start_in_dw < start_in_dw
169 				&& item->next->start_in_dw > start_in_dw) {
170 				return item;
171 			}
172 		}
173 		else {
174 			/* end of chain */
175 			assert(item->start_in_dw < start_in_dw);
176 			return item;
177 		}
178 	}
179 
180 	assert(0 && "unreachable");
181 	return NULL;
182 }
183 
184 /**
185  * Reallocates pool, conserves data
186  */
compute_memory_grow_pool(struct compute_memory_pool * pool,struct pipe_context * pipe,int new_size_in_dw)187 void compute_memory_grow_pool(struct compute_memory_pool* pool,
188 	struct pipe_context * pipe, int new_size_in_dw)
189 {
190 	COMPUTE_DBG("* compute_memory_grow_pool() new_size_in_dw = %d\n",
191 		new_size_in_dw);
192 
193 	assert(new_size_in_dw >= pool->size_in_dw);
194 
195 	assert(!pool->bo && "Growing the global memory pool is not yet "
196 		"supported.  You will see this message if you are trying to"
197 		"use more than 64 kb of memory");
198 
199 	if (!pool->bo) {
200 		compute_memory_pool_init(pool, 1024 * 16);
201 	} else {
202 		/* XXX: Growing memory pools does not work at the moment.  I think
203 		 * it is because we are using fragment shaders to copy data to
204 		 * the new texture and some of the compute registers are being
205 		 * included in the 3D command stream. */
206 		fprintf(stderr, "Warning: growing the global memory pool to"
207 				"more than 64 kb is not yet supported\n");
208 		new_size_in_dw += 1024 - (new_size_in_dw % 1024);
209 
210 		COMPUTE_DBG("  Aligned size = %d\n", new_size_in_dw);
211 
212 		compute_memory_shadow(pool, pipe, 1);
213 		pool->shadow = (uint32_t*)realloc(pool->shadow, new_size_in_dw*4);
214 		pool->size_in_dw = new_size_in_dw;
215 		pool->screen->screen.resource_destroy(
216 			(struct pipe_screen *)pool->screen,
217 			(struct pipe_resource *)pool->bo);
218 		pool->bo = (struct r600_resource*)create_pool_texture(
219 							pool->screen,
220 							pool->size_in_dw);
221 		compute_memory_shadow(pool, pipe, 0);
222 	}
223 }
224 
225 /**
226  * Copy pool from device to host, or host to device.
227  */
compute_memory_shadow(struct compute_memory_pool * pool,struct pipe_context * pipe,int device_to_host)228 void compute_memory_shadow(struct compute_memory_pool* pool,
229 	struct pipe_context * pipe, int device_to_host)
230 {
231 	struct compute_memory_item chunk;
232 
233 	COMPUTE_DBG("* compute_memory_shadow() device_to_host = %d\n",
234 		device_to_host);
235 
236 	chunk.id = 0;
237 	chunk.start_in_dw = 0;
238 	chunk.size_in_dw = pool->size_in_dw;
239 	chunk.prev = chunk.next = NULL;
240 	compute_memory_transfer(pool, pipe, device_to_host, &chunk,
241 				pool->shadow, 0, pool->size_in_dw*4);
242 }
243 
244 /**
245  * Allocates pending allocations in the pool
246  */
compute_memory_finalize_pending(struct compute_memory_pool * pool,struct pipe_context * pipe)247 void compute_memory_finalize_pending(struct compute_memory_pool* pool,
248 	struct pipe_context * pipe)
249 {
250 	struct compute_memory_item *pending_list = NULL, *end_p = NULL;
251 	struct compute_memory_item *item, *next;
252 
253 	int64_t allocated = 0;
254 	int64_t unallocated = 0;
255 
256 	COMPUTE_DBG("* compute_memory_finalize_pending()\n");
257 
258 	for (item = pool->item_list; item; item = item->next) {
259 		COMPUTE_DBG("list: %i %p\n", item->start_in_dw, item->next);
260 	}
261 
262 	for (item = pool->item_list; item; item = next) {
263 		next = item->next;
264 
265 
266 		if (item->start_in_dw == -1) {
267 			if (end_p) {
268 				end_p->next = item;
269 			}
270 			else {
271 				pending_list = item;
272 			}
273 
274 			if (item->prev) {
275 				item->prev->next = next;
276 			}
277 			else {
278 				pool->item_list = next;
279 			}
280 
281 			if (next) {
282 				next->prev = item->prev;
283 			}
284 
285 			item->prev = end_p;
286 			item->next = NULL;
287 			end_p = item;
288 
289 			unallocated += item->size_in_dw+1024;
290 		}
291 		else {
292 			allocated += item->size_in_dw;
293 		}
294 	}
295 
296 	if (pool->size_in_dw < allocated+unallocated) {
297 		compute_memory_grow_pool(pool, pipe, allocated+unallocated);
298 	}
299 
300 	for (item = pending_list; item; item = next) {
301 		next = item->next;
302 
303 		int64_t start_in_dw;
304 
305 		while ((start_in_dw=compute_memory_prealloc_chunk(pool,
306 						item->size_in_dw)) == -1) {
307 			int64_t need = item->size_in_dw+2048 -
308 						(pool->size_in_dw - allocated);
309 
310 			need += 1024 - (need % 1024);
311 
312 			if (need > 0) {
313 				compute_memory_grow_pool(pool,
314 						pipe,
315 						pool->size_in_dw + need);
316 			}
317 			else {
318 				need = pool->size_in_dw / 10;
319 				need += 1024 - (need % 1024);
320 				compute_memory_grow_pool(pool,
321 						pipe,
322 						pool->size_in_dw + need);
323 			}
324 		}
325 
326 		item->start_in_dw = start_in_dw;
327 		item->next = NULL;
328 		item->prev = NULL;
329 
330 		if (pool->item_list) {
331 			struct compute_memory_item *pos;
332 
333 			pos = compute_memory_postalloc_chunk(pool, start_in_dw);
334 			item->prev = pos;
335 			item->next = pos->next;
336 			pos->next = item;
337 
338 			if (item->next) {
339 				item->next->prev = item;
340 			}
341 		}
342 		else {
343 			pool->item_list = item;
344 		}
345 
346 		allocated += item->size_in_dw;
347 	}
348 }
349 
350 
compute_memory_free(struct compute_memory_pool * pool,int64_t id)351 void compute_memory_free(struct compute_memory_pool* pool, int64_t id)
352 {
353 	struct compute_memory_item *item, *next;
354 
355 	COMPUTE_DBG("* compute_memory_free() id + %ld \n", id);
356 
357 	for (item = pool->item_list; item; item = next) {
358 		next = item->next;
359 
360 		if (item->id == id) {
361 			if (item->prev) {
362 				item->prev->next = item->next;
363 			}
364 			else {
365 				pool->item_list = item->next;
366 			}
367 
368 			if (item->next) {
369 				item->next->prev = item->prev;
370 			}
371 
372 			free(item);
373 
374 			return;
375 		}
376 	}
377 
378 	fprintf(stderr, "Internal error, invalid id %ld "
379 		"for compute_memory_free\n", id);
380 
381 	assert(0 && "error");
382 }
383 
384 /**
385  * Creates pending allocations
386  */
compute_memory_alloc(struct compute_memory_pool * pool,int64_t size_in_dw)387 struct compute_memory_item* compute_memory_alloc(
388 	struct compute_memory_pool* pool,
389 	int64_t size_in_dw)
390 {
391 	struct compute_memory_item *new_item;
392 
393 	COMPUTE_DBG("* compute_memory_alloc() size_in_dw = %ld\n", size_in_dw);
394 
395 	new_item = (struct compute_memory_item *)
396 				CALLOC(sizeof(struct compute_memory_item), 1);
397 	new_item->size_in_dw = size_in_dw;
398 	new_item->start_in_dw = -1; /* mark pending */
399 	new_item->id = pool->next_id++;
400 	new_item->pool = pool;
401 
402 	struct compute_memory_item *last_item;
403 
404 	if (pool->item_list) {
405 		for (last_item = pool->item_list; last_item->next;
406 						last_item = last_item->next);
407 
408 		last_item->next = new_item;
409 		new_item->prev = last_item;
410 	}
411 	else {
412 		pool->item_list = new_item;
413 	}
414 
415 	return new_item;
416 }
417 
418 /**
419  * Transfer data host<->device, offset and size is in bytes
420  */
compute_memory_transfer(struct compute_memory_pool * pool,struct pipe_context * pipe,int device_to_host,struct compute_memory_item * chunk,void * data,int offset_in_chunk,int size)421 void compute_memory_transfer(
422 	struct compute_memory_pool* pool,
423 	struct pipe_context * pipe,
424 	int device_to_host,
425 	struct compute_memory_item* chunk,
426 	void* data,
427 	int offset_in_chunk,
428 	int size)
429 {
430 	int64_t aligned_size = pool->size_in_dw;
431 	struct pipe_resource* gart = (struct pipe_resource*)pool->bo;
432 	int64_t internal_offset = chunk->start_in_dw*4 + offset_in_chunk;
433 
434 	struct pipe_transfer *xfer;
435 	uint32_t *map;
436 
437 	assert(gart);
438 
439 	COMPUTE_DBG("* compute_memory_transfer() device_to_host = %d, "
440 		"offset_in_chunk = %d, size = %d\n", device_to_host,
441 		offset_in_chunk, size);
442 
443 	if (device_to_host)
444 	{
445 		xfer = pipe->get_transfer(pipe, gart, 0, PIPE_TRANSFER_READ,
446 			&(struct pipe_box) { .width = aligned_size,
447 			.height = 1, .depth = 1 });
448 		assert(xfer);
449 		map = pipe->transfer_map(pipe, xfer);
450 		assert(map);
451 		memcpy(data, map + internal_offset, size);
452 		pipe->transfer_unmap(pipe, xfer);
453 		pipe->transfer_destroy(pipe, xfer);
454 	} else {
455 		xfer = pipe->get_transfer(pipe, gart, 0, PIPE_TRANSFER_WRITE,
456 			&(struct pipe_box) { .width = aligned_size,
457 			.height = 1, .depth = 1 });
458 		assert(xfer);
459 		map = pipe->transfer_map(pipe, xfer);
460 		assert(map);
461 		memcpy(map + internal_offset, data, size);
462 		pipe->transfer_unmap(pipe, xfer);
463 		pipe->transfer_destroy(pipe, xfer);
464 	}
465 }
466 
467 /**
468  * Transfer data between chunk<->data, it is for VRAM<->GART transfers
469  */
compute_memory_transfer_direct(struct compute_memory_pool * pool,int chunk_to_data,struct compute_memory_item * chunk,struct r600_resource * data,int offset_in_chunk,int offset_in_data,int size)470 void compute_memory_transfer_direct(
471 	struct compute_memory_pool* pool,
472 	int chunk_to_data,
473 	struct compute_memory_item* chunk,
474 	struct r600_resource* data,
475 	int offset_in_chunk,
476 	int offset_in_data,
477 	int size)
478 {
479 	///TODO: DMA
480 }
481