1 /**************************************************************************
2  *
3  * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the
8  * "Software"), to deal in the Software without restriction, including
9  * without limitation the rights to use, copy, modify, merge, publish,
10  * distribute, sub license, and/or sell copies of the Software, and to
11  * permit persons to whom the Software is furnished to do so, subject to
12  * the following conditions:
13  *
14  * The above copyright notice and this permission notice (including the
15  * next paragraph) shall be included in all copies or substantial portions
16  * of the Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21  * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25  *
26  **************************************************************************/
27 
28 /* Originally a fake version of the buffer manager so that we can
29  * prototype the changes in a driver fairly quickly, has been fleshed
30  * out to a fully functional interim solution.
31  *
32  * Basically wraps the old style memory management in the new
33  * programming interface, but is more expressive and avoids many of
34  * the bugs in the old texture manager.
35  */
36 
37 #ifdef HAVE_CONFIG_H
38 #include "config.h"
39 #endif
40 
41 #include <stdlib.h>
42 #include <string.h>
43 #include <assert.h>
44 #include <errno.h>
45 #include <strings.h>
46 #include <xf86drm.h>
47 #include <pthread.h>
48 #include "intel_bufmgr.h"
49 #include "intel_bufmgr_priv.h"
50 #include "drm.h"
51 #include "i915_drm.h"
52 #include "mm.h"
53 #include "libdrm_macros.h"
54 #include "libdrm_lists.h"
55 
56 #define DBG(...) do {					\
57 	if (bufmgr_fake->bufmgr.debug)			\
58 		drmMsg(__VA_ARGS__);			\
59 } while (0)
60 
61 /* Internal flags:
62  */
63 #define BM_NO_BACKING_STORE			0x00000001
64 #define BM_NO_FENCE_SUBDATA			0x00000002
65 #define BM_PINNED				0x00000004
66 
67 /* Wrapper around mm.c's mem_block, which understands that you must
68  * wait for fences to expire before memory can be freed.  This is
69  * specific to our use of memcpy for uploads - an upload that was
70  * processed through the command queue wouldn't need to care about
71  * fences.
72  */
73 #define MAX_RELOCS 4096
74 
75 struct fake_buffer_reloc {
76 	/** Buffer object that the relocation points at. */
77 	drm_intel_bo *target_buf;
78 	/** Offset of the relocation entry within reloc_buf. */
79 	uint32_t offset;
80 	/**
81 	 * Cached value of the offset when we last performed this relocation.
82 	 */
83 	uint32_t last_target_offset;
84 	/** Value added to target_buf's offset to get the relocation entry. */
85 	uint32_t delta;
86 	/** Cache domains the target buffer is read into. */
87 	uint32_t read_domains;
88 	/** Cache domain the target buffer will have dirty cachelines in. */
89 	uint32_t write_domain;
90 };
91 
92 struct block {
93 	struct block *next, *prev;
94 	struct mem_block *mem;	/* BM_MEM_AGP */
95 
96 	/**
97 	 * Marks that the block is currently in the aperture and has yet to be
98 	 * fenced.
99 	 */
100 	unsigned on_hardware:1;
101 	/**
102 	 * Marks that the block is currently fenced (being used by rendering)
103 	 * and can't be freed until @fence is passed.
104 	 */
105 	unsigned fenced:1;
106 
107 	/** Fence cookie for the block. */
108 	unsigned fence;		/* Split to read_fence, write_fence */
109 
110 	drm_intel_bo *bo;
111 	void *virtual;
112 };
113 
114 typedef struct _bufmgr_fake {
115 	drm_intel_bufmgr bufmgr;
116 
117 	pthread_mutex_t lock;
118 
119 	unsigned long low_offset;
120 	unsigned long size;
121 	void *virtual;
122 
123 	struct mem_block *heap;
124 
125 	unsigned buf_nr;	/* for generating ids */
126 
127 	/**
128 	 * List of blocks which are currently in the GART but haven't been
129 	 * fenced yet.
130 	 */
131 	struct block on_hardware;
132 	/**
133 	 * List of blocks which are in the GART and have an active fence on
134 	 * them.
135 	 */
136 	struct block fenced;
137 	/**
138 	 * List of blocks which have an expired fence and are ready to be
139 	 * evicted.
140 	 */
141 	struct block lru;
142 
143 	unsigned int last_fence;
144 
145 	unsigned fail:1;
146 	unsigned need_fence:1;
147 	int thrashing;
148 
149 	/**
150 	 * Driver callback to emit a fence, returning the cookie.
151 	 *
152 	 * This allows the driver to hook in a replacement for the DRM usage in
153 	 * bufmgr_fake.
154 	 *
155 	 * Currently, this also requires that a write flush be emitted before
156 	 * emitting the fence, but this should change.
157 	 */
158 	unsigned int (*fence_emit) (void *private);
159 	/** Driver callback to wait for a fence cookie to have passed. */
160 	void (*fence_wait) (unsigned int fence, void *private);
161 	void *fence_priv;
162 
163 	/**
164 	 * Driver callback to execute a buffer.
165 	 *
166 	 * This allows the driver to hook in a replacement for the DRM usage in
167 	 * bufmgr_fake.
168 	 */
169 	int (*exec) (drm_intel_bo *bo, unsigned int used, void *priv);
170 	void *exec_priv;
171 
172 	/** Driver-supplied argument to driver callbacks */
173 	void *driver_priv;
174 	/**
175 	 * Pointer to kernel-updated sarea data for the last completed user irq
176 	 */
177 	volatile int *last_dispatch;
178 
179 	int fd;
180 
181 	int debug;
182 
183 	int performed_rendering;
184 } drm_intel_bufmgr_fake;
185 
186 typedef struct _drm_intel_bo_fake {
187 	drm_intel_bo bo;
188 
189 	unsigned id;		/* debug only */
190 	const char *name;
191 
192 	unsigned dirty:1;
193 	/**
194 	 * has the card written to this buffer - we make need to copy it back
195 	 */
196 	unsigned card_dirty:1;
197 	unsigned int refcount;
198 	/* Flags may consist of any of the DRM_BO flags, plus
199 	 * DRM_BO_NO_BACKING_STORE and BM_NO_FENCE_SUBDATA, which are the
200 	 * first two driver private flags.
201 	 */
202 	uint64_t flags;
203 	/** Cache domains the target buffer is read into. */
204 	uint32_t read_domains;
205 	/** Cache domain the target buffer will have dirty cachelines in. */
206 	uint32_t write_domain;
207 
208 	unsigned int alignment;
209 	int is_static, validated;
210 	unsigned int map_count;
211 
212 	/** relocation list */
213 	struct fake_buffer_reloc *relocs;
214 	int nr_relocs;
215 	/**
216 	 * Total size of the target_bos of this buffer.
217 	 *
218 	 * Used for estimation in check_aperture.
219 	 */
220 	unsigned int child_size;
221 
222 	struct block *block;
223 	void *backing_store;
224 	void (*invalidate_cb) (drm_intel_bo *bo, void *ptr);
225 	void *invalidate_ptr;
226 } drm_intel_bo_fake;
227 
228 static int clear_fenced(drm_intel_bufmgr_fake *bufmgr_fake,
229 			unsigned int fence_cookie);
230 
231 #define MAXFENCE 0x7fffffff
232 
233 static int
FENCE_LTE(unsigned a,unsigned b)234 FENCE_LTE(unsigned a, unsigned b)
235 {
236 	if (a == b)
237 		return 1;
238 
239 	if (a < b && b - a < (1 << 24))
240 		return 1;
241 
242 	if (a > b && MAXFENCE - a + b < (1 << 24))
243 		return 1;
244 
245 	return 0;
246 }
247 
248 void
drm_intel_bufmgr_fake_set_fence_callback(drm_intel_bufmgr * bufmgr,unsigned int (* emit)(void * priv),void (* wait)(unsigned int fence,void * priv),void * priv)249 drm_intel_bufmgr_fake_set_fence_callback(drm_intel_bufmgr *bufmgr,
250 					 unsigned int (*emit) (void *priv),
251 					 void (*wait) (unsigned int fence,
252 						       void *priv),
253 					 void *priv)
254 {
255 	drm_intel_bufmgr_fake *bufmgr_fake = (drm_intel_bufmgr_fake *) bufmgr;
256 
257 	bufmgr_fake->fence_emit = emit;
258 	bufmgr_fake->fence_wait = wait;
259 	bufmgr_fake->fence_priv = priv;
260 }
261 
262 static unsigned int
_fence_emit_internal(drm_intel_bufmgr_fake * bufmgr_fake)263 _fence_emit_internal(drm_intel_bufmgr_fake *bufmgr_fake)
264 {
265 	struct drm_i915_irq_emit ie;
266 	int ret, seq = 1;
267 
268 	if (bufmgr_fake->fence_emit != NULL) {
269 		seq = bufmgr_fake->fence_emit(bufmgr_fake->fence_priv);
270 		return seq;
271 	}
272 
273 	ie.irq_seq = &seq;
274 	ret = drmCommandWriteRead(bufmgr_fake->fd, DRM_I915_IRQ_EMIT,
275 				  &ie, sizeof(ie));
276 	if (ret) {
277 		drmMsg("%s: drm_i915_irq_emit: %d\n", __func__, ret);
278 		abort();
279 	}
280 
281 	DBG("emit 0x%08x\n", seq);
282 	return seq;
283 }
284 
285 static void
_fence_wait_internal(drm_intel_bufmgr_fake * bufmgr_fake,int seq)286 _fence_wait_internal(drm_intel_bufmgr_fake *bufmgr_fake, int seq)
287 {
288 	struct drm_i915_irq_wait iw;
289 	int hw_seq, busy_count = 0;
290 	int ret;
291 	int kernel_lied;
292 
293 	if (bufmgr_fake->fence_wait != NULL) {
294 		bufmgr_fake->fence_wait(seq, bufmgr_fake->fence_priv);
295 		clear_fenced(bufmgr_fake, seq);
296 		return;
297 	}
298 
299 	iw.irq_seq = seq;
300 
301 	DBG("wait 0x%08x\n", iw.irq_seq);
302 
303 	/* The kernel IRQ_WAIT implementation is all sorts of broken.
304 	 * 1) It returns 1 to 0x7fffffff instead of using the full 32-bit
305 	 *    unsigned range.
306 	 * 2) It returns 0 if hw_seq >= seq, not seq - hw_seq < 0 on the 32-bit
307 	 *    signed range.
308 	 * 3) It waits if seq < hw_seq, not seq - hw_seq > 0 on the 32-bit
309 	 *    signed range.
310 	 * 4) It returns -EBUSY in 3 seconds even if the hardware is still
311 	 *    successfully chewing through buffers.
312 	 *
313 	 * Assume that in userland we treat sequence numbers as ints, which
314 	 * makes some of the comparisons convenient, since the sequence
315 	 * numbers are all postive signed integers.
316 	 *
317 	 * From this we get several cases we need to handle.  Here's a timeline.
318 	 * 0x2   0x7                                    0x7ffffff8   0x7ffffffd
319 	 *   |    |                                             |    |
320 	 * ------------------------------------------------------------
321 	 *
322 	 * A) Normal wait for hw to catch up
323 	 * hw_seq seq
324 	 *   |    |
325 	 * ------------------------------------------------------------
326 	 * seq - hw_seq = 5.  If we call IRQ_WAIT, it will wait for hw to
327 	 * catch up.
328 	 *
329 	 * B) Normal wait for a sequence number that's already passed.
330 	 * seq    hw_seq
331 	 *   |    |
332 	 * ------------------------------------------------------------
333 	 * seq - hw_seq = -5.  If we call IRQ_WAIT, it returns 0 quickly.
334 	 *
335 	 * C) Hardware has already wrapped around ahead of us
336 	 * hw_seq                                                    seq
337 	 *   |                                                       |
338 	 * ------------------------------------------------------------
339 	 * seq - hw_seq = 0x80000000 - 5.  If we called IRQ_WAIT, it would wait
340 	 * for hw_seq >= seq, which may never occur.  Thus, we want to catch
341 	 * this in userland and return 0.
342 	 *
343 	 * D) We've wrapped around ahead of the hardware.
344 	 * seq                                                      hw_seq
345 	 *   |                                                       |
346 	 * ------------------------------------------------------------
347 	 * seq - hw_seq = -(0x80000000 - 5).  If we called IRQ_WAIT, it would
348 	 * return 0 quickly because hw_seq >= seq, even though the hardware
349 	 * isn't caught up. Thus, we need to catch this early return in
350 	 * userland and bother the kernel until the hardware really does
351 	 * catch up.
352 	 *
353 	 * E) Hardware might wrap after we test in userland.
354 	 *                                                  hw_seq  seq
355 	 *                                                      |    |
356 	 * ------------------------------------------------------------
357 	 * seq - hw_seq = 5.  If we call IRQ_WAIT, it will likely see seq >=
358 	 * hw_seq and wait.  However, suppose hw_seq wraps before we make it
359 	 * into the kernel.  The kernel sees hw_seq >= seq and waits for 3
360 	 * seconds then returns -EBUSY.  This is case C).  We should catch
361 	 * this and then return successfully.
362 	 *
363 	 * F) Hardware might take a long time on a buffer.
364 	 * hw_seq seq
365 	 *   |    |
366 	 * -------------------------------------------------------------------
367 	 * seq - hw_seq = 5.  If we call IRQ_WAIT, if sequence 2 through 5
368 	 * take too long, it will return -EBUSY.  Batchbuffers in the
369 	 * gltestperf demo were seen to take up to 7 seconds.  We should
370 	 * catch early -EBUSY return and keep trying.
371 	 */
372 
373 	do {
374 		/* Keep a copy of last_dispatch so that if the wait -EBUSYs
375 		 * because the hardware didn't catch up in 3 seconds, we can
376 		 * see if it at least made progress and retry.
377 		 */
378 		hw_seq = *bufmgr_fake->last_dispatch;
379 
380 		/* Catch case C */
381 		if (seq - hw_seq > 0x40000000)
382 			return;
383 
384 		ret = drmCommandWrite(bufmgr_fake->fd, DRM_I915_IRQ_WAIT,
385 				      &iw, sizeof(iw));
386 		/* Catch case D */
387 		kernel_lied = (ret == 0) && (seq - *bufmgr_fake->last_dispatch <
388 					     -0x40000000);
389 
390 		/* Catch case E */
391 		if (ret == -EBUSY
392 		    && (seq - *bufmgr_fake->last_dispatch > 0x40000000))
393 			ret = 0;
394 
395 		/* Catch case F: Allow up to 15 seconds chewing on one buffer. */
396 		if ((ret == -EBUSY) && (hw_seq != *bufmgr_fake->last_dispatch))
397 			busy_count = 0;
398 		else
399 			busy_count++;
400 	} while (kernel_lied || ret == -EAGAIN || ret == -EINTR ||
401 		 (ret == -EBUSY && busy_count < 5));
402 
403 	if (ret != 0) {
404 		drmMsg("%s:%d: Error waiting for fence: %s.\n", __FILE__,
405 		       __LINE__, strerror(-ret));
406 		abort();
407 	}
408 	clear_fenced(bufmgr_fake, seq);
409 }
410 
411 static int
_fence_test(drm_intel_bufmgr_fake * bufmgr_fake,unsigned fence)412 _fence_test(drm_intel_bufmgr_fake *bufmgr_fake, unsigned fence)
413 {
414 	/* Slight problem with wrap-around:
415 	 */
416 	return fence == 0 || FENCE_LTE(fence, bufmgr_fake->last_fence);
417 }
418 
419 /**
420  * Allocate a memory manager block for the buffer.
421  */
422 static int
alloc_block(drm_intel_bo * bo)423 alloc_block(drm_intel_bo *bo)
424 {
425 	drm_intel_bo_fake *bo_fake = (drm_intel_bo_fake *) bo;
426 	drm_intel_bufmgr_fake *bufmgr_fake =
427 	    (drm_intel_bufmgr_fake *) bo->bufmgr;
428 	struct block *block = (struct block *)calloc(sizeof *block, 1);
429 	unsigned int align_log2 = ffs(bo_fake->alignment) - 1;
430 	unsigned int sz;
431 
432 	if (!block)
433 		return 1;
434 
435 	sz = (bo->size + bo_fake->alignment - 1) & ~(bo_fake->alignment - 1);
436 
437 	block->mem = mmAllocMem(bufmgr_fake->heap, sz, align_log2, 0);
438 	if (!block->mem) {
439 		free(block);
440 		return 0;
441 	}
442 
443 	DRMINITLISTHEAD(block);
444 
445 	/* Insert at head or at tail??? */
446 	DRMLISTADDTAIL(block, &bufmgr_fake->lru);
447 
448 	block->virtual = (uint8_t *) bufmgr_fake->virtual +
449 	    block->mem->ofs - bufmgr_fake->low_offset;
450 	block->bo = bo;
451 
452 	bo_fake->block = block;
453 
454 	return 1;
455 }
456 
457 /* Release the card storage associated with buf:
458  */
459 static void
free_block(drm_intel_bufmgr_fake * bufmgr_fake,struct block * block,int skip_dirty_copy)460 free_block(drm_intel_bufmgr_fake *bufmgr_fake, struct block *block,
461 	   int skip_dirty_copy)
462 {
463 	drm_intel_bo_fake *bo_fake;
464 	DBG("free block %p %08x %d %d\n", block, block->mem->ofs,
465 	    block->on_hardware, block->fenced);
466 
467 	if (!block)
468 		return;
469 
470 	bo_fake = (drm_intel_bo_fake *) block->bo;
471 
472 	if (bo_fake->flags & (BM_PINNED | BM_NO_BACKING_STORE))
473 		skip_dirty_copy = 1;
474 
475 	if (!skip_dirty_copy && (bo_fake->card_dirty == 1)) {
476 		memcpy(bo_fake->backing_store, block->virtual, block->bo->size);
477 		bo_fake->card_dirty = 0;
478 		bo_fake->dirty = 1;
479 	}
480 
481 	if (block->on_hardware) {
482 		block->bo = NULL;
483 	} else if (block->fenced) {
484 		block->bo = NULL;
485 	} else {
486 		DBG("    - free immediately\n");
487 		DRMLISTDEL(block);
488 
489 		mmFreeMem(block->mem);
490 		free(block);
491 	}
492 }
493 
494 static void
alloc_backing_store(drm_intel_bo * bo)495 alloc_backing_store(drm_intel_bo *bo)
496 {
497 	drm_intel_bufmgr_fake *bufmgr_fake =
498 	    (drm_intel_bufmgr_fake *) bo->bufmgr;
499 	drm_intel_bo_fake *bo_fake = (drm_intel_bo_fake *) bo;
500 	assert(!bo_fake->backing_store);
501 	assert(!(bo_fake->flags & (BM_PINNED | BM_NO_BACKING_STORE)));
502 
503 	bo_fake->backing_store = malloc(bo->size);
504 
505 	DBG("alloc_backing - buf %d %p %lu\n", bo_fake->id,
506 	    bo_fake->backing_store, bo->size);
507 	assert(bo_fake->backing_store);
508 }
509 
510 static void
free_backing_store(drm_intel_bo * bo)511 free_backing_store(drm_intel_bo *bo)
512 {
513 	drm_intel_bo_fake *bo_fake = (drm_intel_bo_fake *) bo;
514 
515 	if (bo_fake->backing_store) {
516 		assert(!(bo_fake->flags & (BM_PINNED | BM_NO_BACKING_STORE)));
517 		free(bo_fake->backing_store);
518 		bo_fake->backing_store = NULL;
519 	}
520 }
521 
522 static void
set_dirty(drm_intel_bo * bo)523 set_dirty(drm_intel_bo *bo)
524 {
525 	drm_intel_bufmgr_fake *bufmgr_fake =
526 	    (drm_intel_bufmgr_fake *) bo->bufmgr;
527 	drm_intel_bo_fake *bo_fake = (drm_intel_bo_fake *) bo;
528 
529 	if (bo_fake->flags & BM_NO_BACKING_STORE
530 	    && bo_fake->invalidate_cb != NULL)
531 		bo_fake->invalidate_cb(bo, bo_fake->invalidate_ptr);
532 
533 	assert(!(bo_fake->flags & BM_PINNED));
534 
535 	DBG("set_dirty - buf %d\n", bo_fake->id);
536 	bo_fake->dirty = 1;
537 }
538 
539 static int
evict_lru(drm_intel_bufmgr_fake * bufmgr_fake,unsigned int max_fence)540 evict_lru(drm_intel_bufmgr_fake *bufmgr_fake, unsigned int max_fence)
541 {
542 	struct block *block, *tmp;
543 
544 	DBG("%s\n", __func__);
545 
546 	DRMLISTFOREACHSAFE(block, tmp, &bufmgr_fake->lru) {
547 		drm_intel_bo_fake *bo_fake = (drm_intel_bo_fake *) block->bo;
548 
549 		if (bo_fake != NULL && (bo_fake->flags & BM_NO_FENCE_SUBDATA))
550 			continue;
551 
552 		if (block->fence && max_fence && !FENCE_LTE(block->fence,
553 							    max_fence))
554 			return 0;
555 
556 		set_dirty(&bo_fake->bo);
557 		bo_fake->block = NULL;
558 
559 		free_block(bufmgr_fake, block, 0);
560 		return 1;
561 	}
562 
563 	return 0;
564 }
565 
566 static int
evict_mru(drm_intel_bufmgr_fake * bufmgr_fake)567 evict_mru(drm_intel_bufmgr_fake *bufmgr_fake)
568 {
569 	struct block *block, *tmp;
570 
571 	DBG("%s\n", __func__);
572 
573 	DRMLISTFOREACHSAFEREVERSE(block, tmp, &bufmgr_fake->lru) {
574 		drm_intel_bo_fake *bo_fake = (drm_intel_bo_fake *) block->bo;
575 
576 		if (bo_fake && (bo_fake->flags & BM_NO_FENCE_SUBDATA))
577 			continue;
578 
579 		set_dirty(&bo_fake->bo);
580 		bo_fake->block = NULL;
581 
582 		free_block(bufmgr_fake, block, 0);
583 		return 1;
584 	}
585 
586 	return 0;
587 }
588 
589 /**
590  * Removes all objects from the fenced list older than the given fence.
591  */
592 static int
clear_fenced(drm_intel_bufmgr_fake * bufmgr_fake,unsigned int fence_cookie)593 clear_fenced(drm_intel_bufmgr_fake *bufmgr_fake, unsigned int fence_cookie)
594 {
595 	struct block *block, *tmp;
596 	int ret = 0;
597 
598 	bufmgr_fake->last_fence = fence_cookie;
599 	DRMLISTFOREACHSAFE(block, tmp, &bufmgr_fake->fenced) {
600 		assert(block->fenced);
601 
602 		if (_fence_test(bufmgr_fake, block->fence)) {
603 
604 			block->fenced = 0;
605 
606 			if (!block->bo) {
607 				DBG("delayed free: offset %x sz %x\n",
608 				    block->mem->ofs, block->mem->size);
609 				DRMLISTDEL(block);
610 				mmFreeMem(block->mem);
611 				free(block);
612 			} else {
613 				DBG("return to lru: offset %x sz %x\n",
614 				    block->mem->ofs, block->mem->size);
615 				DRMLISTDEL(block);
616 				DRMLISTADDTAIL(block, &bufmgr_fake->lru);
617 			}
618 
619 			ret = 1;
620 		} else {
621 			/* Blocks are ordered by fence, so if one fails, all
622 			 * from here will fail also:
623 			 */
624 			DBG("fence not passed: offset %x sz %x %d %d \n",
625 			    block->mem->ofs, block->mem->size, block->fence,
626 			    bufmgr_fake->last_fence);
627 			break;
628 		}
629 	}
630 
631 	DBG("%s: %d\n", __func__, ret);
632 	return ret;
633 }
634 
635 static void
fence_blocks(drm_intel_bufmgr_fake * bufmgr_fake,unsigned fence)636 fence_blocks(drm_intel_bufmgr_fake *bufmgr_fake, unsigned fence)
637 {
638 	struct block *block, *tmp;
639 
640 	DRMLISTFOREACHSAFE(block, tmp, &bufmgr_fake->on_hardware) {
641 		DBG("Fence block %p (sz 0x%x ofs %x buf %p) with fence %d\n",
642 		    block, block->mem->size, block->mem->ofs, block->bo, fence);
643 		block->fence = fence;
644 
645 		block->on_hardware = 0;
646 		block->fenced = 1;
647 
648 		/* Move to tail of pending list here
649 		 */
650 		DRMLISTDEL(block);
651 		DRMLISTADDTAIL(block, &bufmgr_fake->fenced);
652 	}
653 
654 	assert(DRMLISTEMPTY(&bufmgr_fake->on_hardware));
655 }
656 
657 static int
evict_and_alloc_block(drm_intel_bo * bo)658 evict_and_alloc_block(drm_intel_bo *bo)
659 {
660 	drm_intel_bufmgr_fake *bufmgr_fake =
661 	    (drm_intel_bufmgr_fake *) bo->bufmgr;
662 	drm_intel_bo_fake *bo_fake = (drm_intel_bo_fake *) bo;
663 
664 	assert(bo_fake->block == NULL);
665 
666 	/* Search for already free memory:
667 	 */
668 	if (alloc_block(bo))
669 		return 1;
670 
671 	/* If we're not thrashing, allow lru eviction to dig deeper into
672 	 * recently used textures.  We'll probably be thrashing soon:
673 	 */
674 	if (!bufmgr_fake->thrashing) {
675 		while (evict_lru(bufmgr_fake, 0))
676 			if (alloc_block(bo))
677 				return 1;
678 	}
679 
680 	/* Keep thrashing counter alive?
681 	 */
682 	if (bufmgr_fake->thrashing)
683 		bufmgr_fake->thrashing = 20;
684 
685 	/* Wait on any already pending fences - here we are waiting for any
686 	 * freed memory that has been submitted to hardware and fenced to
687 	 * become available:
688 	 */
689 	while (!DRMLISTEMPTY(&bufmgr_fake->fenced)) {
690 		uint32_t fence = bufmgr_fake->fenced.next->fence;
691 		_fence_wait_internal(bufmgr_fake, fence);
692 
693 		if (alloc_block(bo))
694 			return 1;
695 	}
696 
697 	if (!DRMLISTEMPTY(&bufmgr_fake->on_hardware)) {
698 		while (!DRMLISTEMPTY(&bufmgr_fake->fenced)) {
699 			uint32_t fence = bufmgr_fake->fenced.next->fence;
700 			_fence_wait_internal(bufmgr_fake, fence);
701 		}
702 
703 		if (!bufmgr_fake->thrashing) {
704 			DBG("thrashing\n");
705 		}
706 		bufmgr_fake->thrashing = 20;
707 
708 		if (alloc_block(bo))
709 			return 1;
710 	}
711 
712 	while (evict_mru(bufmgr_fake))
713 		if (alloc_block(bo))
714 			return 1;
715 
716 	DBG("%s 0x%lx bytes failed\n", __func__, bo->size);
717 
718 	return 0;
719 }
720 
721 /***********************************************************************
722  * Public functions
723  */
724 
725 /**
726  * Wait for hardware idle by emitting a fence and waiting for it.
727  */
728 static void
drm_intel_bufmgr_fake_wait_idle(drm_intel_bufmgr_fake * bufmgr_fake)729 drm_intel_bufmgr_fake_wait_idle(drm_intel_bufmgr_fake *bufmgr_fake)
730 {
731 	unsigned int cookie;
732 
733 	cookie = _fence_emit_internal(bufmgr_fake);
734 	_fence_wait_internal(bufmgr_fake, cookie);
735 }
736 
737 /**
738  * Wait for rendering to a buffer to complete.
739  *
740  * It is assumed that the bathcbuffer which performed the rendering included
741  * the necessary flushing.
742  */
743 static void
drm_intel_fake_bo_wait_rendering_locked(drm_intel_bo * bo)744 drm_intel_fake_bo_wait_rendering_locked(drm_intel_bo *bo)
745 {
746 	drm_intel_bufmgr_fake *bufmgr_fake =
747 	    (drm_intel_bufmgr_fake *) bo->bufmgr;
748 	drm_intel_bo_fake *bo_fake = (drm_intel_bo_fake *) bo;
749 
750 	if (bo_fake->block == NULL || !bo_fake->block->fenced)
751 		return;
752 
753 	_fence_wait_internal(bufmgr_fake, bo_fake->block->fence);
754 }
755 
756 static void
drm_intel_fake_bo_wait_rendering(drm_intel_bo * bo)757 drm_intel_fake_bo_wait_rendering(drm_intel_bo *bo)
758 {
759 	drm_intel_bufmgr_fake *bufmgr_fake =
760 	    (drm_intel_bufmgr_fake *) bo->bufmgr;
761 
762 	pthread_mutex_lock(&bufmgr_fake->lock);
763 	drm_intel_fake_bo_wait_rendering_locked(bo);
764 	pthread_mutex_unlock(&bufmgr_fake->lock);
765 }
766 
767 /* Specifically ignore texture memory sharing.
768  *  -- just evict everything
769  *  -- and wait for idle
770  */
771 void
drm_intel_bufmgr_fake_contended_lock_take(drm_intel_bufmgr * bufmgr)772 drm_intel_bufmgr_fake_contended_lock_take(drm_intel_bufmgr *bufmgr)
773 {
774 	drm_intel_bufmgr_fake *bufmgr_fake = (drm_intel_bufmgr_fake *) bufmgr;
775 	struct block *block, *tmp;
776 
777 	pthread_mutex_lock(&bufmgr_fake->lock);
778 
779 	bufmgr_fake->need_fence = 1;
780 	bufmgr_fake->fail = 0;
781 
782 	/* Wait for hardware idle.  We don't know where acceleration has been
783 	 * happening, so we'll need to wait anyway before letting anything get
784 	 * put on the card again.
785 	 */
786 	drm_intel_bufmgr_fake_wait_idle(bufmgr_fake);
787 
788 	/* Check that we hadn't released the lock without having fenced the last
789 	 * set of buffers.
790 	 */
791 	assert(DRMLISTEMPTY(&bufmgr_fake->fenced));
792 	assert(DRMLISTEMPTY(&bufmgr_fake->on_hardware));
793 
794 	DRMLISTFOREACHSAFE(block, tmp, &bufmgr_fake->lru) {
795 		assert(_fence_test(bufmgr_fake, block->fence));
796 		set_dirty(block->bo);
797 	}
798 
799 	pthread_mutex_unlock(&bufmgr_fake->lock);
800 }
801 
802 static drm_intel_bo *
drm_intel_fake_bo_alloc(drm_intel_bufmgr * bufmgr,const char * name,unsigned long size,unsigned int alignment)803 drm_intel_fake_bo_alloc(drm_intel_bufmgr *bufmgr,
804 			const char *name,
805 			unsigned long size,
806 			unsigned int alignment)
807 {
808 	drm_intel_bufmgr_fake *bufmgr_fake;
809 	drm_intel_bo_fake *bo_fake;
810 
811 	bufmgr_fake = (drm_intel_bufmgr_fake *) bufmgr;
812 
813 	assert(size != 0);
814 
815 	bo_fake = calloc(1, sizeof(*bo_fake));
816 	if (!bo_fake)
817 		return NULL;
818 
819 	bo_fake->bo.size = size;
820 	bo_fake->bo.offset = -1;
821 	bo_fake->bo.virtual = NULL;
822 	bo_fake->bo.bufmgr = bufmgr;
823 	bo_fake->refcount = 1;
824 
825 	/* Alignment must be a power of two */
826 	assert((alignment & (alignment - 1)) == 0);
827 	if (alignment == 0)
828 		alignment = 1;
829 	bo_fake->alignment = alignment;
830 	bo_fake->id = ++bufmgr_fake->buf_nr;
831 	bo_fake->name = name;
832 	bo_fake->flags = 0;
833 	bo_fake->is_static = 0;
834 
835 	DBG("drm_bo_alloc: (buf %d: %s, %lu kb)\n", bo_fake->id, bo_fake->name,
836 	    bo_fake->bo.size / 1024);
837 
838 	return &bo_fake->bo;
839 }
840 
841 static drm_intel_bo *
drm_intel_fake_bo_alloc_tiled(drm_intel_bufmgr * bufmgr,const char * name,int x,int y,int cpp,uint32_t * tiling_mode,unsigned long * pitch,unsigned long flags)842 drm_intel_fake_bo_alloc_tiled(drm_intel_bufmgr * bufmgr,
843 			      const char *name,
844 			      int x, int y, int cpp,
845 			      uint32_t *tiling_mode,
846 			      unsigned long *pitch,
847 			      unsigned long flags)
848 {
849 	unsigned long stride, aligned_y;
850 
851 	/* No runtime tiling support for fake. */
852 	*tiling_mode = I915_TILING_NONE;
853 
854 	/* Align it for being a render target.  Shouldn't need anything else. */
855 	stride = x * cpp;
856 	stride = ROUND_UP_TO(stride, 64);
857 
858 	/* 965 subspan loading alignment */
859 	aligned_y = ALIGN(y, 2);
860 
861 	*pitch = stride;
862 
863 	return drm_intel_fake_bo_alloc(bufmgr, name, stride * aligned_y,
864 				       4096);
865 }
866 
867 drm_intel_bo *
drm_intel_bo_fake_alloc_static(drm_intel_bufmgr * bufmgr,const char * name,unsigned long offset,unsigned long size,void * virtual)868 drm_intel_bo_fake_alloc_static(drm_intel_bufmgr *bufmgr,
869 			       const char *name,
870 			       unsigned long offset,
871 			       unsigned long size, void *virtual)
872 {
873 	drm_intel_bufmgr_fake *bufmgr_fake;
874 	drm_intel_bo_fake *bo_fake;
875 
876 	bufmgr_fake = (drm_intel_bufmgr_fake *) bufmgr;
877 
878 	assert(size != 0);
879 
880 	bo_fake = calloc(1, sizeof(*bo_fake));
881 	if (!bo_fake)
882 		return NULL;
883 
884 	bo_fake->bo.size = size;
885 	bo_fake->bo.offset = offset;
886 	bo_fake->bo.virtual = virtual;
887 	bo_fake->bo.bufmgr = bufmgr;
888 	bo_fake->refcount = 1;
889 	bo_fake->id = ++bufmgr_fake->buf_nr;
890 	bo_fake->name = name;
891 	bo_fake->flags = BM_PINNED;
892 	bo_fake->is_static = 1;
893 
894 	DBG("drm_bo_alloc_static: (buf %d: %s, %lu kb)\n", bo_fake->id,
895 	    bo_fake->name, bo_fake->bo.size / 1024);
896 
897 	return &bo_fake->bo;
898 }
899 
900 static void
drm_intel_fake_bo_reference(drm_intel_bo * bo)901 drm_intel_fake_bo_reference(drm_intel_bo *bo)
902 {
903 	drm_intel_bufmgr_fake *bufmgr_fake =
904 	    (drm_intel_bufmgr_fake *) bo->bufmgr;
905 	drm_intel_bo_fake *bo_fake = (drm_intel_bo_fake *) bo;
906 
907 	pthread_mutex_lock(&bufmgr_fake->lock);
908 	bo_fake->refcount++;
909 	pthread_mutex_unlock(&bufmgr_fake->lock);
910 }
911 
912 static void
drm_intel_fake_bo_reference_locked(drm_intel_bo * bo)913 drm_intel_fake_bo_reference_locked(drm_intel_bo *bo)
914 {
915 	drm_intel_bo_fake *bo_fake = (drm_intel_bo_fake *) bo;
916 
917 	bo_fake->refcount++;
918 }
919 
920 static void
drm_intel_fake_bo_unreference_locked(drm_intel_bo * bo)921 drm_intel_fake_bo_unreference_locked(drm_intel_bo *bo)
922 {
923 	drm_intel_bufmgr_fake *bufmgr_fake =
924 	    (drm_intel_bufmgr_fake *) bo->bufmgr;
925 	drm_intel_bo_fake *bo_fake = (drm_intel_bo_fake *) bo;
926 	int i;
927 
928 	if (--bo_fake->refcount == 0) {
929 		assert(bo_fake->map_count == 0);
930 		/* No remaining references, so free it */
931 		if (bo_fake->block)
932 			free_block(bufmgr_fake, bo_fake->block, 1);
933 		free_backing_store(bo);
934 
935 		for (i = 0; i < bo_fake->nr_relocs; i++)
936 			drm_intel_fake_bo_unreference_locked(bo_fake->relocs[i].
937 							     target_buf);
938 
939 		DBG("drm_bo_unreference: free buf %d %s\n", bo_fake->id,
940 		    bo_fake->name);
941 
942 		free(bo_fake->relocs);
943 		free(bo);
944 	}
945 }
946 
947 static void
drm_intel_fake_bo_unreference(drm_intel_bo * bo)948 drm_intel_fake_bo_unreference(drm_intel_bo *bo)
949 {
950 	drm_intel_bufmgr_fake *bufmgr_fake =
951 	    (drm_intel_bufmgr_fake *) bo->bufmgr;
952 
953 	pthread_mutex_lock(&bufmgr_fake->lock);
954 	drm_intel_fake_bo_unreference_locked(bo);
955 	pthread_mutex_unlock(&bufmgr_fake->lock);
956 }
957 
958 /**
959  * Set the buffer as not requiring backing store, and instead get the callback
960  * invoked whenever it would be set dirty.
961  */
962 void
drm_intel_bo_fake_disable_backing_store(drm_intel_bo * bo,void (* invalidate_cb)(drm_intel_bo * bo,void * ptr),void * ptr)963 drm_intel_bo_fake_disable_backing_store(drm_intel_bo *bo,
964 					void (*invalidate_cb) (drm_intel_bo *bo,
965 							       void *ptr),
966 					void *ptr)
967 {
968 	drm_intel_bufmgr_fake *bufmgr_fake =
969 	    (drm_intel_bufmgr_fake *) bo->bufmgr;
970 	drm_intel_bo_fake *bo_fake = (drm_intel_bo_fake *) bo;
971 
972 	pthread_mutex_lock(&bufmgr_fake->lock);
973 
974 	if (bo_fake->backing_store)
975 		free_backing_store(bo);
976 
977 	bo_fake->flags |= BM_NO_BACKING_STORE;
978 
979 	DBG("disable_backing_store set buf %d dirty\n", bo_fake->id);
980 	bo_fake->dirty = 1;
981 	bo_fake->invalidate_cb = invalidate_cb;
982 	bo_fake->invalidate_ptr = ptr;
983 
984 	/* Note that it is invalid right from the start.  Also note
985 	 * invalidate_cb is called with the bufmgr locked, so cannot
986 	 * itself make bufmgr calls.
987 	 */
988 	if (invalidate_cb != NULL)
989 		invalidate_cb(bo, ptr);
990 
991 	pthread_mutex_unlock(&bufmgr_fake->lock);
992 }
993 
994 /**
995  * Map a buffer into bo->virtual, allocating either card memory space (If
996  * BM_NO_BACKING_STORE or BM_PINNED) or backing store, as necessary.
997  */
998 static int
drm_intel_fake_bo_map_locked(drm_intel_bo * bo,int write_enable)999  drm_intel_fake_bo_map_locked(drm_intel_bo *bo, int write_enable)
1000 {
1001 	drm_intel_bufmgr_fake *bufmgr_fake =
1002 	    (drm_intel_bufmgr_fake *) bo->bufmgr;
1003 	drm_intel_bo_fake *bo_fake = (drm_intel_bo_fake *) bo;
1004 
1005 	/* Static buffers are always mapped. */
1006 	if (bo_fake->is_static) {
1007 		if (bo_fake->card_dirty) {
1008 			drm_intel_bufmgr_fake_wait_idle(bufmgr_fake);
1009 			bo_fake->card_dirty = 0;
1010 		}
1011 		return 0;
1012 	}
1013 
1014 	/* Allow recursive mapping.  Mesa may recursively map buffers with
1015 	 * nested display loops, and it is used internally in bufmgr_fake
1016 	 * for relocation.
1017 	 */
1018 	if (bo_fake->map_count++ != 0)
1019 		return 0;
1020 
1021 	{
1022 		DBG("drm_bo_map: (buf %d: %s, %lu kb)\n", bo_fake->id,
1023 		    bo_fake->name, bo_fake->bo.size / 1024);
1024 
1025 		if (bo->virtual != NULL) {
1026 			drmMsg("%s: already mapped\n", __func__);
1027 			abort();
1028 		} else if (bo_fake->flags & (BM_NO_BACKING_STORE | BM_PINNED)) {
1029 
1030 			if (!bo_fake->block && !evict_and_alloc_block(bo)) {
1031 				DBG("%s: alloc failed\n", __func__);
1032 				bufmgr_fake->fail = 1;
1033 				return 1;
1034 			} else {
1035 				assert(bo_fake->block);
1036 				bo_fake->dirty = 0;
1037 
1038 				if (!(bo_fake->flags & BM_NO_FENCE_SUBDATA) &&
1039 				    bo_fake->block->fenced) {
1040 					drm_intel_fake_bo_wait_rendering_locked
1041 					    (bo);
1042 				}
1043 
1044 				bo->virtual = bo_fake->block->virtual;
1045 			}
1046 		} else {
1047 			if (write_enable)
1048 				set_dirty(bo);
1049 
1050 			if (bo_fake->backing_store == 0)
1051 				alloc_backing_store(bo);
1052 
1053 			if ((bo_fake->card_dirty == 1) && bo_fake->block) {
1054 				if (bo_fake->block->fenced)
1055 					drm_intel_fake_bo_wait_rendering_locked
1056 					    (bo);
1057 
1058 				memcpy(bo_fake->backing_store,
1059 				       bo_fake->block->virtual,
1060 				       bo_fake->block->bo->size);
1061 				bo_fake->card_dirty = 0;
1062 			}
1063 
1064 			bo->virtual = bo_fake->backing_store;
1065 		}
1066 	}
1067 
1068 	return 0;
1069 }
1070 
1071 static int
drm_intel_fake_bo_map(drm_intel_bo * bo,int write_enable)1072  drm_intel_fake_bo_map(drm_intel_bo *bo, int write_enable)
1073 {
1074 	drm_intel_bufmgr_fake *bufmgr_fake =
1075 	    (drm_intel_bufmgr_fake *) bo->bufmgr;
1076 	int ret;
1077 
1078 	pthread_mutex_lock(&bufmgr_fake->lock);
1079 	ret = drm_intel_fake_bo_map_locked(bo, write_enable);
1080 	pthread_mutex_unlock(&bufmgr_fake->lock);
1081 
1082 	return ret;
1083 }
1084 
1085 static int
drm_intel_fake_bo_unmap_locked(drm_intel_bo * bo)1086  drm_intel_fake_bo_unmap_locked(drm_intel_bo *bo)
1087 {
1088 	drm_intel_bufmgr_fake *bufmgr_fake =
1089 	    (drm_intel_bufmgr_fake *) bo->bufmgr;
1090 	drm_intel_bo_fake *bo_fake = (drm_intel_bo_fake *) bo;
1091 
1092 	/* Static buffers are always mapped. */
1093 	if (bo_fake->is_static)
1094 		return 0;
1095 
1096 	assert(bo_fake->map_count != 0);
1097 	if (--bo_fake->map_count != 0)
1098 		return 0;
1099 
1100 	DBG("drm_bo_unmap: (buf %d: %s, %lu kb)\n", bo_fake->id, bo_fake->name,
1101 	    bo_fake->bo.size / 1024);
1102 
1103 	bo->virtual = NULL;
1104 
1105 	return 0;
1106 }
1107 
drm_intel_fake_bo_unmap(drm_intel_bo * bo)1108 static int drm_intel_fake_bo_unmap(drm_intel_bo *bo)
1109 {
1110 	drm_intel_bufmgr_fake *bufmgr_fake =
1111 	    (drm_intel_bufmgr_fake *) bo->bufmgr;
1112 	int ret;
1113 
1114 	pthread_mutex_lock(&bufmgr_fake->lock);
1115 	ret = drm_intel_fake_bo_unmap_locked(bo);
1116 	pthread_mutex_unlock(&bufmgr_fake->lock);
1117 
1118 	return ret;
1119 }
1120 
1121 static int
drm_intel_fake_bo_subdata(drm_intel_bo * bo,unsigned long offset,unsigned long size,const void * data)1122 drm_intel_fake_bo_subdata(drm_intel_bo *bo, unsigned long offset,
1123 			  unsigned long size, const void *data)
1124 {
1125 	int ret;
1126 
1127 	if (size == 0 || data == NULL)
1128 		return 0;
1129 
1130 	ret = drm_intel_bo_map(bo, 1);
1131 	if (ret)
1132 		return ret;
1133 	memcpy((unsigned char *)bo->virtual + offset, data, size);
1134 	drm_intel_bo_unmap(bo);
1135 	return 0;
1136 }
1137 
1138 static void
drm_intel_fake_kick_all_locked(drm_intel_bufmgr_fake * bufmgr_fake)1139  drm_intel_fake_kick_all_locked(drm_intel_bufmgr_fake *bufmgr_fake)
1140 {
1141 	struct block *block, *tmp;
1142 
1143 	bufmgr_fake->performed_rendering = 0;
1144 	/* okay for ever BO that is on the HW kick it off.
1145 	   seriously not afraid of the POLICE right now */
1146 	DRMLISTFOREACHSAFE(block, tmp, &bufmgr_fake->on_hardware) {
1147 		drm_intel_bo_fake *bo_fake = (drm_intel_bo_fake *) block->bo;
1148 
1149 		block->on_hardware = 0;
1150 		free_block(bufmgr_fake, block, 0);
1151 		bo_fake->block = NULL;
1152 		bo_fake->validated = 0;
1153 		if (!(bo_fake->flags & BM_NO_BACKING_STORE))
1154 			bo_fake->dirty = 1;
1155 	}
1156 
1157 }
1158 
1159 static int
drm_intel_fake_bo_validate(drm_intel_bo * bo)1160  drm_intel_fake_bo_validate(drm_intel_bo *bo)
1161 {
1162 	drm_intel_bufmgr_fake *bufmgr_fake;
1163 	drm_intel_bo_fake *bo_fake = (drm_intel_bo_fake *) bo;
1164 
1165 	bufmgr_fake = (drm_intel_bufmgr_fake *) bo->bufmgr;
1166 
1167 	DBG("drm_bo_validate: (buf %d: %s, %lu kb)\n", bo_fake->id,
1168 	    bo_fake->name, bo_fake->bo.size / 1024);
1169 
1170 	/* Sanity check: Buffers should be unmapped before being validated.
1171 	 * This is not so much of a problem for bufmgr_fake, but TTM refuses,
1172 	 * and the problem is harder to debug there.
1173 	 */
1174 	assert(bo_fake->map_count == 0);
1175 
1176 	if (bo_fake->is_static) {
1177 		/* Add it to the needs-fence list */
1178 		bufmgr_fake->need_fence = 1;
1179 		return 0;
1180 	}
1181 
1182 	/* Allocate the card memory */
1183 	if (!bo_fake->block && !evict_and_alloc_block(bo)) {
1184 		bufmgr_fake->fail = 1;
1185 		DBG("Failed to validate buf %d:%s\n", bo_fake->id,
1186 		    bo_fake->name);
1187 		return -1;
1188 	}
1189 
1190 	assert(bo_fake->block);
1191 	assert(bo_fake->block->bo == &bo_fake->bo);
1192 
1193 	bo->offset = bo_fake->block->mem->ofs;
1194 
1195 	/* Upload the buffer contents if necessary */
1196 	if (bo_fake->dirty) {
1197 		DBG("Upload dirty buf %d:%s, sz %lu offset 0x%x\n", bo_fake->id,
1198 		    bo_fake->name, bo->size, bo_fake->block->mem->ofs);
1199 
1200 		assert(!(bo_fake->flags & (BM_NO_BACKING_STORE | BM_PINNED)));
1201 
1202 		/* Actually, should be able to just wait for a fence on the
1203 		 * mmory, hich we would be tracking when we free it.  Waiting
1204 		 * for idle is a sufficiently large hammer for now.
1205 		 */
1206 		drm_intel_bufmgr_fake_wait_idle(bufmgr_fake);
1207 
1208 		/* we may never have mapped this BO so it might not have any
1209 		 * backing store if this happens it should be rare, but 0 the
1210 		 * card memory in any case */
1211 		if (bo_fake->backing_store)
1212 			memcpy(bo_fake->block->virtual, bo_fake->backing_store,
1213 			       bo->size);
1214 		else
1215 			memset(bo_fake->block->virtual, 0, bo->size);
1216 
1217 		bo_fake->dirty = 0;
1218 	}
1219 
1220 	bo_fake->block->fenced = 0;
1221 	bo_fake->block->on_hardware = 1;
1222 	DRMLISTDEL(bo_fake->block);
1223 	DRMLISTADDTAIL(bo_fake->block, &bufmgr_fake->on_hardware);
1224 
1225 	bo_fake->validated = 1;
1226 	bufmgr_fake->need_fence = 1;
1227 
1228 	return 0;
1229 }
1230 
1231 static void
drm_intel_fake_fence_validated(drm_intel_bufmgr * bufmgr)1232 drm_intel_fake_fence_validated(drm_intel_bufmgr *bufmgr)
1233 {
1234 	drm_intel_bufmgr_fake *bufmgr_fake = (drm_intel_bufmgr_fake *) bufmgr;
1235 	unsigned int cookie;
1236 
1237 	cookie = _fence_emit_internal(bufmgr_fake);
1238 	fence_blocks(bufmgr_fake, cookie);
1239 
1240 	DBG("drm_fence_validated: 0x%08x cookie\n", cookie);
1241 }
1242 
1243 static void
drm_intel_fake_destroy(drm_intel_bufmgr * bufmgr)1244 drm_intel_fake_destroy(drm_intel_bufmgr *bufmgr)
1245 {
1246 	drm_intel_bufmgr_fake *bufmgr_fake = (drm_intel_bufmgr_fake *) bufmgr;
1247 
1248 	pthread_mutex_destroy(&bufmgr_fake->lock);
1249 	mmDestroy(bufmgr_fake->heap);
1250 	free(bufmgr);
1251 }
1252 
1253 static int
drm_intel_fake_emit_reloc(drm_intel_bo * bo,uint32_t offset,drm_intel_bo * target_bo,uint32_t target_offset,uint32_t read_domains,uint32_t write_domain)1254 drm_intel_fake_emit_reloc(drm_intel_bo *bo, uint32_t offset,
1255 			  drm_intel_bo *target_bo, uint32_t target_offset,
1256 			  uint32_t read_domains, uint32_t write_domain)
1257 {
1258 	drm_intel_bufmgr_fake *bufmgr_fake =
1259 	    (drm_intel_bufmgr_fake *) bo->bufmgr;
1260 	struct fake_buffer_reloc *r;
1261 	drm_intel_bo_fake *bo_fake = (drm_intel_bo_fake *) bo;
1262 	drm_intel_bo_fake *target_fake = (drm_intel_bo_fake *) target_bo;
1263 	int i;
1264 
1265 	pthread_mutex_lock(&bufmgr_fake->lock);
1266 
1267 	assert(bo);
1268 	assert(target_bo);
1269 
1270 	if (bo_fake->relocs == NULL) {
1271 		bo_fake->relocs =
1272 		    malloc(sizeof(struct fake_buffer_reloc) * MAX_RELOCS);
1273 	}
1274 
1275 	r = &bo_fake->relocs[bo_fake->nr_relocs++];
1276 
1277 	assert(bo_fake->nr_relocs <= MAX_RELOCS);
1278 
1279 	drm_intel_fake_bo_reference_locked(target_bo);
1280 
1281 	if (!target_fake->is_static) {
1282 		bo_fake->child_size +=
1283 		    ALIGN(target_bo->size, target_fake->alignment);
1284 		bo_fake->child_size += target_fake->child_size;
1285 	}
1286 	r->target_buf = target_bo;
1287 	r->offset = offset;
1288 	r->last_target_offset = target_bo->offset;
1289 	r->delta = target_offset;
1290 	r->read_domains = read_domains;
1291 	r->write_domain = write_domain;
1292 
1293 	if (bufmgr_fake->debug) {
1294 		/* Check that a conflicting relocation hasn't already been
1295 		 * emitted.
1296 		 */
1297 		for (i = 0; i < bo_fake->nr_relocs - 1; i++) {
1298 			struct fake_buffer_reloc *r2 = &bo_fake->relocs[i];
1299 
1300 			assert(r->offset != r2->offset);
1301 		}
1302 	}
1303 
1304 	pthread_mutex_unlock(&bufmgr_fake->lock);
1305 
1306 	return 0;
1307 }
1308 
1309 /**
1310  * Incorporates the validation flags associated with each relocation into
1311  * the combined validation flags for the buffer on this batchbuffer submission.
1312  */
1313 static void
drm_intel_fake_calculate_domains(drm_intel_bo * bo)1314 drm_intel_fake_calculate_domains(drm_intel_bo *bo)
1315 {
1316 	drm_intel_bo_fake *bo_fake = (drm_intel_bo_fake *) bo;
1317 	int i;
1318 
1319 	for (i = 0; i < bo_fake->nr_relocs; i++) {
1320 		struct fake_buffer_reloc *r = &bo_fake->relocs[i];
1321 		drm_intel_bo_fake *target_fake =
1322 		    (drm_intel_bo_fake *) r->target_buf;
1323 
1324 		/* Do the same for the tree of buffers we depend on */
1325 		drm_intel_fake_calculate_domains(r->target_buf);
1326 
1327 		target_fake->read_domains |= r->read_domains;
1328 		target_fake->write_domain |= r->write_domain;
1329 	}
1330 }
1331 
1332 static int
drm_intel_fake_reloc_and_validate_buffer(drm_intel_bo * bo)1333 drm_intel_fake_reloc_and_validate_buffer(drm_intel_bo *bo)
1334 {
1335 	drm_intel_bufmgr_fake *bufmgr_fake =
1336 	    (drm_intel_bufmgr_fake *) bo->bufmgr;
1337 	drm_intel_bo_fake *bo_fake = (drm_intel_bo_fake *) bo;
1338 	int i, ret;
1339 
1340 	assert(bo_fake->map_count == 0);
1341 
1342 	for (i = 0; i < bo_fake->nr_relocs; i++) {
1343 		struct fake_buffer_reloc *r = &bo_fake->relocs[i];
1344 		drm_intel_bo_fake *target_fake =
1345 		    (drm_intel_bo_fake *) r->target_buf;
1346 		uint32_t reloc_data;
1347 
1348 		/* Validate the target buffer if that hasn't been done. */
1349 		if (!target_fake->validated) {
1350 			ret =
1351 			    drm_intel_fake_reloc_and_validate_buffer(r->target_buf);
1352 			if (ret != 0) {
1353 				if (bo->virtual != NULL)
1354 					drm_intel_fake_bo_unmap_locked(bo);
1355 				return ret;
1356 			}
1357 		}
1358 
1359 		/* Calculate the value of the relocation entry. */
1360 		if (r->target_buf->offset != r->last_target_offset) {
1361 			reloc_data = r->target_buf->offset + r->delta;
1362 
1363 			if (bo->virtual == NULL)
1364 				drm_intel_fake_bo_map_locked(bo, 1);
1365 
1366 			*(uint32_t *) ((uint8_t *) bo->virtual + r->offset) =
1367 			    reloc_data;
1368 
1369 			r->last_target_offset = r->target_buf->offset;
1370 		}
1371 	}
1372 
1373 	if (bo->virtual != NULL)
1374 		drm_intel_fake_bo_unmap_locked(bo);
1375 
1376 	if (bo_fake->write_domain != 0) {
1377 		if (!(bo_fake->flags & (BM_NO_BACKING_STORE | BM_PINNED))) {
1378 			if (bo_fake->backing_store == 0)
1379 				alloc_backing_store(bo);
1380 		}
1381 		bo_fake->card_dirty = 1;
1382 		bufmgr_fake->performed_rendering = 1;
1383 	}
1384 
1385 	return drm_intel_fake_bo_validate(bo);
1386 }
1387 
1388 static void
drm_intel_bo_fake_post_submit(drm_intel_bo * bo)1389 drm_intel_bo_fake_post_submit(drm_intel_bo *bo)
1390 {
1391 	drm_intel_bufmgr_fake *bufmgr_fake =
1392 	    (drm_intel_bufmgr_fake *) bo->bufmgr;
1393 	drm_intel_bo_fake *bo_fake = (drm_intel_bo_fake *) bo;
1394 	int i;
1395 
1396 	for (i = 0; i < bo_fake->nr_relocs; i++) {
1397 		struct fake_buffer_reloc *r = &bo_fake->relocs[i];
1398 		drm_intel_bo_fake *target_fake =
1399 		    (drm_intel_bo_fake *) r->target_buf;
1400 
1401 		if (target_fake->validated)
1402 			drm_intel_bo_fake_post_submit(r->target_buf);
1403 
1404 		DBG("%s@0x%08x + 0x%08x -> %s@0x%08x + 0x%08x\n",
1405 		    bo_fake->name, (uint32_t) bo->offset, r->offset,
1406 		    target_fake->name, (uint32_t) r->target_buf->offset,
1407 		    r->delta);
1408 	}
1409 
1410 	assert(bo_fake->map_count == 0);
1411 	bo_fake->validated = 0;
1412 	bo_fake->read_domains = 0;
1413 	bo_fake->write_domain = 0;
1414 }
1415 
1416 void
drm_intel_bufmgr_fake_set_exec_callback(drm_intel_bufmgr * bufmgr,int (* exec)(drm_intel_bo * bo,unsigned int used,void * priv),void * priv)1417 drm_intel_bufmgr_fake_set_exec_callback(drm_intel_bufmgr *bufmgr,
1418 					     int (*exec) (drm_intel_bo *bo,
1419 							  unsigned int used,
1420 							  void *priv),
1421 					     void *priv)
1422 {
1423 	drm_intel_bufmgr_fake *bufmgr_fake = (drm_intel_bufmgr_fake *) bufmgr;
1424 
1425 	bufmgr_fake->exec = exec;
1426 	bufmgr_fake->exec_priv = priv;
1427 }
1428 
1429 static int
drm_intel_fake_bo_exec(drm_intel_bo * bo,int used,drm_clip_rect_t * cliprects,int num_cliprects,int DR4)1430 drm_intel_fake_bo_exec(drm_intel_bo *bo, int used,
1431 		       drm_clip_rect_t * cliprects, int num_cliprects, int DR4)
1432 {
1433 	drm_intel_bufmgr_fake *bufmgr_fake =
1434 	    (drm_intel_bufmgr_fake *) bo->bufmgr;
1435 	drm_intel_bo_fake *batch_fake = (drm_intel_bo_fake *) bo;
1436 	struct drm_i915_batchbuffer batch;
1437 	int ret;
1438 	int retry_count = 0;
1439 
1440 	pthread_mutex_lock(&bufmgr_fake->lock);
1441 
1442 	bufmgr_fake->performed_rendering = 0;
1443 
1444 	drm_intel_fake_calculate_domains(bo);
1445 
1446 	batch_fake->read_domains = I915_GEM_DOMAIN_COMMAND;
1447 
1448 	/* we've ran out of RAM so blow the whole lot away and retry */
1449 restart:
1450 	ret = drm_intel_fake_reloc_and_validate_buffer(bo);
1451 	if (bufmgr_fake->fail == 1) {
1452 		if (retry_count == 0) {
1453 			retry_count++;
1454 			drm_intel_fake_kick_all_locked(bufmgr_fake);
1455 			bufmgr_fake->fail = 0;
1456 			goto restart;
1457 		} else		/* dump out the memory here */
1458 			mmDumpMemInfo(bufmgr_fake->heap);
1459 	}
1460 
1461 	assert(ret == 0);
1462 
1463 	if (bufmgr_fake->exec != NULL) {
1464 		ret = bufmgr_fake->exec(bo, used, bufmgr_fake->exec_priv);
1465 		if (ret != 0) {
1466 			pthread_mutex_unlock(&bufmgr_fake->lock);
1467 			return ret;
1468 		}
1469 	} else {
1470 		batch.start = bo->offset;
1471 		batch.used = used;
1472 		batch.cliprects = cliprects;
1473 		batch.num_cliprects = num_cliprects;
1474 		batch.DR1 = 0;
1475 		batch.DR4 = DR4;
1476 
1477 		if (drmCommandWrite
1478 		    (bufmgr_fake->fd, DRM_I915_BATCHBUFFER, &batch,
1479 		     sizeof(batch))) {
1480 			drmMsg("DRM_I915_BATCHBUFFER: %d\n", -errno);
1481 			pthread_mutex_unlock(&bufmgr_fake->lock);
1482 			return -errno;
1483 		}
1484 	}
1485 
1486 	drm_intel_fake_fence_validated(bo->bufmgr);
1487 
1488 	drm_intel_bo_fake_post_submit(bo);
1489 
1490 	pthread_mutex_unlock(&bufmgr_fake->lock);
1491 
1492 	return 0;
1493 }
1494 
1495 /**
1496  * Return an error if the list of BOs will exceed the aperture size.
1497  *
1498  * This is a rough guess and likely to fail, as during the validate sequence we
1499  * may place a buffer in an inopportune spot early on and then fail to fit
1500  * a set smaller than the aperture.
1501  */
1502 static int
drm_intel_fake_check_aperture_space(drm_intel_bo ** bo_array,int count)1503 drm_intel_fake_check_aperture_space(drm_intel_bo ** bo_array, int count)
1504 {
1505 	drm_intel_bufmgr_fake *bufmgr_fake =
1506 	    (drm_intel_bufmgr_fake *) bo_array[0]->bufmgr;
1507 	unsigned int sz = 0;
1508 	int i;
1509 
1510 	for (i = 0; i < count; i++) {
1511 		drm_intel_bo_fake *bo_fake = (drm_intel_bo_fake *) bo_array[i];
1512 
1513 		if (bo_fake == NULL)
1514 			continue;
1515 
1516 		if (!bo_fake->is_static)
1517 			sz += ALIGN(bo_array[i]->size, bo_fake->alignment);
1518 		sz += bo_fake->child_size;
1519 	}
1520 
1521 	if (sz > bufmgr_fake->size) {
1522 		DBG("check_space: overflowed bufmgr size, %ukb vs %lukb\n",
1523 		    sz / 1024, bufmgr_fake->size / 1024);
1524 		return -1;
1525 	}
1526 
1527 	DBG("drm_check_space: sz %ukb vs bufgr %lukb\n", sz / 1024,
1528 	    bufmgr_fake->size / 1024);
1529 	return 0;
1530 }
1531 
1532 /**
1533  * Evicts all buffers, waiting for fences to pass and copying contents out
1534  * as necessary.
1535  *
1536  * Used by the X Server on LeaveVT, when the card memory is no longer our
1537  * own.
1538  */
1539 void
drm_intel_bufmgr_fake_evict_all(drm_intel_bufmgr * bufmgr)1540 drm_intel_bufmgr_fake_evict_all(drm_intel_bufmgr *bufmgr)
1541 {
1542 	drm_intel_bufmgr_fake *bufmgr_fake = (drm_intel_bufmgr_fake *) bufmgr;
1543 	struct block *block, *tmp;
1544 
1545 	pthread_mutex_lock(&bufmgr_fake->lock);
1546 
1547 	bufmgr_fake->need_fence = 1;
1548 	bufmgr_fake->fail = 0;
1549 
1550 	/* Wait for hardware idle.  We don't know where acceleration has been
1551 	 * happening, so we'll need to wait anyway before letting anything get
1552 	 * put on the card again.
1553 	 */
1554 	drm_intel_bufmgr_fake_wait_idle(bufmgr_fake);
1555 
1556 	/* Check that we hadn't released the lock without having fenced the last
1557 	 * set of buffers.
1558 	 */
1559 	assert(DRMLISTEMPTY(&bufmgr_fake->fenced));
1560 	assert(DRMLISTEMPTY(&bufmgr_fake->on_hardware));
1561 
1562 	DRMLISTFOREACHSAFE(block, tmp, &bufmgr_fake->lru) {
1563 		drm_intel_bo_fake *bo_fake = (drm_intel_bo_fake *) block->bo;
1564 		/* Releases the memory, and memcpys dirty contents out if
1565 		 * necessary.
1566 		 */
1567 		free_block(bufmgr_fake, block, 0);
1568 		bo_fake->block = NULL;
1569 	}
1570 
1571 	pthread_mutex_unlock(&bufmgr_fake->lock);
1572 }
1573 
1574 void
drm_intel_bufmgr_fake_set_last_dispatch(drm_intel_bufmgr * bufmgr,volatile unsigned int * last_dispatch)1575 drm_intel_bufmgr_fake_set_last_dispatch(drm_intel_bufmgr *bufmgr,
1576 					volatile unsigned int
1577 					*last_dispatch)
1578 {
1579 	drm_intel_bufmgr_fake *bufmgr_fake = (drm_intel_bufmgr_fake *) bufmgr;
1580 
1581 	bufmgr_fake->last_dispatch = (volatile int *)last_dispatch;
1582 }
1583 
1584 drm_intel_bufmgr *
drm_intel_bufmgr_fake_init(int fd,unsigned long low_offset,void * low_virtual,unsigned long size,volatile unsigned int * last_dispatch)1585 drm_intel_bufmgr_fake_init(int fd, unsigned long low_offset,
1586 			   void *low_virtual, unsigned long size,
1587 			   volatile unsigned int *last_dispatch)
1588 {
1589 	drm_intel_bufmgr_fake *bufmgr_fake;
1590 
1591 	bufmgr_fake = calloc(1, sizeof(*bufmgr_fake));
1592 
1593 	if (pthread_mutex_init(&bufmgr_fake->lock, NULL) != 0) {
1594 		free(bufmgr_fake);
1595 		return NULL;
1596 	}
1597 
1598 	/* Initialize allocator */
1599 	DRMINITLISTHEAD(&bufmgr_fake->fenced);
1600 	DRMINITLISTHEAD(&bufmgr_fake->on_hardware);
1601 	DRMINITLISTHEAD(&bufmgr_fake->lru);
1602 
1603 	bufmgr_fake->low_offset = low_offset;
1604 	bufmgr_fake->virtual = low_virtual;
1605 	bufmgr_fake->size = size;
1606 	bufmgr_fake->heap = mmInit(low_offset, size);
1607 
1608 	/* Hook in methods */
1609 	bufmgr_fake->bufmgr.bo_alloc = drm_intel_fake_bo_alloc;
1610 	bufmgr_fake->bufmgr.bo_alloc_for_render = drm_intel_fake_bo_alloc;
1611 	bufmgr_fake->bufmgr.bo_alloc_tiled = drm_intel_fake_bo_alloc_tiled;
1612 	bufmgr_fake->bufmgr.bo_reference = drm_intel_fake_bo_reference;
1613 	bufmgr_fake->bufmgr.bo_unreference = drm_intel_fake_bo_unreference;
1614 	bufmgr_fake->bufmgr.bo_map = drm_intel_fake_bo_map;
1615 	bufmgr_fake->bufmgr.bo_unmap = drm_intel_fake_bo_unmap;
1616 	bufmgr_fake->bufmgr.bo_subdata = drm_intel_fake_bo_subdata;
1617 	bufmgr_fake->bufmgr.bo_wait_rendering =
1618 	    drm_intel_fake_bo_wait_rendering;
1619 	bufmgr_fake->bufmgr.bo_emit_reloc = drm_intel_fake_emit_reloc;
1620 	bufmgr_fake->bufmgr.destroy = drm_intel_fake_destroy;
1621 	bufmgr_fake->bufmgr.bo_exec = drm_intel_fake_bo_exec;
1622 	bufmgr_fake->bufmgr.check_aperture_space =
1623 	    drm_intel_fake_check_aperture_space;
1624 	bufmgr_fake->bufmgr.debug = 0;
1625 
1626 	bufmgr_fake->fd = fd;
1627 	bufmgr_fake->last_dispatch = (volatile int *)last_dispatch;
1628 
1629 	return &bufmgr_fake->bufmgr;
1630 }
1631