1 /**************************************************************************
2  *
3  * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the
8  * "Software"), to deal in the Software without restriction, including
9  * without limitation the rights to use, copy, modify, merge, publish,
10  * distribute, sub license, and/or sell copies of the Software, and to
11  * permit persons to whom the Software is furnished to do so, subject to
12  * the following conditions:
13  *
14  * The above copyright notice and this permission notice (including the
15  * next paragraph) shall be included in all copies or substantial portions
16  * of the Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21  * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25  *
26  **************************************************************************/
27 
28 /* Originally a fake version of the buffer manager so that we can
29  * prototype the changes in a driver fairly quickly, has been fleshed
30  * out to a fully functional interim solution.
31  *
32  * Basically wraps the old style memory management in the new
33  * programming interface, but is more expressive and avoids many of
34  * the bugs in the old texture manager.
35  */
36 
37 #include <stdlib.h>
38 #include <string.h>
39 #include <assert.h>
40 #include <errno.h>
41 #include <strings.h>
42 #include <xf86drm.h>
43 #include <pthread.h>
44 #include "intel_bufmgr.h"
45 #include "intel_bufmgr_priv.h"
46 #include "drm.h"
47 #include "i915_drm.h"
48 #include "mm.h"
49 #include "libdrm_macros.h"
50 #include "libdrm_lists.h"
51 
52 #define DBG(...) do {					\
53 	if (bufmgr_fake->bufmgr.debug)			\
54 		drmMsg(__VA_ARGS__);			\
55 } while (0)
56 
57 /* Internal flags:
58  */
59 #define BM_NO_BACKING_STORE			0x00000001
60 #define BM_NO_FENCE_SUBDATA			0x00000002
61 #define BM_PINNED				0x00000004
62 
63 /* Wrapper around mm.c's mem_block, which understands that you must
64  * wait for fences to expire before memory can be freed.  This is
65  * specific to our use of memcpy for uploads - an upload that was
66  * processed through the command queue wouldn't need to care about
67  * fences.
68  */
69 #define MAX_RELOCS 4096
70 
71 struct fake_buffer_reloc {
72 	/** Buffer object that the relocation points at. */
73 	drm_intel_bo *target_buf;
74 	/** Offset of the relocation entry within reloc_buf. */
75 	uint32_t offset;
76 	/**
77 	 * Cached value of the offset when we last performed this relocation.
78 	 */
79 	uint32_t last_target_offset;
80 	/** Value added to target_buf's offset to get the relocation entry. */
81 	uint32_t delta;
82 	/** Cache domains the target buffer is read into. */
83 	uint32_t read_domains;
84 	/** Cache domain the target buffer will have dirty cachelines in. */
85 	uint32_t write_domain;
86 };
87 
88 struct block {
89 	struct block *next, *prev;
90 	struct mem_block *mem;	/* BM_MEM_AGP */
91 
92 	/**
93 	 * Marks that the block is currently in the aperture and has yet to be
94 	 * fenced.
95 	 */
96 	unsigned on_hardware:1;
97 	/**
98 	 * Marks that the block is currently fenced (being used by rendering)
99 	 * and can't be freed until @fence is passed.
100 	 */
101 	unsigned fenced:1;
102 
103 	/** Fence cookie for the block. */
104 	unsigned fence;		/* Split to read_fence, write_fence */
105 
106 	drm_intel_bo *bo;
107 	void *virtual;
108 };
109 
110 typedef struct _bufmgr_fake {
111 	drm_intel_bufmgr bufmgr;
112 
113 	pthread_mutex_t lock;
114 
115 	unsigned long low_offset;
116 	unsigned long size;
117 	void *virtual;
118 
119 	struct mem_block *heap;
120 
121 	unsigned buf_nr;	/* for generating ids */
122 
123 	/**
124 	 * List of blocks which are currently in the GART but haven't been
125 	 * fenced yet.
126 	 */
127 	struct block on_hardware;
128 	/**
129 	 * List of blocks which are in the GART and have an active fence on
130 	 * them.
131 	 */
132 	struct block fenced;
133 	/**
134 	 * List of blocks which have an expired fence and are ready to be
135 	 * evicted.
136 	 */
137 	struct block lru;
138 
139 	unsigned int last_fence;
140 
141 	unsigned fail:1;
142 	unsigned need_fence:1;
143 	int thrashing;
144 
145 	/**
146 	 * Driver callback to emit a fence, returning the cookie.
147 	 *
148 	 * This allows the driver to hook in a replacement for the DRM usage in
149 	 * bufmgr_fake.
150 	 *
151 	 * Currently, this also requires that a write flush be emitted before
152 	 * emitting the fence, but this should change.
153 	 */
154 	unsigned int (*fence_emit) (void *private);
155 	/** Driver callback to wait for a fence cookie to have passed. */
156 	void (*fence_wait) (unsigned int fence, void *private);
157 	void *fence_priv;
158 
159 	/**
160 	 * Driver callback to execute a buffer.
161 	 *
162 	 * This allows the driver to hook in a replacement for the DRM usage in
163 	 * bufmgr_fake.
164 	 */
165 	int (*exec) (drm_intel_bo *bo, unsigned int used, void *priv);
166 	void *exec_priv;
167 
168 	/** Driver-supplied argument to driver callbacks */
169 	void *driver_priv;
170 	/**
171 	 * Pointer to kernel-updated sarea data for the last completed user irq
172 	 */
173 	volatile int *last_dispatch;
174 
175 	int fd;
176 
177 	int debug;
178 
179 	int performed_rendering;
180 } drm_intel_bufmgr_fake;
181 
182 typedef struct _drm_intel_bo_fake {
183 	drm_intel_bo bo;
184 
185 	unsigned id;		/* debug only */
186 	const char *name;
187 
188 	unsigned dirty:1;
189 	/**
190 	 * has the card written to this buffer - we make need to copy it back
191 	 */
192 	unsigned card_dirty:1;
193 	unsigned int refcount;
194 	/* Flags may consist of any of the DRM_BO flags, plus
195 	 * DRM_BO_NO_BACKING_STORE and BM_NO_FENCE_SUBDATA, which are the
196 	 * first two driver private flags.
197 	 */
198 	uint64_t flags;
199 	/** Cache domains the target buffer is read into. */
200 	uint32_t read_domains;
201 	/** Cache domain the target buffer will have dirty cachelines in. */
202 	uint32_t write_domain;
203 
204 	unsigned int alignment;
205 	int is_static, validated;
206 	unsigned int map_count;
207 
208 	/** relocation list */
209 	struct fake_buffer_reloc *relocs;
210 	int nr_relocs;
211 	/**
212 	 * Total size of the target_bos of this buffer.
213 	 *
214 	 * Used for estimation in check_aperture.
215 	 */
216 	unsigned int child_size;
217 
218 	struct block *block;
219 	void *backing_store;
220 	void (*invalidate_cb) (drm_intel_bo *bo, void *ptr);
221 	void *invalidate_ptr;
222 } drm_intel_bo_fake;
223 
224 static int clear_fenced(drm_intel_bufmgr_fake *bufmgr_fake,
225 			unsigned int fence_cookie);
226 
227 #define MAXFENCE 0x7fffffff
228 
229 static int
FENCE_LTE(unsigned a,unsigned b)230 FENCE_LTE(unsigned a, unsigned b)
231 {
232 	if (a == b)
233 		return 1;
234 
235 	if (a < b && b - a < (1 << 24))
236 		return 1;
237 
238 	if (a > b && MAXFENCE - a + b < (1 << 24))
239 		return 1;
240 
241 	return 0;
242 }
243 
244 drm_public void
drm_intel_bufmgr_fake_set_fence_callback(drm_intel_bufmgr * bufmgr,unsigned int (* emit)(void * priv),void (* wait)(unsigned int fence,void * priv),void * priv)245 drm_intel_bufmgr_fake_set_fence_callback(drm_intel_bufmgr *bufmgr,
246 					 unsigned int (*emit) (void *priv),
247 					 void (*wait) (unsigned int fence,
248 						       void *priv),
249 					 void *priv)
250 {
251 	drm_intel_bufmgr_fake *bufmgr_fake = (drm_intel_bufmgr_fake *) bufmgr;
252 
253 	bufmgr_fake->fence_emit = emit;
254 	bufmgr_fake->fence_wait = wait;
255 	bufmgr_fake->fence_priv = priv;
256 }
257 
258 static unsigned int
_fence_emit_internal(drm_intel_bufmgr_fake * bufmgr_fake)259 _fence_emit_internal(drm_intel_bufmgr_fake *bufmgr_fake)
260 {
261 	struct drm_i915_irq_emit ie;
262 	int ret, seq = 1;
263 
264 	if (bufmgr_fake->fence_emit != NULL) {
265 		seq = bufmgr_fake->fence_emit(bufmgr_fake->fence_priv);
266 		return seq;
267 	}
268 
269 	ie.irq_seq = &seq;
270 	ret = drmCommandWriteRead(bufmgr_fake->fd, DRM_I915_IRQ_EMIT,
271 				  &ie, sizeof(ie));
272 	if (ret) {
273 		drmMsg("%s: drm_i915_irq_emit: %d\n", __func__, ret);
274 		abort();
275 	}
276 
277 	DBG("emit 0x%08x\n", seq);
278 	return seq;
279 }
280 
281 static void
_fence_wait_internal(drm_intel_bufmgr_fake * bufmgr_fake,int seq)282 _fence_wait_internal(drm_intel_bufmgr_fake *bufmgr_fake, int seq)
283 {
284 	struct drm_i915_irq_wait iw;
285 	int hw_seq, busy_count = 0;
286 	int ret;
287 	int kernel_lied;
288 
289 	if (bufmgr_fake->fence_wait != NULL) {
290 		bufmgr_fake->fence_wait(seq, bufmgr_fake->fence_priv);
291 		clear_fenced(bufmgr_fake, seq);
292 		return;
293 	}
294 
295 	iw.irq_seq = seq;
296 
297 	DBG("wait 0x%08x\n", iw.irq_seq);
298 
299 	/* The kernel IRQ_WAIT implementation is all sorts of broken.
300 	 * 1) It returns 1 to 0x7fffffff instead of using the full 32-bit
301 	 *    unsigned range.
302 	 * 2) It returns 0 if hw_seq >= seq, not seq - hw_seq < 0 on the 32-bit
303 	 *    signed range.
304 	 * 3) It waits if seq < hw_seq, not seq - hw_seq > 0 on the 32-bit
305 	 *    signed range.
306 	 * 4) It returns -EBUSY in 3 seconds even if the hardware is still
307 	 *    successfully chewing through buffers.
308 	 *
309 	 * Assume that in userland we treat sequence numbers as ints, which
310 	 * makes some of the comparisons convenient, since the sequence
311 	 * numbers are all positive signed integers.
312 	 *
313 	 * From this we get several cases we need to handle.  Here's a timeline.
314 	 * 0x2   0x7                                    0x7ffffff8   0x7ffffffd
315 	 *   |    |                                             |    |
316 	 * ------------------------------------------------------------
317 	 *
318 	 * A) Normal wait for hw to catch up
319 	 * hw_seq seq
320 	 *   |    |
321 	 * ------------------------------------------------------------
322 	 * seq - hw_seq = 5.  If we call IRQ_WAIT, it will wait for hw to
323 	 * catch up.
324 	 *
325 	 * B) Normal wait for a sequence number that's already passed.
326 	 * seq    hw_seq
327 	 *   |    |
328 	 * ------------------------------------------------------------
329 	 * seq - hw_seq = -5.  If we call IRQ_WAIT, it returns 0 quickly.
330 	 *
331 	 * C) Hardware has already wrapped around ahead of us
332 	 * hw_seq                                                    seq
333 	 *   |                                                       |
334 	 * ------------------------------------------------------------
335 	 * seq - hw_seq = 0x80000000 - 5.  If we called IRQ_WAIT, it would wait
336 	 * for hw_seq >= seq, which may never occur.  Thus, we want to catch
337 	 * this in userland and return 0.
338 	 *
339 	 * D) We've wrapped around ahead of the hardware.
340 	 * seq                                                      hw_seq
341 	 *   |                                                       |
342 	 * ------------------------------------------------------------
343 	 * seq - hw_seq = -(0x80000000 - 5).  If we called IRQ_WAIT, it would
344 	 * return 0 quickly because hw_seq >= seq, even though the hardware
345 	 * isn't caught up. Thus, we need to catch this early return in
346 	 * userland and bother the kernel until the hardware really does
347 	 * catch up.
348 	 *
349 	 * E) Hardware might wrap after we test in userland.
350 	 *                                                  hw_seq  seq
351 	 *                                                      |    |
352 	 * ------------------------------------------------------------
353 	 * seq - hw_seq = 5.  If we call IRQ_WAIT, it will likely see seq >=
354 	 * hw_seq and wait.  However, suppose hw_seq wraps before we make it
355 	 * into the kernel.  The kernel sees hw_seq >= seq and waits for 3
356 	 * seconds then returns -EBUSY.  This is case C).  We should catch
357 	 * this and then return successfully.
358 	 *
359 	 * F) Hardware might take a long time on a buffer.
360 	 * hw_seq seq
361 	 *   |    |
362 	 * -------------------------------------------------------------------
363 	 * seq - hw_seq = 5.  If we call IRQ_WAIT, if sequence 2 through 5
364 	 * take too long, it will return -EBUSY.  Batchbuffers in the
365 	 * gltestperf demo were seen to take up to 7 seconds.  We should
366 	 * catch early -EBUSY return and keep trying.
367 	 */
368 
369 	do {
370 		/* Keep a copy of last_dispatch so that if the wait -EBUSYs
371 		 * because the hardware didn't catch up in 3 seconds, we can
372 		 * see if it at least made progress and retry.
373 		 */
374 		hw_seq = *bufmgr_fake->last_dispatch;
375 
376 		/* Catch case C */
377 		if (seq - hw_seq > 0x40000000)
378 			return;
379 
380 		ret = drmCommandWrite(bufmgr_fake->fd, DRM_I915_IRQ_WAIT,
381 				      &iw, sizeof(iw));
382 		/* Catch case D */
383 		kernel_lied = (ret == 0) && (seq - *bufmgr_fake->last_dispatch <
384 					     -0x40000000);
385 
386 		/* Catch case E */
387 		if (ret == -EBUSY
388 		    && (seq - *bufmgr_fake->last_dispatch > 0x40000000))
389 			ret = 0;
390 
391 		/* Catch case F: Allow up to 15 seconds chewing on one buffer. */
392 		if ((ret == -EBUSY) && (hw_seq != *bufmgr_fake->last_dispatch))
393 			busy_count = 0;
394 		else
395 			busy_count++;
396 	} while (kernel_lied || ret == -EAGAIN || ret == -EINTR ||
397 		 (ret == -EBUSY && busy_count < 5));
398 
399 	if (ret != 0) {
400 		drmMsg("%s:%d: Error waiting for fence: %s.\n", __FILE__,
401 		       __LINE__, strerror(-ret));
402 		abort();
403 	}
404 	clear_fenced(bufmgr_fake, seq);
405 }
406 
407 static int
_fence_test(drm_intel_bufmgr_fake * bufmgr_fake,unsigned fence)408 _fence_test(drm_intel_bufmgr_fake *bufmgr_fake, unsigned fence)
409 {
410 	/* Slight problem with wrap-around:
411 	 */
412 	return fence == 0 || FENCE_LTE(fence, bufmgr_fake->last_fence);
413 }
414 
415 /**
416  * Allocate a memory manager block for the buffer.
417  */
418 static int
alloc_block(drm_intel_bo * bo)419 alloc_block(drm_intel_bo *bo)
420 {
421 	drm_intel_bo_fake *bo_fake = (drm_intel_bo_fake *) bo;
422 	drm_intel_bufmgr_fake *bufmgr_fake =
423 	    (drm_intel_bufmgr_fake *) bo->bufmgr;
424 	struct block *block = (struct block *)calloc(sizeof *block, 1);
425 	unsigned int align_log2 = ffs(bo_fake->alignment) - 1;
426 	unsigned int sz;
427 
428 	if (!block)
429 		return 1;
430 
431 	sz = (bo->size + bo_fake->alignment - 1) & ~(bo_fake->alignment - 1);
432 
433 	block->mem = mmAllocMem(bufmgr_fake->heap, sz, align_log2, 0);
434 	if (!block->mem) {
435 		free(block);
436 		return 0;
437 	}
438 
439 	DRMINITLISTHEAD(block);
440 
441 	/* Insert at head or at tail??? */
442 	DRMLISTADDTAIL(block, &bufmgr_fake->lru);
443 
444 	block->virtual = (uint8_t *) bufmgr_fake->virtual +
445 	    block->mem->ofs - bufmgr_fake->low_offset;
446 	block->bo = bo;
447 
448 	bo_fake->block = block;
449 
450 	return 1;
451 }
452 
453 /* Release the card storage associated with buf:
454  */
455 static void
free_block(drm_intel_bufmgr_fake * bufmgr_fake,struct block * block,int skip_dirty_copy)456 free_block(drm_intel_bufmgr_fake *bufmgr_fake, struct block *block,
457 	   int skip_dirty_copy)
458 {
459 	drm_intel_bo_fake *bo_fake;
460 	DBG("free block %p %08x %d %d\n", block, block->mem->ofs,
461 	    block->on_hardware, block->fenced);
462 
463 	if (!block)
464 		return;
465 
466 	bo_fake = (drm_intel_bo_fake *) block->bo;
467 
468 	if (bo_fake->flags & (BM_PINNED | BM_NO_BACKING_STORE))
469 		skip_dirty_copy = 1;
470 
471 	if (!skip_dirty_copy && (bo_fake->card_dirty == 1)) {
472 		memcpy(bo_fake->backing_store, block->virtual, block->bo->size);
473 		bo_fake->card_dirty = 0;
474 		bo_fake->dirty = 1;
475 	}
476 
477 	if (block->on_hardware) {
478 		block->bo = NULL;
479 	} else if (block->fenced) {
480 		block->bo = NULL;
481 	} else {
482 		DBG("    - free immediately\n");
483 		DRMLISTDEL(block);
484 
485 		mmFreeMem(block->mem);
486 		free(block);
487 	}
488 }
489 
490 static void
alloc_backing_store(drm_intel_bo * bo)491 alloc_backing_store(drm_intel_bo *bo)
492 {
493 	drm_intel_bufmgr_fake *bufmgr_fake =
494 	    (drm_intel_bufmgr_fake *) bo->bufmgr;
495 	drm_intel_bo_fake *bo_fake = (drm_intel_bo_fake *) bo;
496 	assert(!bo_fake->backing_store);
497 	assert(!(bo_fake->flags & (BM_PINNED | BM_NO_BACKING_STORE)));
498 
499 	bo_fake->backing_store = malloc(bo->size);
500 
501 	DBG("alloc_backing - buf %d %p %lu\n", bo_fake->id,
502 	    bo_fake->backing_store, bo->size);
503 	assert(bo_fake->backing_store);
504 }
505 
506 static void
free_backing_store(drm_intel_bo * bo)507 free_backing_store(drm_intel_bo *bo)
508 {
509 	drm_intel_bo_fake *bo_fake = (drm_intel_bo_fake *) bo;
510 
511 	if (bo_fake->backing_store) {
512 		assert(!(bo_fake->flags & (BM_PINNED | BM_NO_BACKING_STORE)));
513 		free(bo_fake->backing_store);
514 		bo_fake->backing_store = NULL;
515 	}
516 }
517 
518 static void
set_dirty(drm_intel_bo * bo)519 set_dirty(drm_intel_bo *bo)
520 {
521 	drm_intel_bufmgr_fake *bufmgr_fake =
522 	    (drm_intel_bufmgr_fake *) bo->bufmgr;
523 	drm_intel_bo_fake *bo_fake = (drm_intel_bo_fake *) bo;
524 
525 	if (bo_fake->flags & BM_NO_BACKING_STORE
526 	    && bo_fake->invalidate_cb != NULL)
527 		bo_fake->invalidate_cb(bo, bo_fake->invalidate_ptr);
528 
529 	assert(!(bo_fake->flags & BM_PINNED));
530 
531 	DBG("set_dirty - buf %d\n", bo_fake->id);
532 	bo_fake->dirty = 1;
533 }
534 
535 static int
evict_lru(drm_intel_bufmgr_fake * bufmgr_fake,unsigned int max_fence)536 evict_lru(drm_intel_bufmgr_fake *bufmgr_fake, unsigned int max_fence)
537 {
538 	struct block *block, *tmp;
539 
540 	DBG("%s\n", __func__);
541 
542 	DRMLISTFOREACHSAFE(block, tmp, &bufmgr_fake->lru) {
543 		drm_intel_bo_fake *bo_fake = (drm_intel_bo_fake *) block->bo;
544 
545 		if (bo_fake != NULL && (bo_fake->flags & BM_NO_FENCE_SUBDATA))
546 			continue;
547 
548 		if (block->fence && max_fence && !FENCE_LTE(block->fence,
549 							    max_fence))
550 			return 0;
551 
552 		set_dirty(&bo_fake->bo);
553 		bo_fake->block = NULL;
554 
555 		free_block(bufmgr_fake, block, 0);
556 		return 1;
557 	}
558 
559 	return 0;
560 }
561 
562 static int
evict_mru(drm_intel_bufmgr_fake * bufmgr_fake)563 evict_mru(drm_intel_bufmgr_fake *bufmgr_fake)
564 {
565 	struct block *block, *tmp;
566 
567 	DBG("%s\n", __func__);
568 
569 	DRMLISTFOREACHSAFEREVERSE(block, tmp, &bufmgr_fake->lru) {
570 		drm_intel_bo_fake *bo_fake = (drm_intel_bo_fake *) block->bo;
571 
572 		if (bo_fake && (bo_fake->flags & BM_NO_FENCE_SUBDATA))
573 			continue;
574 
575 		set_dirty(&bo_fake->bo);
576 		bo_fake->block = NULL;
577 
578 		free_block(bufmgr_fake, block, 0);
579 		return 1;
580 	}
581 
582 	return 0;
583 }
584 
585 /**
586  * Removes all objects from the fenced list older than the given fence.
587  */
588 static int
clear_fenced(drm_intel_bufmgr_fake * bufmgr_fake,unsigned int fence_cookie)589 clear_fenced(drm_intel_bufmgr_fake *bufmgr_fake, unsigned int fence_cookie)
590 {
591 	struct block *block, *tmp;
592 	int ret = 0;
593 
594 	bufmgr_fake->last_fence = fence_cookie;
595 	DRMLISTFOREACHSAFE(block, tmp, &bufmgr_fake->fenced) {
596 		assert(block->fenced);
597 
598 		if (_fence_test(bufmgr_fake, block->fence)) {
599 
600 			block->fenced = 0;
601 
602 			if (!block->bo) {
603 				DBG("delayed free: offset %x sz %x\n",
604 				    block->mem->ofs, block->mem->size);
605 				DRMLISTDEL(block);
606 				mmFreeMem(block->mem);
607 				free(block);
608 			} else {
609 				DBG("return to lru: offset %x sz %x\n",
610 				    block->mem->ofs, block->mem->size);
611 				DRMLISTDEL(block);
612 				DRMLISTADDTAIL(block, &bufmgr_fake->lru);
613 			}
614 
615 			ret = 1;
616 		} else {
617 			/* Blocks are ordered by fence, so if one fails, all
618 			 * from here will fail also:
619 			 */
620 			DBG("fence not passed: offset %x sz %x %d %d \n",
621 			    block->mem->ofs, block->mem->size, block->fence,
622 			    bufmgr_fake->last_fence);
623 			break;
624 		}
625 	}
626 
627 	DBG("%s: %d\n", __func__, ret);
628 	return ret;
629 }
630 
631 static void
fence_blocks(drm_intel_bufmgr_fake * bufmgr_fake,unsigned fence)632 fence_blocks(drm_intel_bufmgr_fake *bufmgr_fake, unsigned fence)
633 {
634 	struct block *block, *tmp;
635 
636 	DRMLISTFOREACHSAFE(block, tmp, &bufmgr_fake->on_hardware) {
637 		DBG("Fence block %p (sz 0x%x ofs %x buf %p) with fence %d\n",
638 		    block, block->mem->size, block->mem->ofs, block->bo, fence);
639 		block->fence = fence;
640 
641 		block->on_hardware = 0;
642 		block->fenced = 1;
643 
644 		/* Move to tail of pending list here
645 		 */
646 		DRMLISTDEL(block);
647 		DRMLISTADDTAIL(block, &bufmgr_fake->fenced);
648 	}
649 
650 	assert(DRMLISTEMPTY(&bufmgr_fake->on_hardware));
651 }
652 
653 static int
evict_and_alloc_block(drm_intel_bo * bo)654 evict_and_alloc_block(drm_intel_bo *bo)
655 {
656 	drm_intel_bufmgr_fake *bufmgr_fake =
657 	    (drm_intel_bufmgr_fake *) bo->bufmgr;
658 	drm_intel_bo_fake *bo_fake = (drm_intel_bo_fake *) bo;
659 
660 	assert(bo_fake->block == NULL);
661 
662 	/* Search for already free memory:
663 	 */
664 	if (alloc_block(bo))
665 		return 1;
666 
667 	/* If we're not thrashing, allow lru eviction to dig deeper into
668 	 * recently used textures.  We'll probably be thrashing soon:
669 	 */
670 	if (!bufmgr_fake->thrashing) {
671 		while (evict_lru(bufmgr_fake, 0))
672 			if (alloc_block(bo))
673 				return 1;
674 	}
675 
676 	/* Keep thrashing counter alive?
677 	 */
678 	if (bufmgr_fake->thrashing)
679 		bufmgr_fake->thrashing = 20;
680 
681 	/* Wait on any already pending fences - here we are waiting for any
682 	 * freed memory that has been submitted to hardware and fenced to
683 	 * become available:
684 	 */
685 	while (!DRMLISTEMPTY(&bufmgr_fake->fenced)) {
686 		uint32_t fence = bufmgr_fake->fenced.next->fence;
687 		_fence_wait_internal(bufmgr_fake, fence);
688 
689 		if (alloc_block(bo))
690 			return 1;
691 	}
692 
693 	if (!DRMLISTEMPTY(&bufmgr_fake->on_hardware)) {
694 		while (!DRMLISTEMPTY(&bufmgr_fake->fenced)) {
695 			uint32_t fence = bufmgr_fake->fenced.next->fence;
696 			_fence_wait_internal(bufmgr_fake, fence);
697 		}
698 
699 		if (!bufmgr_fake->thrashing) {
700 			DBG("thrashing\n");
701 		}
702 		bufmgr_fake->thrashing = 20;
703 
704 		if (alloc_block(bo))
705 			return 1;
706 	}
707 
708 	while (evict_mru(bufmgr_fake))
709 		if (alloc_block(bo))
710 			return 1;
711 
712 	DBG("%s 0x%lx bytes failed\n", __func__, bo->size);
713 
714 	return 0;
715 }
716 
717 /***********************************************************************
718  * Public functions
719  */
720 
721 /**
722  * Wait for hardware idle by emitting a fence and waiting for it.
723  */
724 static void
drm_intel_bufmgr_fake_wait_idle(drm_intel_bufmgr_fake * bufmgr_fake)725 drm_intel_bufmgr_fake_wait_idle(drm_intel_bufmgr_fake *bufmgr_fake)
726 {
727 	unsigned int cookie;
728 
729 	cookie = _fence_emit_internal(bufmgr_fake);
730 	_fence_wait_internal(bufmgr_fake, cookie);
731 }
732 
733 /**
734  * Wait for rendering to a buffer to complete.
735  *
736  * It is assumed that the batchbuffer which performed the rendering included
737  * the necessary flushing.
738  */
739 static void
drm_intel_fake_bo_wait_rendering_locked(drm_intel_bo * bo)740 drm_intel_fake_bo_wait_rendering_locked(drm_intel_bo *bo)
741 {
742 	drm_intel_bufmgr_fake *bufmgr_fake =
743 	    (drm_intel_bufmgr_fake *) bo->bufmgr;
744 	drm_intel_bo_fake *bo_fake = (drm_intel_bo_fake *) bo;
745 
746 	if (bo_fake->block == NULL || !bo_fake->block->fenced)
747 		return;
748 
749 	_fence_wait_internal(bufmgr_fake, bo_fake->block->fence);
750 }
751 
752 static void
drm_intel_fake_bo_wait_rendering(drm_intel_bo * bo)753 drm_intel_fake_bo_wait_rendering(drm_intel_bo *bo)
754 {
755 	drm_intel_bufmgr_fake *bufmgr_fake =
756 	    (drm_intel_bufmgr_fake *) bo->bufmgr;
757 
758 	pthread_mutex_lock(&bufmgr_fake->lock);
759 	drm_intel_fake_bo_wait_rendering_locked(bo);
760 	pthread_mutex_unlock(&bufmgr_fake->lock);
761 }
762 
763 /* Specifically ignore texture memory sharing.
764  *  -- just evict everything
765  *  -- and wait for idle
766  */
767 drm_public void
drm_intel_bufmgr_fake_contended_lock_take(drm_intel_bufmgr * bufmgr)768 drm_intel_bufmgr_fake_contended_lock_take(drm_intel_bufmgr *bufmgr)
769 {
770 	drm_intel_bufmgr_fake *bufmgr_fake = (drm_intel_bufmgr_fake *) bufmgr;
771 	struct block *block, *tmp;
772 
773 	pthread_mutex_lock(&bufmgr_fake->lock);
774 
775 	bufmgr_fake->need_fence = 1;
776 	bufmgr_fake->fail = 0;
777 
778 	/* Wait for hardware idle.  We don't know where acceleration has been
779 	 * happening, so we'll need to wait anyway before letting anything get
780 	 * put on the card again.
781 	 */
782 	drm_intel_bufmgr_fake_wait_idle(bufmgr_fake);
783 
784 	/* Check that we hadn't released the lock without having fenced the last
785 	 * set of buffers.
786 	 */
787 	assert(DRMLISTEMPTY(&bufmgr_fake->fenced));
788 	assert(DRMLISTEMPTY(&bufmgr_fake->on_hardware));
789 
790 	DRMLISTFOREACHSAFE(block, tmp, &bufmgr_fake->lru) {
791 		assert(_fence_test(bufmgr_fake, block->fence));
792 		set_dirty(block->bo);
793 	}
794 
795 	pthread_mutex_unlock(&bufmgr_fake->lock);
796 }
797 
798 static drm_intel_bo *
drm_intel_fake_bo_alloc(drm_intel_bufmgr * bufmgr,const char * name,unsigned long size,unsigned int alignment)799 drm_intel_fake_bo_alloc(drm_intel_bufmgr *bufmgr,
800 			const char *name,
801 			unsigned long size,
802 			unsigned int alignment)
803 {
804 	drm_intel_bufmgr_fake *bufmgr_fake;
805 	drm_intel_bo_fake *bo_fake;
806 
807 	bufmgr_fake = (drm_intel_bufmgr_fake *) bufmgr;
808 
809 	assert(size != 0);
810 
811 	bo_fake = calloc(1, sizeof(*bo_fake));
812 	if (!bo_fake)
813 		return NULL;
814 
815 	bo_fake->bo.size = size;
816 	bo_fake->bo.offset = -1;
817 	bo_fake->bo.virtual = NULL;
818 	bo_fake->bo.bufmgr = bufmgr;
819 	bo_fake->refcount = 1;
820 
821 	/* Alignment must be a power of two */
822 	assert((alignment & (alignment - 1)) == 0);
823 	if (alignment == 0)
824 		alignment = 1;
825 	bo_fake->alignment = alignment;
826 	bo_fake->id = ++bufmgr_fake->buf_nr;
827 	bo_fake->name = name;
828 	bo_fake->flags = 0;
829 	bo_fake->is_static = 0;
830 
831 	DBG("drm_bo_alloc: (buf %d: %s, %lu kb)\n", bo_fake->id, bo_fake->name,
832 	    bo_fake->bo.size / 1024);
833 
834 	return &bo_fake->bo;
835 }
836 
837 static drm_intel_bo *
drm_intel_fake_bo_alloc_tiled(drm_intel_bufmgr * bufmgr,const char * name,int x,int y,int cpp,uint32_t * tiling_mode,unsigned long * pitch,unsigned long flags)838 drm_intel_fake_bo_alloc_tiled(drm_intel_bufmgr * bufmgr,
839 			      const char *name,
840 			      int x, int y, int cpp,
841 			      uint32_t *tiling_mode,
842 			      unsigned long *pitch,
843 			      unsigned long flags)
844 {
845 	unsigned long stride, aligned_y;
846 
847 	/* No runtime tiling support for fake. */
848 	*tiling_mode = I915_TILING_NONE;
849 
850 	/* Align it for being a render target.  Shouldn't need anything else. */
851 	stride = x * cpp;
852 	stride = ROUND_UP_TO(stride, 64);
853 
854 	/* 965 subspan loading alignment */
855 	aligned_y = ALIGN(y, 2);
856 
857 	*pitch = stride;
858 
859 	return drm_intel_fake_bo_alloc(bufmgr, name, stride * aligned_y,
860 				       4096);
861 }
862 
863 drm_public drm_intel_bo *
drm_intel_bo_fake_alloc_static(drm_intel_bufmgr * bufmgr,const char * name,unsigned long offset,unsigned long size,void * virtual)864 drm_intel_bo_fake_alloc_static(drm_intel_bufmgr *bufmgr,
865 			       const char *name,
866 			       unsigned long offset,
867 			       unsigned long size, void *virtual)
868 {
869 	drm_intel_bufmgr_fake *bufmgr_fake;
870 	drm_intel_bo_fake *bo_fake;
871 
872 	bufmgr_fake = (drm_intel_bufmgr_fake *) bufmgr;
873 
874 	assert(size != 0);
875 
876 	bo_fake = calloc(1, sizeof(*bo_fake));
877 	if (!bo_fake)
878 		return NULL;
879 
880 	bo_fake->bo.size = size;
881 	bo_fake->bo.offset = offset;
882 	bo_fake->bo.virtual = virtual;
883 	bo_fake->bo.bufmgr = bufmgr;
884 	bo_fake->refcount = 1;
885 	bo_fake->id = ++bufmgr_fake->buf_nr;
886 	bo_fake->name = name;
887 	bo_fake->flags = BM_PINNED;
888 	bo_fake->is_static = 1;
889 
890 	DBG("drm_bo_alloc_static: (buf %d: %s, %lu kb)\n", bo_fake->id,
891 	    bo_fake->name, bo_fake->bo.size / 1024);
892 
893 	return &bo_fake->bo;
894 }
895 
896 static void
drm_intel_fake_bo_reference(drm_intel_bo * bo)897 drm_intel_fake_bo_reference(drm_intel_bo *bo)
898 {
899 	drm_intel_bufmgr_fake *bufmgr_fake =
900 	    (drm_intel_bufmgr_fake *) bo->bufmgr;
901 	drm_intel_bo_fake *bo_fake = (drm_intel_bo_fake *) bo;
902 
903 	pthread_mutex_lock(&bufmgr_fake->lock);
904 	bo_fake->refcount++;
905 	pthread_mutex_unlock(&bufmgr_fake->lock);
906 }
907 
908 static void
drm_intel_fake_bo_reference_locked(drm_intel_bo * bo)909 drm_intel_fake_bo_reference_locked(drm_intel_bo *bo)
910 {
911 	drm_intel_bo_fake *bo_fake = (drm_intel_bo_fake *) bo;
912 
913 	bo_fake->refcount++;
914 }
915 
916 static void
drm_intel_fake_bo_unreference_locked(drm_intel_bo * bo)917 drm_intel_fake_bo_unreference_locked(drm_intel_bo *bo)
918 {
919 	drm_intel_bufmgr_fake *bufmgr_fake =
920 	    (drm_intel_bufmgr_fake *) bo->bufmgr;
921 	drm_intel_bo_fake *bo_fake = (drm_intel_bo_fake *) bo;
922 	int i;
923 
924 	if (--bo_fake->refcount == 0) {
925 		assert(bo_fake->map_count == 0);
926 		/* No remaining references, so free it */
927 		if (bo_fake->block)
928 			free_block(bufmgr_fake, bo_fake->block, 1);
929 		free_backing_store(bo);
930 
931 		for (i = 0; i < bo_fake->nr_relocs; i++)
932 			drm_intel_fake_bo_unreference_locked(bo_fake->relocs[i].
933 							     target_buf);
934 
935 		DBG("drm_bo_unreference: free buf %d %s\n", bo_fake->id,
936 		    bo_fake->name);
937 
938 		free(bo_fake->relocs);
939 		free(bo);
940 	}
941 }
942 
943 static void
drm_intel_fake_bo_unreference(drm_intel_bo * bo)944 drm_intel_fake_bo_unreference(drm_intel_bo *bo)
945 {
946 	drm_intel_bufmgr_fake *bufmgr_fake =
947 	    (drm_intel_bufmgr_fake *) bo->bufmgr;
948 
949 	pthread_mutex_lock(&bufmgr_fake->lock);
950 	drm_intel_fake_bo_unreference_locked(bo);
951 	pthread_mutex_unlock(&bufmgr_fake->lock);
952 }
953 
954 /**
955  * Set the buffer as not requiring backing store, and instead get the callback
956  * invoked whenever it would be set dirty.
957  */
958 drm_public void
drm_intel_bo_fake_disable_backing_store(drm_intel_bo * bo,void (* invalidate_cb)(drm_intel_bo * bo,void * ptr),void * ptr)959 drm_intel_bo_fake_disable_backing_store(drm_intel_bo *bo,
960 					void (*invalidate_cb) (drm_intel_bo *bo,
961 							       void *ptr),
962 					void *ptr)
963 {
964 	drm_intel_bufmgr_fake *bufmgr_fake =
965 	    (drm_intel_bufmgr_fake *) bo->bufmgr;
966 	drm_intel_bo_fake *bo_fake = (drm_intel_bo_fake *) bo;
967 
968 	pthread_mutex_lock(&bufmgr_fake->lock);
969 
970 	if (bo_fake->backing_store)
971 		free_backing_store(bo);
972 
973 	bo_fake->flags |= BM_NO_BACKING_STORE;
974 
975 	DBG("disable_backing_store set buf %d dirty\n", bo_fake->id);
976 	bo_fake->dirty = 1;
977 	bo_fake->invalidate_cb = invalidate_cb;
978 	bo_fake->invalidate_ptr = ptr;
979 
980 	/* Note that it is invalid right from the start.  Also note
981 	 * invalidate_cb is called with the bufmgr locked, so cannot
982 	 * itself make bufmgr calls.
983 	 */
984 	if (invalidate_cb != NULL)
985 		invalidate_cb(bo, ptr);
986 
987 	pthread_mutex_unlock(&bufmgr_fake->lock);
988 }
989 
990 /**
991  * Map a buffer into bo->virtual, allocating either card memory space (If
992  * BM_NO_BACKING_STORE or BM_PINNED) or backing store, as necessary.
993  */
994 static int
drm_intel_fake_bo_map_locked(drm_intel_bo * bo,int write_enable)995  drm_intel_fake_bo_map_locked(drm_intel_bo *bo, int write_enable)
996 {
997 	drm_intel_bufmgr_fake *bufmgr_fake =
998 	    (drm_intel_bufmgr_fake *) bo->bufmgr;
999 	drm_intel_bo_fake *bo_fake = (drm_intel_bo_fake *) bo;
1000 
1001 	/* Static buffers are always mapped. */
1002 	if (bo_fake->is_static) {
1003 		if (bo_fake->card_dirty) {
1004 			drm_intel_bufmgr_fake_wait_idle(bufmgr_fake);
1005 			bo_fake->card_dirty = 0;
1006 		}
1007 		return 0;
1008 	}
1009 
1010 	/* Allow recursive mapping.  Mesa may recursively map buffers with
1011 	 * nested display loops, and it is used internally in bufmgr_fake
1012 	 * for relocation.
1013 	 */
1014 	if (bo_fake->map_count++ != 0)
1015 		return 0;
1016 
1017 	{
1018 		DBG("drm_bo_map: (buf %d: %s, %lu kb)\n", bo_fake->id,
1019 		    bo_fake->name, bo_fake->bo.size / 1024);
1020 
1021 		if (bo->virtual != NULL) {
1022 			drmMsg("%s: already mapped\n", __func__);
1023 			abort();
1024 		} else if (bo_fake->flags & (BM_NO_BACKING_STORE | BM_PINNED)) {
1025 
1026 			if (!bo_fake->block && !evict_and_alloc_block(bo)) {
1027 				DBG("%s: alloc failed\n", __func__);
1028 				bufmgr_fake->fail = 1;
1029 				return 1;
1030 			} else {
1031 				assert(bo_fake->block);
1032 				bo_fake->dirty = 0;
1033 
1034 				if (!(bo_fake->flags & BM_NO_FENCE_SUBDATA) &&
1035 				    bo_fake->block->fenced) {
1036 					drm_intel_fake_bo_wait_rendering_locked
1037 					    (bo);
1038 				}
1039 
1040 				bo->virtual = bo_fake->block->virtual;
1041 			}
1042 		} else {
1043 			if (write_enable)
1044 				set_dirty(bo);
1045 
1046 			if (bo_fake->backing_store == 0)
1047 				alloc_backing_store(bo);
1048 
1049 			if ((bo_fake->card_dirty == 1) && bo_fake->block) {
1050 				if (bo_fake->block->fenced)
1051 					drm_intel_fake_bo_wait_rendering_locked
1052 					    (bo);
1053 
1054 				memcpy(bo_fake->backing_store,
1055 				       bo_fake->block->virtual,
1056 				       bo_fake->block->bo->size);
1057 				bo_fake->card_dirty = 0;
1058 			}
1059 
1060 			bo->virtual = bo_fake->backing_store;
1061 		}
1062 	}
1063 
1064 	return 0;
1065 }
1066 
1067 static int
drm_intel_fake_bo_map(drm_intel_bo * bo,int write_enable)1068  drm_intel_fake_bo_map(drm_intel_bo *bo, int write_enable)
1069 {
1070 	drm_intel_bufmgr_fake *bufmgr_fake =
1071 	    (drm_intel_bufmgr_fake *) bo->bufmgr;
1072 	int ret;
1073 
1074 	pthread_mutex_lock(&bufmgr_fake->lock);
1075 	ret = drm_intel_fake_bo_map_locked(bo, write_enable);
1076 	pthread_mutex_unlock(&bufmgr_fake->lock);
1077 
1078 	return ret;
1079 }
1080 
1081 static int
drm_intel_fake_bo_unmap_locked(drm_intel_bo * bo)1082  drm_intel_fake_bo_unmap_locked(drm_intel_bo *bo)
1083 {
1084 	drm_intel_bufmgr_fake *bufmgr_fake =
1085 	    (drm_intel_bufmgr_fake *) bo->bufmgr;
1086 	drm_intel_bo_fake *bo_fake = (drm_intel_bo_fake *) bo;
1087 
1088 	/* Static buffers are always mapped. */
1089 	if (bo_fake->is_static)
1090 		return 0;
1091 
1092 	assert(bo_fake->map_count != 0);
1093 	if (--bo_fake->map_count != 0)
1094 		return 0;
1095 
1096 	DBG("drm_bo_unmap: (buf %d: %s, %lu kb)\n", bo_fake->id, bo_fake->name,
1097 	    bo_fake->bo.size / 1024);
1098 
1099 	bo->virtual = NULL;
1100 
1101 	return 0;
1102 }
1103 
drm_intel_fake_bo_unmap(drm_intel_bo * bo)1104 static int drm_intel_fake_bo_unmap(drm_intel_bo *bo)
1105 {
1106 	drm_intel_bufmgr_fake *bufmgr_fake =
1107 	    (drm_intel_bufmgr_fake *) bo->bufmgr;
1108 	int ret;
1109 
1110 	pthread_mutex_lock(&bufmgr_fake->lock);
1111 	ret = drm_intel_fake_bo_unmap_locked(bo);
1112 	pthread_mutex_unlock(&bufmgr_fake->lock);
1113 
1114 	return ret;
1115 }
1116 
1117 static int
drm_intel_fake_bo_subdata(drm_intel_bo * bo,unsigned long offset,unsigned long size,const void * data)1118 drm_intel_fake_bo_subdata(drm_intel_bo *bo, unsigned long offset,
1119 			  unsigned long size, const void *data)
1120 {
1121 	int ret;
1122 
1123 	if (size == 0 || data == NULL)
1124 		return 0;
1125 
1126 	ret = drm_intel_bo_map(bo, 1);
1127 	if (ret)
1128 		return ret;
1129 	memcpy((unsigned char *)bo->virtual + offset, data, size);
1130 	drm_intel_bo_unmap(bo);
1131 	return 0;
1132 }
1133 
1134 static void
drm_intel_fake_kick_all_locked(drm_intel_bufmgr_fake * bufmgr_fake)1135  drm_intel_fake_kick_all_locked(drm_intel_bufmgr_fake *bufmgr_fake)
1136 {
1137 	struct block *block, *tmp;
1138 
1139 	bufmgr_fake->performed_rendering = 0;
1140 	/* okay for ever BO that is on the HW kick it off.
1141 	   seriously not afraid of the POLICE right now */
1142 	DRMLISTFOREACHSAFE(block, tmp, &bufmgr_fake->on_hardware) {
1143 		drm_intel_bo_fake *bo_fake = (drm_intel_bo_fake *) block->bo;
1144 
1145 		block->on_hardware = 0;
1146 		free_block(bufmgr_fake, block, 0);
1147 		bo_fake->block = NULL;
1148 		bo_fake->validated = 0;
1149 		if (!(bo_fake->flags & BM_NO_BACKING_STORE))
1150 			bo_fake->dirty = 1;
1151 	}
1152 
1153 }
1154 
1155 static int
drm_intel_fake_bo_validate(drm_intel_bo * bo)1156  drm_intel_fake_bo_validate(drm_intel_bo *bo)
1157 {
1158 	drm_intel_bufmgr_fake *bufmgr_fake;
1159 	drm_intel_bo_fake *bo_fake = (drm_intel_bo_fake *) bo;
1160 
1161 	bufmgr_fake = (drm_intel_bufmgr_fake *) bo->bufmgr;
1162 
1163 	DBG("drm_bo_validate: (buf %d: %s, %lu kb)\n", bo_fake->id,
1164 	    bo_fake->name, bo_fake->bo.size / 1024);
1165 
1166 	/* Sanity check: Buffers should be unmapped before being validated.
1167 	 * This is not so much of a problem for bufmgr_fake, but TTM refuses,
1168 	 * and the problem is harder to debug there.
1169 	 */
1170 	assert(bo_fake->map_count == 0);
1171 
1172 	if (bo_fake->is_static) {
1173 		/* Add it to the needs-fence list */
1174 		bufmgr_fake->need_fence = 1;
1175 		return 0;
1176 	}
1177 
1178 	/* Allocate the card memory */
1179 	if (!bo_fake->block && !evict_and_alloc_block(bo)) {
1180 		bufmgr_fake->fail = 1;
1181 		DBG("Failed to validate buf %d:%s\n", bo_fake->id,
1182 		    bo_fake->name);
1183 		return -1;
1184 	}
1185 
1186 	assert(bo_fake->block);
1187 	assert(bo_fake->block->bo == &bo_fake->bo);
1188 
1189 	bo->offset = bo_fake->block->mem->ofs;
1190 
1191 	/* Upload the buffer contents if necessary */
1192 	if (bo_fake->dirty) {
1193 		DBG("Upload dirty buf %d:%s, sz %lu offset 0x%x\n", bo_fake->id,
1194 		    bo_fake->name, bo->size, bo_fake->block->mem->ofs);
1195 
1196 		assert(!(bo_fake->flags & (BM_NO_BACKING_STORE | BM_PINNED)));
1197 
1198 		/* Actually, should be able to just wait for a fence on the
1199 		 * memory, which we would be tracking when we free it. Waiting
1200 		 * for idle is a sufficiently large hammer for now.
1201 		 */
1202 		drm_intel_bufmgr_fake_wait_idle(bufmgr_fake);
1203 
1204 		/* we may never have mapped this BO so it might not have any
1205 		 * backing store if this happens it should be rare, but 0 the
1206 		 * card memory in any case */
1207 		if (bo_fake->backing_store)
1208 			memcpy(bo_fake->block->virtual, bo_fake->backing_store,
1209 			       bo->size);
1210 		else
1211 			memset(bo_fake->block->virtual, 0, bo->size);
1212 
1213 		bo_fake->dirty = 0;
1214 	}
1215 
1216 	bo_fake->block->fenced = 0;
1217 	bo_fake->block->on_hardware = 1;
1218 	DRMLISTDEL(bo_fake->block);
1219 	DRMLISTADDTAIL(bo_fake->block, &bufmgr_fake->on_hardware);
1220 
1221 	bo_fake->validated = 1;
1222 	bufmgr_fake->need_fence = 1;
1223 
1224 	return 0;
1225 }
1226 
1227 static void
drm_intel_fake_fence_validated(drm_intel_bufmgr * bufmgr)1228 drm_intel_fake_fence_validated(drm_intel_bufmgr *bufmgr)
1229 {
1230 	drm_intel_bufmgr_fake *bufmgr_fake = (drm_intel_bufmgr_fake *) bufmgr;
1231 	unsigned int cookie;
1232 
1233 	cookie = _fence_emit_internal(bufmgr_fake);
1234 	fence_blocks(bufmgr_fake, cookie);
1235 
1236 	DBG("drm_fence_validated: 0x%08x cookie\n", cookie);
1237 }
1238 
1239 static void
drm_intel_fake_destroy(drm_intel_bufmgr * bufmgr)1240 drm_intel_fake_destroy(drm_intel_bufmgr *bufmgr)
1241 {
1242 	drm_intel_bufmgr_fake *bufmgr_fake = (drm_intel_bufmgr_fake *) bufmgr;
1243 
1244 	pthread_mutex_destroy(&bufmgr_fake->lock);
1245 	mmDestroy(bufmgr_fake->heap);
1246 	free(bufmgr);
1247 }
1248 
1249 static int
drm_intel_fake_emit_reloc(drm_intel_bo * bo,uint32_t offset,drm_intel_bo * target_bo,uint32_t target_offset,uint32_t read_domains,uint32_t write_domain)1250 drm_intel_fake_emit_reloc(drm_intel_bo *bo, uint32_t offset,
1251 			  drm_intel_bo *target_bo, uint32_t target_offset,
1252 			  uint32_t read_domains, uint32_t write_domain)
1253 {
1254 	drm_intel_bufmgr_fake *bufmgr_fake =
1255 	    (drm_intel_bufmgr_fake *) bo->bufmgr;
1256 	struct fake_buffer_reloc *r;
1257 	drm_intel_bo_fake *bo_fake = (drm_intel_bo_fake *) bo;
1258 	drm_intel_bo_fake *target_fake = (drm_intel_bo_fake *) target_bo;
1259 	int i;
1260 
1261 	pthread_mutex_lock(&bufmgr_fake->lock);
1262 
1263 	assert(bo);
1264 	assert(target_bo);
1265 
1266 	if (bo_fake->relocs == NULL) {
1267 		bo_fake->relocs =
1268 		    malloc(sizeof(struct fake_buffer_reloc) * MAX_RELOCS);
1269 	}
1270 
1271 	r = &bo_fake->relocs[bo_fake->nr_relocs++];
1272 
1273 	assert(bo_fake->nr_relocs <= MAX_RELOCS);
1274 
1275 	drm_intel_fake_bo_reference_locked(target_bo);
1276 
1277 	if (!target_fake->is_static) {
1278 		bo_fake->child_size +=
1279 		    ALIGN(target_bo->size, target_fake->alignment);
1280 		bo_fake->child_size += target_fake->child_size;
1281 	}
1282 	r->target_buf = target_bo;
1283 	r->offset = offset;
1284 	r->last_target_offset = target_bo->offset;
1285 	r->delta = target_offset;
1286 	r->read_domains = read_domains;
1287 	r->write_domain = write_domain;
1288 
1289 	if (bufmgr_fake->debug) {
1290 		/* Check that a conflicting relocation hasn't already been
1291 		 * emitted.
1292 		 */
1293 		for (i = 0; i < bo_fake->nr_relocs - 1; i++) {
1294 			struct fake_buffer_reloc *r2 = &bo_fake->relocs[i];
1295 
1296 			assert(r->offset != r2->offset);
1297 		}
1298 	}
1299 
1300 	pthread_mutex_unlock(&bufmgr_fake->lock);
1301 
1302 	return 0;
1303 }
1304 
1305 /**
1306  * Incorporates the validation flags associated with each relocation into
1307  * the combined validation flags for the buffer on this batchbuffer submission.
1308  */
1309 static void
drm_intel_fake_calculate_domains(drm_intel_bo * bo)1310 drm_intel_fake_calculate_domains(drm_intel_bo *bo)
1311 {
1312 	drm_intel_bo_fake *bo_fake = (drm_intel_bo_fake *) bo;
1313 	int i;
1314 
1315 	for (i = 0; i < bo_fake->nr_relocs; i++) {
1316 		struct fake_buffer_reloc *r = &bo_fake->relocs[i];
1317 		drm_intel_bo_fake *target_fake =
1318 		    (drm_intel_bo_fake *) r->target_buf;
1319 
1320 		/* Do the same for the tree of buffers we depend on */
1321 		drm_intel_fake_calculate_domains(r->target_buf);
1322 
1323 		target_fake->read_domains |= r->read_domains;
1324 		target_fake->write_domain |= r->write_domain;
1325 	}
1326 }
1327 
1328 static int
drm_intel_fake_reloc_and_validate_buffer(drm_intel_bo * bo)1329 drm_intel_fake_reloc_and_validate_buffer(drm_intel_bo *bo)
1330 {
1331 	drm_intel_bufmgr_fake *bufmgr_fake =
1332 	    (drm_intel_bufmgr_fake *) bo->bufmgr;
1333 	drm_intel_bo_fake *bo_fake = (drm_intel_bo_fake *) bo;
1334 	int i, ret;
1335 
1336 	assert(bo_fake->map_count == 0);
1337 
1338 	for (i = 0; i < bo_fake->nr_relocs; i++) {
1339 		struct fake_buffer_reloc *r = &bo_fake->relocs[i];
1340 		drm_intel_bo_fake *target_fake =
1341 		    (drm_intel_bo_fake *) r->target_buf;
1342 		uint32_t reloc_data;
1343 
1344 		/* Validate the target buffer if that hasn't been done. */
1345 		if (!target_fake->validated) {
1346 			ret =
1347 			    drm_intel_fake_reloc_and_validate_buffer(r->target_buf);
1348 			if (ret != 0) {
1349 				if (bo->virtual != NULL)
1350 					drm_intel_fake_bo_unmap_locked(bo);
1351 				return ret;
1352 			}
1353 		}
1354 
1355 		/* Calculate the value of the relocation entry. */
1356 		if (r->target_buf->offset != r->last_target_offset) {
1357 			reloc_data = r->target_buf->offset + r->delta;
1358 
1359 			if (bo->virtual == NULL)
1360 				drm_intel_fake_bo_map_locked(bo, 1);
1361 
1362 			*(uint32_t *) ((uint8_t *) bo->virtual + r->offset) =
1363 			    reloc_data;
1364 
1365 			r->last_target_offset = r->target_buf->offset;
1366 		}
1367 	}
1368 
1369 	if (bo->virtual != NULL)
1370 		drm_intel_fake_bo_unmap_locked(bo);
1371 
1372 	if (bo_fake->write_domain != 0) {
1373 		if (!(bo_fake->flags & (BM_NO_BACKING_STORE | BM_PINNED))) {
1374 			if (bo_fake->backing_store == 0)
1375 				alloc_backing_store(bo);
1376 		}
1377 		bo_fake->card_dirty = 1;
1378 		bufmgr_fake->performed_rendering = 1;
1379 	}
1380 
1381 	return drm_intel_fake_bo_validate(bo);
1382 }
1383 
1384 static void
drm_intel_bo_fake_post_submit(drm_intel_bo * bo)1385 drm_intel_bo_fake_post_submit(drm_intel_bo *bo)
1386 {
1387 	drm_intel_bufmgr_fake *bufmgr_fake =
1388 	    (drm_intel_bufmgr_fake *) bo->bufmgr;
1389 	drm_intel_bo_fake *bo_fake = (drm_intel_bo_fake *) bo;
1390 	int i;
1391 
1392 	for (i = 0; i < bo_fake->nr_relocs; i++) {
1393 		struct fake_buffer_reloc *r = &bo_fake->relocs[i];
1394 		drm_intel_bo_fake *target_fake =
1395 		    (drm_intel_bo_fake *) r->target_buf;
1396 
1397 		if (target_fake->validated)
1398 			drm_intel_bo_fake_post_submit(r->target_buf);
1399 
1400 		DBG("%s@0x%08x + 0x%08x -> %s@0x%08x + 0x%08x\n",
1401 		    bo_fake->name, (uint32_t) bo->offset, r->offset,
1402 		    target_fake->name, (uint32_t) r->target_buf->offset,
1403 		    r->delta);
1404 	}
1405 
1406 	assert(bo_fake->map_count == 0);
1407 	bo_fake->validated = 0;
1408 	bo_fake->read_domains = 0;
1409 	bo_fake->write_domain = 0;
1410 }
1411 
1412 drm_public void
drm_intel_bufmgr_fake_set_exec_callback(drm_intel_bufmgr * bufmgr,int (* exec)(drm_intel_bo * bo,unsigned int used,void * priv),void * priv)1413 drm_intel_bufmgr_fake_set_exec_callback(drm_intel_bufmgr *bufmgr,
1414 					     int (*exec) (drm_intel_bo *bo,
1415 							  unsigned int used,
1416 							  void *priv),
1417 					     void *priv)
1418 {
1419 	drm_intel_bufmgr_fake *bufmgr_fake = (drm_intel_bufmgr_fake *) bufmgr;
1420 
1421 	bufmgr_fake->exec = exec;
1422 	bufmgr_fake->exec_priv = priv;
1423 }
1424 
1425 static int
drm_intel_fake_bo_exec(drm_intel_bo * bo,int used,drm_clip_rect_t * cliprects,int num_cliprects,int DR4)1426 drm_intel_fake_bo_exec(drm_intel_bo *bo, int used,
1427 		       drm_clip_rect_t * cliprects, int num_cliprects, int DR4)
1428 {
1429 	drm_intel_bufmgr_fake *bufmgr_fake =
1430 	    (drm_intel_bufmgr_fake *) bo->bufmgr;
1431 	drm_intel_bo_fake *batch_fake = (drm_intel_bo_fake *) bo;
1432 	struct drm_i915_batchbuffer batch;
1433 	int ret;
1434 	int retry_count = 0;
1435 
1436 	pthread_mutex_lock(&bufmgr_fake->lock);
1437 
1438 	bufmgr_fake->performed_rendering = 0;
1439 
1440 	drm_intel_fake_calculate_domains(bo);
1441 
1442 	batch_fake->read_domains = I915_GEM_DOMAIN_COMMAND;
1443 
1444 	/* we've ran out of RAM so blow the whole lot away and retry */
1445 restart:
1446 	ret = drm_intel_fake_reloc_and_validate_buffer(bo);
1447 	if (bufmgr_fake->fail == 1) {
1448 		if (retry_count == 0) {
1449 			retry_count++;
1450 			drm_intel_fake_kick_all_locked(bufmgr_fake);
1451 			bufmgr_fake->fail = 0;
1452 			goto restart;
1453 		} else		/* dump out the memory here */
1454 			mmDumpMemInfo(bufmgr_fake->heap);
1455 	}
1456 
1457 	assert(ret == 0);
1458 
1459 	if (bufmgr_fake->exec != NULL) {
1460 		ret = bufmgr_fake->exec(bo, used, bufmgr_fake->exec_priv);
1461 		if (ret != 0) {
1462 			pthread_mutex_unlock(&bufmgr_fake->lock);
1463 			return ret;
1464 		}
1465 	} else {
1466 		batch.start = bo->offset;
1467 		batch.used = used;
1468 		batch.cliprects = cliprects;
1469 		batch.num_cliprects = num_cliprects;
1470 		batch.DR1 = 0;
1471 		batch.DR4 = DR4;
1472 
1473 		if (drmCommandWrite
1474 		    (bufmgr_fake->fd, DRM_I915_BATCHBUFFER, &batch,
1475 		     sizeof(batch))) {
1476 			drmMsg("DRM_I915_BATCHBUFFER: %d\n", -errno);
1477 			pthread_mutex_unlock(&bufmgr_fake->lock);
1478 			return -errno;
1479 		}
1480 	}
1481 
1482 	drm_intel_fake_fence_validated(bo->bufmgr);
1483 
1484 	drm_intel_bo_fake_post_submit(bo);
1485 
1486 	pthread_mutex_unlock(&bufmgr_fake->lock);
1487 
1488 	return 0;
1489 }
1490 
1491 /**
1492  * Return an error if the list of BOs will exceed the aperture size.
1493  *
1494  * This is a rough guess and likely to fail, as during the validate sequence we
1495  * may place a buffer in an inopportune spot early on and then fail to fit
1496  * a set smaller than the aperture.
1497  */
1498 static int
drm_intel_fake_check_aperture_space(drm_intel_bo ** bo_array,int count)1499 drm_intel_fake_check_aperture_space(drm_intel_bo ** bo_array, int count)
1500 {
1501 	drm_intel_bufmgr_fake *bufmgr_fake =
1502 	    (drm_intel_bufmgr_fake *) bo_array[0]->bufmgr;
1503 	unsigned int sz = 0;
1504 	int i;
1505 
1506 	for (i = 0; i < count; i++) {
1507 		drm_intel_bo_fake *bo_fake = (drm_intel_bo_fake *) bo_array[i];
1508 
1509 		if (bo_fake == NULL)
1510 			continue;
1511 
1512 		if (!bo_fake->is_static)
1513 			sz += ALIGN(bo_array[i]->size, bo_fake->alignment);
1514 		sz += bo_fake->child_size;
1515 	}
1516 
1517 	if (sz > bufmgr_fake->size) {
1518 		DBG("check_space: overflowed bufmgr size, %ukb vs %lukb\n",
1519 		    sz / 1024, bufmgr_fake->size / 1024);
1520 		return -1;
1521 	}
1522 
1523 	DBG("drm_check_space: sz %ukb vs bufgr %lukb\n", sz / 1024,
1524 	    bufmgr_fake->size / 1024);
1525 	return 0;
1526 }
1527 
1528 /**
1529  * Evicts all buffers, waiting for fences to pass and copying contents out
1530  * as necessary.
1531  *
1532  * Used by the X Server on LeaveVT, when the card memory is no longer our
1533  * own.
1534  */
1535 drm_public void
drm_intel_bufmgr_fake_evict_all(drm_intel_bufmgr * bufmgr)1536 drm_intel_bufmgr_fake_evict_all(drm_intel_bufmgr *bufmgr)
1537 {
1538 	drm_intel_bufmgr_fake *bufmgr_fake = (drm_intel_bufmgr_fake *) bufmgr;
1539 	struct block *block, *tmp;
1540 
1541 	pthread_mutex_lock(&bufmgr_fake->lock);
1542 
1543 	bufmgr_fake->need_fence = 1;
1544 	bufmgr_fake->fail = 0;
1545 
1546 	/* Wait for hardware idle.  We don't know where acceleration has been
1547 	 * happening, so we'll need to wait anyway before letting anything get
1548 	 * put on the card again.
1549 	 */
1550 	drm_intel_bufmgr_fake_wait_idle(bufmgr_fake);
1551 
1552 	/* Check that we hadn't released the lock without having fenced the last
1553 	 * set of buffers.
1554 	 */
1555 	assert(DRMLISTEMPTY(&bufmgr_fake->fenced));
1556 	assert(DRMLISTEMPTY(&bufmgr_fake->on_hardware));
1557 
1558 	DRMLISTFOREACHSAFE(block, tmp, &bufmgr_fake->lru) {
1559 		drm_intel_bo_fake *bo_fake = (drm_intel_bo_fake *) block->bo;
1560 		/* Releases the memory, and memcpys dirty contents out if
1561 		 * necessary.
1562 		 */
1563 		free_block(bufmgr_fake, block, 0);
1564 		bo_fake->block = NULL;
1565 	}
1566 
1567 	pthread_mutex_unlock(&bufmgr_fake->lock);
1568 }
1569 
1570 drm_public void
drm_intel_bufmgr_fake_set_last_dispatch(drm_intel_bufmgr * bufmgr,volatile unsigned int * last_dispatch)1571 drm_intel_bufmgr_fake_set_last_dispatch(drm_intel_bufmgr *bufmgr,
1572 					volatile unsigned int
1573 					*last_dispatch)
1574 {
1575 	drm_intel_bufmgr_fake *bufmgr_fake = (drm_intel_bufmgr_fake *) bufmgr;
1576 
1577 	bufmgr_fake->last_dispatch = (volatile int *)last_dispatch;
1578 }
1579 
1580 drm_public drm_intel_bufmgr *
drm_intel_bufmgr_fake_init(int fd,unsigned long low_offset,void * low_virtual,unsigned long size,volatile unsigned int * last_dispatch)1581 drm_intel_bufmgr_fake_init(int fd, unsigned long low_offset,
1582 			   void *low_virtual, unsigned long size,
1583 			   volatile unsigned int *last_dispatch)
1584 {
1585 	drm_intel_bufmgr_fake *bufmgr_fake;
1586 
1587 	bufmgr_fake = calloc(1, sizeof(*bufmgr_fake));
1588 
1589 	if (pthread_mutex_init(&bufmgr_fake->lock, NULL) != 0) {
1590 		free(bufmgr_fake);
1591 		return NULL;
1592 	}
1593 
1594 	/* Initialize allocator */
1595 	DRMINITLISTHEAD(&bufmgr_fake->fenced);
1596 	DRMINITLISTHEAD(&bufmgr_fake->on_hardware);
1597 	DRMINITLISTHEAD(&bufmgr_fake->lru);
1598 
1599 	bufmgr_fake->low_offset = low_offset;
1600 	bufmgr_fake->virtual = low_virtual;
1601 	bufmgr_fake->size = size;
1602 	bufmgr_fake->heap = mmInit(low_offset, size);
1603 
1604 	/* Hook in methods */
1605 	bufmgr_fake->bufmgr.bo_alloc = drm_intel_fake_bo_alloc;
1606 	bufmgr_fake->bufmgr.bo_alloc_for_render = drm_intel_fake_bo_alloc;
1607 	bufmgr_fake->bufmgr.bo_alloc_tiled = drm_intel_fake_bo_alloc_tiled;
1608 	bufmgr_fake->bufmgr.bo_reference = drm_intel_fake_bo_reference;
1609 	bufmgr_fake->bufmgr.bo_unreference = drm_intel_fake_bo_unreference;
1610 	bufmgr_fake->bufmgr.bo_map = drm_intel_fake_bo_map;
1611 	bufmgr_fake->bufmgr.bo_unmap = drm_intel_fake_bo_unmap;
1612 	bufmgr_fake->bufmgr.bo_subdata = drm_intel_fake_bo_subdata;
1613 	bufmgr_fake->bufmgr.bo_wait_rendering =
1614 	    drm_intel_fake_bo_wait_rendering;
1615 	bufmgr_fake->bufmgr.bo_emit_reloc = drm_intel_fake_emit_reloc;
1616 	bufmgr_fake->bufmgr.destroy = drm_intel_fake_destroy;
1617 	bufmgr_fake->bufmgr.bo_exec = drm_intel_fake_bo_exec;
1618 	bufmgr_fake->bufmgr.check_aperture_space =
1619 	    drm_intel_fake_check_aperture_space;
1620 	bufmgr_fake->bufmgr.debug = 0;
1621 
1622 	bufmgr_fake->fd = fd;
1623 	bufmgr_fake->last_dispatch = (volatile int *)last_dispatch;
1624 
1625 	return &bufmgr_fake->bufmgr;
1626 }
1627