1 /**************************************************************************
2  *
3  * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the
8  * "Software"), to deal in the Software without restriction, including
9  * without limitation the rights to use, copy, modify, merge, publish,
10  * distribute, sub license, and/or sell copies of the Software, and to
11  * permit persons to whom the Software is furnished to do so, subject to
12  * the following conditions:
13  *
14  * The above copyright notice and this permission notice (including the
15  * next paragraph) shall be included in all copies or substantial portions
16  * of the Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21  * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25  *
26  **************************************************************************/
27 
28 /* Originally a fake version of the buffer manager so that we can
29  * prototype the changes in a driver fairly quickly, has been fleshed
30  * out to a fully functional interim solution.
31  *
32  * Basically wraps the old style memory management in the new
33  * programming interface, but is more expressive and avoids many of
34  * the bugs in the old texture manager.
35  */
36 
37 #ifdef HAVE_CONFIG_H
38 #include "config.h"
39 #endif
40 
41 #include <stdlib.h>
42 #include <string.h>
43 #include <assert.h>
44 #include <errno.h>
45 #include <xf86drm.h>
46 #include <pthread.h>
47 #include "intel_bufmgr.h"
48 #include "intel_bufmgr_priv.h"
49 #include "drm.h"
50 #include "i915_drm.h"
51 #include "mm.h"
52 #include "libdrm.h"
53 #include "libdrm_lists.h"
54 
55 /* Support gcc's __FUNCTION__ for people using other compilers */
56 #if !defined(__GNUC__) && !defined(__FUNCTION__)
57 # define __FUNCTION__ __func__ /* C99 */
58 #endif
59 
60 #define DBG(...) do {					\
61 	if (bufmgr_fake->bufmgr.debug)			\
62 		drmMsg(__VA_ARGS__);			\
63 } while (0)
64 
65 /* Internal flags:
66  */
67 #define BM_NO_BACKING_STORE			0x00000001
68 #define BM_NO_FENCE_SUBDATA			0x00000002
69 #define BM_PINNED				0x00000004
70 
71 /* Wrapper around mm.c's mem_block, which understands that you must
72  * wait for fences to expire before memory can be freed.  This is
73  * specific to our use of memcpy for uploads - an upload that was
74  * processed through the command queue wouldn't need to care about
75  * fences.
76  */
77 #define MAX_RELOCS 4096
78 
79 struct fake_buffer_reloc {
80 	/** Buffer object that the relocation points at. */
81 	drm_intel_bo *target_buf;
82 	/** Offset of the relocation entry within reloc_buf. */
83 	uint32_t offset;
84 	/**
85 	 * Cached value of the offset when we last performed this relocation.
86 	 */
87 	uint32_t last_target_offset;
88 	/** Value added to target_buf's offset to get the relocation entry. */
89 	uint32_t delta;
90 	/** Cache domains the target buffer is read into. */
91 	uint32_t read_domains;
92 	/** Cache domain the target buffer will have dirty cachelines in. */
93 	uint32_t write_domain;
94 };
95 
96 struct block {
97 	struct block *next, *prev;
98 	struct mem_block *mem;	/* BM_MEM_AGP */
99 
100 	/**
101 	 * Marks that the block is currently in the aperture and has yet to be
102 	 * fenced.
103 	 */
104 	unsigned on_hardware:1;
105 	/**
106 	 * Marks that the block is currently fenced (being used by rendering)
107 	 * and can't be freed until @fence is passed.
108 	 */
109 	unsigned fenced:1;
110 
111 	/** Fence cookie for the block. */
112 	unsigned fence;		/* Split to read_fence, write_fence */
113 
114 	drm_intel_bo *bo;
115 	void *virtual;
116 };
117 
118 typedef struct _bufmgr_fake {
119 	drm_intel_bufmgr bufmgr;
120 
121 	pthread_mutex_t lock;
122 
123 	unsigned long low_offset;
124 	unsigned long size;
125 	void *virtual;
126 
127 	struct mem_block *heap;
128 
129 	unsigned buf_nr;	/* for generating ids */
130 
131 	/**
132 	 * List of blocks which are currently in the GART but haven't been
133 	 * fenced yet.
134 	 */
135 	struct block on_hardware;
136 	/**
137 	 * List of blocks which are in the GART and have an active fence on
138 	 * them.
139 	 */
140 	struct block fenced;
141 	/**
142 	 * List of blocks which have an expired fence and are ready to be
143 	 * evicted.
144 	 */
145 	struct block lru;
146 
147 	unsigned int last_fence;
148 
149 	unsigned fail:1;
150 	unsigned need_fence:1;
151 	int thrashing;
152 
153 	/**
154 	 * Driver callback to emit a fence, returning the cookie.
155 	 *
156 	 * This allows the driver to hook in a replacement for the DRM usage in
157 	 * bufmgr_fake.
158 	 *
159 	 * Currently, this also requires that a write flush be emitted before
160 	 * emitting the fence, but this should change.
161 	 */
162 	unsigned int (*fence_emit) (void *private);
163 	/** Driver callback to wait for a fence cookie to have passed. */
164 	void (*fence_wait) (unsigned int fence, void *private);
165 	void *fence_priv;
166 
167 	/**
168 	 * Driver callback to execute a buffer.
169 	 *
170 	 * This allows the driver to hook in a replacement for the DRM usage in
171 	 * bufmgr_fake.
172 	 */
173 	int (*exec) (drm_intel_bo *bo, unsigned int used, void *priv);
174 	void *exec_priv;
175 
176 	/** Driver-supplied argument to driver callbacks */
177 	void *driver_priv;
178 	/**
179 	 * Pointer to kernel-updated sarea data for the last completed user irq
180 	 */
181 	volatile int *last_dispatch;
182 
183 	int fd;
184 
185 	int debug;
186 
187 	int performed_rendering;
188 } drm_intel_bufmgr_fake;
189 
190 typedef struct _drm_intel_bo_fake {
191 	drm_intel_bo bo;
192 
193 	unsigned id;		/* debug only */
194 	const char *name;
195 
196 	unsigned dirty:1;
197 	/**
198 	 * has the card written to this buffer - we make need to copy it back
199 	 */
200 	unsigned card_dirty:1;
201 	unsigned int refcount;
202 	/* Flags may consist of any of the DRM_BO flags, plus
203 	 * DRM_BO_NO_BACKING_STORE and BM_NO_FENCE_SUBDATA, which are the
204 	 * first two driver private flags.
205 	 */
206 	uint64_t flags;
207 	/** Cache domains the target buffer is read into. */
208 	uint32_t read_domains;
209 	/** Cache domain the target buffer will have dirty cachelines in. */
210 	uint32_t write_domain;
211 
212 	unsigned int alignment;
213 	int is_static, validated;
214 	unsigned int map_count;
215 
216 	/** relocation list */
217 	struct fake_buffer_reloc *relocs;
218 	int nr_relocs;
219 	/**
220 	 * Total size of the target_bos of this buffer.
221 	 *
222 	 * Used for estimation in check_aperture.
223 	 */
224 	unsigned int child_size;
225 
226 	struct block *block;
227 	void *backing_store;
228 	void (*invalidate_cb) (drm_intel_bo *bo, void *ptr);
229 	void *invalidate_ptr;
230 } drm_intel_bo_fake;
231 
232 static int clear_fenced(drm_intel_bufmgr_fake *bufmgr_fake,
233 			unsigned int fence_cookie);
234 
235 #define MAXFENCE 0x7fffffff
236 
237 static int
FENCE_LTE(unsigned a,unsigned b)238 FENCE_LTE(unsigned a, unsigned b)
239 {
240 	if (a == b)
241 		return 1;
242 
243 	if (a < b && b - a < (1 << 24))
244 		return 1;
245 
246 	if (a > b && MAXFENCE - a + b < (1 << 24))
247 		return 1;
248 
249 	return 0;
250 }
251 
252 drm_public void
drm_intel_bufmgr_fake_set_fence_callback(drm_intel_bufmgr * bufmgr,unsigned int (* emit)(void * priv),void (* wait)(unsigned int fence,void * priv),void * priv)253 drm_intel_bufmgr_fake_set_fence_callback(drm_intel_bufmgr *bufmgr,
254 					 unsigned int (*emit) (void *priv),
255 					 void (*wait) (unsigned int fence,
256 						       void *priv),
257 					 void *priv)
258 {
259 	drm_intel_bufmgr_fake *bufmgr_fake = (drm_intel_bufmgr_fake *) bufmgr;
260 
261 	bufmgr_fake->fence_emit = emit;
262 	bufmgr_fake->fence_wait = wait;
263 	bufmgr_fake->fence_priv = priv;
264 }
265 
266 static unsigned int
_fence_emit_internal(drm_intel_bufmgr_fake * bufmgr_fake)267 _fence_emit_internal(drm_intel_bufmgr_fake *bufmgr_fake)
268 {
269 	struct drm_i915_irq_emit ie;
270 	int ret, seq = 1;
271 
272 	if (bufmgr_fake->fence_emit != NULL) {
273 		seq = bufmgr_fake->fence_emit(bufmgr_fake->fence_priv);
274 		return seq;
275 	}
276 
277 	ie.irq_seq = &seq;
278 	ret = drmCommandWriteRead(bufmgr_fake->fd, DRM_I915_IRQ_EMIT,
279 				  &ie, sizeof(ie));
280 	if (ret) {
281 		drmMsg("%s: drm_i915_irq_emit: %d\n", __FUNCTION__, ret);
282 		abort();
283 	}
284 
285 	DBG("emit 0x%08x\n", seq);
286 	return seq;
287 }
288 
289 static void
_fence_wait_internal(drm_intel_bufmgr_fake * bufmgr_fake,int seq)290 _fence_wait_internal(drm_intel_bufmgr_fake *bufmgr_fake, int seq)
291 {
292 	struct drm_i915_irq_wait iw;
293 	int hw_seq, busy_count = 0;
294 	int ret;
295 	int kernel_lied;
296 
297 	if (bufmgr_fake->fence_wait != NULL) {
298 		bufmgr_fake->fence_wait(seq, bufmgr_fake->fence_priv);
299 		clear_fenced(bufmgr_fake, seq);
300 		return;
301 	}
302 
303 	iw.irq_seq = seq;
304 
305 	DBG("wait 0x%08x\n", iw.irq_seq);
306 
307 	/* The kernel IRQ_WAIT implementation is all sorts of broken.
308 	 * 1) It returns 1 to 0x7fffffff instead of using the full 32-bit
309 	 *    unsigned range.
310 	 * 2) It returns 0 if hw_seq >= seq, not seq - hw_seq < 0 on the 32-bit
311 	 *    signed range.
312 	 * 3) It waits if seq < hw_seq, not seq - hw_seq > 0 on the 32-bit
313 	 *    signed range.
314 	 * 4) It returns -EBUSY in 3 seconds even if the hardware is still
315 	 *    successfully chewing through buffers.
316 	 *
317 	 * Assume that in userland we treat sequence numbers as ints, which
318 	 * makes some of the comparisons convenient, since the sequence
319 	 * numbers are all postive signed integers.
320 	 *
321 	 * From this we get several cases we need to handle.  Here's a timeline.
322 	 * 0x2   0x7                                    0x7ffffff8   0x7ffffffd
323 	 *   |    |                                             |    |
324 	 * ------------------------------------------------------------
325 	 *
326 	 * A) Normal wait for hw to catch up
327 	 * hw_seq seq
328 	 *   |    |
329 	 * ------------------------------------------------------------
330 	 * seq - hw_seq = 5.  If we call IRQ_WAIT, it will wait for hw to
331 	 * catch up.
332 	 *
333 	 * B) Normal wait for a sequence number that's already passed.
334 	 * seq    hw_seq
335 	 *   |    |
336 	 * ------------------------------------------------------------
337 	 * seq - hw_seq = -5.  If we call IRQ_WAIT, it returns 0 quickly.
338 	 *
339 	 * C) Hardware has already wrapped around ahead of us
340 	 * hw_seq                                                    seq
341 	 *   |                                                       |
342 	 * ------------------------------------------------------------
343 	 * seq - hw_seq = 0x80000000 - 5.  If we called IRQ_WAIT, it would wait
344 	 * for hw_seq >= seq, which may never occur.  Thus, we want to catch
345 	 * this in userland and return 0.
346 	 *
347 	 * D) We've wrapped around ahead of the hardware.
348 	 * seq                                                      hw_seq
349 	 *   |                                                       |
350 	 * ------------------------------------------------------------
351 	 * seq - hw_seq = -(0x80000000 - 5).  If we called IRQ_WAIT, it would
352 	 * return 0 quickly because hw_seq >= seq, even though the hardware
353 	 * isn't caught up. Thus, we need to catch this early return in
354 	 * userland and bother the kernel until the hardware really does
355 	 * catch up.
356 	 *
357 	 * E) Hardware might wrap after we test in userland.
358 	 *                                                  hw_seq  seq
359 	 *                                                      |    |
360 	 * ------------------------------------------------------------
361 	 * seq - hw_seq = 5.  If we call IRQ_WAIT, it will likely see seq >=
362 	 * hw_seq and wait.  However, suppose hw_seq wraps before we make it
363 	 * into the kernel.  The kernel sees hw_seq >= seq and waits for 3
364 	 * seconds then returns -EBUSY.  This is case C).  We should catch
365 	 * this and then return successfully.
366 	 *
367 	 * F) Hardware might take a long time on a buffer.
368 	 * hw_seq seq
369 	 *   |    |
370 	 * -------------------------------------------------------------------
371 	 * seq - hw_seq = 5.  If we call IRQ_WAIT, if sequence 2 through 5
372 	 * take too long, it will return -EBUSY.  Batchbuffers in the
373 	 * gltestperf demo were seen to take up to 7 seconds.  We should
374 	 * catch early -EBUSY return and keep trying.
375 	 */
376 
377 	do {
378 		/* Keep a copy of last_dispatch so that if the wait -EBUSYs
379 		 * because the hardware didn't catch up in 3 seconds, we can
380 		 * see if it at least made progress and retry.
381 		 */
382 		hw_seq = *bufmgr_fake->last_dispatch;
383 
384 		/* Catch case C */
385 		if (seq - hw_seq > 0x40000000)
386 			return;
387 
388 		ret = drmCommandWrite(bufmgr_fake->fd, DRM_I915_IRQ_WAIT,
389 				      &iw, sizeof(iw));
390 		/* Catch case D */
391 		kernel_lied = (ret == 0) && (seq - *bufmgr_fake->last_dispatch <
392 					     -0x40000000);
393 
394 		/* Catch case E */
395 		if (ret == -EBUSY
396 		    && (seq - *bufmgr_fake->last_dispatch > 0x40000000))
397 			ret = 0;
398 
399 		/* Catch case F: Allow up to 15 seconds chewing on one buffer. */
400 		if ((ret == -EBUSY) && (hw_seq != *bufmgr_fake->last_dispatch))
401 			busy_count = 0;
402 		else
403 			busy_count++;
404 	} while (kernel_lied || ret == -EAGAIN || ret == -EINTR ||
405 		 (ret == -EBUSY && busy_count < 5));
406 
407 	if (ret != 0) {
408 		drmMsg("%s:%d: Error waiting for fence: %s.\n", __FILE__,
409 		       __LINE__, strerror(-ret));
410 		abort();
411 	}
412 	clear_fenced(bufmgr_fake, seq);
413 }
414 
415 static int
_fence_test(drm_intel_bufmgr_fake * bufmgr_fake,unsigned fence)416 _fence_test(drm_intel_bufmgr_fake *bufmgr_fake, unsigned fence)
417 {
418 	/* Slight problem with wrap-around:
419 	 */
420 	return fence == 0 || FENCE_LTE(fence, bufmgr_fake->last_fence);
421 }
422 
423 /**
424  * Allocate a memory manager block for the buffer.
425  */
426 static int
alloc_block(drm_intel_bo * bo)427 alloc_block(drm_intel_bo *bo)
428 {
429 	drm_intel_bo_fake *bo_fake = (drm_intel_bo_fake *) bo;
430 	drm_intel_bufmgr_fake *bufmgr_fake =
431 	    (drm_intel_bufmgr_fake *) bo->bufmgr;
432 	struct block *block = (struct block *)calloc(sizeof *block, 1);
433 	unsigned int align_log2 = ffs(bo_fake->alignment) - 1;
434 	unsigned int sz;
435 
436 	if (!block)
437 		return 1;
438 
439 	sz = (bo->size + bo_fake->alignment - 1) & ~(bo_fake->alignment - 1);
440 
441 	block->mem = mmAllocMem(bufmgr_fake->heap, sz, align_log2, 0);
442 	if (!block->mem) {
443 		free(block);
444 		return 0;
445 	}
446 
447 	DRMINITLISTHEAD(block);
448 
449 	/* Insert at head or at tail??? */
450 	DRMLISTADDTAIL(block, &bufmgr_fake->lru);
451 
452 	block->virtual = (uint8_t *) bufmgr_fake->virtual +
453 	    block->mem->ofs - bufmgr_fake->low_offset;
454 	block->bo = bo;
455 
456 	bo_fake->block = block;
457 
458 	return 1;
459 }
460 
461 /* Release the card storage associated with buf:
462  */
463 static void
free_block(drm_intel_bufmgr_fake * bufmgr_fake,struct block * block,int skip_dirty_copy)464 free_block(drm_intel_bufmgr_fake *bufmgr_fake, struct block *block,
465 	   int skip_dirty_copy)
466 {
467 	drm_intel_bo_fake *bo_fake;
468 	DBG("free block %p %08x %d %d\n", block, block->mem->ofs,
469 	    block->on_hardware, block->fenced);
470 
471 	if (!block)
472 		return;
473 
474 	bo_fake = (drm_intel_bo_fake *) block->bo;
475 
476 	if (bo_fake->flags & (BM_PINNED | BM_NO_BACKING_STORE))
477 		skip_dirty_copy = 1;
478 
479 	if (!skip_dirty_copy && (bo_fake->card_dirty == 1)) {
480 		memcpy(bo_fake->backing_store, block->virtual, block->bo->size);
481 		bo_fake->card_dirty = 0;
482 		bo_fake->dirty = 1;
483 	}
484 
485 	if (block->on_hardware) {
486 		block->bo = NULL;
487 	} else if (block->fenced) {
488 		block->bo = NULL;
489 	} else {
490 		DBG("    - free immediately\n");
491 		DRMLISTDEL(block);
492 
493 		mmFreeMem(block->mem);
494 		free(block);
495 	}
496 }
497 
498 static void
alloc_backing_store(drm_intel_bo * bo)499 alloc_backing_store(drm_intel_bo *bo)
500 {
501 	drm_intel_bufmgr_fake *bufmgr_fake =
502 	    (drm_intel_bufmgr_fake *) bo->bufmgr;
503 	drm_intel_bo_fake *bo_fake = (drm_intel_bo_fake *) bo;
504 	assert(!bo_fake->backing_store);
505 	assert(!(bo_fake->flags & (BM_PINNED | BM_NO_BACKING_STORE)));
506 
507 	bo_fake->backing_store = malloc(bo->size);
508 
509 	DBG("alloc_backing - buf %d %p %lu\n", bo_fake->id,
510 	    bo_fake->backing_store, bo->size);
511 	assert(bo_fake->backing_store);
512 }
513 
514 static void
free_backing_store(drm_intel_bo * bo)515 free_backing_store(drm_intel_bo *bo)
516 {
517 	drm_intel_bo_fake *bo_fake = (drm_intel_bo_fake *) bo;
518 
519 	if (bo_fake->backing_store) {
520 		assert(!(bo_fake->flags & (BM_PINNED | BM_NO_BACKING_STORE)));
521 		free(bo_fake->backing_store);
522 		bo_fake->backing_store = NULL;
523 	}
524 }
525 
526 static void
set_dirty(drm_intel_bo * bo)527 set_dirty(drm_intel_bo *bo)
528 {
529 	drm_intel_bufmgr_fake *bufmgr_fake =
530 	    (drm_intel_bufmgr_fake *) bo->bufmgr;
531 	drm_intel_bo_fake *bo_fake = (drm_intel_bo_fake *) bo;
532 
533 	if (bo_fake->flags & BM_NO_BACKING_STORE
534 	    && bo_fake->invalidate_cb != NULL)
535 		bo_fake->invalidate_cb(bo, bo_fake->invalidate_ptr);
536 
537 	assert(!(bo_fake->flags & BM_PINNED));
538 
539 	DBG("set_dirty - buf %d\n", bo_fake->id);
540 	bo_fake->dirty = 1;
541 }
542 
543 static int
evict_lru(drm_intel_bufmgr_fake * bufmgr_fake,unsigned int max_fence)544 evict_lru(drm_intel_bufmgr_fake *bufmgr_fake, unsigned int max_fence)
545 {
546 	struct block *block, *tmp;
547 
548 	DBG("%s\n", __FUNCTION__);
549 
550 	DRMLISTFOREACHSAFE(block, tmp, &bufmgr_fake->lru) {
551 		drm_intel_bo_fake *bo_fake = (drm_intel_bo_fake *) block->bo;
552 
553 		if (bo_fake != NULL && (bo_fake->flags & BM_NO_FENCE_SUBDATA))
554 			continue;
555 
556 		if (block->fence && max_fence && !FENCE_LTE(block->fence,
557 							    max_fence))
558 			return 0;
559 
560 		set_dirty(&bo_fake->bo);
561 		bo_fake->block = NULL;
562 
563 		free_block(bufmgr_fake, block, 0);
564 		return 1;
565 	}
566 
567 	return 0;
568 }
569 
570 static int
evict_mru(drm_intel_bufmgr_fake * bufmgr_fake)571 evict_mru(drm_intel_bufmgr_fake *bufmgr_fake)
572 {
573 	struct block *block, *tmp;
574 
575 	DBG("%s\n", __FUNCTION__);
576 
577 	DRMLISTFOREACHSAFEREVERSE(block, tmp, &bufmgr_fake->lru) {
578 		drm_intel_bo_fake *bo_fake = (drm_intel_bo_fake *) block->bo;
579 
580 		if (bo_fake && (bo_fake->flags & BM_NO_FENCE_SUBDATA))
581 			continue;
582 
583 		set_dirty(&bo_fake->bo);
584 		bo_fake->block = NULL;
585 
586 		free_block(bufmgr_fake, block, 0);
587 		return 1;
588 	}
589 
590 	return 0;
591 }
592 
593 /**
594  * Removes all objects from the fenced list older than the given fence.
595  */
596 static int
clear_fenced(drm_intel_bufmgr_fake * bufmgr_fake,unsigned int fence_cookie)597 clear_fenced(drm_intel_bufmgr_fake *bufmgr_fake, unsigned int fence_cookie)
598 {
599 	struct block *block, *tmp;
600 	int ret = 0;
601 
602 	bufmgr_fake->last_fence = fence_cookie;
603 	DRMLISTFOREACHSAFE(block, tmp, &bufmgr_fake->fenced) {
604 		assert(block->fenced);
605 
606 		if (_fence_test(bufmgr_fake, block->fence)) {
607 
608 			block->fenced = 0;
609 
610 			if (!block->bo) {
611 				DBG("delayed free: offset %x sz %x\n",
612 				    block->mem->ofs, block->mem->size);
613 				DRMLISTDEL(block);
614 				mmFreeMem(block->mem);
615 				free(block);
616 			} else {
617 				DBG("return to lru: offset %x sz %x\n",
618 				    block->mem->ofs, block->mem->size);
619 				DRMLISTDEL(block);
620 				DRMLISTADDTAIL(block, &bufmgr_fake->lru);
621 			}
622 
623 			ret = 1;
624 		} else {
625 			/* Blocks are ordered by fence, so if one fails, all
626 			 * from here will fail also:
627 			 */
628 			DBG("fence not passed: offset %x sz %x %d %d \n",
629 			    block->mem->ofs, block->mem->size, block->fence,
630 			    bufmgr_fake->last_fence);
631 			break;
632 		}
633 	}
634 
635 	DBG("%s: %d\n", __FUNCTION__, ret);
636 	return ret;
637 }
638 
639 static void
fence_blocks(drm_intel_bufmgr_fake * bufmgr_fake,unsigned fence)640 fence_blocks(drm_intel_bufmgr_fake *bufmgr_fake, unsigned fence)
641 {
642 	struct block *block, *tmp;
643 
644 	DRMLISTFOREACHSAFE(block, tmp, &bufmgr_fake->on_hardware) {
645 		DBG("Fence block %p (sz 0x%x ofs %x buf %p) with fence %d\n",
646 		    block, block->mem->size, block->mem->ofs, block->bo, fence);
647 		block->fence = fence;
648 
649 		block->on_hardware = 0;
650 		block->fenced = 1;
651 
652 		/* Move to tail of pending list here
653 		 */
654 		DRMLISTDEL(block);
655 		DRMLISTADDTAIL(block, &bufmgr_fake->fenced);
656 	}
657 
658 	assert(DRMLISTEMPTY(&bufmgr_fake->on_hardware));
659 }
660 
661 static int
evict_and_alloc_block(drm_intel_bo * bo)662 evict_and_alloc_block(drm_intel_bo *bo)
663 {
664 	drm_intel_bufmgr_fake *bufmgr_fake =
665 	    (drm_intel_bufmgr_fake *) bo->bufmgr;
666 	drm_intel_bo_fake *bo_fake = (drm_intel_bo_fake *) bo;
667 
668 	assert(bo_fake->block == NULL);
669 
670 	/* Search for already free memory:
671 	 */
672 	if (alloc_block(bo))
673 		return 1;
674 
675 	/* If we're not thrashing, allow lru eviction to dig deeper into
676 	 * recently used textures.  We'll probably be thrashing soon:
677 	 */
678 	if (!bufmgr_fake->thrashing) {
679 		while (evict_lru(bufmgr_fake, 0))
680 			if (alloc_block(bo))
681 				return 1;
682 	}
683 
684 	/* Keep thrashing counter alive?
685 	 */
686 	if (bufmgr_fake->thrashing)
687 		bufmgr_fake->thrashing = 20;
688 
689 	/* Wait on any already pending fences - here we are waiting for any
690 	 * freed memory that has been submitted to hardware and fenced to
691 	 * become available:
692 	 */
693 	while (!DRMLISTEMPTY(&bufmgr_fake->fenced)) {
694 		uint32_t fence = bufmgr_fake->fenced.next->fence;
695 		_fence_wait_internal(bufmgr_fake, fence);
696 
697 		if (alloc_block(bo))
698 			return 1;
699 	}
700 
701 	if (!DRMLISTEMPTY(&bufmgr_fake->on_hardware)) {
702 		while (!DRMLISTEMPTY(&bufmgr_fake->fenced)) {
703 			uint32_t fence = bufmgr_fake->fenced.next->fence;
704 			_fence_wait_internal(bufmgr_fake, fence);
705 		}
706 
707 		if (!bufmgr_fake->thrashing) {
708 			DBG("thrashing\n");
709 		}
710 		bufmgr_fake->thrashing = 20;
711 
712 		if (alloc_block(bo))
713 			return 1;
714 	}
715 
716 	while (evict_mru(bufmgr_fake))
717 		if (alloc_block(bo))
718 			return 1;
719 
720 	DBG("%s 0x%lx bytes failed\n", __FUNCTION__, bo->size);
721 
722 	return 0;
723 }
724 
725 /***********************************************************************
726  * Public functions
727  */
728 
729 /**
730  * Wait for hardware idle by emitting a fence and waiting for it.
731  */
732 static void
drm_intel_bufmgr_fake_wait_idle(drm_intel_bufmgr_fake * bufmgr_fake)733 drm_intel_bufmgr_fake_wait_idle(drm_intel_bufmgr_fake *bufmgr_fake)
734 {
735 	unsigned int cookie;
736 
737 	cookie = _fence_emit_internal(bufmgr_fake);
738 	_fence_wait_internal(bufmgr_fake, cookie);
739 }
740 
741 /**
742  * Wait for rendering to a buffer to complete.
743  *
744  * It is assumed that the bathcbuffer which performed the rendering included
745  * the necessary flushing.
746  */
747 static void
drm_intel_fake_bo_wait_rendering_locked(drm_intel_bo * bo)748 drm_intel_fake_bo_wait_rendering_locked(drm_intel_bo *bo)
749 {
750 	drm_intel_bufmgr_fake *bufmgr_fake =
751 	    (drm_intel_bufmgr_fake *) bo->bufmgr;
752 	drm_intel_bo_fake *bo_fake = (drm_intel_bo_fake *) bo;
753 
754 	if (bo_fake->block == NULL || !bo_fake->block->fenced)
755 		return;
756 
757 	_fence_wait_internal(bufmgr_fake, bo_fake->block->fence);
758 }
759 
760 static void
drm_intel_fake_bo_wait_rendering(drm_intel_bo * bo)761 drm_intel_fake_bo_wait_rendering(drm_intel_bo *bo)
762 {
763 	drm_intel_bufmgr_fake *bufmgr_fake =
764 	    (drm_intel_bufmgr_fake *) bo->bufmgr;
765 
766 	pthread_mutex_lock(&bufmgr_fake->lock);
767 	drm_intel_fake_bo_wait_rendering_locked(bo);
768 	pthread_mutex_unlock(&bufmgr_fake->lock);
769 }
770 
771 /* Specifically ignore texture memory sharing.
772  *  -- just evict everything
773  *  -- and wait for idle
774  */
775 drm_public void
drm_intel_bufmgr_fake_contended_lock_take(drm_intel_bufmgr * bufmgr)776 drm_intel_bufmgr_fake_contended_lock_take(drm_intel_bufmgr *bufmgr)
777 {
778 	drm_intel_bufmgr_fake *bufmgr_fake = (drm_intel_bufmgr_fake *) bufmgr;
779 	struct block *block, *tmp;
780 
781 	pthread_mutex_lock(&bufmgr_fake->lock);
782 
783 	bufmgr_fake->need_fence = 1;
784 	bufmgr_fake->fail = 0;
785 
786 	/* Wait for hardware idle.  We don't know where acceleration has been
787 	 * happening, so we'll need to wait anyway before letting anything get
788 	 * put on the card again.
789 	 */
790 	drm_intel_bufmgr_fake_wait_idle(bufmgr_fake);
791 
792 	/* Check that we hadn't released the lock without having fenced the last
793 	 * set of buffers.
794 	 */
795 	assert(DRMLISTEMPTY(&bufmgr_fake->fenced));
796 	assert(DRMLISTEMPTY(&bufmgr_fake->on_hardware));
797 
798 	DRMLISTFOREACHSAFE(block, tmp, &bufmgr_fake->lru) {
799 		assert(_fence_test(bufmgr_fake, block->fence));
800 		set_dirty(block->bo);
801 	}
802 
803 	pthread_mutex_unlock(&bufmgr_fake->lock);
804 }
805 
806 static drm_intel_bo *
drm_intel_fake_bo_alloc(drm_intel_bufmgr * bufmgr,const char * name,unsigned long size,unsigned int alignment)807 drm_intel_fake_bo_alloc(drm_intel_bufmgr *bufmgr,
808 			const char *name,
809 			unsigned long size,
810 			unsigned int alignment)
811 {
812 	drm_intel_bufmgr_fake *bufmgr_fake;
813 	drm_intel_bo_fake *bo_fake;
814 
815 	bufmgr_fake = (drm_intel_bufmgr_fake *) bufmgr;
816 
817 	assert(size != 0);
818 
819 	bo_fake = calloc(1, sizeof(*bo_fake));
820 	if (!bo_fake)
821 		return NULL;
822 
823 	bo_fake->bo.size = size;
824 	bo_fake->bo.offset = -1;
825 	bo_fake->bo.virtual = NULL;
826 	bo_fake->bo.bufmgr = bufmgr;
827 	bo_fake->refcount = 1;
828 
829 	/* Alignment must be a power of two */
830 	assert((alignment & (alignment - 1)) == 0);
831 	if (alignment == 0)
832 		alignment = 1;
833 	bo_fake->alignment = alignment;
834 	bo_fake->id = ++bufmgr_fake->buf_nr;
835 	bo_fake->name = name;
836 	bo_fake->flags = 0;
837 	bo_fake->is_static = 0;
838 
839 	DBG("drm_bo_alloc: (buf %d: %s, %lu kb)\n", bo_fake->id, bo_fake->name,
840 	    bo_fake->bo.size / 1024);
841 
842 	return &bo_fake->bo;
843 }
844 
845 static drm_intel_bo *
drm_intel_fake_bo_alloc_tiled(drm_intel_bufmgr * bufmgr,const char * name,int x,int y,int cpp,uint32_t * tiling_mode,unsigned long * pitch,unsigned long flags)846 drm_intel_fake_bo_alloc_tiled(drm_intel_bufmgr * bufmgr,
847 			      const char *name,
848 			      int x, int y, int cpp,
849 			      uint32_t *tiling_mode,
850 			      unsigned long *pitch,
851 			      unsigned long flags)
852 {
853 	unsigned long stride, aligned_y;
854 
855 	/* No runtime tiling support for fake. */
856 	*tiling_mode = I915_TILING_NONE;
857 
858 	/* Align it for being a render target.  Shouldn't need anything else. */
859 	stride = x * cpp;
860 	stride = ROUND_UP_TO(stride, 64);
861 
862 	/* 965 subspan loading alignment */
863 	aligned_y = ALIGN(y, 2);
864 
865 	*pitch = stride;
866 
867 	return drm_intel_fake_bo_alloc(bufmgr, name, stride * aligned_y,
868 				       4096);
869 }
870 
871 drm_public drm_intel_bo *
drm_intel_bo_fake_alloc_static(drm_intel_bufmgr * bufmgr,const char * name,unsigned long offset,unsigned long size,void * virtual)872 drm_intel_bo_fake_alloc_static(drm_intel_bufmgr *bufmgr,
873 			       const char *name,
874 			       unsigned long offset,
875 			       unsigned long size, void *virtual)
876 {
877 	drm_intel_bufmgr_fake *bufmgr_fake;
878 	drm_intel_bo_fake *bo_fake;
879 
880 	bufmgr_fake = (drm_intel_bufmgr_fake *) bufmgr;
881 
882 	assert(size != 0);
883 
884 	bo_fake = calloc(1, sizeof(*bo_fake));
885 	if (!bo_fake)
886 		return NULL;
887 
888 	bo_fake->bo.size = size;
889 	bo_fake->bo.offset = offset;
890 	bo_fake->bo.virtual = virtual;
891 	bo_fake->bo.bufmgr = bufmgr;
892 	bo_fake->refcount = 1;
893 	bo_fake->id = ++bufmgr_fake->buf_nr;
894 	bo_fake->name = name;
895 	bo_fake->flags = BM_PINNED;
896 	bo_fake->is_static = 1;
897 
898 	DBG("drm_bo_alloc_static: (buf %d: %s, %lu kb)\n", bo_fake->id,
899 	    bo_fake->name, bo_fake->bo.size / 1024);
900 
901 	return &bo_fake->bo;
902 }
903 
904 static void
drm_intel_fake_bo_reference(drm_intel_bo * bo)905 drm_intel_fake_bo_reference(drm_intel_bo *bo)
906 {
907 	drm_intel_bufmgr_fake *bufmgr_fake =
908 	    (drm_intel_bufmgr_fake *) bo->bufmgr;
909 	drm_intel_bo_fake *bo_fake = (drm_intel_bo_fake *) bo;
910 
911 	pthread_mutex_lock(&bufmgr_fake->lock);
912 	bo_fake->refcount++;
913 	pthread_mutex_unlock(&bufmgr_fake->lock);
914 }
915 
916 static void
drm_intel_fake_bo_reference_locked(drm_intel_bo * bo)917 drm_intel_fake_bo_reference_locked(drm_intel_bo *bo)
918 {
919 	drm_intel_bo_fake *bo_fake = (drm_intel_bo_fake *) bo;
920 
921 	bo_fake->refcount++;
922 }
923 
924 static void
drm_intel_fake_bo_unreference_locked(drm_intel_bo * bo)925 drm_intel_fake_bo_unreference_locked(drm_intel_bo *bo)
926 {
927 	drm_intel_bufmgr_fake *bufmgr_fake =
928 	    (drm_intel_bufmgr_fake *) bo->bufmgr;
929 	drm_intel_bo_fake *bo_fake = (drm_intel_bo_fake *) bo;
930 	int i;
931 
932 	if (--bo_fake->refcount == 0) {
933 		assert(bo_fake->map_count == 0);
934 		/* No remaining references, so free it */
935 		if (bo_fake->block)
936 			free_block(bufmgr_fake, bo_fake->block, 1);
937 		free_backing_store(bo);
938 
939 		for (i = 0; i < bo_fake->nr_relocs; i++)
940 			drm_intel_fake_bo_unreference_locked(bo_fake->relocs[i].
941 							     target_buf);
942 
943 		DBG("drm_bo_unreference: free buf %d %s\n", bo_fake->id,
944 		    bo_fake->name);
945 
946 		free(bo_fake->relocs);
947 		free(bo);
948 	}
949 }
950 
951 static void
drm_intel_fake_bo_unreference(drm_intel_bo * bo)952 drm_intel_fake_bo_unreference(drm_intel_bo *bo)
953 {
954 	drm_intel_bufmgr_fake *bufmgr_fake =
955 	    (drm_intel_bufmgr_fake *) bo->bufmgr;
956 
957 	pthread_mutex_lock(&bufmgr_fake->lock);
958 	drm_intel_fake_bo_unreference_locked(bo);
959 	pthread_mutex_unlock(&bufmgr_fake->lock);
960 }
961 
962 /**
963  * Set the buffer as not requiring backing store, and instead get the callback
964  * invoked whenever it would be set dirty.
965  */
966 drm_public void
drm_intel_bo_fake_disable_backing_store(drm_intel_bo * bo,void (* invalidate_cb)(drm_intel_bo * bo,void * ptr),void * ptr)967 drm_intel_bo_fake_disable_backing_store(drm_intel_bo *bo,
968 					void (*invalidate_cb) (drm_intel_bo *bo,
969 							       void *ptr),
970 					void *ptr)
971 {
972 	drm_intel_bufmgr_fake *bufmgr_fake =
973 	    (drm_intel_bufmgr_fake *) bo->bufmgr;
974 	drm_intel_bo_fake *bo_fake = (drm_intel_bo_fake *) bo;
975 
976 	pthread_mutex_lock(&bufmgr_fake->lock);
977 
978 	if (bo_fake->backing_store)
979 		free_backing_store(bo);
980 
981 	bo_fake->flags |= BM_NO_BACKING_STORE;
982 
983 	DBG("disable_backing_store set buf %d dirty\n", bo_fake->id);
984 	bo_fake->dirty = 1;
985 	bo_fake->invalidate_cb = invalidate_cb;
986 	bo_fake->invalidate_ptr = ptr;
987 
988 	/* Note that it is invalid right from the start.  Also note
989 	 * invalidate_cb is called with the bufmgr locked, so cannot
990 	 * itself make bufmgr calls.
991 	 */
992 	if (invalidate_cb != NULL)
993 		invalidate_cb(bo, ptr);
994 
995 	pthread_mutex_unlock(&bufmgr_fake->lock);
996 }
997 
998 /**
999  * Map a buffer into bo->virtual, allocating either card memory space (If
1000  * BM_NO_BACKING_STORE or BM_PINNED) or backing store, as necessary.
1001  */
1002 static int
drm_intel_fake_bo_map_locked(drm_intel_bo * bo,int write_enable)1003  drm_intel_fake_bo_map_locked(drm_intel_bo *bo, int write_enable)
1004 {
1005 	drm_intel_bufmgr_fake *bufmgr_fake =
1006 	    (drm_intel_bufmgr_fake *) bo->bufmgr;
1007 	drm_intel_bo_fake *bo_fake = (drm_intel_bo_fake *) bo;
1008 
1009 	/* Static buffers are always mapped. */
1010 	if (bo_fake->is_static) {
1011 		if (bo_fake->card_dirty) {
1012 			drm_intel_bufmgr_fake_wait_idle(bufmgr_fake);
1013 			bo_fake->card_dirty = 0;
1014 		}
1015 		return 0;
1016 	}
1017 
1018 	/* Allow recursive mapping.  Mesa may recursively map buffers with
1019 	 * nested display loops, and it is used internally in bufmgr_fake
1020 	 * for relocation.
1021 	 */
1022 	if (bo_fake->map_count++ != 0)
1023 		return 0;
1024 
1025 	{
1026 		DBG("drm_bo_map: (buf %d: %s, %lu kb)\n", bo_fake->id,
1027 		    bo_fake->name, bo_fake->bo.size / 1024);
1028 
1029 		if (bo->virtual != NULL) {
1030 			drmMsg("%s: already mapped\n", __FUNCTION__);
1031 			abort();
1032 		} else if (bo_fake->flags & (BM_NO_BACKING_STORE | BM_PINNED)) {
1033 
1034 			if (!bo_fake->block && !evict_and_alloc_block(bo)) {
1035 				DBG("%s: alloc failed\n", __FUNCTION__);
1036 				bufmgr_fake->fail = 1;
1037 				return 1;
1038 			} else {
1039 				assert(bo_fake->block);
1040 				bo_fake->dirty = 0;
1041 
1042 				if (!(bo_fake->flags & BM_NO_FENCE_SUBDATA) &&
1043 				    bo_fake->block->fenced) {
1044 					drm_intel_fake_bo_wait_rendering_locked
1045 					    (bo);
1046 				}
1047 
1048 				bo->virtual = bo_fake->block->virtual;
1049 			}
1050 		} else {
1051 			if (write_enable)
1052 				set_dirty(bo);
1053 
1054 			if (bo_fake->backing_store == 0)
1055 				alloc_backing_store(bo);
1056 
1057 			if ((bo_fake->card_dirty == 1) && bo_fake->block) {
1058 				if (bo_fake->block->fenced)
1059 					drm_intel_fake_bo_wait_rendering_locked
1060 					    (bo);
1061 
1062 				memcpy(bo_fake->backing_store,
1063 				       bo_fake->block->virtual,
1064 				       bo_fake->block->bo->size);
1065 				bo_fake->card_dirty = 0;
1066 			}
1067 
1068 			bo->virtual = bo_fake->backing_store;
1069 		}
1070 	}
1071 
1072 	return 0;
1073 }
1074 
1075 static int
drm_intel_fake_bo_map(drm_intel_bo * bo,int write_enable)1076  drm_intel_fake_bo_map(drm_intel_bo *bo, int write_enable)
1077 {
1078 	drm_intel_bufmgr_fake *bufmgr_fake =
1079 	    (drm_intel_bufmgr_fake *) bo->bufmgr;
1080 	int ret;
1081 
1082 	pthread_mutex_lock(&bufmgr_fake->lock);
1083 	ret = drm_intel_fake_bo_map_locked(bo, write_enable);
1084 	pthread_mutex_unlock(&bufmgr_fake->lock);
1085 
1086 	return ret;
1087 }
1088 
1089 static int
drm_intel_fake_bo_unmap_locked(drm_intel_bo * bo)1090  drm_intel_fake_bo_unmap_locked(drm_intel_bo *bo)
1091 {
1092 	drm_intel_bufmgr_fake *bufmgr_fake =
1093 	    (drm_intel_bufmgr_fake *) bo->bufmgr;
1094 	drm_intel_bo_fake *bo_fake = (drm_intel_bo_fake *) bo;
1095 
1096 	/* Static buffers are always mapped. */
1097 	if (bo_fake->is_static)
1098 		return 0;
1099 
1100 	assert(bo_fake->map_count != 0);
1101 	if (--bo_fake->map_count != 0)
1102 		return 0;
1103 
1104 	DBG("drm_bo_unmap: (buf %d: %s, %lu kb)\n", bo_fake->id, bo_fake->name,
1105 	    bo_fake->bo.size / 1024);
1106 
1107 	bo->virtual = NULL;
1108 
1109 	return 0;
1110 }
1111 
drm_intel_fake_bo_unmap(drm_intel_bo * bo)1112 static int drm_intel_fake_bo_unmap(drm_intel_bo *bo)
1113 {
1114 	drm_intel_bufmgr_fake *bufmgr_fake =
1115 	    (drm_intel_bufmgr_fake *) bo->bufmgr;
1116 	int ret;
1117 
1118 	pthread_mutex_lock(&bufmgr_fake->lock);
1119 	ret = drm_intel_fake_bo_unmap_locked(bo);
1120 	pthread_mutex_unlock(&bufmgr_fake->lock);
1121 
1122 	return ret;
1123 }
1124 
1125 static int
drm_intel_fake_bo_subdata(drm_intel_bo * bo,unsigned long offset,unsigned long size,const void * data)1126 drm_intel_fake_bo_subdata(drm_intel_bo *bo, unsigned long offset,
1127 			  unsigned long size, const void *data)
1128 {
1129 	int ret;
1130 
1131 	if (size == 0 || data == NULL)
1132 		return 0;
1133 
1134 	ret = drm_intel_bo_map(bo, 1);
1135 	if (ret)
1136 		return ret;
1137 	memcpy((unsigned char *)bo->virtual + offset, data, size);
1138 	drm_intel_bo_unmap(bo);
1139 	return 0;
1140 }
1141 
1142 static void
drm_intel_fake_kick_all_locked(drm_intel_bufmgr_fake * bufmgr_fake)1143  drm_intel_fake_kick_all_locked(drm_intel_bufmgr_fake *bufmgr_fake)
1144 {
1145 	struct block *block, *tmp;
1146 
1147 	bufmgr_fake->performed_rendering = 0;
1148 	/* okay for ever BO that is on the HW kick it off.
1149 	   seriously not afraid of the POLICE right now */
1150 	DRMLISTFOREACHSAFE(block, tmp, &bufmgr_fake->on_hardware) {
1151 		drm_intel_bo_fake *bo_fake = (drm_intel_bo_fake *) block->bo;
1152 
1153 		block->on_hardware = 0;
1154 		free_block(bufmgr_fake, block, 0);
1155 		bo_fake->block = NULL;
1156 		bo_fake->validated = 0;
1157 		if (!(bo_fake->flags & BM_NO_BACKING_STORE))
1158 			bo_fake->dirty = 1;
1159 	}
1160 
1161 }
1162 
1163 static int
drm_intel_fake_bo_validate(drm_intel_bo * bo)1164  drm_intel_fake_bo_validate(drm_intel_bo *bo)
1165 {
1166 	drm_intel_bufmgr_fake *bufmgr_fake;
1167 	drm_intel_bo_fake *bo_fake = (drm_intel_bo_fake *) bo;
1168 
1169 	bufmgr_fake = (drm_intel_bufmgr_fake *) bo->bufmgr;
1170 
1171 	DBG("drm_bo_validate: (buf %d: %s, %lu kb)\n", bo_fake->id,
1172 	    bo_fake->name, bo_fake->bo.size / 1024);
1173 
1174 	/* Sanity check: Buffers should be unmapped before being validated.
1175 	 * This is not so much of a problem for bufmgr_fake, but TTM refuses,
1176 	 * and the problem is harder to debug there.
1177 	 */
1178 	assert(bo_fake->map_count == 0);
1179 
1180 	if (bo_fake->is_static) {
1181 		/* Add it to the needs-fence list */
1182 		bufmgr_fake->need_fence = 1;
1183 		return 0;
1184 	}
1185 
1186 	/* Allocate the card memory */
1187 	if (!bo_fake->block && !evict_and_alloc_block(bo)) {
1188 		bufmgr_fake->fail = 1;
1189 		DBG("Failed to validate buf %d:%s\n", bo_fake->id,
1190 		    bo_fake->name);
1191 		return -1;
1192 	}
1193 
1194 	assert(bo_fake->block);
1195 	assert(bo_fake->block->bo == &bo_fake->bo);
1196 
1197 	bo->offset = bo_fake->block->mem->ofs;
1198 
1199 	/* Upload the buffer contents if necessary */
1200 	if (bo_fake->dirty) {
1201 		DBG("Upload dirty buf %d:%s, sz %lu offset 0x%x\n", bo_fake->id,
1202 		    bo_fake->name, bo->size, bo_fake->block->mem->ofs);
1203 
1204 		assert(!(bo_fake->flags & (BM_NO_BACKING_STORE | BM_PINNED)));
1205 
1206 		/* Actually, should be able to just wait for a fence on the
1207 		 * mmory, hich we would be tracking when we free it.  Waiting
1208 		 * for idle is a sufficiently large hammer for now.
1209 		 */
1210 		drm_intel_bufmgr_fake_wait_idle(bufmgr_fake);
1211 
1212 		/* we may never have mapped this BO so it might not have any
1213 		 * backing store if this happens it should be rare, but 0 the
1214 		 * card memory in any case */
1215 		if (bo_fake->backing_store)
1216 			memcpy(bo_fake->block->virtual, bo_fake->backing_store,
1217 			       bo->size);
1218 		else
1219 			memset(bo_fake->block->virtual, 0, bo->size);
1220 
1221 		bo_fake->dirty = 0;
1222 	}
1223 
1224 	bo_fake->block->fenced = 0;
1225 	bo_fake->block->on_hardware = 1;
1226 	DRMLISTDEL(bo_fake->block);
1227 	DRMLISTADDTAIL(bo_fake->block, &bufmgr_fake->on_hardware);
1228 
1229 	bo_fake->validated = 1;
1230 	bufmgr_fake->need_fence = 1;
1231 
1232 	return 0;
1233 }
1234 
1235 static void
drm_intel_fake_fence_validated(drm_intel_bufmgr * bufmgr)1236 drm_intel_fake_fence_validated(drm_intel_bufmgr *bufmgr)
1237 {
1238 	drm_intel_bufmgr_fake *bufmgr_fake = (drm_intel_bufmgr_fake *) bufmgr;
1239 	unsigned int cookie;
1240 
1241 	cookie = _fence_emit_internal(bufmgr_fake);
1242 	fence_blocks(bufmgr_fake, cookie);
1243 
1244 	DBG("drm_fence_validated: 0x%08x cookie\n", cookie);
1245 }
1246 
1247 static void
drm_intel_fake_destroy(drm_intel_bufmgr * bufmgr)1248 drm_intel_fake_destroy(drm_intel_bufmgr *bufmgr)
1249 {
1250 	drm_intel_bufmgr_fake *bufmgr_fake = (drm_intel_bufmgr_fake *) bufmgr;
1251 
1252 	pthread_mutex_destroy(&bufmgr_fake->lock);
1253 	mmDestroy(bufmgr_fake->heap);
1254 	free(bufmgr);
1255 }
1256 
1257 static int
drm_intel_fake_emit_reloc(drm_intel_bo * bo,uint32_t offset,drm_intel_bo * target_bo,uint32_t target_offset,uint32_t read_domains,uint32_t write_domain)1258 drm_intel_fake_emit_reloc(drm_intel_bo *bo, uint32_t offset,
1259 			  drm_intel_bo *target_bo, uint32_t target_offset,
1260 			  uint32_t read_domains, uint32_t write_domain)
1261 {
1262 	drm_intel_bufmgr_fake *bufmgr_fake =
1263 	    (drm_intel_bufmgr_fake *) bo->bufmgr;
1264 	struct fake_buffer_reloc *r;
1265 	drm_intel_bo_fake *bo_fake = (drm_intel_bo_fake *) bo;
1266 	drm_intel_bo_fake *target_fake = (drm_intel_bo_fake *) target_bo;
1267 	int i;
1268 
1269 	pthread_mutex_lock(&bufmgr_fake->lock);
1270 
1271 	assert(bo);
1272 	assert(target_bo);
1273 
1274 	if (bo_fake->relocs == NULL) {
1275 		bo_fake->relocs =
1276 		    malloc(sizeof(struct fake_buffer_reloc) * MAX_RELOCS);
1277 	}
1278 
1279 	r = &bo_fake->relocs[bo_fake->nr_relocs++];
1280 
1281 	assert(bo_fake->nr_relocs <= MAX_RELOCS);
1282 
1283 	drm_intel_fake_bo_reference_locked(target_bo);
1284 
1285 	if (!target_fake->is_static) {
1286 		bo_fake->child_size +=
1287 		    ALIGN(target_bo->size, target_fake->alignment);
1288 		bo_fake->child_size += target_fake->child_size;
1289 	}
1290 	r->target_buf = target_bo;
1291 	r->offset = offset;
1292 	r->last_target_offset = target_bo->offset;
1293 	r->delta = target_offset;
1294 	r->read_domains = read_domains;
1295 	r->write_domain = write_domain;
1296 
1297 	if (bufmgr_fake->debug) {
1298 		/* Check that a conflicting relocation hasn't already been
1299 		 * emitted.
1300 		 */
1301 		for (i = 0; i < bo_fake->nr_relocs - 1; i++) {
1302 			struct fake_buffer_reloc *r2 = &bo_fake->relocs[i];
1303 
1304 			assert(r->offset != r2->offset);
1305 		}
1306 	}
1307 
1308 	pthread_mutex_unlock(&bufmgr_fake->lock);
1309 
1310 	return 0;
1311 }
1312 
1313 /**
1314  * Incorporates the validation flags associated with each relocation into
1315  * the combined validation flags for the buffer on this batchbuffer submission.
1316  */
1317 static void
drm_intel_fake_calculate_domains(drm_intel_bo * bo)1318 drm_intel_fake_calculate_domains(drm_intel_bo *bo)
1319 {
1320 	drm_intel_bo_fake *bo_fake = (drm_intel_bo_fake *) bo;
1321 	int i;
1322 
1323 	for (i = 0; i < bo_fake->nr_relocs; i++) {
1324 		struct fake_buffer_reloc *r = &bo_fake->relocs[i];
1325 		drm_intel_bo_fake *target_fake =
1326 		    (drm_intel_bo_fake *) r->target_buf;
1327 
1328 		/* Do the same for the tree of buffers we depend on */
1329 		drm_intel_fake_calculate_domains(r->target_buf);
1330 
1331 		target_fake->read_domains |= r->read_domains;
1332 		target_fake->write_domain |= r->write_domain;
1333 	}
1334 }
1335 
1336 static int
drm_intel_fake_reloc_and_validate_buffer(drm_intel_bo * bo)1337 drm_intel_fake_reloc_and_validate_buffer(drm_intel_bo *bo)
1338 {
1339 	drm_intel_bufmgr_fake *bufmgr_fake =
1340 	    (drm_intel_bufmgr_fake *) bo->bufmgr;
1341 	drm_intel_bo_fake *bo_fake = (drm_intel_bo_fake *) bo;
1342 	int i, ret;
1343 
1344 	assert(bo_fake->map_count == 0);
1345 
1346 	for (i = 0; i < bo_fake->nr_relocs; i++) {
1347 		struct fake_buffer_reloc *r = &bo_fake->relocs[i];
1348 		drm_intel_bo_fake *target_fake =
1349 		    (drm_intel_bo_fake *) r->target_buf;
1350 		uint32_t reloc_data;
1351 
1352 		/* Validate the target buffer if that hasn't been done. */
1353 		if (!target_fake->validated) {
1354 			ret =
1355 			    drm_intel_fake_reloc_and_validate_buffer(r->target_buf);
1356 			if (ret != 0) {
1357 				if (bo->virtual != NULL)
1358 					drm_intel_fake_bo_unmap_locked(bo);
1359 				return ret;
1360 			}
1361 		}
1362 
1363 		/* Calculate the value of the relocation entry. */
1364 		if (r->target_buf->offset != r->last_target_offset) {
1365 			reloc_data = r->target_buf->offset + r->delta;
1366 
1367 			if (bo->virtual == NULL)
1368 				drm_intel_fake_bo_map_locked(bo, 1);
1369 
1370 			*(uint32_t *) ((uint8_t *) bo->virtual + r->offset) =
1371 			    reloc_data;
1372 
1373 			r->last_target_offset = r->target_buf->offset;
1374 		}
1375 	}
1376 
1377 	if (bo->virtual != NULL)
1378 		drm_intel_fake_bo_unmap_locked(bo);
1379 
1380 	if (bo_fake->write_domain != 0) {
1381 		if (!(bo_fake->flags & (BM_NO_BACKING_STORE | BM_PINNED))) {
1382 			if (bo_fake->backing_store == 0)
1383 				alloc_backing_store(bo);
1384 		}
1385 		bo_fake->card_dirty = 1;
1386 		bufmgr_fake->performed_rendering = 1;
1387 	}
1388 
1389 	return drm_intel_fake_bo_validate(bo);
1390 }
1391 
1392 static void
drm_intel_bo_fake_post_submit(drm_intel_bo * bo)1393 drm_intel_bo_fake_post_submit(drm_intel_bo *bo)
1394 {
1395 	drm_intel_bufmgr_fake *bufmgr_fake =
1396 	    (drm_intel_bufmgr_fake *) bo->bufmgr;
1397 	drm_intel_bo_fake *bo_fake = (drm_intel_bo_fake *) bo;
1398 	int i;
1399 
1400 	for (i = 0; i < bo_fake->nr_relocs; i++) {
1401 		struct fake_buffer_reloc *r = &bo_fake->relocs[i];
1402 		drm_intel_bo_fake *target_fake =
1403 		    (drm_intel_bo_fake *) r->target_buf;
1404 
1405 		if (target_fake->validated)
1406 			drm_intel_bo_fake_post_submit(r->target_buf);
1407 
1408 		DBG("%s@0x%08x + 0x%08x -> %s@0x%08x + 0x%08x\n",
1409 		    bo_fake->name, (uint32_t) bo->offset, r->offset,
1410 		    target_fake->name, (uint32_t) r->target_buf->offset,
1411 		    r->delta);
1412 	}
1413 
1414 	assert(bo_fake->map_count == 0);
1415 	bo_fake->validated = 0;
1416 	bo_fake->read_domains = 0;
1417 	bo_fake->write_domain = 0;
1418 }
1419 
1420 drm_public void
drm_intel_bufmgr_fake_set_exec_callback(drm_intel_bufmgr * bufmgr,int (* exec)(drm_intel_bo * bo,unsigned int used,void * priv),void * priv)1421 drm_intel_bufmgr_fake_set_exec_callback(drm_intel_bufmgr *bufmgr,
1422 					     int (*exec) (drm_intel_bo *bo,
1423 							  unsigned int used,
1424 							  void *priv),
1425 					     void *priv)
1426 {
1427 	drm_intel_bufmgr_fake *bufmgr_fake = (drm_intel_bufmgr_fake *) bufmgr;
1428 
1429 	bufmgr_fake->exec = exec;
1430 	bufmgr_fake->exec_priv = priv;
1431 }
1432 
1433 static int
drm_intel_fake_bo_exec(drm_intel_bo * bo,int used,drm_clip_rect_t * cliprects,int num_cliprects,int DR4)1434 drm_intel_fake_bo_exec(drm_intel_bo *bo, int used,
1435 		       drm_clip_rect_t * cliprects, int num_cliprects, int DR4)
1436 {
1437 	drm_intel_bufmgr_fake *bufmgr_fake =
1438 	    (drm_intel_bufmgr_fake *) bo->bufmgr;
1439 	drm_intel_bo_fake *batch_fake = (drm_intel_bo_fake *) bo;
1440 	struct drm_i915_batchbuffer batch;
1441 	int ret;
1442 	int retry_count = 0;
1443 
1444 	pthread_mutex_lock(&bufmgr_fake->lock);
1445 
1446 	bufmgr_fake->performed_rendering = 0;
1447 
1448 	drm_intel_fake_calculate_domains(bo);
1449 
1450 	batch_fake->read_domains = I915_GEM_DOMAIN_COMMAND;
1451 
1452 	/* we've ran out of RAM so blow the whole lot away and retry */
1453 restart:
1454 	ret = drm_intel_fake_reloc_and_validate_buffer(bo);
1455 	if (bufmgr_fake->fail == 1) {
1456 		if (retry_count == 0) {
1457 			retry_count++;
1458 			drm_intel_fake_kick_all_locked(bufmgr_fake);
1459 			bufmgr_fake->fail = 0;
1460 			goto restart;
1461 		} else		/* dump out the memory here */
1462 			mmDumpMemInfo(bufmgr_fake->heap);
1463 	}
1464 
1465 	assert(ret == 0);
1466 
1467 	if (bufmgr_fake->exec != NULL) {
1468 		int ret = bufmgr_fake->exec(bo, used, bufmgr_fake->exec_priv);
1469 		if (ret != 0) {
1470 			pthread_mutex_unlock(&bufmgr_fake->lock);
1471 			return ret;
1472 		}
1473 	} else {
1474 		batch.start = bo->offset;
1475 		batch.used = used;
1476 		batch.cliprects = cliprects;
1477 		batch.num_cliprects = num_cliprects;
1478 		batch.DR1 = 0;
1479 		batch.DR4 = DR4;
1480 
1481 		if (drmCommandWrite
1482 		    (bufmgr_fake->fd, DRM_I915_BATCHBUFFER, &batch,
1483 		     sizeof(batch))) {
1484 			drmMsg("DRM_I915_BATCHBUFFER: %d\n", -errno);
1485 			pthread_mutex_unlock(&bufmgr_fake->lock);
1486 			return -errno;
1487 		}
1488 	}
1489 
1490 	drm_intel_fake_fence_validated(bo->bufmgr);
1491 
1492 	drm_intel_bo_fake_post_submit(bo);
1493 
1494 	pthread_mutex_unlock(&bufmgr_fake->lock);
1495 
1496 	return 0;
1497 }
1498 
1499 /**
1500  * Return an error if the list of BOs will exceed the aperture size.
1501  *
1502  * This is a rough guess and likely to fail, as during the validate sequence we
1503  * may place a buffer in an inopportune spot early on and then fail to fit
1504  * a set smaller than the aperture.
1505  */
1506 static int
drm_intel_fake_check_aperture_space(drm_intel_bo ** bo_array,int count)1507 drm_intel_fake_check_aperture_space(drm_intel_bo ** bo_array, int count)
1508 {
1509 	drm_intel_bufmgr_fake *bufmgr_fake =
1510 	    (drm_intel_bufmgr_fake *) bo_array[0]->bufmgr;
1511 	unsigned int sz = 0;
1512 	int i;
1513 
1514 	for (i = 0; i < count; i++) {
1515 		drm_intel_bo_fake *bo_fake = (drm_intel_bo_fake *) bo_array[i];
1516 
1517 		if (bo_fake == NULL)
1518 			continue;
1519 
1520 		if (!bo_fake->is_static)
1521 			sz += ALIGN(bo_array[i]->size, bo_fake->alignment);
1522 		sz += bo_fake->child_size;
1523 	}
1524 
1525 	if (sz > bufmgr_fake->size) {
1526 		DBG("check_space: overflowed bufmgr size, %ukb vs %lukb\n",
1527 		    sz / 1024, bufmgr_fake->size / 1024);
1528 		return -1;
1529 	}
1530 
1531 	DBG("drm_check_space: sz %ukb vs bufgr %lukb\n", sz / 1024,
1532 	    bufmgr_fake->size / 1024);
1533 	return 0;
1534 }
1535 
1536 /**
1537  * Evicts all buffers, waiting for fences to pass and copying contents out
1538  * as necessary.
1539  *
1540  * Used by the X Server on LeaveVT, when the card memory is no longer our
1541  * own.
1542  */
1543 drm_public void
drm_intel_bufmgr_fake_evict_all(drm_intel_bufmgr * bufmgr)1544 drm_intel_bufmgr_fake_evict_all(drm_intel_bufmgr *bufmgr)
1545 {
1546 	drm_intel_bufmgr_fake *bufmgr_fake = (drm_intel_bufmgr_fake *) bufmgr;
1547 	struct block *block, *tmp;
1548 
1549 	pthread_mutex_lock(&bufmgr_fake->lock);
1550 
1551 	bufmgr_fake->need_fence = 1;
1552 	bufmgr_fake->fail = 0;
1553 
1554 	/* Wait for hardware idle.  We don't know where acceleration has been
1555 	 * happening, so we'll need to wait anyway before letting anything get
1556 	 * put on the card again.
1557 	 */
1558 	drm_intel_bufmgr_fake_wait_idle(bufmgr_fake);
1559 
1560 	/* Check that we hadn't released the lock without having fenced the last
1561 	 * set of buffers.
1562 	 */
1563 	assert(DRMLISTEMPTY(&bufmgr_fake->fenced));
1564 	assert(DRMLISTEMPTY(&bufmgr_fake->on_hardware));
1565 
1566 	DRMLISTFOREACHSAFE(block, tmp, &bufmgr_fake->lru) {
1567 		drm_intel_bo_fake *bo_fake = (drm_intel_bo_fake *) block->bo;
1568 		/* Releases the memory, and memcpys dirty contents out if
1569 		 * necessary.
1570 		 */
1571 		free_block(bufmgr_fake, block, 0);
1572 		bo_fake->block = NULL;
1573 	}
1574 
1575 	pthread_mutex_unlock(&bufmgr_fake->lock);
1576 }
1577 
1578 drm_public void
drm_intel_bufmgr_fake_set_last_dispatch(drm_intel_bufmgr * bufmgr,volatile unsigned int * last_dispatch)1579 drm_intel_bufmgr_fake_set_last_dispatch(drm_intel_bufmgr *bufmgr,
1580 					volatile unsigned int
1581 					*last_dispatch)
1582 {
1583 	drm_intel_bufmgr_fake *bufmgr_fake = (drm_intel_bufmgr_fake *) bufmgr;
1584 
1585 	bufmgr_fake->last_dispatch = (volatile int *)last_dispatch;
1586 }
1587 
1588 drm_public drm_intel_bufmgr *
drm_intel_bufmgr_fake_init(int fd,unsigned long low_offset,void * low_virtual,unsigned long size,volatile unsigned int * last_dispatch)1589 drm_intel_bufmgr_fake_init(int fd, unsigned long low_offset,
1590 			   void *low_virtual, unsigned long size,
1591 			   volatile unsigned int *last_dispatch)
1592 {
1593 	drm_intel_bufmgr_fake *bufmgr_fake;
1594 
1595 	bufmgr_fake = calloc(1, sizeof(*bufmgr_fake));
1596 
1597 	if (pthread_mutex_init(&bufmgr_fake->lock, NULL) != 0) {
1598 		free(bufmgr_fake);
1599 		return NULL;
1600 	}
1601 
1602 	/* Initialize allocator */
1603 	DRMINITLISTHEAD(&bufmgr_fake->fenced);
1604 	DRMINITLISTHEAD(&bufmgr_fake->on_hardware);
1605 	DRMINITLISTHEAD(&bufmgr_fake->lru);
1606 
1607 	bufmgr_fake->low_offset = low_offset;
1608 	bufmgr_fake->virtual = low_virtual;
1609 	bufmgr_fake->size = size;
1610 	bufmgr_fake->heap = mmInit(low_offset, size);
1611 
1612 	/* Hook in methods */
1613 	bufmgr_fake->bufmgr.bo_alloc = drm_intel_fake_bo_alloc;
1614 	bufmgr_fake->bufmgr.bo_alloc_for_render = drm_intel_fake_bo_alloc;
1615 	bufmgr_fake->bufmgr.bo_alloc_tiled = drm_intel_fake_bo_alloc_tiled;
1616 	bufmgr_fake->bufmgr.bo_reference = drm_intel_fake_bo_reference;
1617 	bufmgr_fake->bufmgr.bo_unreference = drm_intel_fake_bo_unreference;
1618 	bufmgr_fake->bufmgr.bo_map = drm_intel_fake_bo_map;
1619 	bufmgr_fake->bufmgr.bo_unmap = drm_intel_fake_bo_unmap;
1620 	bufmgr_fake->bufmgr.bo_subdata = drm_intel_fake_bo_subdata;
1621 	bufmgr_fake->bufmgr.bo_wait_rendering =
1622 	    drm_intel_fake_bo_wait_rendering;
1623 	bufmgr_fake->bufmgr.bo_emit_reloc = drm_intel_fake_emit_reloc;
1624 	bufmgr_fake->bufmgr.destroy = drm_intel_fake_destroy;
1625 	bufmgr_fake->bufmgr.bo_exec = drm_intel_fake_bo_exec;
1626 	bufmgr_fake->bufmgr.check_aperture_space =
1627 	    drm_intel_fake_check_aperture_space;
1628 	bufmgr_fake->bufmgr.debug = 0;
1629 
1630 	bufmgr_fake->fd = fd;
1631 	bufmgr_fake->last_dispatch = (volatile int *)last_dispatch;
1632 
1633 	return &bufmgr_fake->bufmgr;
1634 }
1635