1 /**************************************************************************
2 *
3 * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 /* Originally a fake version of the buffer manager so that we can
29 * prototype the changes in a driver fairly quickly, has been fleshed
30 * out to a fully functional interim solution.
31 *
32 * Basically wraps the old style memory management in the new
33 * programming interface, but is more expressive and avoids many of
34 * the bugs in the old texture manager.
35 */
36
37 #ifdef HAVE_CONFIG_H
38 #include "config.h"
39 #endif
40
41 #include <stdlib.h>
42 #include <string.h>
43 #include <assert.h>
44 #include <errno.h>
45 #include <xf86drm.h>
46 #include <pthread.h>
47 #include "intel_bufmgr.h"
48 #include "intel_bufmgr_priv.h"
49 #include "drm.h"
50 #include "i915_drm.h"
51 #include "mm.h"
52 #include "libdrm.h"
53 #include "libdrm_lists.h"
54
55 /* Support gcc's __FUNCTION__ for people using other compilers */
56 #if !defined(__GNUC__) && !defined(__FUNCTION__)
57 # define __FUNCTION__ __func__ /* C99 */
58 #endif
59
60 #define DBG(...) do { \
61 if (bufmgr_fake->bufmgr.debug) \
62 drmMsg(__VA_ARGS__); \
63 } while (0)
64
65 /* Internal flags:
66 */
67 #define BM_NO_BACKING_STORE 0x00000001
68 #define BM_NO_FENCE_SUBDATA 0x00000002
69 #define BM_PINNED 0x00000004
70
71 /* Wrapper around mm.c's mem_block, which understands that you must
72 * wait for fences to expire before memory can be freed. This is
73 * specific to our use of memcpy for uploads - an upload that was
74 * processed through the command queue wouldn't need to care about
75 * fences.
76 */
77 #define MAX_RELOCS 4096
78
79 struct fake_buffer_reloc {
80 /** Buffer object that the relocation points at. */
81 drm_intel_bo *target_buf;
82 /** Offset of the relocation entry within reloc_buf. */
83 uint32_t offset;
84 /**
85 * Cached value of the offset when we last performed this relocation.
86 */
87 uint32_t last_target_offset;
88 /** Value added to target_buf's offset to get the relocation entry. */
89 uint32_t delta;
90 /** Cache domains the target buffer is read into. */
91 uint32_t read_domains;
92 /** Cache domain the target buffer will have dirty cachelines in. */
93 uint32_t write_domain;
94 };
95
96 struct block {
97 struct block *next, *prev;
98 struct mem_block *mem; /* BM_MEM_AGP */
99
100 /**
101 * Marks that the block is currently in the aperture and has yet to be
102 * fenced.
103 */
104 unsigned on_hardware:1;
105 /**
106 * Marks that the block is currently fenced (being used by rendering)
107 * and can't be freed until @fence is passed.
108 */
109 unsigned fenced:1;
110
111 /** Fence cookie for the block. */
112 unsigned fence; /* Split to read_fence, write_fence */
113
114 drm_intel_bo *bo;
115 void *virtual;
116 };
117
118 typedef struct _bufmgr_fake {
119 drm_intel_bufmgr bufmgr;
120
121 pthread_mutex_t lock;
122
123 unsigned long low_offset;
124 unsigned long size;
125 void *virtual;
126
127 struct mem_block *heap;
128
129 unsigned buf_nr; /* for generating ids */
130
131 /**
132 * List of blocks which are currently in the GART but haven't been
133 * fenced yet.
134 */
135 struct block on_hardware;
136 /**
137 * List of blocks which are in the GART and have an active fence on
138 * them.
139 */
140 struct block fenced;
141 /**
142 * List of blocks which have an expired fence and are ready to be
143 * evicted.
144 */
145 struct block lru;
146
147 unsigned int last_fence;
148
149 unsigned fail:1;
150 unsigned need_fence:1;
151 int thrashing;
152
153 /**
154 * Driver callback to emit a fence, returning the cookie.
155 *
156 * This allows the driver to hook in a replacement for the DRM usage in
157 * bufmgr_fake.
158 *
159 * Currently, this also requires that a write flush be emitted before
160 * emitting the fence, but this should change.
161 */
162 unsigned int (*fence_emit) (void *private);
163 /** Driver callback to wait for a fence cookie to have passed. */
164 void (*fence_wait) (unsigned int fence, void *private);
165 void *fence_priv;
166
167 /**
168 * Driver callback to execute a buffer.
169 *
170 * This allows the driver to hook in a replacement for the DRM usage in
171 * bufmgr_fake.
172 */
173 int (*exec) (drm_intel_bo *bo, unsigned int used, void *priv);
174 void *exec_priv;
175
176 /** Driver-supplied argument to driver callbacks */
177 void *driver_priv;
178 /**
179 * Pointer to kernel-updated sarea data for the last completed user irq
180 */
181 volatile int *last_dispatch;
182
183 int fd;
184
185 int debug;
186
187 int performed_rendering;
188 } drm_intel_bufmgr_fake;
189
190 typedef struct _drm_intel_bo_fake {
191 drm_intel_bo bo;
192
193 unsigned id; /* debug only */
194 const char *name;
195
196 unsigned dirty:1;
197 /**
198 * has the card written to this buffer - we make need to copy it back
199 */
200 unsigned card_dirty:1;
201 unsigned int refcount;
202 /* Flags may consist of any of the DRM_BO flags, plus
203 * DRM_BO_NO_BACKING_STORE and BM_NO_FENCE_SUBDATA, which are the
204 * first two driver private flags.
205 */
206 uint64_t flags;
207 /** Cache domains the target buffer is read into. */
208 uint32_t read_domains;
209 /** Cache domain the target buffer will have dirty cachelines in. */
210 uint32_t write_domain;
211
212 unsigned int alignment;
213 int is_static, validated;
214 unsigned int map_count;
215
216 /** relocation list */
217 struct fake_buffer_reloc *relocs;
218 int nr_relocs;
219 /**
220 * Total size of the target_bos of this buffer.
221 *
222 * Used for estimation in check_aperture.
223 */
224 unsigned int child_size;
225
226 struct block *block;
227 void *backing_store;
228 void (*invalidate_cb) (drm_intel_bo *bo, void *ptr);
229 void *invalidate_ptr;
230 } drm_intel_bo_fake;
231
232 static int clear_fenced(drm_intel_bufmgr_fake *bufmgr_fake,
233 unsigned int fence_cookie);
234
235 #define MAXFENCE 0x7fffffff
236
237 static int
FENCE_LTE(unsigned a,unsigned b)238 FENCE_LTE(unsigned a, unsigned b)
239 {
240 if (a == b)
241 return 1;
242
243 if (a < b && b - a < (1 << 24))
244 return 1;
245
246 if (a > b && MAXFENCE - a + b < (1 << 24))
247 return 1;
248
249 return 0;
250 }
251
252 drm_public void
drm_intel_bufmgr_fake_set_fence_callback(drm_intel_bufmgr * bufmgr,unsigned int (* emit)(void * priv),void (* wait)(unsigned int fence,void * priv),void * priv)253 drm_intel_bufmgr_fake_set_fence_callback(drm_intel_bufmgr *bufmgr,
254 unsigned int (*emit) (void *priv),
255 void (*wait) (unsigned int fence,
256 void *priv),
257 void *priv)
258 {
259 drm_intel_bufmgr_fake *bufmgr_fake = (drm_intel_bufmgr_fake *) bufmgr;
260
261 bufmgr_fake->fence_emit = emit;
262 bufmgr_fake->fence_wait = wait;
263 bufmgr_fake->fence_priv = priv;
264 }
265
266 static unsigned int
_fence_emit_internal(drm_intel_bufmgr_fake * bufmgr_fake)267 _fence_emit_internal(drm_intel_bufmgr_fake *bufmgr_fake)
268 {
269 struct drm_i915_irq_emit ie;
270 int ret, seq = 1;
271
272 if (bufmgr_fake->fence_emit != NULL) {
273 seq = bufmgr_fake->fence_emit(bufmgr_fake->fence_priv);
274 return seq;
275 }
276
277 ie.irq_seq = &seq;
278 ret = drmCommandWriteRead(bufmgr_fake->fd, DRM_I915_IRQ_EMIT,
279 &ie, sizeof(ie));
280 if (ret) {
281 drmMsg("%s: drm_i915_irq_emit: %d\n", __FUNCTION__, ret);
282 abort();
283 }
284
285 DBG("emit 0x%08x\n", seq);
286 return seq;
287 }
288
289 static void
_fence_wait_internal(drm_intel_bufmgr_fake * bufmgr_fake,int seq)290 _fence_wait_internal(drm_intel_bufmgr_fake *bufmgr_fake, int seq)
291 {
292 struct drm_i915_irq_wait iw;
293 int hw_seq, busy_count = 0;
294 int ret;
295 int kernel_lied;
296
297 if (bufmgr_fake->fence_wait != NULL) {
298 bufmgr_fake->fence_wait(seq, bufmgr_fake->fence_priv);
299 clear_fenced(bufmgr_fake, seq);
300 return;
301 }
302
303 iw.irq_seq = seq;
304
305 DBG("wait 0x%08x\n", iw.irq_seq);
306
307 /* The kernel IRQ_WAIT implementation is all sorts of broken.
308 * 1) It returns 1 to 0x7fffffff instead of using the full 32-bit
309 * unsigned range.
310 * 2) It returns 0 if hw_seq >= seq, not seq - hw_seq < 0 on the 32-bit
311 * signed range.
312 * 3) It waits if seq < hw_seq, not seq - hw_seq > 0 on the 32-bit
313 * signed range.
314 * 4) It returns -EBUSY in 3 seconds even if the hardware is still
315 * successfully chewing through buffers.
316 *
317 * Assume that in userland we treat sequence numbers as ints, which
318 * makes some of the comparisons convenient, since the sequence
319 * numbers are all postive signed integers.
320 *
321 * From this we get several cases we need to handle. Here's a timeline.
322 * 0x2 0x7 0x7ffffff8 0x7ffffffd
323 * | | | |
324 * ------------------------------------------------------------
325 *
326 * A) Normal wait for hw to catch up
327 * hw_seq seq
328 * | |
329 * ------------------------------------------------------------
330 * seq - hw_seq = 5. If we call IRQ_WAIT, it will wait for hw to
331 * catch up.
332 *
333 * B) Normal wait for a sequence number that's already passed.
334 * seq hw_seq
335 * | |
336 * ------------------------------------------------------------
337 * seq - hw_seq = -5. If we call IRQ_WAIT, it returns 0 quickly.
338 *
339 * C) Hardware has already wrapped around ahead of us
340 * hw_seq seq
341 * | |
342 * ------------------------------------------------------------
343 * seq - hw_seq = 0x80000000 - 5. If we called IRQ_WAIT, it would wait
344 * for hw_seq >= seq, which may never occur. Thus, we want to catch
345 * this in userland and return 0.
346 *
347 * D) We've wrapped around ahead of the hardware.
348 * seq hw_seq
349 * | |
350 * ------------------------------------------------------------
351 * seq - hw_seq = -(0x80000000 - 5). If we called IRQ_WAIT, it would
352 * return 0 quickly because hw_seq >= seq, even though the hardware
353 * isn't caught up. Thus, we need to catch this early return in
354 * userland and bother the kernel until the hardware really does
355 * catch up.
356 *
357 * E) Hardware might wrap after we test in userland.
358 * hw_seq seq
359 * | |
360 * ------------------------------------------------------------
361 * seq - hw_seq = 5. If we call IRQ_WAIT, it will likely see seq >=
362 * hw_seq and wait. However, suppose hw_seq wraps before we make it
363 * into the kernel. The kernel sees hw_seq >= seq and waits for 3
364 * seconds then returns -EBUSY. This is case C). We should catch
365 * this and then return successfully.
366 *
367 * F) Hardware might take a long time on a buffer.
368 * hw_seq seq
369 * | |
370 * -------------------------------------------------------------------
371 * seq - hw_seq = 5. If we call IRQ_WAIT, if sequence 2 through 5
372 * take too long, it will return -EBUSY. Batchbuffers in the
373 * gltestperf demo were seen to take up to 7 seconds. We should
374 * catch early -EBUSY return and keep trying.
375 */
376
377 do {
378 /* Keep a copy of last_dispatch so that if the wait -EBUSYs
379 * because the hardware didn't catch up in 3 seconds, we can
380 * see if it at least made progress and retry.
381 */
382 hw_seq = *bufmgr_fake->last_dispatch;
383
384 /* Catch case C */
385 if (seq - hw_seq > 0x40000000)
386 return;
387
388 ret = drmCommandWrite(bufmgr_fake->fd, DRM_I915_IRQ_WAIT,
389 &iw, sizeof(iw));
390 /* Catch case D */
391 kernel_lied = (ret == 0) && (seq - *bufmgr_fake->last_dispatch <
392 -0x40000000);
393
394 /* Catch case E */
395 if (ret == -EBUSY
396 && (seq - *bufmgr_fake->last_dispatch > 0x40000000))
397 ret = 0;
398
399 /* Catch case F: Allow up to 15 seconds chewing on one buffer. */
400 if ((ret == -EBUSY) && (hw_seq != *bufmgr_fake->last_dispatch))
401 busy_count = 0;
402 else
403 busy_count++;
404 } while (kernel_lied || ret == -EAGAIN || ret == -EINTR ||
405 (ret == -EBUSY && busy_count < 5));
406
407 if (ret != 0) {
408 drmMsg("%s:%d: Error waiting for fence: %s.\n", __FILE__,
409 __LINE__, strerror(-ret));
410 abort();
411 }
412 clear_fenced(bufmgr_fake, seq);
413 }
414
415 static int
_fence_test(drm_intel_bufmgr_fake * bufmgr_fake,unsigned fence)416 _fence_test(drm_intel_bufmgr_fake *bufmgr_fake, unsigned fence)
417 {
418 /* Slight problem with wrap-around:
419 */
420 return fence == 0 || FENCE_LTE(fence, bufmgr_fake->last_fence);
421 }
422
423 /**
424 * Allocate a memory manager block for the buffer.
425 */
426 static int
alloc_block(drm_intel_bo * bo)427 alloc_block(drm_intel_bo *bo)
428 {
429 drm_intel_bo_fake *bo_fake = (drm_intel_bo_fake *) bo;
430 drm_intel_bufmgr_fake *bufmgr_fake =
431 (drm_intel_bufmgr_fake *) bo->bufmgr;
432 struct block *block = (struct block *)calloc(sizeof *block, 1);
433 unsigned int align_log2 = ffs(bo_fake->alignment) - 1;
434 unsigned int sz;
435
436 if (!block)
437 return 1;
438
439 sz = (bo->size + bo_fake->alignment - 1) & ~(bo_fake->alignment - 1);
440
441 block->mem = mmAllocMem(bufmgr_fake->heap, sz, align_log2, 0);
442 if (!block->mem) {
443 free(block);
444 return 0;
445 }
446
447 DRMINITLISTHEAD(block);
448
449 /* Insert at head or at tail??? */
450 DRMLISTADDTAIL(block, &bufmgr_fake->lru);
451
452 block->virtual = (uint8_t *) bufmgr_fake->virtual +
453 block->mem->ofs - bufmgr_fake->low_offset;
454 block->bo = bo;
455
456 bo_fake->block = block;
457
458 return 1;
459 }
460
461 /* Release the card storage associated with buf:
462 */
463 static void
free_block(drm_intel_bufmgr_fake * bufmgr_fake,struct block * block,int skip_dirty_copy)464 free_block(drm_intel_bufmgr_fake *bufmgr_fake, struct block *block,
465 int skip_dirty_copy)
466 {
467 drm_intel_bo_fake *bo_fake;
468 DBG("free block %p %08x %d %d\n", block, block->mem->ofs,
469 block->on_hardware, block->fenced);
470
471 if (!block)
472 return;
473
474 bo_fake = (drm_intel_bo_fake *) block->bo;
475
476 if (bo_fake->flags & (BM_PINNED | BM_NO_BACKING_STORE))
477 skip_dirty_copy = 1;
478
479 if (!skip_dirty_copy && (bo_fake->card_dirty == 1)) {
480 memcpy(bo_fake->backing_store, block->virtual, block->bo->size);
481 bo_fake->card_dirty = 0;
482 bo_fake->dirty = 1;
483 }
484
485 if (block->on_hardware) {
486 block->bo = NULL;
487 } else if (block->fenced) {
488 block->bo = NULL;
489 } else {
490 DBG(" - free immediately\n");
491 DRMLISTDEL(block);
492
493 mmFreeMem(block->mem);
494 free(block);
495 }
496 }
497
498 static void
alloc_backing_store(drm_intel_bo * bo)499 alloc_backing_store(drm_intel_bo *bo)
500 {
501 drm_intel_bufmgr_fake *bufmgr_fake =
502 (drm_intel_bufmgr_fake *) bo->bufmgr;
503 drm_intel_bo_fake *bo_fake = (drm_intel_bo_fake *) bo;
504 assert(!bo_fake->backing_store);
505 assert(!(bo_fake->flags & (BM_PINNED | BM_NO_BACKING_STORE)));
506
507 bo_fake->backing_store = malloc(bo->size);
508
509 DBG("alloc_backing - buf %d %p %lu\n", bo_fake->id,
510 bo_fake->backing_store, bo->size);
511 assert(bo_fake->backing_store);
512 }
513
514 static void
free_backing_store(drm_intel_bo * bo)515 free_backing_store(drm_intel_bo *bo)
516 {
517 drm_intel_bo_fake *bo_fake = (drm_intel_bo_fake *) bo;
518
519 if (bo_fake->backing_store) {
520 assert(!(bo_fake->flags & (BM_PINNED | BM_NO_BACKING_STORE)));
521 free(bo_fake->backing_store);
522 bo_fake->backing_store = NULL;
523 }
524 }
525
526 static void
set_dirty(drm_intel_bo * bo)527 set_dirty(drm_intel_bo *bo)
528 {
529 drm_intel_bufmgr_fake *bufmgr_fake =
530 (drm_intel_bufmgr_fake *) bo->bufmgr;
531 drm_intel_bo_fake *bo_fake = (drm_intel_bo_fake *) bo;
532
533 if (bo_fake->flags & BM_NO_BACKING_STORE
534 && bo_fake->invalidate_cb != NULL)
535 bo_fake->invalidate_cb(bo, bo_fake->invalidate_ptr);
536
537 assert(!(bo_fake->flags & BM_PINNED));
538
539 DBG("set_dirty - buf %d\n", bo_fake->id);
540 bo_fake->dirty = 1;
541 }
542
543 static int
evict_lru(drm_intel_bufmgr_fake * bufmgr_fake,unsigned int max_fence)544 evict_lru(drm_intel_bufmgr_fake *bufmgr_fake, unsigned int max_fence)
545 {
546 struct block *block, *tmp;
547
548 DBG("%s\n", __FUNCTION__);
549
550 DRMLISTFOREACHSAFE(block, tmp, &bufmgr_fake->lru) {
551 drm_intel_bo_fake *bo_fake = (drm_intel_bo_fake *) block->bo;
552
553 if (bo_fake != NULL && (bo_fake->flags & BM_NO_FENCE_SUBDATA))
554 continue;
555
556 if (block->fence && max_fence && !FENCE_LTE(block->fence,
557 max_fence))
558 return 0;
559
560 set_dirty(&bo_fake->bo);
561 bo_fake->block = NULL;
562
563 free_block(bufmgr_fake, block, 0);
564 return 1;
565 }
566
567 return 0;
568 }
569
570 static int
evict_mru(drm_intel_bufmgr_fake * bufmgr_fake)571 evict_mru(drm_intel_bufmgr_fake *bufmgr_fake)
572 {
573 struct block *block, *tmp;
574
575 DBG("%s\n", __FUNCTION__);
576
577 DRMLISTFOREACHSAFEREVERSE(block, tmp, &bufmgr_fake->lru) {
578 drm_intel_bo_fake *bo_fake = (drm_intel_bo_fake *) block->bo;
579
580 if (bo_fake && (bo_fake->flags & BM_NO_FENCE_SUBDATA))
581 continue;
582
583 set_dirty(&bo_fake->bo);
584 bo_fake->block = NULL;
585
586 free_block(bufmgr_fake, block, 0);
587 return 1;
588 }
589
590 return 0;
591 }
592
593 /**
594 * Removes all objects from the fenced list older than the given fence.
595 */
596 static int
clear_fenced(drm_intel_bufmgr_fake * bufmgr_fake,unsigned int fence_cookie)597 clear_fenced(drm_intel_bufmgr_fake *bufmgr_fake, unsigned int fence_cookie)
598 {
599 struct block *block, *tmp;
600 int ret = 0;
601
602 bufmgr_fake->last_fence = fence_cookie;
603 DRMLISTFOREACHSAFE(block, tmp, &bufmgr_fake->fenced) {
604 assert(block->fenced);
605
606 if (_fence_test(bufmgr_fake, block->fence)) {
607
608 block->fenced = 0;
609
610 if (!block->bo) {
611 DBG("delayed free: offset %x sz %x\n",
612 block->mem->ofs, block->mem->size);
613 DRMLISTDEL(block);
614 mmFreeMem(block->mem);
615 free(block);
616 } else {
617 DBG("return to lru: offset %x sz %x\n",
618 block->mem->ofs, block->mem->size);
619 DRMLISTDEL(block);
620 DRMLISTADDTAIL(block, &bufmgr_fake->lru);
621 }
622
623 ret = 1;
624 } else {
625 /* Blocks are ordered by fence, so if one fails, all
626 * from here will fail also:
627 */
628 DBG("fence not passed: offset %x sz %x %d %d \n",
629 block->mem->ofs, block->mem->size, block->fence,
630 bufmgr_fake->last_fence);
631 break;
632 }
633 }
634
635 DBG("%s: %d\n", __FUNCTION__, ret);
636 return ret;
637 }
638
639 static void
fence_blocks(drm_intel_bufmgr_fake * bufmgr_fake,unsigned fence)640 fence_blocks(drm_intel_bufmgr_fake *bufmgr_fake, unsigned fence)
641 {
642 struct block *block, *tmp;
643
644 DRMLISTFOREACHSAFE(block, tmp, &bufmgr_fake->on_hardware) {
645 DBG("Fence block %p (sz 0x%x ofs %x buf %p) with fence %d\n",
646 block, block->mem->size, block->mem->ofs, block->bo, fence);
647 block->fence = fence;
648
649 block->on_hardware = 0;
650 block->fenced = 1;
651
652 /* Move to tail of pending list here
653 */
654 DRMLISTDEL(block);
655 DRMLISTADDTAIL(block, &bufmgr_fake->fenced);
656 }
657
658 assert(DRMLISTEMPTY(&bufmgr_fake->on_hardware));
659 }
660
661 static int
evict_and_alloc_block(drm_intel_bo * bo)662 evict_and_alloc_block(drm_intel_bo *bo)
663 {
664 drm_intel_bufmgr_fake *bufmgr_fake =
665 (drm_intel_bufmgr_fake *) bo->bufmgr;
666 drm_intel_bo_fake *bo_fake = (drm_intel_bo_fake *) bo;
667
668 assert(bo_fake->block == NULL);
669
670 /* Search for already free memory:
671 */
672 if (alloc_block(bo))
673 return 1;
674
675 /* If we're not thrashing, allow lru eviction to dig deeper into
676 * recently used textures. We'll probably be thrashing soon:
677 */
678 if (!bufmgr_fake->thrashing) {
679 while (evict_lru(bufmgr_fake, 0))
680 if (alloc_block(bo))
681 return 1;
682 }
683
684 /* Keep thrashing counter alive?
685 */
686 if (bufmgr_fake->thrashing)
687 bufmgr_fake->thrashing = 20;
688
689 /* Wait on any already pending fences - here we are waiting for any
690 * freed memory that has been submitted to hardware and fenced to
691 * become available:
692 */
693 while (!DRMLISTEMPTY(&bufmgr_fake->fenced)) {
694 uint32_t fence = bufmgr_fake->fenced.next->fence;
695 _fence_wait_internal(bufmgr_fake, fence);
696
697 if (alloc_block(bo))
698 return 1;
699 }
700
701 if (!DRMLISTEMPTY(&bufmgr_fake->on_hardware)) {
702 while (!DRMLISTEMPTY(&bufmgr_fake->fenced)) {
703 uint32_t fence = bufmgr_fake->fenced.next->fence;
704 _fence_wait_internal(bufmgr_fake, fence);
705 }
706
707 if (!bufmgr_fake->thrashing) {
708 DBG("thrashing\n");
709 }
710 bufmgr_fake->thrashing = 20;
711
712 if (alloc_block(bo))
713 return 1;
714 }
715
716 while (evict_mru(bufmgr_fake))
717 if (alloc_block(bo))
718 return 1;
719
720 DBG("%s 0x%lx bytes failed\n", __FUNCTION__, bo->size);
721
722 return 0;
723 }
724
725 /***********************************************************************
726 * Public functions
727 */
728
729 /**
730 * Wait for hardware idle by emitting a fence and waiting for it.
731 */
732 static void
drm_intel_bufmgr_fake_wait_idle(drm_intel_bufmgr_fake * bufmgr_fake)733 drm_intel_bufmgr_fake_wait_idle(drm_intel_bufmgr_fake *bufmgr_fake)
734 {
735 unsigned int cookie;
736
737 cookie = _fence_emit_internal(bufmgr_fake);
738 _fence_wait_internal(bufmgr_fake, cookie);
739 }
740
741 /**
742 * Wait for rendering to a buffer to complete.
743 *
744 * It is assumed that the bathcbuffer which performed the rendering included
745 * the necessary flushing.
746 */
747 static void
drm_intel_fake_bo_wait_rendering_locked(drm_intel_bo * bo)748 drm_intel_fake_bo_wait_rendering_locked(drm_intel_bo *bo)
749 {
750 drm_intel_bufmgr_fake *bufmgr_fake =
751 (drm_intel_bufmgr_fake *) bo->bufmgr;
752 drm_intel_bo_fake *bo_fake = (drm_intel_bo_fake *) bo;
753
754 if (bo_fake->block == NULL || !bo_fake->block->fenced)
755 return;
756
757 _fence_wait_internal(bufmgr_fake, bo_fake->block->fence);
758 }
759
760 static void
drm_intel_fake_bo_wait_rendering(drm_intel_bo * bo)761 drm_intel_fake_bo_wait_rendering(drm_intel_bo *bo)
762 {
763 drm_intel_bufmgr_fake *bufmgr_fake =
764 (drm_intel_bufmgr_fake *) bo->bufmgr;
765
766 pthread_mutex_lock(&bufmgr_fake->lock);
767 drm_intel_fake_bo_wait_rendering_locked(bo);
768 pthread_mutex_unlock(&bufmgr_fake->lock);
769 }
770
771 /* Specifically ignore texture memory sharing.
772 * -- just evict everything
773 * -- and wait for idle
774 */
775 drm_public void
drm_intel_bufmgr_fake_contended_lock_take(drm_intel_bufmgr * bufmgr)776 drm_intel_bufmgr_fake_contended_lock_take(drm_intel_bufmgr *bufmgr)
777 {
778 drm_intel_bufmgr_fake *bufmgr_fake = (drm_intel_bufmgr_fake *) bufmgr;
779 struct block *block, *tmp;
780
781 pthread_mutex_lock(&bufmgr_fake->lock);
782
783 bufmgr_fake->need_fence = 1;
784 bufmgr_fake->fail = 0;
785
786 /* Wait for hardware idle. We don't know where acceleration has been
787 * happening, so we'll need to wait anyway before letting anything get
788 * put on the card again.
789 */
790 drm_intel_bufmgr_fake_wait_idle(bufmgr_fake);
791
792 /* Check that we hadn't released the lock without having fenced the last
793 * set of buffers.
794 */
795 assert(DRMLISTEMPTY(&bufmgr_fake->fenced));
796 assert(DRMLISTEMPTY(&bufmgr_fake->on_hardware));
797
798 DRMLISTFOREACHSAFE(block, tmp, &bufmgr_fake->lru) {
799 assert(_fence_test(bufmgr_fake, block->fence));
800 set_dirty(block->bo);
801 }
802
803 pthread_mutex_unlock(&bufmgr_fake->lock);
804 }
805
806 static drm_intel_bo *
drm_intel_fake_bo_alloc(drm_intel_bufmgr * bufmgr,const char * name,unsigned long size,unsigned int alignment)807 drm_intel_fake_bo_alloc(drm_intel_bufmgr *bufmgr,
808 const char *name,
809 unsigned long size,
810 unsigned int alignment)
811 {
812 drm_intel_bufmgr_fake *bufmgr_fake;
813 drm_intel_bo_fake *bo_fake;
814
815 bufmgr_fake = (drm_intel_bufmgr_fake *) bufmgr;
816
817 assert(size != 0);
818
819 bo_fake = calloc(1, sizeof(*bo_fake));
820 if (!bo_fake)
821 return NULL;
822
823 bo_fake->bo.size = size;
824 bo_fake->bo.offset = -1;
825 bo_fake->bo.virtual = NULL;
826 bo_fake->bo.bufmgr = bufmgr;
827 bo_fake->refcount = 1;
828
829 /* Alignment must be a power of two */
830 assert((alignment & (alignment - 1)) == 0);
831 if (alignment == 0)
832 alignment = 1;
833 bo_fake->alignment = alignment;
834 bo_fake->id = ++bufmgr_fake->buf_nr;
835 bo_fake->name = name;
836 bo_fake->flags = 0;
837 bo_fake->is_static = 0;
838
839 DBG("drm_bo_alloc: (buf %d: %s, %lu kb)\n", bo_fake->id, bo_fake->name,
840 bo_fake->bo.size / 1024);
841
842 return &bo_fake->bo;
843 }
844
845 static drm_intel_bo *
drm_intel_fake_bo_alloc_tiled(drm_intel_bufmgr * bufmgr,const char * name,int x,int y,int cpp,uint32_t * tiling_mode,unsigned long * pitch,unsigned long flags)846 drm_intel_fake_bo_alloc_tiled(drm_intel_bufmgr * bufmgr,
847 const char *name,
848 int x, int y, int cpp,
849 uint32_t *tiling_mode,
850 unsigned long *pitch,
851 unsigned long flags)
852 {
853 unsigned long stride, aligned_y;
854
855 /* No runtime tiling support for fake. */
856 *tiling_mode = I915_TILING_NONE;
857
858 /* Align it for being a render target. Shouldn't need anything else. */
859 stride = x * cpp;
860 stride = ROUND_UP_TO(stride, 64);
861
862 /* 965 subspan loading alignment */
863 aligned_y = ALIGN(y, 2);
864
865 *pitch = stride;
866
867 return drm_intel_fake_bo_alloc(bufmgr, name, stride * aligned_y,
868 4096);
869 }
870
871 drm_public drm_intel_bo *
drm_intel_bo_fake_alloc_static(drm_intel_bufmgr * bufmgr,const char * name,unsigned long offset,unsigned long size,void * virtual)872 drm_intel_bo_fake_alloc_static(drm_intel_bufmgr *bufmgr,
873 const char *name,
874 unsigned long offset,
875 unsigned long size, void *virtual)
876 {
877 drm_intel_bufmgr_fake *bufmgr_fake;
878 drm_intel_bo_fake *bo_fake;
879
880 bufmgr_fake = (drm_intel_bufmgr_fake *) bufmgr;
881
882 assert(size != 0);
883
884 bo_fake = calloc(1, sizeof(*bo_fake));
885 if (!bo_fake)
886 return NULL;
887
888 bo_fake->bo.size = size;
889 bo_fake->bo.offset = offset;
890 bo_fake->bo.virtual = virtual;
891 bo_fake->bo.bufmgr = bufmgr;
892 bo_fake->refcount = 1;
893 bo_fake->id = ++bufmgr_fake->buf_nr;
894 bo_fake->name = name;
895 bo_fake->flags = BM_PINNED;
896 bo_fake->is_static = 1;
897
898 DBG("drm_bo_alloc_static: (buf %d: %s, %lu kb)\n", bo_fake->id,
899 bo_fake->name, bo_fake->bo.size / 1024);
900
901 return &bo_fake->bo;
902 }
903
904 static void
drm_intel_fake_bo_reference(drm_intel_bo * bo)905 drm_intel_fake_bo_reference(drm_intel_bo *bo)
906 {
907 drm_intel_bufmgr_fake *bufmgr_fake =
908 (drm_intel_bufmgr_fake *) bo->bufmgr;
909 drm_intel_bo_fake *bo_fake = (drm_intel_bo_fake *) bo;
910
911 pthread_mutex_lock(&bufmgr_fake->lock);
912 bo_fake->refcount++;
913 pthread_mutex_unlock(&bufmgr_fake->lock);
914 }
915
916 static void
drm_intel_fake_bo_reference_locked(drm_intel_bo * bo)917 drm_intel_fake_bo_reference_locked(drm_intel_bo *bo)
918 {
919 drm_intel_bo_fake *bo_fake = (drm_intel_bo_fake *) bo;
920
921 bo_fake->refcount++;
922 }
923
924 static void
drm_intel_fake_bo_unreference_locked(drm_intel_bo * bo)925 drm_intel_fake_bo_unreference_locked(drm_intel_bo *bo)
926 {
927 drm_intel_bufmgr_fake *bufmgr_fake =
928 (drm_intel_bufmgr_fake *) bo->bufmgr;
929 drm_intel_bo_fake *bo_fake = (drm_intel_bo_fake *) bo;
930 int i;
931
932 if (--bo_fake->refcount == 0) {
933 assert(bo_fake->map_count == 0);
934 /* No remaining references, so free it */
935 if (bo_fake->block)
936 free_block(bufmgr_fake, bo_fake->block, 1);
937 free_backing_store(bo);
938
939 for (i = 0; i < bo_fake->nr_relocs; i++)
940 drm_intel_fake_bo_unreference_locked(bo_fake->relocs[i].
941 target_buf);
942
943 DBG("drm_bo_unreference: free buf %d %s\n", bo_fake->id,
944 bo_fake->name);
945
946 free(bo_fake->relocs);
947 free(bo);
948 }
949 }
950
951 static void
drm_intel_fake_bo_unreference(drm_intel_bo * bo)952 drm_intel_fake_bo_unreference(drm_intel_bo *bo)
953 {
954 drm_intel_bufmgr_fake *bufmgr_fake =
955 (drm_intel_bufmgr_fake *) bo->bufmgr;
956
957 pthread_mutex_lock(&bufmgr_fake->lock);
958 drm_intel_fake_bo_unreference_locked(bo);
959 pthread_mutex_unlock(&bufmgr_fake->lock);
960 }
961
962 /**
963 * Set the buffer as not requiring backing store, and instead get the callback
964 * invoked whenever it would be set dirty.
965 */
966 drm_public void
drm_intel_bo_fake_disable_backing_store(drm_intel_bo * bo,void (* invalidate_cb)(drm_intel_bo * bo,void * ptr),void * ptr)967 drm_intel_bo_fake_disable_backing_store(drm_intel_bo *bo,
968 void (*invalidate_cb) (drm_intel_bo *bo,
969 void *ptr),
970 void *ptr)
971 {
972 drm_intel_bufmgr_fake *bufmgr_fake =
973 (drm_intel_bufmgr_fake *) bo->bufmgr;
974 drm_intel_bo_fake *bo_fake = (drm_intel_bo_fake *) bo;
975
976 pthread_mutex_lock(&bufmgr_fake->lock);
977
978 if (bo_fake->backing_store)
979 free_backing_store(bo);
980
981 bo_fake->flags |= BM_NO_BACKING_STORE;
982
983 DBG("disable_backing_store set buf %d dirty\n", bo_fake->id);
984 bo_fake->dirty = 1;
985 bo_fake->invalidate_cb = invalidate_cb;
986 bo_fake->invalidate_ptr = ptr;
987
988 /* Note that it is invalid right from the start. Also note
989 * invalidate_cb is called with the bufmgr locked, so cannot
990 * itself make bufmgr calls.
991 */
992 if (invalidate_cb != NULL)
993 invalidate_cb(bo, ptr);
994
995 pthread_mutex_unlock(&bufmgr_fake->lock);
996 }
997
998 /**
999 * Map a buffer into bo->virtual, allocating either card memory space (If
1000 * BM_NO_BACKING_STORE or BM_PINNED) or backing store, as necessary.
1001 */
1002 static int
drm_intel_fake_bo_map_locked(drm_intel_bo * bo,int write_enable)1003 drm_intel_fake_bo_map_locked(drm_intel_bo *bo, int write_enable)
1004 {
1005 drm_intel_bufmgr_fake *bufmgr_fake =
1006 (drm_intel_bufmgr_fake *) bo->bufmgr;
1007 drm_intel_bo_fake *bo_fake = (drm_intel_bo_fake *) bo;
1008
1009 /* Static buffers are always mapped. */
1010 if (bo_fake->is_static) {
1011 if (bo_fake->card_dirty) {
1012 drm_intel_bufmgr_fake_wait_idle(bufmgr_fake);
1013 bo_fake->card_dirty = 0;
1014 }
1015 return 0;
1016 }
1017
1018 /* Allow recursive mapping. Mesa may recursively map buffers with
1019 * nested display loops, and it is used internally in bufmgr_fake
1020 * for relocation.
1021 */
1022 if (bo_fake->map_count++ != 0)
1023 return 0;
1024
1025 {
1026 DBG("drm_bo_map: (buf %d: %s, %lu kb)\n", bo_fake->id,
1027 bo_fake->name, bo_fake->bo.size / 1024);
1028
1029 if (bo->virtual != NULL) {
1030 drmMsg("%s: already mapped\n", __FUNCTION__);
1031 abort();
1032 } else if (bo_fake->flags & (BM_NO_BACKING_STORE | BM_PINNED)) {
1033
1034 if (!bo_fake->block && !evict_and_alloc_block(bo)) {
1035 DBG("%s: alloc failed\n", __FUNCTION__);
1036 bufmgr_fake->fail = 1;
1037 return 1;
1038 } else {
1039 assert(bo_fake->block);
1040 bo_fake->dirty = 0;
1041
1042 if (!(bo_fake->flags & BM_NO_FENCE_SUBDATA) &&
1043 bo_fake->block->fenced) {
1044 drm_intel_fake_bo_wait_rendering_locked
1045 (bo);
1046 }
1047
1048 bo->virtual = bo_fake->block->virtual;
1049 }
1050 } else {
1051 if (write_enable)
1052 set_dirty(bo);
1053
1054 if (bo_fake->backing_store == 0)
1055 alloc_backing_store(bo);
1056
1057 if ((bo_fake->card_dirty == 1) && bo_fake->block) {
1058 if (bo_fake->block->fenced)
1059 drm_intel_fake_bo_wait_rendering_locked
1060 (bo);
1061
1062 memcpy(bo_fake->backing_store,
1063 bo_fake->block->virtual,
1064 bo_fake->block->bo->size);
1065 bo_fake->card_dirty = 0;
1066 }
1067
1068 bo->virtual = bo_fake->backing_store;
1069 }
1070 }
1071
1072 return 0;
1073 }
1074
1075 static int
drm_intel_fake_bo_map(drm_intel_bo * bo,int write_enable)1076 drm_intel_fake_bo_map(drm_intel_bo *bo, int write_enable)
1077 {
1078 drm_intel_bufmgr_fake *bufmgr_fake =
1079 (drm_intel_bufmgr_fake *) bo->bufmgr;
1080 int ret;
1081
1082 pthread_mutex_lock(&bufmgr_fake->lock);
1083 ret = drm_intel_fake_bo_map_locked(bo, write_enable);
1084 pthread_mutex_unlock(&bufmgr_fake->lock);
1085
1086 return ret;
1087 }
1088
1089 static int
drm_intel_fake_bo_unmap_locked(drm_intel_bo * bo)1090 drm_intel_fake_bo_unmap_locked(drm_intel_bo *bo)
1091 {
1092 drm_intel_bufmgr_fake *bufmgr_fake =
1093 (drm_intel_bufmgr_fake *) bo->bufmgr;
1094 drm_intel_bo_fake *bo_fake = (drm_intel_bo_fake *) bo;
1095
1096 /* Static buffers are always mapped. */
1097 if (bo_fake->is_static)
1098 return 0;
1099
1100 assert(bo_fake->map_count != 0);
1101 if (--bo_fake->map_count != 0)
1102 return 0;
1103
1104 DBG("drm_bo_unmap: (buf %d: %s, %lu kb)\n", bo_fake->id, bo_fake->name,
1105 bo_fake->bo.size / 1024);
1106
1107 bo->virtual = NULL;
1108
1109 return 0;
1110 }
1111
drm_intel_fake_bo_unmap(drm_intel_bo * bo)1112 static int drm_intel_fake_bo_unmap(drm_intel_bo *bo)
1113 {
1114 drm_intel_bufmgr_fake *bufmgr_fake =
1115 (drm_intel_bufmgr_fake *) bo->bufmgr;
1116 int ret;
1117
1118 pthread_mutex_lock(&bufmgr_fake->lock);
1119 ret = drm_intel_fake_bo_unmap_locked(bo);
1120 pthread_mutex_unlock(&bufmgr_fake->lock);
1121
1122 return ret;
1123 }
1124
1125 static int
drm_intel_fake_bo_subdata(drm_intel_bo * bo,unsigned long offset,unsigned long size,const void * data)1126 drm_intel_fake_bo_subdata(drm_intel_bo *bo, unsigned long offset,
1127 unsigned long size, const void *data)
1128 {
1129 int ret;
1130
1131 if (size == 0 || data == NULL)
1132 return 0;
1133
1134 ret = drm_intel_bo_map(bo, 1);
1135 if (ret)
1136 return ret;
1137 memcpy((unsigned char *)bo->virtual + offset, data, size);
1138 drm_intel_bo_unmap(bo);
1139 return 0;
1140 }
1141
1142 static void
drm_intel_fake_kick_all_locked(drm_intel_bufmgr_fake * bufmgr_fake)1143 drm_intel_fake_kick_all_locked(drm_intel_bufmgr_fake *bufmgr_fake)
1144 {
1145 struct block *block, *tmp;
1146
1147 bufmgr_fake->performed_rendering = 0;
1148 /* okay for ever BO that is on the HW kick it off.
1149 seriously not afraid of the POLICE right now */
1150 DRMLISTFOREACHSAFE(block, tmp, &bufmgr_fake->on_hardware) {
1151 drm_intel_bo_fake *bo_fake = (drm_intel_bo_fake *) block->bo;
1152
1153 block->on_hardware = 0;
1154 free_block(bufmgr_fake, block, 0);
1155 bo_fake->block = NULL;
1156 bo_fake->validated = 0;
1157 if (!(bo_fake->flags & BM_NO_BACKING_STORE))
1158 bo_fake->dirty = 1;
1159 }
1160
1161 }
1162
1163 static int
drm_intel_fake_bo_validate(drm_intel_bo * bo)1164 drm_intel_fake_bo_validate(drm_intel_bo *bo)
1165 {
1166 drm_intel_bufmgr_fake *bufmgr_fake;
1167 drm_intel_bo_fake *bo_fake = (drm_intel_bo_fake *) bo;
1168
1169 bufmgr_fake = (drm_intel_bufmgr_fake *) bo->bufmgr;
1170
1171 DBG("drm_bo_validate: (buf %d: %s, %lu kb)\n", bo_fake->id,
1172 bo_fake->name, bo_fake->bo.size / 1024);
1173
1174 /* Sanity check: Buffers should be unmapped before being validated.
1175 * This is not so much of a problem for bufmgr_fake, but TTM refuses,
1176 * and the problem is harder to debug there.
1177 */
1178 assert(bo_fake->map_count == 0);
1179
1180 if (bo_fake->is_static) {
1181 /* Add it to the needs-fence list */
1182 bufmgr_fake->need_fence = 1;
1183 return 0;
1184 }
1185
1186 /* Allocate the card memory */
1187 if (!bo_fake->block && !evict_and_alloc_block(bo)) {
1188 bufmgr_fake->fail = 1;
1189 DBG("Failed to validate buf %d:%s\n", bo_fake->id,
1190 bo_fake->name);
1191 return -1;
1192 }
1193
1194 assert(bo_fake->block);
1195 assert(bo_fake->block->bo == &bo_fake->bo);
1196
1197 bo->offset = bo_fake->block->mem->ofs;
1198
1199 /* Upload the buffer contents if necessary */
1200 if (bo_fake->dirty) {
1201 DBG("Upload dirty buf %d:%s, sz %lu offset 0x%x\n", bo_fake->id,
1202 bo_fake->name, bo->size, bo_fake->block->mem->ofs);
1203
1204 assert(!(bo_fake->flags & (BM_NO_BACKING_STORE | BM_PINNED)));
1205
1206 /* Actually, should be able to just wait for a fence on the
1207 * mmory, hich we would be tracking when we free it. Waiting
1208 * for idle is a sufficiently large hammer for now.
1209 */
1210 drm_intel_bufmgr_fake_wait_idle(bufmgr_fake);
1211
1212 /* we may never have mapped this BO so it might not have any
1213 * backing store if this happens it should be rare, but 0 the
1214 * card memory in any case */
1215 if (bo_fake->backing_store)
1216 memcpy(bo_fake->block->virtual, bo_fake->backing_store,
1217 bo->size);
1218 else
1219 memset(bo_fake->block->virtual, 0, bo->size);
1220
1221 bo_fake->dirty = 0;
1222 }
1223
1224 bo_fake->block->fenced = 0;
1225 bo_fake->block->on_hardware = 1;
1226 DRMLISTDEL(bo_fake->block);
1227 DRMLISTADDTAIL(bo_fake->block, &bufmgr_fake->on_hardware);
1228
1229 bo_fake->validated = 1;
1230 bufmgr_fake->need_fence = 1;
1231
1232 return 0;
1233 }
1234
1235 static void
drm_intel_fake_fence_validated(drm_intel_bufmgr * bufmgr)1236 drm_intel_fake_fence_validated(drm_intel_bufmgr *bufmgr)
1237 {
1238 drm_intel_bufmgr_fake *bufmgr_fake = (drm_intel_bufmgr_fake *) bufmgr;
1239 unsigned int cookie;
1240
1241 cookie = _fence_emit_internal(bufmgr_fake);
1242 fence_blocks(bufmgr_fake, cookie);
1243
1244 DBG("drm_fence_validated: 0x%08x cookie\n", cookie);
1245 }
1246
1247 static void
drm_intel_fake_destroy(drm_intel_bufmgr * bufmgr)1248 drm_intel_fake_destroy(drm_intel_bufmgr *bufmgr)
1249 {
1250 drm_intel_bufmgr_fake *bufmgr_fake = (drm_intel_bufmgr_fake *) bufmgr;
1251
1252 pthread_mutex_destroy(&bufmgr_fake->lock);
1253 mmDestroy(bufmgr_fake->heap);
1254 free(bufmgr);
1255 }
1256
1257 static int
drm_intel_fake_emit_reloc(drm_intel_bo * bo,uint32_t offset,drm_intel_bo * target_bo,uint32_t target_offset,uint32_t read_domains,uint32_t write_domain)1258 drm_intel_fake_emit_reloc(drm_intel_bo *bo, uint32_t offset,
1259 drm_intel_bo *target_bo, uint32_t target_offset,
1260 uint32_t read_domains, uint32_t write_domain)
1261 {
1262 drm_intel_bufmgr_fake *bufmgr_fake =
1263 (drm_intel_bufmgr_fake *) bo->bufmgr;
1264 struct fake_buffer_reloc *r;
1265 drm_intel_bo_fake *bo_fake = (drm_intel_bo_fake *) bo;
1266 drm_intel_bo_fake *target_fake = (drm_intel_bo_fake *) target_bo;
1267 int i;
1268
1269 pthread_mutex_lock(&bufmgr_fake->lock);
1270
1271 assert(bo);
1272 assert(target_bo);
1273
1274 if (bo_fake->relocs == NULL) {
1275 bo_fake->relocs =
1276 malloc(sizeof(struct fake_buffer_reloc) * MAX_RELOCS);
1277 }
1278
1279 r = &bo_fake->relocs[bo_fake->nr_relocs++];
1280
1281 assert(bo_fake->nr_relocs <= MAX_RELOCS);
1282
1283 drm_intel_fake_bo_reference_locked(target_bo);
1284
1285 if (!target_fake->is_static) {
1286 bo_fake->child_size +=
1287 ALIGN(target_bo->size, target_fake->alignment);
1288 bo_fake->child_size += target_fake->child_size;
1289 }
1290 r->target_buf = target_bo;
1291 r->offset = offset;
1292 r->last_target_offset = target_bo->offset;
1293 r->delta = target_offset;
1294 r->read_domains = read_domains;
1295 r->write_domain = write_domain;
1296
1297 if (bufmgr_fake->debug) {
1298 /* Check that a conflicting relocation hasn't already been
1299 * emitted.
1300 */
1301 for (i = 0; i < bo_fake->nr_relocs - 1; i++) {
1302 struct fake_buffer_reloc *r2 = &bo_fake->relocs[i];
1303
1304 assert(r->offset != r2->offset);
1305 }
1306 }
1307
1308 pthread_mutex_unlock(&bufmgr_fake->lock);
1309
1310 return 0;
1311 }
1312
1313 /**
1314 * Incorporates the validation flags associated with each relocation into
1315 * the combined validation flags for the buffer on this batchbuffer submission.
1316 */
1317 static void
drm_intel_fake_calculate_domains(drm_intel_bo * bo)1318 drm_intel_fake_calculate_domains(drm_intel_bo *bo)
1319 {
1320 drm_intel_bo_fake *bo_fake = (drm_intel_bo_fake *) bo;
1321 int i;
1322
1323 for (i = 0; i < bo_fake->nr_relocs; i++) {
1324 struct fake_buffer_reloc *r = &bo_fake->relocs[i];
1325 drm_intel_bo_fake *target_fake =
1326 (drm_intel_bo_fake *) r->target_buf;
1327
1328 /* Do the same for the tree of buffers we depend on */
1329 drm_intel_fake_calculate_domains(r->target_buf);
1330
1331 target_fake->read_domains |= r->read_domains;
1332 target_fake->write_domain |= r->write_domain;
1333 }
1334 }
1335
1336 static int
drm_intel_fake_reloc_and_validate_buffer(drm_intel_bo * bo)1337 drm_intel_fake_reloc_and_validate_buffer(drm_intel_bo *bo)
1338 {
1339 drm_intel_bufmgr_fake *bufmgr_fake =
1340 (drm_intel_bufmgr_fake *) bo->bufmgr;
1341 drm_intel_bo_fake *bo_fake = (drm_intel_bo_fake *) bo;
1342 int i, ret;
1343
1344 assert(bo_fake->map_count == 0);
1345
1346 for (i = 0; i < bo_fake->nr_relocs; i++) {
1347 struct fake_buffer_reloc *r = &bo_fake->relocs[i];
1348 drm_intel_bo_fake *target_fake =
1349 (drm_intel_bo_fake *) r->target_buf;
1350 uint32_t reloc_data;
1351
1352 /* Validate the target buffer if that hasn't been done. */
1353 if (!target_fake->validated) {
1354 ret =
1355 drm_intel_fake_reloc_and_validate_buffer(r->target_buf);
1356 if (ret != 0) {
1357 if (bo->virtual != NULL)
1358 drm_intel_fake_bo_unmap_locked(bo);
1359 return ret;
1360 }
1361 }
1362
1363 /* Calculate the value of the relocation entry. */
1364 if (r->target_buf->offset != r->last_target_offset) {
1365 reloc_data = r->target_buf->offset + r->delta;
1366
1367 if (bo->virtual == NULL)
1368 drm_intel_fake_bo_map_locked(bo, 1);
1369
1370 *(uint32_t *) ((uint8_t *) bo->virtual + r->offset) =
1371 reloc_data;
1372
1373 r->last_target_offset = r->target_buf->offset;
1374 }
1375 }
1376
1377 if (bo->virtual != NULL)
1378 drm_intel_fake_bo_unmap_locked(bo);
1379
1380 if (bo_fake->write_domain != 0) {
1381 if (!(bo_fake->flags & (BM_NO_BACKING_STORE | BM_PINNED))) {
1382 if (bo_fake->backing_store == 0)
1383 alloc_backing_store(bo);
1384 }
1385 bo_fake->card_dirty = 1;
1386 bufmgr_fake->performed_rendering = 1;
1387 }
1388
1389 return drm_intel_fake_bo_validate(bo);
1390 }
1391
1392 static void
drm_intel_bo_fake_post_submit(drm_intel_bo * bo)1393 drm_intel_bo_fake_post_submit(drm_intel_bo *bo)
1394 {
1395 drm_intel_bufmgr_fake *bufmgr_fake =
1396 (drm_intel_bufmgr_fake *) bo->bufmgr;
1397 drm_intel_bo_fake *bo_fake = (drm_intel_bo_fake *) bo;
1398 int i;
1399
1400 for (i = 0; i < bo_fake->nr_relocs; i++) {
1401 struct fake_buffer_reloc *r = &bo_fake->relocs[i];
1402 drm_intel_bo_fake *target_fake =
1403 (drm_intel_bo_fake *) r->target_buf;
1404
1405 if (target_fake->validated)
1406 drm_intel_bo_fake_post_submit(r->target_buf);
1407
1408 DBG("%s@0x%08x + 0x%08x -> %s@0x%08x + 0x%08x\n",
1409 bo_fake->name, (uint32_t) bo->offset, r->offset,
1410 target_fake->name, (uint32_t) r->target_buf->offset,
1411 r->delta);
1412 }
1413
1414 assert(bo_fake->map_count == 0);
1415 bo_fake->validated = 0;
1416 bo_fake->read_domains = 0;
1417 bo_fake->write_domain = 0;
1418 }
1419
1420 drm_public void
drm_intel_bufmgr_fake_set_exec_callback(drm_intel_bufmgr * bufmgr,int (* exec)(drm_intel_bo * bo,unsigned int used,void * priv),void * priv)1421 drm_intel_bufmgr_fake_set_exec_callback(drm_intel_bufmgr *bufmgr,
1422 int (*exec) (drm_intel_bo *bo,
1423 unsigned int used,
1424 void *priv),
1425 void *priv)
1426 {
1427 drm_intel_bufmgr_fake *bufmgr_fake = (drm_intel_bufmgr_fake *) bufmgr;
1428
1429 bufmgr_fake->exec = exec;
1430 bufmgr_fake->exec_priv = priv;
1431 }
1432
1433 static int
drm_intel_fake_bo_exec(drm_intel_bo * bo,int used,drm_clip_rect_t * cliprects,int num_cliprects,int DR4)1434 drm_intel_fake_bo_exec(drm_intel_bo *bo, int used,
1435 drm_clip_rect_t * cliprects, int num_cliprects, int DR4)
1436 {
1437 drm_intel_bufmgr_fake *bufmgr_fake =
1438 (drm_intel_bufmgr_fake *) bo->bufmgr;
1439 drm_intel_bo_fake *batch_fake = (drm_intel_bo_fake *) bo;
1440 struct drm_i915_batchbuffer batch;
1441 int ret;
1442 int retry_count = 0;
1443
1444 pthread_mutex_lock(&bufmgr_fake->lock);
1445
1446 bufmgr_fake->performed_rendering = 0;
1447
1448 drm_intel_fake_calculate_domains(bo);
1449
1450 batch_fake->read_domains = I915_GEM_DOMAIN_COMMAND;
1451
1452 /* we've ran out of RAM so blow the whole lot away and retry */
1453 restart:
1454 ret = drm_intel_fake_reloc_and_validate_buffer(bo);
1455 if (bufmgr_fake->fail == 1) {
1456 if (retry_count == 0) {
1457 retry_count++;
1458 drm_intel_fake_kick_all_locked(bufmgr_fake);
1459 bufmgr_fake->fail = 0;
1460 goto restart;
1461 } else /* dump out the memory here */
1462 mmDumpMemInfo(bufmgr_fake->heap);
1463 }
1464
1465 assert(ret == 0);
1466
1467 if (bufmgr_fake->exec != NULL) {
1468 int ret = bufmgr_fake->exec(bo, used, bufmgr_fake->exec_priv);
1469 if (ret != 0) {
1470 pthread_mutex_unlock(&bufmgr_fake->lock);
1471 return ret;
1472 }
1473 } else {
1474 batch.start = bo->offset;
1475 batch.used = used;
1476 batch.cliprects = cliprects;
1477 batch.num_cliprects = num_cliprects;
1478 batch.DR1 = 0;
1479 batch.DR4 = DR4;
1480
1481 if (drmCommandWrite
1482 (bufmgr_fake->fd, DRM_I915_BATCHBUFFER, &batch,
1483 sizeof(batch))) {
1484 drmMsg("DRM_I915_BATCHBUFFER: %d\n", -errno);
1485 pthread_mutex_unlock(&bufmgr_fake->lock);
1486 return -errno;
1487 }
1488 }
1489
1490 drm_intel_fake_fence_validated(bo->bufmgr);
1491
1492 drm_intel_bo_fake_post_submit(bo);
1493
1494 pthread_mutex_unlock(&bufmgr_fake->lock);
1495
1496 return 0;
1497 }
1498
1499 /**
1500 * Return an error if the list of BOs will exceed the aperture size.
1501 *
1502 * This is a rough guess and likely to fail, as during the validate sequence we
1503 * may place a buffer in an inopportune spot early on and then fail to fit
1504 * a set smaller than the aperture.
1505 */
1506 static int
drm_intel_fake_check_aperture_space(drm_intel_bo ** bo_array,int count)1507 drm_intel_fake_check_aperture_space(drm_intel_bo ** bo_array, int count)
1508 {
1509 drm_intel_bufmgr_fake *bufmgr_fake =
1510 (drm_intel_bufmgr_fake *) bo_array[0]->bufmgr;
1511 unsigned int sz = 0;
1512 int i;
1513
1514 for (i = 0; i < count; i++) {
1515 drm_intel_bo_fake *bo_fake = (drm_intel_bo_fake *) bo_array[i];
1516
1517 if (bo_fake == NULL)
1518 continue;
1519
1520 if (!bo_fake->is_static)
1521 sz += ALIGN(bo_array[i]->size, bo_fake->alignment);
1522 sz += bo_fake->child_size;
1523 }
1524
1525 if (sz > bufmgr_fake->size) {
1526 DBG("check_space: overflowed bufmgr size, %ukb vs %lukb\n",
1527 sz / 1024, bufmgr_fake->size / 1024);
1528 return -1;
1529 }
1530
1531 DBG("drm_check_space: sz %ukb vs bufgr %lukb\n", sz / 1024,
1532 bufmgr_fake->size / 1024);
1533 return 0;
1534 }
1535
1536 /**
1537 * Evicts all buffers, waiting for fences to pass and copying contents out
1538 * as necessary.
1539 *
1540 * Used by the X Server on LeaveVT, when the card memory is no longer our
1541 * own.
1542 */
1543 drm_public void
drm_intel_bufmgr_fake_evict_all(drm_intel_bufmgr * bufmgr)1544 drm_intel_bufmgr_fake_evict_all(drm_intel_bufmgr *bufmgr)
1545 {
1546 drm_intel_bufmgr_fake *bufmgr_fake = (drm_intel_bufmgr_fake *) bufmgr;
1547 struct block *block, *tmp;
1548
1549 pthread_mutex_lock(&bufmgr_fake->lock);
1550
1551 bufmgr_fake->need_fence = 1;
1552 bufmgr_fake->fail = 0;
1553
1554 /* Wait for hardware idle. We don't know where acceleration has been
1555 * happening, so we'll need to wait anyway before letting anything get
1556 * put on the card again.
1557 */
1558 drm_intel_bufmgr_fake_wait_idle(bufmgr_fake);
1559
1560 /* Check that we hadn't released the lock without having fenced the last
1561 * set of buffers.
1562 */
1563 assert(DRMLISTEMPTY(&bufmgr_fake->fenced));
1564 assert(DRMLISTEMPTY(&bufmgr_fake->on_hardware));
1565
1566 DRMLISTFOREACHSAFE(block, tmp, &bufmgr_fake->lru) {
1567 drm_intel_bo_fake *bo_fake = (drm_intel_bo_fake *) block->bo;
1568 /* Releases the memory, and memcpys dirty contents out if
1569 * necessary.
1570 */
1571 free_block(bufmgr_fake, block, 0);
1572 bo_fake->block = NULL;
1573 }
1574
1575 pthread_mutex_unlock(&bufmgr_fake->lock);
1576 }
1577
1578 drm_public void
drm_intel_bufmgr_fake_set_last_dispatch(drm_intel_bufmgr * bufmgr,volatile unsigned int * last_dispatch)1579 drm_intel_bufmgr_fake_set_last_dispatch(drm_intel_bufmgr *bufmgr,
1580 volatile unsigned int
1581 *last_dispatch)
1582 {
1583 drm_intel_bufmgr_fake *bufmgr_fake = (drm_intel_bufmgr_fake *) bufmgr;
1584
1585 bufmgr_fake->last_dispatch = (volatile int *)last_dispatch;
1586 }
1587
1588 drm_public drm_intel_bufmgr *
drm_intel_bufmgr_fake_init(int fd,unsigned long low_offset,void * low_virtual,unsigned long size,volatile unsigned int * last_dispatch)1589 drm_intel_bufmgr_fake_init(int fd, unsigned long low_offset,
1590 void *low_virtual, unsigned long size,
1591 volatile unsigned int *last_dispatch)
1592 {
1593 drm_intel_bufmgr_fake *bufmgr_fake;
1594
1595 bufmgr_fake = calloc(1, sizeof(*bufmgr_fake));
1596
1597 if (pthread_mutex_init(&bufmgr_fake->lock, NULL) != 0) {
1598 free(bufmgr_fake);
1599 return NULL;
1600 }
1601
1602 /* Initialize allocator */
1603 DRMINITLISTHEAD(&bufmgr_fake->fenced);
1604 DRMINITLISTHEAD(&bufmgr_fake->on_hardware);
1605 DRMINITLISTHEAD(&bufmgr_fake->lru);
1606
1607 bufmgr_fake->low_offset = low_offset;
1608 bufmgr_fake->virtual = low_virtual;
1609 bufmgr_fake->size = size;
1610 bufmgr_fake->heap = mmInit(low_offset, size);
1611
1612 /* Hook in methods */
1613 bufmgr_fake->bufmgr.bo_alloc = drm_intel_fake_bo_alloc;
1614 bufmgr_fake->bufmgr.bo_alloc_for_render = drm_intel_fake_bo_alloc;
1615 bufmgr_fake->bufmgr.bo_alloc_tiled = drm_intel_fake_bo_alloc_tiled;
1616 bufmgr_fake->bufmgr.bo_reference = drm_intel_fake_bo_reference;
1617 bufmgr_fake->bufmgr.bo_unreference = drm_intel_fake_bo_unreference;
1618 bufmgr_fake->bufmgr.bo_map = drm_intel_fake_bo_map;
1619 bufmgr_fake->bufmgr.bo_unmap = drm_intel_fake_bo_unmap;
1620 bufmgr_fake->bufmgr.bo_subdata = drm_intel_fake_bo_subdata;
1621 bufmgr_fake->bufmgr.bo_wait_rendering =
1622 drm_intel_fake_bo_wait_rendering;
1623 bufmgr_fake->bufmgr.bo_emit_reloc = drm_intel_fake_emit_reloc;
1624 bufmgr_fake->bufmgr.destroy = drm_intel_fake_destroy;
1625 bufmgr_fake->bufmgr.bo_exec = drm_intel_fake_bo_exec;
1626 bufmgr_fake->bufmgr.check_aperture_space =
1627 drm_intel_fake_check_aperture_space;
1628 bufmgr_fake->bufmgr.debug = 0;
1629
1630 bufmgr_fake->fd = fd;
1631 bufmgr_fake->last_dispatch = (volatile int *)last_dispatch;
1632
1633 return &bufmgr_fake->bufmgr;
1634 }
1635