1 /**************************************************************************
2  *
3  * Copyright � 2007 Red Hat Inc.
4  * Copyright � 2007 Intel Corporation
5  * Copyright 2006 Tungsten Graphics, Inc., Bismarck, ND., USA
6  * All Rights Reserved.
7  *
8  * Permission is hereby granted, free of charge, to any person obtaining a
9  * copy of this software and associated documentation files (the
10  * "Software"), to deal in the Software without restriction, including
11  * without limitation the rights to use, copy, modify, merge, publish,
12  * distribute, sub license, and/or sell copies of the Software, and to
13  * permit persons to whom the Software is furnished to do so, subject to
14  * the following conditions:
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19  * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
20  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22  * USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * The above copyright notice and this permission notice (including the
25  * next paragraph) shall be included in all copies or substantial portions
26  * of the Software.
27  *
28  *
29  **************************************************************************/
30 /*
31  * Authors: Thomas Hellstr�m <thomas-at-tungstengraphics-dot-com>
32  *          Keith Whitwell <keithw-at-tungstengraphics-dot-com>
33  *	    Eric Anholt <eric@anholt.net>
34  *	    Dave Airlie <airlied@linux.ie>
35  */
36 
37 #ifdef HAVE_CONFIG_H
38 #include "config.h"
39 #endif
40 
41 #include <xf86drm.h>
42 #include <fcntl.h>
43 #include <stdio.h>
44 #include <stdlib.h>
45 #include <string.h>
46 #include <unistd.h>
47 #include <assert.h>
48 #include <pthread.h>
49 #include <sys/ioctl.h>
50 #include <sys/mman.h>
51 #include <sys/stat.h>
52 #include <sys/types.h>
53 
54 #include "errno.h"
55 #include "libdrm_lists.h"
56 #include "intel_bufmgr.h"
57 #include "intel_bufmgr_priv.h"
58 #include "intel_chipset.h"
59 #include "string.h"
60 
61 #include "i915_drm.h"
62 
63 #define DBG(...) do {					\
64    if (bufmgr_gem->bufmgr.debug)			\
65       fprintf(stderr, __VA_ARGS__);			\
66 } while (0)
67 
68 typedef struct _drm_intel_bo_gem drm_intel_bo_gem;
69 
70 struct drm_intel_gem_bo_bucket {
71    drmMMListHead head;
72    unsigned long size;
73 };
74 
75 /* Only cache objects up to 64MB.  Bigger than that, and the rounding of the
76  * size makes many operations fail that wouldn't otherwise.
77  */
78 #define DRM_INTEL_GEM_BO_BUCKETS	14
79 typedef struct _drm_intel_bufmgr_gem {
80     drm_intel_bufmgr bufmgr;
81 
82     int fd;
83 
84     int max_relocs;
85 
86     pthread_mutex_t lock;
87 
88     struct drm_i915_gem_exec_object *exec_objects;
89     drm_intel_bo **exec_bos;
90     int exec_size;
91     int exec_count;
92 
93     /** Array of lists of cached gem objects of power-of-two sizes */
94     struct drm_intel_gem_bo_bucket cache_bucket[DRM_INTEL_GEM_BO_BUCKETS];
95 
96     uint64_t gtt_size;
97     int available_fences;
98     int pci_device;
99     char bo_reuse;
100 } drm_intel_bufmgr_gem;
101 
102 struct _drm_intel_bo_gem {
103     drm_intel_bo bo;
104 
105     int refcount;
106     /** Boolean whether the mmap ioctl has been called for this buffer yet. */
107     uint32_t gem_handle;
108     const char *name;
109 
110     /**
111      * Kenel-assigned global name for this object
112      */
113     unsigned int global_name;
114 
115     /**
116      * Index of the buffer within the validation list while preparing a
117      * batchbuffer execution.
118      */
119     int validate_index;
120 
121     /**
122      * Current tiling mode
123      */
124     uint32_t tiling_mode;
125     uint32_t swizzle_mode;
126 
127     time_t free_time;
128 
129     /** Array passed to the DRM containing relocation information. */
130     struct drm_i915_gem_relocation_entry *relocs;
131     /** Array of bos corresponding to relocs[i].target_handle */
132     drm_intel_bo **reloc_target_bo;
133     /** Number of entries in relocs */
134     int reloc_count;
135     /** Mapped address for the buffer, saved across map/unmap cycles */
136     void *mem_virtual;
137     /** GTT virtual address for the buffer, saved across map/unmap cycles */
138     void *gtt_virtual;
139 
140     /** BO cache list */
141     drmMMListHead head;
142 
143     /**
144      * Boolean of whether this BO and its children have been included in
145      * the current drm_intel_bufmgr_check_aperture_space() total.
146      */
147     char included_in_check_aperture;
148 
149     /**
150      * Boolean of whether this buffer has been used as a relocation
151      * target and had its size accounted for, and thus can't have any
152      * further relocations added to it.
153      */
154      char used_as_reloc_target;
155 
156     /**
157      * Boolean of whether this buffer can be re-used
158      */
159     char reusable;
160 
161     /**
162      * Size in bytes of this buffer and its relocation descendents.
163      *
164      * Used to avoid costly tree walking in drm_intel_bufmgr_check_aperture in
165      * the common case.
166      */
167     int reloc_tree_size;
168     /**
169      * Number of potential fence registers required by this buffer and its
170      * relocations.
171      */
172     int reloc_tree_fences;
173 };
174 
175 static void drm_intel_gem_bo_reference_locked(drm_intel_bo *bo);
176 
177 static unsigned int
178 drm_intel_gem_estimate_batch_space(drm_intel_bo **bo_array, int count);
179 
180 static unsigned int
181 drm_intel_gem_compute_batch_space(drm_intel_bo **bo_array, int count);
182 
183 static int
184 drm_intel_gem_bo_get_tiling(drm_intel_bo *bo, uint32_t *tiling_mode,
185 			    uint32_t *swizzle_mode);
186 
187 static int
188 drm_intel_gem_bo_set_tiling(drm_intel_bo *bo, uint32_t *tiling_mode,
189 			    uint32_t stride);
190 
191 static void
192 drm_intel_gem_bo_unreference(drm_intel_bo *bo);
193 
194 static struct drm_intel_gem_bo_bucket *
drm_intel_gem_bo_bucket_for_size(drm_intel_bufmgr_gem * bufmgr_gem,unsigned long size)195 drm_intel_gem_bo_bucket_for_size(drm_intel_bufmgr_gem *bufmgr_gem,
196 				 unsigned long size)
197 {
198     int i;
199 
200     for (i = 0; i < DRM_INTEL_GEM_BO_BUCKETS; i++) {
201 	struct drm_intel_gem_bo_bucket *bucket = &bufmgr_gem->cache_bucket[i];
202 	if (bucket->size >= size) {
203 	    return bucket;
204 	}
205     }
206 
207     return NULL;
208 }
209 
drm_intel_gem_dump_validation_list(drm_intel_bufmgr_gem * bufmgr_gem)210 static void drm_intel_gem_dump_validation_list(drm_intel_bufmgr_gem *bufmgr_gem)
211 {
212     int i, j;
213 
214     for (i = 0; i < bufmgr_gem->exec_count; i++) {
215 	drm_intel_bo *bo = bufmgr_gem->exec_bos[i];
216 	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo;
217 
218 	if (bo_gem->relocs == NULL) {
219 	    DBG("%2d: %d (%s)\n", i, bo_gem->gem_handle, bo_gem->name);
220 	    continue;
221 	}
222 
223 	for (j = 0; j < bo_gem->reloc_count; j++) {
224 	    drm_intel_bo *target_bo = bo_gem->reloc_target_bo[j];
225 	    drm_intel_bo_gem *target_gem = (drm_intel_bo_gem *)target_bo;
226 
227 	    DBG("%2d: %d (%s)@0x%08llx -> %d (%s)@0x%08lx + 0x%08x\n",
228 		i,
229 		bo_gem->gem_handle, bo_gem->name,
230 		(unsigned long long)bo_gem->relocs[j].offset,
231 		target_gem->gem_handle, target_gem->name, target_bo->offset,
232 		bo_gem->relocs[j].delta);
233 	}
234     }
235 }
236 
237 /**
238  * Adds the given buffer to the list of buffers to be validated (moved into the
239  * appropriate memory type) with the next batch submission.
240  *
241  * If a buffer is validated multiple times in a batch submission, it ends up
242  * with the intersection of the memory type flags and the union of the
243  * access flags.
244  */
245 static void
drm_intel_add_validate_buffer(drm_intel_bo * bo)246 drm_intel_add_validate_buffer(drm_intel_bo *bo)
247 {
248     drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr;
249     drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo;
250     int index;
251 
252     if (bo_gem->validate_index != -1)
253 	return;
254 
255     /* Extend the array of validation entries as necessary. */
256     if (bufmgr_gem->exec_count == bufmgr_gem->exec_size) {
257 	int new_size = bufmgr_gem->exec_size * 2;
258 
259 	if (new_size == 0)
260 	    new_size = 5;
261 
262 	bufmgr_gem->exec_objects =
263 	    realloc(bufmgr_gem->exec_objects,
264 		    sizeof(*bufmgr_gem->exec_objects) * new_size);
265 	bufmgr_gem->exec_bos =
266 	    realloc(bufmgr_gem->exec_bos,
267 		    sizeof(*bufmgr_gem->exec_bos) * new_size);
268 	bufmgr_gem->exec_size = new_size;
269     }
270 
271     index = bufmgr_gem->exec_count;
272     bo_gem->validate_index = index;
273     /* Fill in array entry */
274     bufmgr_gem->exec_objects[index].handle = bo_gem->gem_handle;
275     bufmgr_gem->exec_objects[index].relocation_count = bo_gem->reloc_count;
276     bufmgr_gem->exec_objects[index].relocs_ptr = (uintptr_t)bo_gem->relocs;
277     bufmgr_gem->exec_objects[index].alignment = 0;
278     bufmgr_gem->exec_objects[index].offset = 0;
279     bufmgr_gem->exec_bos[index] = bo;
280     drm_intel_gem_bo_reference_locked(bo);
281     bufmgr_gem->exec_count++;
282 }
283 
284 
285 #define RELOC_BUF_SIZE(x) ((I915_RELOC_HEADER + x * I915_RELOC0_STRIDE) * \
286 	sizeof(uint32_t))
287 
288 static int
drm_intel_setup_reloc_list(drm_intel_bo * bo)289 drm_intel_setup_reloc_list(drm_intel_bo *bo)
290 {
291     drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo;
292     drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr;
293 
294     bo_gem->relocs = malloc(bufmgr_gem->max_relocs *
295 			    sizeof(struct drm_i915_gem_relocation_entry));
296     bo_gem->reloc_target_bo = malloc(bufmgr_gem->max_relocs *
297 				     sizeof(drm_intel_bo *));
298 
299     return 0;
300 }
301 
302 static int
drm_intel_gem_bo_busy(drm_intel_bo * bo)303 drm_intel_gem_bo_busy(drm_intel_bo *bo)
304 {
305     drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr;
306     drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo;
307     struct drm_i915_gem_busy busy;
308     int ret;
309 
310     memset(&busy, 0, sizeof(busy));
311     busy.handle = bo_gem->gem_handle;
312 
313     ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_BUSY, &busy);
314 
315     return (ret == 0 && busy.busy);
316 }
317 
318 static drm_intel_bo *
drm_intel_gem_bo_alloc_internal(drm_intel_bufmgr * bufmgr,const char * name,unsigned long size,unsigned int alignment,int for_render)319 drm_intel_gem_bo_alloc_internal(drm_intel_bufmgr *bufmgr, const char *name,
320 				unsigned long size, unsigned int alignment,
321 				int for_render)
322 {
323     drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr;
324     drm_intel_bo_gem *bo_gem;
325     unsigned int page_size = getpagesize();
326     int ret;
327     struct drm_intel_gem_bo_bucket *bucket;
328     int alloc_from_cache = 0;
329     unsigned long bo_size;
330 
331     /* Round the allocated size up to a power of two number of pages. */
332     bucket = drm_intel_gem_bo_bucket_for_size(bufmgr_gem, size);
333 
334     /* If we don't have caching at this size, don't actually round the
335      * allocation up.
336      */
337     if (bucket == NULL) {
338 	bo_size = size;
339 	if (bo_size < page_size)
340 	    bo_size = page_size;
341     } else {
342 	bo_size = bucket->size;
343     }
344 
345     pthread_mutex_lock(&bufmgr_gem->lock);
346     /* Get a buffer out of the cache if available */
347     if (bucket != NULL && !DRMLISTEMPTY(&bucket->head)) {
348 	if (for_render) {
349 	    /* Allocate new render-target BOs from the tail (MRU)
350 	     * of the list, as it will likely be hot in the GPU cache
351 	     * and in the aperture for us.
352 	     */
353 	    bo_gem = DRMLISTENTRY(drm_intel_bo_gem, bucket->head.prev, head);
354 	    DRMLISTDEL(&bo_gem->head);
355 	    alloc_from_cache = 1;
356 	} else {
357 	    /* For non-render-target BOs (where we're probably going to map it
358 	     * first thing in order to fill it with data), check if the
359 	     * last BO in the cache is unbusy, and only reuse in that case.
360 	     * Otherwise, allocating a new buffer is probably faster than
361 	     * waiting for the GPU to finish.
362 	     */
363 	    bo_gem = DRMLISTENTRY(drm_intel_bo_gem, bucket->head.next, head);
364 
365 	    if (!drm_intel_gem_bo_busy(&bo_gem->bo)) {
366 		alloc_from_cache = 1;
367 		DRMLISTDEL(&bo_gem->head);
368 	    }
369 	}
370     }
371     pthread_mutex_unlock(&bufmgr_gem->lock);
372 
373     if (!alloc_from_cache) {
374 	struct drm_i915_gem_create create;
375 
376 	bo_gem = calloc(1, sizeof(*bo_gem));
377 	if (!bo_gem)
378 	    return NULL;
379 
380 	bo_gem->bo.size = bo_size;
381 	memset(&create, 0, sizeof(create));
382 	create.size = bo_size;
383 
384 	ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_CREATE, &create);
385 	bo_gem->gem_handle = create.handle;
386 	bo_gem->bo.handle = bo_gem->gem_handle;
387 	if (ret != 0) {
388 	    free(bo_gem);
389 	    return NULL;
390 	}
391 	bo_gem->bo.bufmgr = bufmgr;
392     }
393 
394     bo_gem->name = name;
395     bo_gem->refcount = 1;
396     bo_gem->validate_index = -1;
397     bo_gem->reloc_tree_size = bo_gem->bo.size;
398     bo_gem->reloc_tree_fences = 0;
399     bo_gem->used_as_reloc_target = 0;
400     bo_gem->tiling_mode = I915_TILING_NONE;
401     bo_gem->swizzle_mode = I915_BIT_6_SWIZZLE_NONE;
402     bo_gem->reusable = 1;
403 
404     DBG("bo_create: buf %d (%s) %ldb\n",
405 	bo_gem->gem_handle, bo_gem->name, size);
406 
407     return &bo_gem->bo;
408 }
409 
410 static drm_intel_bo *
drm_intel_gem_bo_alloc_for_render(drm_intel_bufmgr * bufmgr,const char * name,unsigned long size,unsigned int alignment)411 drm_intel_gem_bo_alloc_for_render(drm_intel_bufmgr *bufmgr, const char *name,
412 				  unsigned long size, unsigned int alignment)
413 {
414     return drm_intel_gem_bo_alloc_internal(bufmgr, name, size, alignment, 1);
415 }
416 
417 static drm_intel_bo *
drm_intel_gem_bo_alloc(drm_intel_bufmgr * bufmgr,const char * name,unsigned long size,unsigned int alignment)418 drm_intel_gem_bo_alloc(drm_intel_bufmgr *bufmgr, const char *name,
419 		       unsigned long size, unsigned int alignment)
420 {
421     return drm_intel_gem_bo_alloc_internal(bufmgr, name, size, alignment, 0);
422 }
423 
424 /**
425  * Returns a drm_intel_bo wrapping the given buffer object handle.
426  *
427  * This can be used when one application needs to pass a buffer object
428  * to another.
429  */
430 drm_intel_bo *
drm_intel_bo_gem_create_from_name(drm_intel_bufmgr * bufmgr,const char * name,unsigned int handle)431 drm_intel_bo_gem_create_from_name(drm_intel_bufmgr *bufmgr, const char *name,
432 				  unsigned int handle)
433 {
434     drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr;
435     drm_intel_bo_gem *bo_gem;
436     int ret;
437     struct drm_gem_open open_arg;
438     struct drm_i915_gem_get_tiling get_tiling;
439 
440     bo_gem = calloc(1, sizeof(*bo_gem));
441     if (!bo_gem)
442 	return NULL;
443 
444     memset(&open_arg, 0, sizeof(open_arg));
445     open_arg.name = handle;
446     ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_GEM_OPEN, &open_arg);
447     if (ret != 0) {
448 	fprintf(stderr, "Couldn't reference %s handle 0x%08x: %s\n",
449 	       name, handle, strerror(errno));
450 	free(bo_gem);
451 	return NULL;
452     }
453     bo_gem->bo.size = open_arg.size;
454     bo_gem->bo.offset = 0;
455     bo_gem->bo.virtual = NULL;
456     bo_gem->bo.bufmgr = bufmgr;
457     bo_gem->name = name;
458     bo_gem->refcount = 1;
459     bo_gem->validate_index = -1;
460     bo_gem->gem_handle = open_arg.handle;
461     bo_gem->global_name = handle;
462     bo_gem->reusable = 0;
463 
464     memset(&get_tiling, 0, sizeof(get_tiling));
465     get_tiling.handle = bo_gem->gem_handle;
466     ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_GET_TILING, &get_tiling);
467     if (ret != 0) {
468 	drm_intel_gem_bo_unreference(&bo_gem->bo);
469 	return NULL;
470     }
471     bo_gem->tiling_mode = get_tiling.tiling_mode;
472     bo_gem->swizzle_mode = get_tiling.swizzle_mode;
473     if (bo_gem->tiling_mode == I915_TILING_NONE)
474 	bo_gem->reloc_tree_fences = 0;
475     else
476 	bo_gem->reloc_tree_fences = 1;
477 
478     DBG("bo_create_from_handle: %d (%s)\n", handle, bo_gem->name);
479 
480     return &bo_gem->bo;
481 }
482 
483 static void
drm_intel_gem_bo_reference(drm_intel_bo * bo)484 drm_intel_gem_bo_reference(drm_intel_bo *bo)
485 {
486     drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr;
487     drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo;
488 
489     assert(bo_gem->refcount > 0);
490     pthread_mutex_lock(&bufmgr_gem->lock);
491     bo_gem->refcount++;
492     pthread_mutex_unlock(&bufmgr_gem->lock);
493 }
494 
495 static void
drm_intel_gem_bo_reference_locked(drm_intel_bo * bo)496 drm_intel_gem_bo_reference_locked(drm_intel_bo *bo)
497 {
498     drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo;
499 
500     assert(bo_gem->refcount > 0);
501     bo_gem->refcount++;
502 }
503 
504 static void
drm_intel_gem_bo_free(drm_intel_bo * bo)505 drm_intel_gem_bo_free(drm_intel_bo *bo)
506 {
507     drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr;
508     drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo;
509     struct drm_gem_close close;
510     int ret;
511 
512     if (bo_gem->mem_virtual)
513 	munmap (bo_gem->mem_virtual, bo_gem->bo.size);
514     if (bo_gem->gtt_virtual)
515 	munmap (bo_gem->gtt_virtual, bo_gem->bo.size);
516 
517     /* Close this object */
518     memset(&close, 0, sizeof(close));
519     close.handle = bo_gem->gem_handle;
520     ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_GEM_CLOSE, &close);
521     if (ret != 0) {
522 	fprintf(stderr,
523 		"DRM_IOCTL_GEM_CLOSE %d failed (%s): %s\n",
524 		bo_gem->gem_handle, bo_gem->name, strerror(errno));
525     }
526     free(bo);
527 }
528 
529 /** Frees all cached buffers significantly older than @time. */
530 static void
drm_intel_gem_cleanup_bo_cache(drm_intel_bufmgr_gem * bufmgr_gem,time_t time)531 drm_intel_gem_cleanup_bo_cache(drm_intel_bufmgr_gem *bufmgr_gem, time_t time)
532 {
533     int i;
534 
535     for (i = 0; i < DRM_INTEL_GEM_BO_BUCKETS; i++) {
536 	struct drm_intel_gem_bo_bucket *bucket = &bufmgr_gem->cache_bucket[i];
537 
538 	while (!DRMLISTEMPTY(&bucket->head)) {
539 	    drm_intel_bo_gem *bo_gem;
540 
541 	    bo_gem = DRMLISTENTRY(drm_intel_bo_gem, bucket->head.next, head);
542 	    if (time - bo_gem->free_time <= 1)
543 		break;
544 
545 	    DRMLISTDEL(&bo_gem->head);
546 
547 	    drm_intel_gem_bo_free(&bo_gem->bo);
548 	}
549     }
550 }
551 
552 static void
drm_intel_gem_bo_unreference_locked(drm_intel_bo * bo)553 drm_intel_gem_bo_unreference_locked(drm_intel_bo *bo)
554 {
555     drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr;
556     drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo;
557 
558     assert(bo_gem->refcount > 0);
559     if (--bo_gem->refcount == 0) {
560 	struct drm_intel_gem_bo_bucket *bucket;
561 	uint32_t tiling_mode;
562 
563 	if (bo_gem->relocs != NULL) {
564 	    int i;
565 
566 	    /* Unreference all the target buffers */
567 	    for (i = 0; i < bo_gem->reloc_count; i++)
568 		 drm_intel_gem_bo_unreference_locked(bo_gem->reloc_target_bo[i]);
569 	    free(bo_gem->reloc_target_bo);
570 	    free(bo_gem->relocs);
571 	}
572 
573 	DBG("bo_unreference final: %d (%s)\n",
574 	    bo_gem->gem_handle, bo_gem->name);
575 
576 	bucket = drm_intel_gem_bo_bucket_for_size(bufmgr_gem, bo->size);
577 	/* Put the buffer into our internal cache for reuse if we can. */
578 	tiling_mode = I915_TILING_NONE;
579 	if (bufmgr_gem->bo_reuse && bo_gem->reusable && bucket != NULL &&
580 	    drm_intel_gem_bo_set_tiling(bo, &tiling_mode, 0) == 0)
581 	{
582 	    struct timespec time;
583 
584 	    clock_gettime(CLOCK_MONOTONIC, &time);
585 	    bo_gem->free_time = time.tv_sec;
586 
587 	    bo_gem->name = NULL;
588 	    bo_gem->validate_index = -1;
589 	    bo_gem->relocs = NULL;
590 	    bo_gem->reloc_target_bo = NULL;
591 	    bo_gem->reloc_count = 0;
592 
593 	    DRMLISTADDTAIL(&bo_gem->head, &bucket->head);
594 
595 	    drm_intel_gem_cleanup_bo_cache(bufmgr_gem, time.tv_sec);
596 	} else {
597 	    drm_intel_gem_bo_free(bo);
598 	}
599     }
600 }
601 
602 static void
drm_intel_gem_bo_unreference(drm_intel_bo * bo)603 drm_intel_gem_bo_unreference(drm_intel_bo *bo)
604 {
605     drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr;
606 
607     pthread_mutex_lock(&bufmgr_gem->lock);
608     drm_intel_gem_bo_unreference_locked(bo);
609     pthread_mutex_unlock(&bufmgr_gem->lock);
610 }
611 
612 static int
drm_intel_gem_bo_map(drm_intel_bo * bo,int write_enable)613 drm_intel_gem_bo_map(drm_intel_bo *bo, int write_enable)
614 {
615     drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr;
616     drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo;
617     struct drm_i915_gem_set_domain set_domain;
618     int ret;
619 
620     pthread_mutex_lock(&bufmgr_gem->lock);
621 
622     /* Allow recursive mapping. Mesa may recursively map buffers with
623      * nested display loops.
624      */
625     if (!bo_gem->mem_virtual) {
626 	struct drm_i915_gem_mmap mmap_arg;
627 
628 	DBG("bo_map: %d (%s)\n", bo_gem->gem_handle, bo_gem->name);
629 
630 	memset(&mmap_arg, 0, sizeof(mmap_arg));
631 	mmap_arg.handle = bo_gem->gem_handle;
632 	mmap_arg.offset = 0;
633 	mmap_arg.size = bo->size;
634 	ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_MMAP, &mmap_arg);
635 	if (ret != 0) {
636 	    fprintf(stderr, "%s:%d: Error mapping buffer %d (%s): %s .\n",
637 		    __FILE__, __LINE__,
638 		    bo_gem->gem_handle, bo_gem->name, strerror(errno));
639 	    pthread_mutex_unlock(&bufmgr_gem->lock);
640 	    return ret;
641 	}
642 	bo_gem->mem_virtual = (void *)(uintptr_t)mmap_arg.addr_ptr;
643     }
644     DBG("bo_map: %d (%s) -> %p\n", bo_gem->gem_handle, bo_gem->name,
645 	bo_gem->mem_virtual);
646     bo->virtual = bo_gem->mem_virtual;
647 
648     set_domain.handle = bo_gem->gem_handle;
649     set_domain.read_domains = I915_GEM_DOMAIN_CPU;
650     if (write_enable)
651 	set_domain.write_domain = I915_GEM_DOMAIN_CPU;
652     else
653 	set_domain.write_domain = 0;
654     do {
655 	ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN,
656 		    &set_domain);
657     } while (ret == -1 && errno == EINTR);
658     if (ret != 0) {
659 	fprintf (stderr, "%s:%d: Error setting to CPU domain %d: %s\n",
660 		 __FILE__, __LINE__, bo_gem->gem_handle, strerror (errno));
661 	pthread_mutex_unlock(&bufmgr_gem->lock);
662 	return ret;
663     }
664 
665     pthread_mutex_unlock(&bufmgr_gem->lock);
666 
667     return 0;
668 }
669 
670 int
drm_intel_gem_bo_map_gtt(drm_intel_bo * bo)671 drm_intel_gem_bo_map_gtt(drm_intel_bo *bo)
672 {
673     drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr;
674     drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo;
675     struct drm_i915_gem_set_domain set_domain;
676     int ret;
677 
678     pthread_mutex_lock(&bufmgr_gem->lock);
679 
680     /* Get a mapping of the buffer if we haven't before. */
681     if (bo_gem->gtt_virtual == NULL) {
682 	struct drm_i915_gem_mmap_gtt mmap_arg;
683 
684 	DBG("bo_map_gtt: %d (%s)\n", bo_gem->gem_handle, bo_gem->name);
685 
686 	memset(&mmap_arg, 0, sizeof(mmap_arg));
687 	mmap_arg.handle = bo_gem->gem_handle;
688 
689 	/* Get the fake offset back... */
690 	ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_MMAP_GTT, &mmap_arg);
691 	if (ret != 0) {
692 	    fprintf(stderr,
693 		    "%s:%d: Error preparing buffer map %d (%s): %s .\n",
694 		    __FILE__, __LINE__,
695 		    bo_gem->gem_handle, bo_gem->name,
696 		    strerror(errno));
697 	    pthread_mutex_unlock(&bufmgr_gem->lock);
698 	    return ret;
699 	}
700 
701 	/* and mmap it */
702 	bo_gem->gtt_virtual = mmap(0, bo->size, PROT_READ | PROT_WRITE,
703 				   MAP_SHARED, bufmgr_gem->fd,
704 				   mmap_arg.offset);
705 	if (bo_gem->gtt_virtual == MAP_FAILED) {
706 	    fprintf(stderr,
707 		    "%s:%d: Error mapping buffer %d (%s): %s .\n",
708 		    __FILE__, __LINE__,
709 		    bo_gem->gem_handle, bo_gem->name,
710 		    strerror(errno));
711 	    pthread_mutex_unlock(&bufmgr_gem->lock);
712 	    return errno;
713 	}
714     }
715 
716     bo->virtual = bo_gem->gtt_virtual;
717 
718     DBG("bo_map: %d (%s) -> %p\n", bo_gem->gem_handle, bo_gem->name,
719 	bo_gem->gtt_virtual);
720 
721     /* Now move it to the GTT domain so that the CPU caches are flushed */
722     set_domain.handle = bo_gem->gem_handle;
723     set_domain.read_domains = I915_GEM_DOMAIN_GTT;
724     set_domain.write_domain = I915_GEM_DOMAIN_GTT;
725     do {
726 	    ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN,
727 			&set_domain);
728     } while (ret == -1 && errno == EINTR);
729 
730     if (ret != 0) {
731 	    fprintf (stderr, "%s:%d: Error setting domain %d: %s\n",
732 		     __FILE__, __LINE__, bo_gem->gem_handle, strerror (errno));
733     }
734 
735     pthread_mutex_unlock(&bufmgr_gem->lock);
736 
737     return 0;
738 }
739 
740 int
drm_intel_gem_bo_unmap_gtt(drm_intel_bo * bo)741 drm_intel_gem_bo_unmap_gtt(drm_intel_bo *bo)
742 {
743     drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr;
744     drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo;
745     int ret = 0;
746 
747     if (bo == NULL)
748 	return 0;
749 
750     assert(bo_gem->gtt_virtual != NULL);
751 
752     pthread_mutex_lock(&bufmgr_gem->lock);
753     bo->virtual = NULL;
754     pthread_mutex_unlock(&bufmgr_gem->lock);
755 
756     return ret;
757 }
758 
759 static int
drm_intel_gem_bo_unmap(drm_intel_bo * bo)760 drm_intel_gem_bo_unmap(drm_intel_bo *bo)
761 {
762     drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr;
763     drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo;
764     struct drm_i915_gem_sw_finish sw_finish;
765     int ret;
766 
767     if (bo == NULL)
768 	return 0;
769 
770     assert(bo_gem->mem_virtual != NULL);
771 
772     pthread_mutex_lock(&bufmgr_gem->lock);
773 
774     /* Cause a flush to happen if the buffer's pinned for scanout, so the
775      * results show up in a timely manner.
776      */
777     sw_finish.handle = bo_gem->gem_handle;
778     do {
779 	ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_SW_FINISH,
780 		    &sw_finish);
781     } while (ret == -1 && errno == EINTR);
782 
783     bo->virtual = NULL;
784     pthread_mutex_unlock(&bufmgr_gem->lock);
785     return 0;
786 }
787 
788 static int
drm_intel_gem_bo_subdata(drm_intel_bo * bo,unsigned long offset,unsigned long size,const void * data)789 drm_intel_gem_bo_subdata (drm_intel_bo *bo, unsigned long offset,
790 			  unsigned long size, const void *data)
791 {
792     drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr;
793     drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo;
794     struct drm_i915_gem_pwrite pwrite;
795     int ret;
796 
797     memset (&pwrite, 0, sizeof (pwrite));
798     pwrite.handle = bo_gem->gem_handle;
799     pwrite.offset = offset;
800     pwrite.size = size;
801     pwrite.data_ptr = (uint64_t) (uintptr_t) data;
802     do {
803 	ret = ioctl (bufmgr_gem->fd, DRM_IOCTL_I915_GEM_PWRITE, &pwrite);
804     } while (ret == -1 && errno == EINTR);
805     if (ret != 0) {
806 	fprintf (stderr, "%s:%d: Error writing data to buffer %d: (%d %d) %s .\n",
807 		 __FILE__, __LINE__,
808 		 bo_gem->gem_handle, (int) offset, (int) size,
809 		 strerror (errno));
810     }
811     return 0;
812 }
813 
814 static int
drm_intel_gem_get_pipe_from_crtc_id(drm_intel_bufmgr * bufmgr,int crtc_id)815 drm_intel_gem_get_pipe_from_crtc_id (drm_intel_bufmgr *bufmgr, int crtc_id)
816 {
817     drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr;
818     struct drm_i915_get_pipe_from_crtc_id get_pipe_from_crtc_id;
819     int ret;
820 
821     get_pipe_from_crtc_id.crtc_id = crtc_id;
822     ret = ioctl (bufmgr_gem->fd, DRM_IOCTL_I915_GET_PIPE_FROM_CRTC_ID,
823 		 &get_pipe_from_crtc_id);
824     if (ret != 0) {
825 	/* We return -1 here to signal that we don't
826 	 * know which pipe is associated with this crtc.
827 	 * This lets the caller know that this information
828 	 * isn't available; using the wrong pipe for
829 	 * vblank waiting can cause the chipset to lock up
830 	 */
831 	return -1;
832     }
833 
834     return get_pipe_from_crtc_id.pipe;
835 }
836 
837 static int
drm_intel_gem_bo_get_subdata(drm_intel_bo * bo,unsigned long offset,unsigned long size,void * data)838 drm_intel_gem_bo_get_subdata (drm_intel_bo *bo, unsigned long offset,
839 			      unsigned long size, void *data)
840 {
841     drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr;
842     drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo;
843     struct drm_i915_gem_pread pread;
844     int ret;
845 
846     memset (&pread, 0, sizeof (pread));
847     pread.handle = bo_gem->gem_handle;
848     pread.offset = offset;
849     pread.size = size;
850     pread.data_ptr = (uint64_t) (uintptr_t) data;
851     do {
852 	ret = ioctl (bufmgr_gem->fd, DRM_IOCTL_I915_GEM_PREAD, &pread);
853     } while (ret == -1 && errno == EINTR);
854     if (ret != 0) {
855 	fprintf (stderr, "%s:%d: Error reading data from buffer %d: (%d %d) %s .\n",
856 		 __FILE__, __LINE__,
857 		 bo_gem->gem_handle, (int) offset, (int) size,
858 		 strerror (errno));
859     }
860     return 0;
861 }
862 
863 /** Waits for all GPU rendering to the object to have completed. */
864 static void
drm_intel_gem_bo_wait_rendering(drm_intel_bo * bo)865 drm_intel_gem_bo_wait_rendering(drm_intel_bo *bo)
866 {
867     drm_intel_gem_bo_start_gtt_access(bo, 0);
868 }
869 
870 /**
871  * Sets the object to the GTT read and possibly write domain, used by the X
872  * 2D driver in the absence of kernel support to do drm_intel_gem_bo_map_gtt().
873  *
874  * In combination with drm_intel_gem_bo_pin() and manual fence management, we
875  * can do tiled pixmaps this way.
876  */
877 void
drm_intel_gem_bo_start_gtt_access(drm_intel_bo * bo,int write_enable)878 drm_intel_gem_bo_start_gtt_access(drm_intel_bo *bo, int write_enable)
879 {
880     drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr;
881     drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo;
882     struct drm_i915_gem_set_domain set_domain;
883     int ret;
884 
885     set_domain.handle = bo_gem->gem_handle;
886     set_domain.read_domains = I915_GEM_DOMAIN_GTT;
887     set_domain.write_domain = write_enable ? I915_GEM_DOMAIN_GTT : 0;
888     do {
889 	ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain);
890     } while (ret == -1 && errno == EINTR);
891     if (ret != 0) {
892 	fprintf (stderr, "%s:%d: Error setting memory domains %d (%08x %08x): %s .\n",
893 		 __FILE__, __LINE__,
894 		 bo_gem->gem_handle, set_domain.read_domains, set_domain.write_domain,
895 		 strerror (errno));
896     }
897 }
898 
899 static void
drm_intel_bufmgr_gem_destroy(drm_intel_bufmgr * bufmgr)900 drm_intel_bufmgr_gem_destroy(drm_intel_bufmgr *bufmgr)
901 {
902     drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr;
903     int i;
904 
905     free(bufmgr_gem->exec_objects);
906     free(bufmgr_gem->exec_bos);
907 
908     pthread_mutex_destroy(&bufmgr_gem->lock);
909 
910     /* Free any cached buffer objects we were going to reuse */
911     for (i = 0; i < DRM_INTEL_GEM_BO_BUCKETS; i++) {
912 	struct drm_intel_gem_bo_bucket *bucket = &bufmgr_gem->cache_bucket[i];
913 	drm_intel_bo_gem *bo_gem;
914 
915 	while (!DRMLISTEMPTY(&bucket->head)) {
916 	    bo_gem = DRMLISTENTRY(drm_intel_bo_gem, bucket->head.next, head);
917 	    DRMLISTDEL(&bo_gem->head);
918 
919 	    drm_intel_gem_bo_free(&bo_gem->bo);
920 	}
921     }
922 
923     free(bufmgr);
924 }
925 
926 /**
927  * Adds the target buffer to the validation list and adds the relocation
928  * to the reloc_buffer's relocation list.
929  *
930  * The relocation entry at the given offset must already contain the
931  * precomputed relocation value, because the kernel will optimize out
932  * the relocation entry write when the buffer hasn't moved from the
933  * last known offset in target_bo.
934  */
935 static int
drm_intel_gem_bo_emit_reloc(drm_intel_bo * bo,uint32_t offset,drm_intel_bo * target_bo,uint32_t target_offset,uint32_t read_domains,uint32_t write_domain)936 drm_intel_gem_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset,
937 			    drm_intel_bo *target_bo, uint32_t target_offset,
938 			    uint32_t read_domains, uint32_t write_domain)
939 {
940     drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr;
941     drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo;
942     drm_intel_bo_gem *target_bo_gem = (drm_intel_bo_gem *)target_bo;
943 
944     pthread_mutex_lock(&bufmgr_gem->lock);
945 
946     /* Create a new relocation list if needed */
947     if (bo_gem->relocs == NULL)
948 	drm_intel_setup_reloc_list(bo);
949 
950     /* Check overflow */
951     assert(bo_gem->reloc_count < bufmgr_gem->max_relocs);
952 
953     /* Check args */
954     assert (offset <= bo->size - 4);
955     assert ((write_domain & (write_domain-1)) == 0);
956 
957     /* Make sure that we're not adding a reloc to something whose size has
958      * already been accounted for.
959      */
960     assert(!bo_gem->used_as_reloc_target);
961     bo_gem->reloc_tree_size += target_bo_gem->reloc_tree_size;
962     bo_gem->reloc_tree_fences += target_bo_gem->reloc_tree_fences;
963 
964     /* Flag the target to disallow further relocations in it. */
965     target_bo_gem->used_as_reloc_target = 1;
966 
967     bo_gem->relocs[bo_gem->reloc_count].offset = offset;
968     bo_gem->relocs[bo_gem->reloc_count].delta = target_offset;
969     bo_gem->relocs[bo_gem->reloc_count].target_handle =
970 	target_bo_gem->gem_handle;
971     bo_gem->relocs[bo_gem->reloc_count].read_domains = read_domains;
972     bo_gem->relocs[bo_gem->reloc_count].write_domain = write_domain;
973     bo_gem->relocs[bo_gem->reloc_count].presumed_offset = target_bo->offset;
974 
975     bo_gem->reloc_target_bo[bo_gem->reloc_count] = target_bo;
976     drm_intel_gem_bo_reference_locked(target_bo);
977 
978     bo_gem->reloc_count++;
979 
980     pthread_mutex_unlock(&bufmgr_gem->lock);
981 
982     return 0;
983 }
984 
985 /**
986  * Walk the tree of relocations rooted at BO and accumulate the list of
987  * validations to be performed and update the relocation buffers with
988  * index values into the validation list.
989  */
990 static void
drm_intel_gem_bo_process_reloc(drm_intel_bo * bo)991 drm_intel_gem_bo_process_reloc(drm_intel_bo *bo)
992 {
993     drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo;
994     int i;
995 
996     if (bo_gem->relocs == NULL)
997 	return;
998 
999     for (i = 0; i < bo_gem->reloc_count; i++) {
1000 	drm_intel_bo *target_bo = bo_gem->reloc_target_bo[i];
1001 
1002 	/* Continue walking the tree depth-first. */
1003 	drm_intel_gem_bo_process_reloc(target_bo);
1004 
1005 	/* Add the target to the validate list */
1006 	drm_intel_add_validate_buffer(target_bo);
1007     }
1008 }
1009 
1010 static void
drm_intel_update_buffer_offsets(drm_intel_bufmgr_gem * bufmgr_gem)1011 drm_intel_update_buffer_offsets (drm_intel_bufmgr_gem *bufmgr_gem)
1012 {
1013     int i;
1014 
1015     for (i = 0; i < bufmgr_gem->exec_count; i++) {
1016 	drm_intel_bo *bo = bufmgr_gem->exec_bos[i];
1017 	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo;
1018 
1019 	/* Update the buffer offset */
1020 	if (bufmgr_gem->exec_objects[i].offset != bo->offset) {
1021 	    DBG("BO %d (%s) migrated: 0x%08lx -> 0x%08llx\n",
1022 		bo_gem->gem_handle, bo_gem->name, bo->offset,
1023 		(unsigned long long)bufmgr_gem->exec_objects[i].offset);
1024 	    bo->offset = bufmgr_gem->exec_objects[i].offset;
1025 	}
1026     }
1027 }
1028 
1029 static int
drm_intel_gem_bo_exec(drm_intel_bo * bo,int used,drm_clip_rect_t * cliprects,int num_cliprects,int DR4)1030 drm_intel_gem_bo_exec(drm_intel_bo *bo, int used,
1031 		      drm_clip_rect_t *cliprects, int num_cliprects,
1032 		      int DR4)
1033 {
1034     drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr;
1035     struct drm_i915_gem_execbuffer execbuf;
1036     int ret, i;
1037 
1038     pthread_mutex_lock(&bufmgr_gem->lock);
1039     /* Update indices and set up the validate list. */
1040     drm_intel_gem_bo_process_reloc(bo);
1041 
1042     /* Add the batch buffer to the validation list.  There are no relocations
1043      * pointing to it.
1044      */
1045     drm_intel_add_validate_buffer(bo);
1046 
1047     execbuf.buffers_ptr = (uintptr_t)bufmgr_gem->exec_objects;
1048     execbuf.buffer_count = bufmgr_gem->exec_count;
1049     execbuf.batch_start_offset = 0;
1050     execbuf.batch_len = used;
1051     execbuf.cliprects_ptr = (uintptr_t)cliprects;
1052     execbuf.num_cliprects = num_cliprects;
1053     execbuf.DR1 = 0;
1054     execbuf.DR4 = DR4;
1055 
1056     do {
1057 	ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_EXECBUFFER, &execbuf);
1058     } while (ret != 0 && errno == EAGAIN);
1059 
1060     if (ret != 0 && errno == ENOMEM) {
1061 	fprintf(stderr, "Execbuffer fails to pin. Estimate: %u. Actual: %u. Available: %u\n",
1062 		drm_intel_gem_estimate_batch_space(bufmgr_gem->exec_bos,
1063 						   bufmgr_gem->exec_count),
1064 		drm_intel_gem_compute_batch_space(bufmgr_gem->exec_bos,
1065 						  bufmgr_gem->exec_count),
1066 		(unsigned int) bufmgr_gem->gtt_size);
1067     }
1068     drm_intel_update_buffer_offsets (bufmgr_gem);
1069 
1070     if (bufmgr_gem->bufmgr.debug)
1071 	drm_intel_gem_dump_validation_list(bufmgr_gem);
1072 
1073     for (i = 0; i < bufmgr_gem->exec_count; i++) {
1074 	drm_intel_bo *bo = bufmgr_gem->exec_bos[i];
1075 	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo;
1076 
1077 	/* Disconnect the buffer from the validate list */
1078 	bo_gem->validate_index = -1;
1079 	drm_intel_gem_bo_unreference_locked(bo);
1080 	bufmgr_gem->exec_bos[i] = NULL;
1081     }
1082     bufmgr_gem->exec_count = 0;
1083     pthread_mutex_unlock(&bufmgr_gem->lock);
1084 
1085     return 0;
1086 }
1087 
1088 static int
drm_intel_gem_bo_pin(drm_intel_bo * bo,uint32_t alignment)1089 drm_intel_gem_bo_pin(drm_intel_bo *bo, uint32_t alignment)
1090 {
1091     drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr;
1092     drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo;
1093     struct drm_i915_gem_pin pin;
1094     int ret;
1095 
1096     memset(&pin, 0, sizeof(pin));
1097     pin.handle = bo_gem->gem_handle;
1098     pin.alignment = alignment;
1099 
1100     do {
1101 	ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_PIN, &pin);
1102     } while (ret == -1 && errno == EINTR);
1103 
1104     if (ret != 0)
1105 	return -errno;
1106 
1107     bo->offset = pin.offset;
1108     return 0;
1109 }
1110 
1111 static int
drm_intel_gem_bo_unpin(drm_intel_bo * bo)1112 drm_intel_gem_bo_unpin(drm_intel_bo *bo)
1113 {
1114     drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr;
1115     drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo;
1116     struct drm_i915_gem_unpin unpin;
1117     int ret;
1118 
1119     memset(&unpin, 0, sizeof(unpin));
1120     unpin.handle = bo_gem->gem_handle;
1121 
1122     ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_UNPIN, &unpin);
1123     if (ret != 0)
1124 	return -errno;
1125 
1126     return 0;
1127 }
1128 
1129 static int
drm_intel_gem_bo_set_tiling(drm_intel_bo * bo,uint32_t * tiling_mode,uint32_t stride)1130 drm_intel_gem_bo_set_tiling(drm_intel_bo *bo, uint32_t *tiling_mode,
1131 			    uint32_t stride)
1132 {
1133     drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr;
1134     drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo;
1135     struct drm_i915_gem_set_tiling set_tiling;
1136     int ret;
1137 
1138     if (bo_gem->global_name == 0 && *tiling_mode == bo_gem->tiling_mode)
1139 	return 0;
1140 
1141     /* If we're going from non-tiling to tiling, bump fence count */
1142     if (bo_gem->tiling_mode == I915_TILING_NONE)
1143 	bo_gem->reloc_tree_fences++;
1144 
1145     memset(&set_tiling, 0, sizeof(set_tiling));
1146     set_tiling.handle = bo_gem->gem_handle;
1147     set_tiling.tiling_mode = *tiling_mode;
1148     set_tiling.stride = stride;
1149 
1150     ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_SET_TILING, &set_tiling);
1151     if (ret != 0) {
1152 	*tiling_mode = bo_gem->tiling_mode;
1153 	return -errno;
1154     }
1155     bo_gem->tiling_mode = set_tiling.tiling_mode;
1156     bo_gem->swizzle_mode = set_tiling.swizzle_mode;
1157 
1158     /* If we're going from tiling to non-tiling, drop fence count */
1159     if (bo_gem->tiling_mode == I915_TILING_NONE)
1160 	bo_gem->reloc_tree_fences--;
1161 
1162     *tiling_mode = bo_gem->tiling_mode;
1163     return 0;
1164 }
1165 
1166 static int
drm_intel_gem_bo_get_tiling(drm_intel_bo * bo,uint32_t * tiling_mode,uint32_t * swizzle_mode)1167 drm_intel_gem_bo_get_tiling(drm_intel_bo *bo, uint32_t *tiling_mode,
1168 			    uint32_t *swizzle_mode)
1169 {
1170     drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo;
1171 
1172     *tiling_mode = bo_gem->tiling_mode;
1173     *swizzle_mode = bo_gem->swizzle_mode;
1174     return 0;
1175 }
1176 
1177 static int
drm_intel_gem_bo_flink(drm_intel_bo * bo,uint32_t * name)1178 drm_intel_gem_bo_flink(drm_intel_bo *bo, uint32_t *name)
1179 {
1180     drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr;
1181     drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo;
1182     struct drm_gem_flink flink;
1183     int ret;
1184 
1185     if (!bo_gem->global_name) {
1186 	memset(&flink, 0, sizeof(flink));
1187 	flink.handle = bo_gem->gem_handle;
1188 
1189 	ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_GEM_FLINK, &flink);
1190 	if (ret != 0)
1191 	    return -errno;
1192 	bo_gem->global_name = flink.name;
1193 	bo_gem->reusable = 0;
1194     }
1195 
1196     *name = bo_gem->global_name;
1197     return 0;
1198 }
1199 
1200 /**
1201  * Enables unlimited caching of buffer objects for reuse.
1202  *
1203  * This is potentially very memory expensive, as the cache at each bucket
1204  * size is only bounded by how many buffers of that size we've managed to have
1205  * in flight at once.
1206  */
1207 void
drm_intel_bufmgr_gem_enable_reuse(drm_intel_bufmgr * bufmgr)1208 drm_intel_bufmgr_gem_enable_reuse(drm_intel_bufmgr *bufmgr)
1209 {
1210     drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr;
1211 
1212     bufmgr_gem->bo_reuse = 1;
1213 }
1214 
1215 /**
1216  * Return the additional aperture space required by the tree of buffer objects
1217  * rooted at bo.
1218  */
1219 static int
drm_intel_gem_bo_get_aperture_space(drm_intel_bo * bo)1220 drm_intel_gem_bo_get_aperture_space(drm_intel_bo *bo)
1221 {
1222     drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo;
1223     int i;
1224     int total = 0;
1225 
1226     if (bo == NULL || bo_gem->included_in_check_aperture)
1227 	return 0;
1228 
1229     total += bo->size;
1230     bo_gem->included_in_check_aperture = 1;
1231 
1232     for (i = 0; i < bo_gem->reloc_count; i++)
1233 	total += drm_intel_gem_bo_get_aperture_space(bo_gem->reloc_target_bo[i]);
1234 
1235     return total;
1236 }
1237 
1238 /**
1239  * Count the number of buffers in this list that need a fence reg
1240  *
1241  * If the count is greater than the number of available regs, we'll have
1242  * to ask the caller to resubmit a batch with fewer tiled buffers.
1243  *
1244  * This function over-counts if the same buffer is used multiple times.
1245  */
1246 static unsigned int
drm_intel_gem_total_fences(drm_intel_bo ** bo_array,int count)1247 drm_intel_gem_total_fences(drm_intel_bo **bo_array, int count)
1248 {
1249     int i;
1250     unsigned int total = 0;
1251 
1252     for (i = 0; i < count; i++) {
1253 	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo_array[i];
1254 
1255 	if (bo_gem == NULL)
1256 	    continue;
1257 
1258 	total += bo_gem->reloc_tree_fences;
1259     }
1260     return total;
1261 }
1262 
1263 /**
1264  * Clear the flag set by drm_intel_gem_bo_get_aperture_space() so we're ready
1265  * for the next drm_intel_bufmgr_check_aperture_space() call.
1266  */
1267 static void
drm_intel_gem_bo_clear_aperture_space_flag(drm_intel_bo * bo)1268 drm_intel_gem_bo_clear_aperture_space_flag(drm_intel_bo *bo)
1269 {
1270     drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo;
1271     int i;
1272 
1273     if (bo == NULL || !bo_gem->included_in_check_aperture)
1274 	return;
1275 
1276     bo_gem->included_in_check_aperture = 0;
1277 
1278     for (i = 0; i < bo_gem->reloc_count; i++)
1279 	drm_intel_gem_bo_clear_aperture_space_flag(bo_gem->reloc_target_bo[i]);
1280 }
1281 
1282 /**
1283  * Return a conservative estimate for the amount of aperture required
1284  * for a collection of buffers. This may double-count some buffers.
1285  */
1286 static unsigned int
drm_intel_gem_estimate_batch_space(drm_intel_bo ** bo_array,int count)1287 drm_intel_gem_estimate_batch_space(drm_intel_bo **bo_array, int count)
1288 {
1289     int i;
1290     unsigned int total = 0;
1291 
1292     for (i = 0; i < count; i++) {
1293 	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo_array[i];
1294 	if (bo_gem != NULL)
1295 		total += bo_gem->reloc_tree_size;
1296     }
1297     return total;
1298 }
1299 
1300 /**
1301  * Return the amount of aperture needed for a collection of buffers.
1302  * This avoids double counting any buffers, at the cost of looking
1303  * at every buffer in the set.
1304  */
1305 static unsigned int
drm_intel_gem_compute_batch_space(drm_intel_bo ** bo_array,int count)1306 drm_intel_gem_compute_batch_space(drm_intel_bo **bo_array, int count)
1307 {
1308     int i;
1309     unsigned int total = 0;
1310 
1311     for (i = 0; i < count; i++) {
1312 	total += drm_intel_gem_bo_get_aperture_space(bo_array[i]);
1313 	/* For the first buffer object in the array, we get an accurate count
1314 	 * back for its reloc_tree size (since nothing had been flagged as
1315 	 * being counted yet).  We can save that value out as a more
1316 	 * conservative reloc_tree_size that avoids double-counting target
1317 	 * buffers.  Since the first buffer happens to usually be the batch
1318 	 * buffer in our callers, this can pull us back from doing the tree
1319 	 * walk on every new batch emit.
1320 	 */
1321 	if (i == 0) {
1322 	    drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo_array[i];
1323 	    bo_gem->reloc_tree_size = total;
1324 	}
1325     }
1326 
1327     for (i = 0; i < count; i++)
1328 	drm_intel_gem_bo_clear_aperture_space_flag(bo_array[i]);
1329     return total;
1330 }
1331 
1332 /**
1333  * Return -1 if the batchbuffer should be flushed before attempting to
1334  * emit rendering referencing the buffers pointed to by bo_array.
1335  *
1336  * This is required because if we try to emit a batchbuffer with relocations
1337  * to a tree of buffers that won't simultaneously fit in the aperture,
1338  * the rendering will return an error at a point where the software is not
1339  * prepared to recover from it.
1340  *
1341  * However, we also want to emit the batchbuffer significantly before we reach
1342  * the limit, as a series of batchbuffers each of which references buffers
1343  * covering almost all of the aperture means that at each emit we end up
1344  * waiting to evict a buffer from the last rendering, and we get synchronous
1345  * performance.  By emitting smaller batchbuffers, we eat some CPU overhead to
1346  * get better parallelism.
1347  */
1348 static int
drm_intel_gem_check_aperture_space(drm_intel_bo ** bo_array,int count)1349 drm_intel_gem_check_aperture_space(drm_intel_bo **bo_array, int count)
1350 {
1351     drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo_array[0]->bufmgr;
1352     unsigned int total = 0;
1353     unsigned int threshold = bufmgr_gem->gtt_size * 3 / 4;
1354     int total_fences;
1355 
1356     /* Check for fence reg constraints if necessary */
1357     if (bufmgr_gem->available_fences) {
1358 	total_fences = drm_intel_gem_total_fences(bo_array, count);
1359 	if (total_fences > bufmgr_gem->available_fences)
1360 	    return -1;
1361     }
1362 
1363     total = drm_intel_gem_estimate_batch_space(bo_array, count);
1364 
1365     if (total > threshold)
1366 	total = drm_intel_gem_compute_batch_space(bo_array, count);
1367 
1368     if (total > threshold) {
1369 	DBG("check_space: overflowed available aperture, %dkb vs %dkb\n",
1370 	    total / 1024, (int)bufmgr_gem->gtt_size / 1024);
1371 	return -1;
1372     } else {
1373 	DBG("drm_check_space: total %dkb vs bufgr %dkb\n", total / 1024 ,
1374 	    (int)bufmgr_gem->gtt_size / 1024);
1375 	return 0;
1376     }
1377 }
1378 
1379 /*
1380  * Disable buffer reuse for objects which are shared with the kernel
1381  * as scanout buffers
1382  */
1383 static int
drm_intel_gem_bo_disable_reuse(drm_intel_bo * bo)1384 drm_intel_gem_bo_disable_reuse(drm_intel_bo *bo)
1385 {
1386     drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo;
1387 
1388     bo_gem->reusable = 0;
1389     return 0;
1390 }
1391 
1392 /**
1393  * Initializes the GEM buffer manager, which uses the kernel to allocate, map,
1394  * and manage map buffer objections.
1395  *
1396  * \param fd File descriptor of the opened DRM device.
1397  */
1398 drm_intel_bufmgr *
drm_intel_bufmgr_gem_init(int fd,int batch_size)1399 drm_intel_bufmgr_gem_init(int fd, int batch_size)
1400 {
1401     drm_intel_bufmgr_gem *bufmgr_gem;
1402     struct drm_i915_gem_get_aperture aperture;
1403     drm_i915_getparam_t gp;
1404     int ret, i;
1405     unsigned long size;
1406 
1407     bufmgr_gem = calloc(1, sizeof(*bufmgr_gem));
1408     bufmgr_gem->fd = fd;
1409 
1410     if (pthread_mutex_init(&bufmgr_gem->lock, NULL) != 0) {
1411       free(bufmgr_gem);
1412       return NULL;
1413    }
1414 
1415     ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_GET_APERTURE, &aperture);
1416 
1417     if (ret == 0)
1418 	bufmgr_gem->gtt_size = aperture.aper_available_size;
1419     else {
1420 	fprintf(stderr, "DRM_IOCTL_I915_GEM_APERTURE failed: %s\n",
1421 		strerror(errno));
1422 	bufmgr_gem->gtt_size = 128 * 1024 * 1024;
1423 	fprintf(stderr, "Assuming %dkB available aperture size.\n"
1424 		"May lead to reduced performance or incorrect rendering.\n",
1425 		(int)bufmgr_gem->gtt_size / 1024);
1426     }
1427 
1428     gp.param = I915_PARAM_CHIPSET_ID;
1429     gp.value = &bufmgr_gem->pci_device;
1430     ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
1431     if (ret) {
1432 	fprintf(stderr, "get chip id failed: %d\n", ret);
1433 	fprintf(stderr, "param: %d, val: %d\n", gp.param, *gp.value);
1434     }
1435 
1436     if (!IS_I965G(bufmgr_gem)) {
1437 	gp.param = I915_PARAM_NUM_FENCES_AVAIL;
1438 	gp.value = &bufmgr_gem->available_fences;
1439 	ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
1440 	if (ret) {
1441 	    fprintf(stderr, "get fences failed: %d\n", ret);
1442 	    fprintf(stderr, "param: %d, val: %d\n", gp.param, *gp.value);
1443 	    bufmgr_gem->available_fences = 0;
1444 	}
1445     }
1446 
1447     /* Let's go with one relocation per every 2 dwords (but round down a bit
1448      * since a power of two will mean an extra page allocation for the reloc
1449      * buffer).
1450      *
1451      * Every 4 was too few for the blender benchmark.
1452      */
1453     bufmgr_gem->max_relocs = batch_size / sizeof(uint32_t) / 2 - 2;
1454 
1455     bufmgr_gem->bufmgr.bo_alloc = drm_intel_gem_bo_alloc;
1456     bufmgr_gem->bufmgr.bo_alloc_for_render = drm_intel_gem_bo_alloc_for_render;
1457     bufmgr_gem->bufmgr.bo_reference = drm_intel_gem_bo_reference;
1458     bufmgr_gem->bufmgr.bo_unreference = drm_intel_gem_bo_unreference;
1459     bufmgr_gem->bufmgr.bo_map = drm_intel_gem_bo_map;
1460     bufmgr_gem->bufmgr.bo_unmap = drm_intel_gem_bo_unmap;
1461     bufmgr_gem->bufmgr.bo_subdata = drm_intel_gem_bo_subdata;
1462     bufmgr_gem->bufmgr.bo_get_subdata = drm_intel_gem_bo_get_subdata;
1463     bufmgr_gem->bufmgr.bo_wait_rendering = drm_intel_gem_bo_wait_rendering;
1464     bufmgr_gem->bufmgr.bo_emit_reloc = drm_intel_gem_bo_emit_reloc;
1465     bufmgr_gem->bufmgr.bo_pin = drm_intel_gem_bo_pin;
1466     bufmgr_gem->bufmgr.bo_unpin = drm_intel_gem_bo_unpin;
1467     bufmgr_gem->bufmgr.bo_get_tiling = drm_intel_gem_bo_get_tiling;
1468     bufmgr_gem->bufmgr.bo_set_tiling = drm_intel_gem_bo_set_tiling;
1469     bufmgr_gem->bufmgr.bo_flink = drm_intel_gem_bo_flink;
1470     bufmgr_gem->bufmgr.bo_exec = drm_intel_gem_bo_exec;
1471     bufmgr_gem->bufmgr.bo_busy = drm_intel_gem_bo_busy;
1472     bufmgr_gem->bufmgr.destroy = drm_intel_bufmgr_gem_destroy;
1473     bufmgr_gem->bufmgr.debug = 0;
1474     bufmgr_gem->bufmgr.check_aperture_space = drm_intel_gem_check_aperture_space;
1475     bufmgr_gem->bufmgr.bo_disable_reuse = drm_intel_gem_bo_disable_reuse;
1476     bufmgr_gem->bufmgr.get_pipe_from_crtc_id = drm_intel_gem_get_pipe_from_crtc_id;
1477     /* Initialize the linked lists for BO reuse cache. */
1478     for (i = 0, size = 4096; i < DRM_INTEL_GEM_BO_BUCKETS; i++, size *= 2) {
1479 	DRMINITLISTHEAD(&bufmgr_gem->cache_bucket[i].head);
1480 	bufmgr_gem->cache_bucket[i].size = size;
1481     }
1482 
1483     return &bufmgr_gem->bufmgr;
1484 }
1485 
1486