1 /*
2  * Copyright © 2011 Marek Olšák <maraeo@gmail.com>
3  * All Rights Reserved.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining
6  * a copy of this software and associated documentation files (the
7  * "Software"), to deal in the Software without restriction, including
8  * without limitation the rights to use, copy, modify, merge, publish,
9  * distribute, sub license, and/or sell copies of the Software, and to
10  * permit persons to whom the Software is furnished to do so, subject to
11  * the following conditions:
12  *
13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
14  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
15  * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
16  * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
17  * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
20  * USE OR OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * The above copyright notice and this permission notice (including the
23  * next paragraph) shall be included in all copies or substantial portions
24  * of the Software.
25  */
26 
27 #include "radeon_drm_cs.h"
28 
29 #include "util/u_hash_table.h"
30 #include "util/u_memory.h"
31 #include "util/simple_list.h"
32 #include "os/os_thread.h"
33 #include "os/os_mman.h"
34 #include "util/os_time.h"
35 
36 #include "frontend/drm_driver.h"
37 
38 #include <sys/ioctl.h>
39 #include <xf86drm.h>
40 #include <errno.h>
41 #include <fcntl.h>
42 #include <stdio.h>
43 #include <inttypes.h>
44 
45 static struct pb_buffer *
46 radeon_winsys_bo_create(struct radeon_winsys *rws,
47                         uint64_t size,
48                         unsigned alignment,
49                         enum radeon_bo_domain domain,
50                         enum radeon_bo_flag flags);
51 
radeon_bo(struct pb_buffer * bo)52 static inline struct radeon_bo *radeon_bo(struct pb_buffer *bo)
53 {
54    return (struct radeon_bo *)bo;
55 }
56 
57 struct radeon_bo_va_hole {
58    struct list_head list;
59    uint64_t         offset;
60    uint64_t         size;
61 };
62 
radeon_real_bo_is_busy(struct radeon_bo * bo)63 static bool radeon_real_bo_is_busy(struct radeon_bo *bo)
64 {
65    struct drm_radeon_gem_busy args = {0};
66 
67    args.handle = bo->handle;
68    return drmCommandWriteRead(bo->rws->fd, DRM_RADEON_GEM_BUSY,
69                               &args, sizeof(args)) != 0;
70 }
71 
radeon_bo_is_busy(struct radeon_bo * bo)72 static bool radeon_bo_is_busy(struct radeon_bo *bo)
73 {
74    unsigned num_idle;
75    bool busy = false;
76 
77    if (bo->handle)
78       return radeon_real_bo_is_busy(bo);
79 
80    mtx_lock(&bo->rws->bo_fence_lock);
81    for (num_idle = 0; num_idle < bo->u.slab.num_fences; ++num_idle) {
82       if (radeon_real_bo_is_busy(bo->u.slab.fences[num_idle])) {
83          busy = true;
84          break;
85       }
86       radeon_bo_reference(&bo->u.slab.fences[num_idle], NULL);
87    }
88    memmove(&bo->u.slab.fences[0], &bo->u.slab.fences[num_idle],
89          (bo->u.slab.num_fences - num_idle) * sizeof(bo->u.slab.fences[0]));
90    bo->u.slab.num_fences -= num_idle;
91    mtx_unlock(&bo->rws->bo_fence_lock);
92 
93    return busy;
94 }
95 
radeon_real_bo_wait_idle(struct radeon_bo * bo)96 static void radeon_real_bo_wait_idle(struct radeon_bo *bo)
97 {
98    struct drm_radeon_gem_wait_idle args = {0};
99 
100    args.handle = bo->handle;
101    while (drmCommandWrite(bo->rws->fd, DRM_RADEON_GEM_WAIT_IDLE,
102                           &args, sizeof(args)) == -EBUSY);
103 }
104 
radeon_bo_wait_idle(struct radeon_bo * bo)105 static void radeon_bo_wait_idle(struct radeon_bo *bo)
106 {
107    if (bo->handle) {
108       radeon_real_bo_wait_idle(bo);
109    } else {
110       mtx_lock(&bo->rws->bo_fence_lock);
111       while (bo->u.slab.num_fences) {
112          struct radeon_bo *fence = NULL;
113          radeon_bo_reference(&fence, bo->u.slab.fences[0]);
114          mtx_unlock(&bo->rws->bo_fence_lock);
115 
116          /* Wait without holding the fence lock. */
117          radeon_real_bo_wait_idle(fence);
118 
119          mtx_lock(&bo->rws->bo_fence_lock);
120          if (bo->u.slab.num_fences && fence == bo->u.slab.fences[0]) {
121             radeon_bo_reference(&bo->u.slab.fences[0], NULL);
122             memmove(&bo->u.slab.fences[0], &bo->u.slab.fences[1],
123                   (bo->u.slab.num_fences - 1) * sizeof(bo->u.slab.fences[0]));
124             bo->u.slab.num_fences--;
125          }
126          radeon_bo_reference(&fence, NULL);
127       }
128       mtx_unlock(&bo->rws->bo_fence_lock);
129    }
130 }
131 
radeon_bo_wait(struct pb_buffer * _buf,uint64_t timeout,enum radeon_bo_usage usage)132 static bool radeon_bo_wait(struct pb_buffer *_buf, uint64_t timeout,
133                            enum radeon_bo_usage usage)
134 {
135    struct radeon_bo *bo = radeon_bo(_buf);
136    int64_t abs_timeout;
137 
138    /* No timeout. Just query. */
139    if (timeout == 0)
140       return !bo->num_active_ioctls && !radeon_bo_is_busy(bo);
141 
142    abs_timeout = os_time_get_absolute_timeout(timeout);
143 
144    /* Wait if any ioctl is being submitted with this buffer. */
145    if (!os_wait_until_zero_abs_timeout(&bo->num_active_ioctls, abs_timeout))
146       return false;
147 
148    /* Infinite timeout. */
149    if (abs_timeout == PIPE_TIMEOUT_INFINITE) {
150       radeon_bo_wait_idle(bo);
151       return true;
152    }
153 
154    /* Other timeouts need to be emulated with a loop. */
155    while (radeon_bo_is_busy(bo)) {
156       if (os_time_get_nano() >= abs_timeout)
157          return false;
158       os_time_sleep(10);
159    }
160 
161    return true;
162 }
163 
get_valid_domain(enum radeon_bo_domain domain)164 static enum radeon_bo_domain get_valid_domain(enum radeon_bo_domain domain)
165 {
166    /* Zero domains the driver doesn't understand. */
167    domain &= RADEON_DOMAIN_VRAM_GTT;
168 
169    /* If no domain is set, we must set something... */
170    if (!domain)
171       domain = RADEON_DOMAIN_VRAM_GTT;
172 
173    return domain;
174 }
175 
radeon_bo_get_initial_domain(struct pb_buffer * buf)176 static enum radeon_bo_domain radeon_bo_get_initial_domain(
177       struct pb_buffer *buf)
178 {
179    struct radeon_bo *bo = (struct radeon_bo*)buf;
180    struct drm_radeon_gem_op args;
181 
182    if (bo->rws->info.drm_minor < 38)
183       return RADEON_DOMAIN_VRAM_GTT;
184 
185    memset(&args, 0, sizeof(args));
186    args.handle = bo->handle;
187    args.op = RADEON_GEM_OP_GET_INITIAL_DOMAIN;
188 
189    if (drmCommandWriteRead(bo->rws->fd, DRM_RADEON_GEM_OP,
190                            &args, sizeof(args))) {
191       fprintf(stderr, "radeon: failed to get initial domain: %p 0x%08X\n",
192               bo, bo->handle);
193       /* Default domain as returned by get_valid_domain. */
194       return RADEON_DOMAIN_VRAM_GTT;
195    }
196 
197    /* GEM domains and winsys domains are defined the same. */
198    return get_valid_domain(args.value);
199 }
200 
radeon_bomgr_find_va(const struct radeon_info * info,struct radeon_vm_heap * heap,uint64_t size,uint64_t alignment)201 static uint64_t radeon_bomgr_find_va(const struct radeon_info *info,
202                                      struct radeon_vm_heap *heap,
203                                      uint64_t size, uint64_t alignment)
204 {
205    struct radeon_bo_va_hole *hole, *n;
206    uint64_t offset = 0, waste = 0;
207 
208    /* All VM address space holes will implicitly start aligned to the
209     * size alignment, so we don't need to sanitize the alignment here
210     */
211    size = align(size, info->gart_page_size);
212 
213    mtx_lock(&heap->mutex);
214    /* first look for a hole */
215    LIST_FOR_EACH_ENTRY_SAFE(hole, n, &heap->holes, list) {
216       offset = hole->offset;
217       waste = offset % alignment;
218       waste = waste ? alignment - waste : 0;
219       offset += waste;
220       if (offset >= (hole->offset + hole->size)) {
221          continue;
222       }
223       if (!waste && hole->size == size) {
224          offset = hole->offset;
225          list_del(&hole->list);
226          FREE(hole);
227          mtx_unlock(&heap->mutex);
228          return offset;
229       }
230       if ((hole->size - waste) > size) {
231          if (waste) {
232             n = CALLOC_STRUCT(radeon_bo_va_hole);
233             n->size = waste;
234             n->offset = hole->offset;
235             list_add(&n->list, &hole->list);
236          }
237          hole->size -= (size + waste);
238          hole->offset += size + waste;
239          mtx_unlock(&heap->mutex);
240          return offset;
241       }
242       if ((hole->size - waste) == size) {
243          hole->size = waste;
244          mtx_unlock(&heap->mutex);
245          return offset;
246       }
247    }
248 
249    offset = heap->start;
250    waste = offset % alignment;
251    waste = waste ? alignment - waste : 0;
252 
253    if (offset + waste + size > heap->end) {
254       mtx_unlock(&heap->mutex);
255       return 0;
256    }
257 
258    if (waste) {
259       n = CALLOC_STRUCT(radeon_bo_va_hole);
260       n->size = waste;
261       n->offset = offset;
262       list_add(&n->list, &heap->holes);
263    }
264    offset += waste;
265    heap->start += size + waste;
266    mtx_unlock(&heap->mutex);
267    return offset;
268 }
269 
radeon_bomgr_find_va64(struct radeon_drm_winsys * ws,uint64_t size,uint64_t alignment)270 static uint64_t radeon_bomgr_find_va64(struct radeon_drm_winsys *ws,
271                                        uint64_t size, uint64_t alignment)
272 {
273    uint64_t va = 0;
274 
275    /* Try to allocate from the 64-bit address space first.
276     * If it doesn't exist (start = 0) or if it doesn't have enough space,
277     * fall back to the 32-bit address space.
278     */
279    if (ws->vm64.start)
280       va = radeon_bomgr_find_va(&ws->info, &ws->vm64, size, alignment);
281    if (!va)
282       va = radeon_bomgr_find_va(&ws->info, &ws->vm32, size, alignment);
283    return va;
284 }
285 
radeon_bomgr_free_va(const struct radeon_info * info,struct radeon_vm_heap * heap,uint64_t va,uint64_t size)286 static void radeon_bomgr_free_va(const struct radeon_info *info,
287                                  struct radeon_vm_heap *heap,
288                                  uint64_t va, uint64_t size)
289 {
290    struct radeon_bo_va_hole *hole = NULL;
291 
292    size = align(size, info->gart_page_size);
293 
294    mtx_lock(&heap->mutex);
295    if ((va + size) == heap->start) {
296       heap->start = va;
297       /* Delete uppermost hole if it reaches the new top */
298       if (!list_is_empty(&heap->holes)) {
299          hole = container_of(heap->holes.next, hole, list);
300          if ((hole->offset + hole->size) == va) {
301             heap->start = hole->offset;
302             list_del(&hole->list);
303             FREE(hole);
304          }
305       }
306    } else {
307       struct radeon_bo_va_hole *next;
308 
309       hole = container_of(&heap->holes, hole, list);
310       LIST_FOR_EACH_ENTRY(next, &heap->holes, list) {
311          if (next->offset < va)
312             break;
313          hole = next;
314       }
315 
316       if (&hole->list != &heap->holes) {
317          /* Grow upper hole if it's adjacent */
318          if (hole->offset == (va + size)) {
319             hole->offset = va;
320             hole->size += size;
321             /* Merge lower hole if it's adjacent */
322             if (next != hole && &next->list != &heap->holes &&
323                 (next->offset + next->size) == va) {
324                next->size += hole->size;
325                list_del(&hole->list);
326                FREE(hole);
327             }
328             goto out;
329          }
330       }
331 
332       /* Grow lower hole if it's adjacent */
333       if (next != hole && &next->list != &heap->holes &&
334           (next->offset + next->size) == va) {
335          next->size += size;
336          goto out;
337       }
338 
339       /* FIXME on allocation failure we just lose virtual address space
340        * maybe print a warning
341        */
342       next = CALLOC_STRUCT(radeon_bo_va_hole);
343       if (next) {
344          next->size = size;
345          next->offset = va;
346          list_add(&next->list, &hole->list);
347       }
348    }
349 out:
350    mtx_unlock(&heap->mutex);
351 }
352 
radeon_bo_destroy(struct pb_buffer * _buf)353 void radeon_bo_destroy(struct pb_buffer *_buf)
354 {
355    struct radeon_bo *bo = radeon_bo(_buf);
356    struct radeon_drm_winsys *rws = bo->rws;
357    struct drm_gem_close args;
358 
359    assert(bo->handle && "must not be called for slab entries");
360 
361    memset(&args, 0, sizeof(args));
362 
363    mtx_lock(&rws->bo_handles_mutex);
364    _mesa_hash_table_remove_key(rws->bo_handles, (void*)(uintptr_t)bo->handle);
365    if (bo->flink_name) {
366       _mesa_hash_table_remove_key(rws->bo_names,
367                                   (void*)(uintptr_t)bo->flink_name);
368    }
369    mtx_unlock(&rws->bo_handles_mutex);
370 
371    if (bo->u.real.ptr)
372       os_munmap(bo->u.real.ptr, bo->base.size);
373 
374    if (rws->info.r600_has_virtual_memory) {
375       if (rws->va_unmap_working) {
376          struct drm_radeon_gem_va va;
377 
378          va.handle = bo->handle;
379          va.vm_id = 0;
380          va.operation = RADEON_VA_UNMAP;
381          va.flags = RADEON_VM_PAGE_READABLE |
382                     RADEON_VM_PAGE_WRITEABLE |
383                     RADEON_VM_PAGE_SNOOPED;
384          va.offset = bo->va;
385 
386          if (drmCommandWriteRead(rws->fd, DRM_RADEON_GEM_VA, &va,
387                                  sizeof(va)) != 0 &&
388              va.operation == RADEON_VA_RESULT_ERROR) {
389             fprintf(stderr, "radeon: Failed to deallocate virtual address for buffer:\n");
390             fprintf(stderr, "radeon:    size      : %"PRIu64" bytes\n", bo->base.size);
391             fprintf(stderr, "radeon:    va        : 0x%"PRIx64"\n", bo->va);
392          }
393       }
394 
395       radeon_bomgr_free_va(&rws->info,
396                            bo->va < rws->vm32.end ? &rws->vm32 : &rws->vm64,
397                            bo->va, bo->base.size);
398    }
399 
400    /* Close object. */
401    args.handle = bo->handle;
402    drmIoctl(rws->fd, DRM_IOCTL_GEM_CLOSE, &args);
403 
404    mtx_destroy(&bo->u.real.map_mutex);
405 
406    if (bo->initial_domain & RADEON_DOMAIN_VRAM)
407       rws->allocated_vram -= align(bo->base.size, rws->info.gart_page_size);
408    else if (bo->initial_domain & RADEON_DOMAIN_GTT)
409       rws->allocated_gtt -= align(bo->base.size, rws->info.gart_page_size);
410 
411    if (bo->u.real.map_count >= 1) {
412       if (bo->initial_domain & RADEON_DOMAIN_VRAM)
413          bo->rws->mapped_vram -= bo->base.size;
414       else
415          bo->rws->mapped_gtt -= bo->base.size;
416       bo->rws->num_mapped_buffers--;
417    }
418 
419    FREE(bo);
420 }
421 
radeon_bo_destroy_or_cache(struct pb_buffer * _buf)422 static void radeon_bo_destroy_or_cache(struct pb_buffer *_buf)
423 {
424    struct radeon_bo *bo = radeon_bo(_buf);
425 
426    assert(bo->handle && "must not be called for slab entries");
427 
428    if (bo->u.real.use_reusable_pool)
429       pb_cache_add_buffer(&bo->u.real.cache_entry);
430    else
431       radeon_bo_destroy(_buf);
432 }
433 
radeon_bo_do_map(struct radeon_bo * bo)434 void *radeon_bo_do_map(struct radeon_bo *bo)
435 {
436    struct drm_radeon_gem_mmap args = {0};
437    void *ptr;
438    unsigned offset;
439 
440    /* If the buffer is created from user memory, return the user pointer. */
441    if (bo->user_ptr)
442       return bo->user_ptr;
443 
444    if (bo->handle) {
445       offset = 0;
446    } else {
447       offset = bo->va - bo->u.slab.real->va;
448       bo = bo->u.slab.real;
449    }
450 
451    /* Map the buffer. */
452    mtx_lock(&bo->u.real.map_mutex);
453    /* Return the pointer if it's already mapped. */
454    if (bo->u.real.ptr) {
455       bo->u.real.map_count++;
456       mtx_unlock(&bo->u.real.map_mutex);
457       return (uint8_t*)bo->u.real.ptr + offset;
458    }
459    args.handle = bo->handle;
460    args.offset = 0;
461    args.size = (uint64_t)bo->base.size;
462    if (drmCommandWriteRead(bo->rws->fd,
463                            DRM_RADEON_GEM_MMAP,
464                            &args,
465                            sizeof(args))) {
466       mtx_unlock(&bo->u.real.map_mutex);
467       fprintf(stderr, "radeon: gem_mmap failed: %p 0x%08X\n",
468               bo, bo->handle);
469       return NULL;
470    }
471 
472    ptr = os_mmap(0, args.size, PROT_READ|PROT_WRITE, MAP_SHARED,
473                  bo->rws->fd, args.addr_ptr);
474    if (ptr == MAP_FAILED) {
475       /* Clear the cache and try again. */
476       pb_cache_release_all_buffers(&bo->rws->bo_cache);
477 
478       ptr = os_mmap(0, args.size, PROT_READ|PROT_WRITE, MAP_SHARED,
479                     bo->rws->fd, args.addr_ptr);
480       if (ptr == MAP_FAILED) {
481          mtx_unlock(&bo->u.real.map_mutex);
482          fprintf(stderr, "radeon: mmap failed, errno: %i\n", errno);
483          return NULL;
484       }
485    }
486    bo->u.real.ptr = ptr;
487    bo->u.real.map_count = 1;
488 
489    if (bo->initial_domain & RADEON_DOMAIN_VRAM)
490       bo->rws->mapped_vram += bo->base.size;
491    else
492       bo->rws->mapped_gtt += bo->base.size;
493    bo->rws->num_mapped_buffers++;
494 
495    mtx_unlock(&bo->u.real.map_mutex);
496    return (uint8_t*)bo->u.real.ptr + offset;
497 }
498 
radeon_bo_map(struct pb_buffer * buf,struct radeon_cmdbuf * rcs,enum pipe_map_flags usage)499 static void *radeon_bo_map(struct pb_buffer *buf,
500                            struct radeon_cmdbuf *rcs,
501                            enum pipe_map_flags usage)
502 {
503    struct radeon_bo *bo = (struct radeon_bo*)buf;
504    struct radeon_drm_cs *cs = (struct radeon_drm_cs*)rcs;
505 
506    /* If it's not unsynchronized bo_map, flush CS if needed and then wait. */
507    if (!(usage & PIPE_MAP_UNSYNCHRONIZED)) {
508       /* DONTBLOCK doesn't make sense with UNSYNCHRONIZED. */
509       if (usage & PIPE_MAP_DONTBLOCK) {
510          if (!(usage & PIPE_MAP_WRITE)) {
511             /* Mapping for read.
512              *
513              * Since we are mapping for read, we don't need to wait
514              * if the GPU is using the buffer for read too
515              * (neither one is changing it).
516              *
517              * Only check whether the buffer is being used for write. */
518             if (cs && radeon_bo_is_referenced_by_cs_for_write(cs, bo)) {
519                cs->flush_cs(cs->flush_data,
520                             RADEON_FLUSH_ASYNC_START_NEXT_GFX_IB_NOW, NULL);
521                return NULL;
522             }
523 
524             if (!radeon_bo_wait((struct pb_buffer*)bo, 0,
525                                 RADEON_USAGE_WRITE)) {
526                return NULL;
527             }
528          } else {
529             if (cs && radeon_bo_is_referenced_by_cs(cs, bo)) {
530                cs->flush_cs(cs->flush_data,
531                             RADEON_FLUSH_ASYNC_START_NEXT_GFX_IB_NOW, NULL);
532                return NULL;
533             }
534 
535             if (!radeon_bo_wait((struct pb_buffer*)bo, 0,
536                                 RADEON_USAGE_READWRITE)) {
537                return NULL;
538             }
539          }
540       } else {
541          uint64_t time = os_time_get_nano();
542 
543          if (!(usage & PIPE_MAP_WRITE)) {
544             /* Mapping for read.
545              *
546              * Since we are mapping for read, we don't need to wait
547              * if the GPU is using the buffer for read too
548              * (neither one is changing it).
549              *
550              * Only check whether the buffer is being used for write. */
551             if (cs && radeon_bo_is_referenced_by_cs_for_write(cs, bo)) {
552                cs->flush_cs(cs->flush_data,
553                             RADEON_FLUSH_START_NEXT_GFX_IB_NOW, NULL);
554             }
555             radeon_bo_wait((struct pb_buffer*)bo, PIPE_TIMEOUT_INFINITE,
556                            RADEON_USAGE_WRITE);
557          } else {
558             /* Mapping for write. */
559             if (cs) {
560                if (radeon_bo_is_referenced_by_cs(cs, bo)) {
561                   cs->flush_cs(cs->flush_data,
562                                RADEON_FLUSH_START_NEXT_GFX_IB_NOW, NULL);
563                } else {
564                   /* Try to avoid busy-waiting in radeon_bo_wait. */
565                   if (p_atomic_read(&bo->num_active_ioctls))
566                      radeon_drm_cs_sync_flush(rcs);
567                }
568             }
569 
570             radeon_bo_wait((struct pb_buffer*)bo, PIPE_TIMEOUT_INFINITE,
571                            RADEON_USAGE_READWRITE);
572          }
573 
574          bo->rws->buffer_wait_time += os_time_get_nano() - time;
575       }
576    }
577 
578    return radeon_bo_do_map(bo);
579 }
580 
radeon_bo_unmap(struct pb_buffer * _buf)581 static void radeon_bo_unmap(struct pb_buffer *_buf)
582 {
583    struct radeon_bo *bo = (struct radeon_bo*)_buf;
584 
585    if (bo->user_ptr)
586       return;
587 
588    if (!bo->handle)
589       bo = bo->u.slab.real;
590 
591    mtx_lock(&bo->u.real.map_mutex);
592    if (!bo->u.real.ptr) {
593       mtx_unlock(&bo->u.real.map_mutex);
594       return; /* it's not been mapped */
595    }
596 
597    assert(bo->u.real.map_count);
598    if (--bo->u.real.map_count) {
599       mtx_unlock(&bo->u.real.map_mutex);
600       return; /* it's been mapped multiple times */
601    }
602 
603    os_munmap(bo->u.real.ptr, bo->base.size);
604    bo->u.real.ptr = NULL;
605 
606    if (bo->initial_domain & RADEON_DOMAIN_VRAM)
607       bo->rws->mapped_vram -= bo->base.size;
608    else
609       bo->rws->mapped_gtt -= bo->base.size;
610    bo->rws->num_mapped_buffers--;
611 
612    mtx_unlock(&bo->u.real.map_mutex);
613 }
614 
615 static const struct pb_vtbl radeon_bo_vtbl = {
616    radeon_bo_destroy_or_cache
617    /* other functions are never called */
618 };
619 
radeon_create_bo(struct radeon_drm_winsys * rws,unsigned size,unsigned alignment,unsigned initial_domains,unsigned flags,int heap)620 static struct radeon_bo *radeon_create_bo(struct radeon_drm_winsys *rws,
621                                           unsigned size, unsigned alignment,
622                                           unsigned initial_domains,
623                                           unsigned flags,
624                                           int heap)
625 {
626    struct radeon_bo *bo;
627    struct drm_radeon_gem_create args;
628    int r;
629 
630    memset(&args, 0, sizeof(args));
631 
632    assert(initial_domains);
633    assert((initial_domains &
634            ~(RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM)) == 0);
635 
636    args.size = size;
637    args.alignment = alignment;
638    args.initial_domain = initial_domains;
639    args.flags = 0;
640 
641    /* If VRAM is just stolen system memory, allow both VRAM and
642     * GTT, whichever has free space. If a buffer is evicted from
643     * VRAM to GTT, it will stay there.
644     */
645    if (!rws->info.has_dedicated_vram)
646       args.initial_domain |= RADEON_DOMAIN_GTT;
647 
648    if (flags & RADEON_FLAG_GTT_WC)
649       args.flags |= RADEON_GEM_GTT_WC;
650    if (flags & RADEON_FLAG_NO_CPU_ACCESS)
651       args.flags |= RADEON_GEM_NO_CPU_ACCESS;
652 
653    if (drmCommandWriteRead(rws->fd, DRM_RADEON_GEM_CREATE,
654                            &args, sizeof(args))) {
655       fprintf(stderr, "radeon: Failed to allocate a buffer:\n");
656       fprintf(stderr, "radeon:    size      : %u bytes\n", size);
657       fprintf(stderr, "radeon:    alignment : %u bytes\n", alignment);
658       fprintf(stderr, "radeon:    domains   : %u\n", args.initial_domain);
659       fprintf(stderr, "radeon:    flags     : %u\n", args.flags);
660       return NULL;
661    }
662 
663    assert(args.handle != 0);
664 
665    bo = CALLOC_STRUCT(radeon_bo);
666    if (!bo)
667       return NULL;
668 
669    pipe_reference_init(&bo->base.reference, 1);
670    bo->base.alignment = alignment;
671    bo->base.usage = 0;
672    bo->base.size = size;
673    bo->base.vtbl = &radeon_bo_vtbl;
674    bo->rws = rws;
675    bo->handle = args.handle;
676    bo->va = 0;
677    bo->initial_domain = initial_domains;
678    bo->hash = __sync_fetch_and_add(&rws->next_bo_hash, 1);
679    (void) mtx_init(&bo->u.real.map_mutex, mtx_plain);
680 
681    if (heap >= 0) {
682       pb_cache_init_entry(&rws->bo_cache, &bo->u.real.cache_entry, &bo->base,
683                           heap);
684    }
685 
686    if (rws->info.r600_has_virtual_memory) {
687       struct drm_radeon_gem_va va;
688       unsigned va_gap_size;
689 
690       va_gap_size = rws->check_vm ? MAX2(4 * alignment, 64 * 1024) : 0;
691 
692       if (flags & RADEON_FLAG_32BIT) {
693          bo->va = radeon_bomgr_find_va(&rws->info, &rws->vm32,
694                                        size + va_gap_size, alignment);
695          assert(bo->va + size < rws->vm32.end);
696       } else {
697          bo->va = radeon_bomgr_find_va64(rws, size + va_gap_size, alignment);
698       }
699 
700       va.handle = bo->handle;
701       va.vm_id = 0;
702       va.operation = RADEON_VA_MAP;
703       va.flags = RADEON_VM_PAGE_READABLE |
704                  RADEON_VM_PAGE_WRITEABLE |
705                  RADEON_VM_PAGE_SNOOPED;
706       va.offset = bo->va;
707       r = drmCommandWriteRead(rws->fd, DRM_RADEON_GEM_VA, &va, sizeof(va));
708       if (r && va.operation == RADEON_VA_RESULT_ERROR) {
709          fprintf(stderr, "radeon: Failed to allocate virtual address for buffer:\n");
710          fprintf(stderr, "radeon:    size      : %d bytes\n", size);
711          fprintf(stderr, "radeon:    alignment : %d bytes\n", alignment);
712          fprintf(stderr, "radeon:    domains   : %d\n", args.initial_domain);
713          fprintf(stderr, "radeon:    va        : 0x%016llx\n", (unsigned long long)bo->va);
714          radeon_bo_destroy(&bo->base);
715          return NULL;
716       }
717       mtx_lock(&rws->bo_handles_mutex);
718       if (va.operation == RADEON_VA_RESULT_VA_EXIST) {
719          struct pb_buffer *b = &bo->base;
720          struct radeon_bo *old_bo =
721                _mesa_hash_table_u64_search(rws->bo_vas, va.offset);
722 
723          mtx_unlock(&rws->bo_handles_mutex);
724          pb_reference(&b, &old_bo->base);
725          return radeon_bo(b);
726       }
727 
728       _mesa_hash_table_u64_insert(rws->bo_vas, bo->va, bo);
729       mtx_unlock(&rws->bo_handles_mutex);
730    }
731 
732    if (initial_domains & RADEON_DOMAIN_VRAM)
733       rws->allocated_vram += align(size, rws->info.gart_page_size);
734    else if (initial_domains & RADEON_DOMAIN_GTT)
735       rws->allocated_gtt += align(size, rws->info.gart_page_size);
736 
737    return bo;
738 }
739 
radeon_bo_can_reclaim(struct pb_buffer * _buf)740 bool radeon_bo_can_reclaim(struct pb_buffer *_buf)
741 {
742    struct radeon_bo *bo = radeon_bo(_buf);
743 
744    if (radeon_bo_is_referenced_by_any_cs(bo))
745       return false;
746 
747    return radeon_bo_wait(_buf, 0, RADEON_USAGE_READWRITE);
748 }
749 
radeon_bo_can_reclaim_slab(void * priv,struct pb_slab_entry * entry)750 bool radeon_bo_can_reclaim_slab(void *priv, struct pb_slab_entry *entry)
751 {
752    struct radeon_bo *bo = NULL; /* fix container_of */
753    bo = container_of(entry, bo, u.slab.entry);
754 
755    return radeon_bo_can_reclaim(&bo->base);
756 }
757 
radeon_bo_slab_destroy(struct pb_buffer * _buf)758 static void radeon_bo_slab_destroy(struct pb_buffer *_buf)
759 {
760    struct radeon_bo *bo = radeon_bo(_buf);
761 
762    assert(!bo->handle);
763 
764    pb_slab_free(&bo->rws->bo_slabs, &bo->u.slab.entry);
765 }
766 
767 static const struct pb_vtbl radeon_winsys_bo_slab_vtbl = {
768    radeon_bo_slab_destroy
769    /* other functions are never called */
770 };
771 
radeon_bo_slab_alloc(void * priv,unsigned heap,unsigned entry_size,unsigned group_index)772 struct pb_slab *radeon_bo_slab_alloc(void *priv, unsigned heap,
773                                      unsigned entry_size,
774                                      unsigned group_index)
775 {
776    struct radeon_drm_winsys *ws = priv;
777    struct radeon_slab *slab = CALLOC_STRUCT(radeon_slab);
778    enum radeon_bo_domain domains = radeon_domain_from_heap(heap);
779    enum radeon_bo_flag flags = radeon_flags_from_heap(heap);
780    unsigned base_hash;
781 
782    if (!slab)
783       return NULL;
784 
785    slab->buffer = radeon_bo(radeon_winsys_bo_create(&ws->base,
786                                                     64 * 1024, 64 * 1024,
787                                                     domains, flags));
788    if (!slab->buffer)
789       goto fail;
790 
791    assert(slab->buffer->handle);
792 
793    slab->base.num_entries = slab->buffer->base.size / entry_size;
794    slab->base.num_free = slab->base.num_entries;
795    slab->entries = CALLOC(slab->base.num_entries, sizeof(*slab->entries));
796    if (!slab->entries)
797       goto fail_buffer;
798 
799    list_inithead(&slab->base.free);
800 
801    base_hash = __sync_fetch_and_add(&ws->next_bo_hash, slab->base.num_entries);
802 
803    for (unsigned i = 0; i < slab->base.num_entries; ++i) {
804       struct radeon_bo *bo = &slab->entries[i];
805 
806       bo->base.alignment = entry_size;
807       bo->base.usage = slab->buffer->base.usage;
808       bo->base.size = entry_size;
809       bo->base.vtbl = &radeon_winsys_bo_slab_vtbl;
810       bo->rws = ws;
811       bo->va = slab->buffer->va + i * entry_size;
812       bo->initial_domain = domains;
813       bo->hash = base_hash + i;
814       bo->u.slab.entry.slab = &slab->base;
815       bo->u.slab.entry.group_index = group_index;
816       bo->u.slab.real = slab->buffer;
817 
818       list_addtail(&bo->u.slab.entry.head, &slab->base.free);
819    }
820 
821    return &slab->base;
822 
823 fail_buffer:
824    radeon_bo_reference(&slab->buffer, NULL);
825 fail:
826    FREE(slab);
827    return NULL;
828 }
829 
radeon_bo_slab_free(void * priv,struct pb_slab * pslab)830 void radeon_bo_slab_free(void *priv, struct pb_slab *pslab)
831 {
832    struct radeon_slab *slab = (struct radeon_slab *)pslab;
833 
834    for (unsigned i = 0; i < slab->base.num_entries; ++i) {
835       struct radeon_bo *bo = &slab->entries[i];
836       for (unsigned j = 0; j < bo->u.slab.num_fences; ++j)
837          radeon_bo_reference(&bo->u.slab.fences[j], NULL);
838       FREE(bo->u.slab.fences);
839    }
840 
841    FREE(slab->entries);
842    radeon_bo_reference(&slab->buffer, NULL);
843    FREE(slab);
844 }
845 
eg_tile_split(unsigned tile_split)846 static unsigned eg_tile_split(unsigned tile_split)
847 {
848    switch (tile_split) {
849    case 0:     tile_split = 64;    break;
850    case 1:     tile_split = 128;   break;
851    case 2:     tile_split = 256;   break;
852    case 3:     tile_split = 512;   break;
853    default:
854    case 4:     tile_split = 1024;  break;
855    case 5:     tile_split = 2048;  break;
856    case 6:     tile_split = 4096;  break;
857    }
858    return tile_split;
859 }
860 
eg_tile_split_rev(unsigned eg_tile_split)861 static unsigned eg_tile_split_rev(unsigned eg_tile_split)
862 {
863    switch (eg_tile_split) {
864    case 64:    return 0;
865    case 128:   return 1;
866    case 256:   return 2;
867    case 512:   return 3;
868    default:
869    case 1024:  return 4;
870    case 2048:  return 5;
871    case 4096:  return 6;
872    }
873 }
874 
radeon_bo_get_metadata(struct pb_buffer * _buf,struct radeon_bo_metadata * md,struct radeon_surf * surf)875 static void radeon_bo_get_metadata(struct pb_buffer *_buf,
876                                    struct radeon_bo_metadata *md,
877                                    struct radeon_surf *surf)
878 {
879    struct radeon_bo *bo = radeon_bo(_buf);
880    struct drm_radeon_gem_set_tiling args;
881 
882    assert(bo->handle && "must not be called for slab entries");
883 
884    memset(&args, 0, sizeof(args));
885 
886    args.handle = bo->handle;
887 
888    drmCommandWriteRead(bo->rws->fd,
889                        DRM_RADEON_GEM_GET_TILING,
890                        &args,
891                        sizeof(args));
892 
893    if (surf) {
894       if (args.tiling_flags & RADEON_TILING_MACRO)
895          md->mode = RADEON_SURF_MODE_2D;
896       else if (args.tiling_flags & RADEON_TILING_MICRO)
897          md->mode = RADEON_SURF_MODE_1D;
898       else
899          md->mode = RADEON_SURF_MODE_LINEAR_ALIGNED;
900 
901       surf->u.legacy.bankw = (args.tiling_flags >> RADEON_TILING_EG_BANKW_SHIFT) & RADEON_TILING_EG_BANKW_MASK;
902       surf->u.legacy.bankh = (args.tiling_flags >> RADEON_TILING_EG_BANKH_SHIFT) & RADEON_TILING_EG_BANKH_MASK;
903       surf->u.legacy.tile_split = (args.tiling_flags >> RADEON_TILING_EG_TILE_SPLIT_SHIFT) & RADEON_TILING_EG_TILE_SPLIT_MASK;
904       surf->u.legacy.tile_split = eg_tile_split(surf->u.legacy.tile_split);
905       surf->u.legacy.mtilea = (args.tiling_flags >> RADEON_TILING_EG_MACRO_TILE_ASPECT_SHIFT) & RADEON_TILING_EG_MACRO_TILE_ASPECT_MASK;
906 
907       if (bo->rws->gen >= DRV_SI && !(args.tiling_flags & RADEON_TILING_R600_NO_SCANOUT))
908          surf->flags |= RADEON_SURF_SCANOUT;
909       else
910          surf->flags &= ~RADEON_SURF_SCANOUT;
911       return;
912    }
913 
914    md->u.legacy.microtile = RADEON_LAYOUT_LINEAR;
915    md->u.legacy.macrotile = RADEON_LAYOUT_LINEAR;
916    if (args.tiling_flags & RADEON_TILING_MICRO)
917       md->u.legacy.microtile = RADEON_LAYOUT_TILED;
918    else if (args.tiling_flags & RADEON_TILING_MICRO_SQUARE)
919       md->u.legacy.microtile = RADEON_LAYOUT_SQUARETILED;
920 
921    if (args.tiling_flags & RADEON_TILING_MACRO)
922       md->u.legacy.macrotile = RADEON_LAYOUT_TILED;
923 
924    md->u.legacy.bankw = (args.tiling_flags >> RADEON_TILING_EG_BANKW_SHIFT) & RADEON_TILING_EG_BANKW_MASK;
925    md->u.legacy.bankh = (args.tiling_flags >> RADEON_TILING_EG_BANKH_SHIFT) & RADEON_TILING_EG_BANKH_MASK;
926    md->u.legacy.tile_split = (args.tiling_flags >> RADEON_TILING_EG_TILE_SPLIT_SHIFT) & RADEON_TILING_EG_TILE_SPLIT_MASK;
927    md->u.legacy.mtilea = (args.tiling_flags >> RADEON_TILING_EG_MACRO_TILE_ASPECT_SHIFT) & RADEON_TILING_EG_MACRO_TILE_ASPECT_MASK;
928    md->u.legacy.tile_split = eg_tile_split(md->u.legacy.tile_split);
929    md->u.legacy.scanout = bo->rws->gen >= DRV_SI && !(args.tiling_flags & RADEON_TILING_R600_NO_SCANOUT);
930 }
931 
radeon_bo_set_metadata(struct pb_buffer * _buf,struct radeon_bo_metadata * md,struct radeon_surf * surf)932 static void radeon_bo_set_metadata(struct pb_buffer *_buf,
933                                    struct radeon_bo_metadata *md,
934                                    struct radeon_surf *surf)
935 {
936    struct radeon_bo *bo = radeon_bo(_buf);
937    struct drm_radeon_gem_set_tiling args;
938 
939    assert(bo->handle && "must not be called for slab entries");
940 
941    memset(&args, 0, sizeof(args));
942 
943    os_wait_until_zero(&bo->num_active_ioctls, PIPE_TIMEOUT_INFINITE);
944 
945    if (surf) {
946       if (surf->u.legacy.level[0].mode >= RADEON_SURF_MODE_1D)
947          args.tiling_flags |= RADEON_TILING_MICRO;
948       if (surf->u.legacy.level[0].mode >= RADEON_SURF_MODE_2D)
949          args.tiling_flags |= RADEON_TILING_MACRO;
950 
951       args.tiling_flags |= (surf->u.legacy.bankw & RADEON_TILING_EG_BANKW_MASK) <<
952                            RADEON_TILING_EG_BANKW_SHIFT;
953       args.tiling_flags |= (surf->u.legacy.bankh & RADEON_TILING_EG_BANKH_MASK) <<
954                            RADEON_TILING_EG_BANKH_SHIFT;
955       if (surf->u.legacy.tile_split) {
956          args.tiling_flags |= (eg_tile_split_rev(surf->u.legacy.tile_split) &
957                                RADEON_TILING_EG_TILE_SPLIT_MASK) <<
958                               RADEON_TILING_EG_TILE_SPLIT_SHIFT;
959       }
960       args.tiling_flags |= (surf->u.legacy.mtilea & RADEON_TILING_EG_MACRO_TILE_ASPECT_MASK) <<
961                            RADEON_TILING_EG_MACRO_TILE_ASPECT_SHIFT;
962 
963       if (bo->rws->gen >= DRV_SI && !(surf->flags & RADEON_SURF_SCANOUT))
964          args.tiling_flags |= RADEON_TILING_R600_NO_SCANOUT;
965 
966       args.pitch = surf->u.legacy.level[0].nblk_x * surf->bpe;
967    } else {
968       if (md->u.legacy.microtile == RADEON_LAYOUT_TILED)
969          args.tiling_flags |= RADEON_TILING_MICRO;
970       else if (md->u.legacy.microtile == RADEON_LAYOUT_SQUARETILED)
971          args.tiling_flags |= RADEON_TILING_MICRO_SQUARE;
972 
973       if (md->u.legacy.macrotile == RADEON_LAYOUT_TILED)
974          args.tiling_flags |= RADEON_TILING_MACRO;
975 
976       args.tiling_flags |= (md->u.legacy.bankw & RADEON_TILING_EG_BANKW_MASK) <<
977                            RADEON_TILING_EG_BANKW_SHIFT;
978       args.tiling_flags |= (md->u.legacy.bankh & RADEON_TILING_EG_BANKH_MASK) <<
979                            RADEON_TILING_EG_BANKH_SHIFT;
980       if (md->u.legacy.tile_split) {
981          args.tiling_flags |= (eg_tile_split_rev(md->u.legacy.tile_split) &
982                                RADEON_TILING_EG_TILE_SPLIT_MASK) <<
983                               RADEON_TILING_EG_TILE_SPLIT_SHIFT;
984       }
985       args.tiling_flags |= (md->u.legacy.mtilea & RADEON_TILING_EG_MACRO_TILE_ASPECT_MASK) <<
986                            RADEON_TILING_EG_MACRO_TILE_ASPECT_SHIFT;
987 
988       if (bo->rws->gen >= DRV_SI && !md->u.legacy.scanout)
989          args.tiling_flags |= RADEON_TILING_R600_NO_SCANOUT;
990 
991       args.pitch = md->u.legacy.stride;
992    }
993 
994    args.handle = bo->handle;
995 
996    drmCommandWriteRead(bo->rws->fd,
997                        DRM_RADEON_GEM_SET_TILING,
998                        &args,
999                        sizeof(args));
1000 }
1001 
1002 static struct pb_buffer *
radeon_winsys_bo_create(struct radeon_winsys * rws,uint64_t size,unsigned alignment,enum radeon_bo_domain domain,enum radeon_bo_flag flags)1003 radeon_winsys_bo_create(struct radeon_winsys *rws,
1004                         uint64_t size,
1005                         unsigned alignment,
1006                         enum radeon_bo_domain domain,
1007                         enum radeon_bo_flag flags)
1008 {
1009    struct radeon_drm_winsys *ws = radeon_drm_winsys(rws);
1010    struct radeon_bo *bo;
1011    int heap = -1;
1012 
1013    assert(!(flags & RADEON_FLAG_SPARSE)); /* not supported */
1014 
1015    /* Only 32-bit sizes are supported. */
1016    if (size > UINT_MAX)
1017       return NULL;
1018 
1019    /* VRAM implies WC. This is not optional. */
1020    if (domain & RADEON_DOMAIN_VRAM)
1021       flags |= RADEON_FLAG_GTT_WC;
1022    /* NO_CPU_ACCESS is valid with VRAM only. */
1023    if (domain != RADEON_DOMAIN_VRAM)
1024       flags &= ~RADEON_FLAG_NO_CPU_ACCESS;
1025 
1026    /* Sub-allocate small buffers from slabs. */
1027    if (!(flags & RADEON_FLAG_NO_SUBALLOC) &&
1028        size <= (1 << RADEON_SLAB_MAX_SIZE_LOG2) &&
1029        ws->info.r600_has_virtual_memory &&
1030        alignment <= MAX2(1 << RADEON_SLAB_MIN_SIZE_LOG2, util_next_power_of_two(size))) {
1031       struct pb_slab_entry *entry;
1032       int heap = radeon_get_heap_index(domain, flags);
1033 
1034       if (heap < 0 || heap >= RADEON_MAX_SLAB_HEAPS)
1035          goto no_slab;
1036 
1037       entry = pb_slab_alloc(&ws->bo_slabs, size, heap);
1038       if (!entry) {
1039          /* Clear the cache and try again. */
1040          pb_cache_release_all_buffers(&ws->bo_cache);
1041 
1042          entry = pb_slab_alloc(&ws->bo_slabs, size, heap);
1043       }
1044       if (!entry)
1045          return NULL;
1046 
1047       bo = NULL;
1048       bo = container_of(entry, bo, u.slab.entry);
1049 
1050       pipe_reference_init(&bo->base.reference, 1);
1051 
1052       return &bo->base;
1053    }
1054 no_slab:
1055 
1056    /* This flag is irrelevant for the cache. */
1057    flags &= ~RADEON_FLAG_NO_SUBALLOC;
1058 
1059    /* Align size to page size. This is the minimum alignment for normal
1060     * BOs. Aligning this here helps the cached bufmgr. Especially small BOs,
1061     * like constant/uniform buffers, can benefit from better and more reuse.
1062     */
1063    size = align(size, ws->info.gart_page_size);
1064    alignment = align(alignment, ws->info.gart_page_size);
1065 
1066    bool use_reusable_pool = flags & RADEON_FLAG_NO_INTERPROCESS_SHARING;
1067 
1068    /* Shared resources don't use cached heaps. */
1069    if (use_reusable_pool) {
1070       heap = radeon_get_heap_index(domain, flags);
1071       assert(heap >= 0 && heap < RADEON_MAX_CACHED_HEAPS);
1072 
1073       bo = radeon_bo(pb_cache_reclaim_buffer(&ws->bo_cache, size, alignment,
1074                                              0, heap));
1075       if (bo)
1076          return &bo->base;
1077    }
1078 
1079    bo = radeon_create_bo(ws, size, alignment, domain, flags, heap);
1080    if (!bo) {
1081       /* Clear the cache and try again. */
1082       if (ws->info.r600_has_virtual_memory)
1083          pb_slabs_reclaim(&ws->bo_slabs);
1084       pb_cache_release_all_buffers(&ws->bo_cache);
1085       bo = radeon_create_bo(ws, size, alignment, domain, flags, heap);
1086       if (!bo)
1087          return NULL;
1088    }
1089 
1090    bo->u.real.use_reusable_pool = use_reusable_pool;
1091 
1092    mtx_lock(&ws->bo_handles_mutex);
1093    _mesa_hash_table_insert(ws->bo_handles, (void*)(uintptr_t)bo->handle, bo);
1094    mtx_unlock(&ws->bo_handles_mutex);
1095 
1096    return &bo->base;
1097 }
1098 
radeon_winsys_bo_from_ptr(struct radeon_winsys * rws,void * pointer,uint64_t size)1099 static struct pb_buffer *radeon_winsys_bo_from_ptr(struct radeon_winsys *rws,
1100                                                    void *pointer, uint64_t size)
1101 {
1102    struct radeon_drm_winsys *ws = radeon_drm_winsys(rws);
1103    struct drm_radeon_gem_userptr args;
1104    struct radeon_bo *bo;
1105    int r;
1106 
1107    bo = CALLOC_STRUCT(radeon_bo);
1108    if (!bo)
1109       return NULL;
1110 
1111    memset(&args, 0, sizeof(args));
1112    args.addr = (uintptr_t)pointer;
1113    args.size = align(size, ws->info.gart_page_size);
1114    args.flags = RADEON_GEM_USERPTR_ANONONLY |
1115                 RADEON_GEM_USERPTR_VALIDATE |
1116                 RADEON_GEM_USERPTR_REGISTER;
1117    if (drmCommandWriteRead(ws->fd, DRM_RADEON_GEM_USERPTR,
1118                            &args, sizeof(args))) {
1119       FREE(bo);
1120       return NULL;
1121    }
1122 
1123    assert(args.handle != 0);
1124 
1125    mtx_lock(&ws->bo_handles_mutex);
1126 
1127    /* Initialize it. */
1128    pipe_reference_init(&bo->base.reference, 1);
1129    bo->handle = args.handle;
1130    bo->base.alignment = 0;
1131    bo->base.size = size;
1132    bo->base.vtbl = &radeon_bo_vtbl;
1133    bo->rws = ws;
1134    bo->user_ptr = pointer;
1135    bo->va = 0;
1136    bo->initial_domain = RADEON_DOMAIN_GTT;
1137    bo->hash = __sync_fetch_and_add(&ws->next_bo_hash, 1);
1138    (void) mtx_init(&bo->u.real.map_mutex, mtx_plain);
1139 
1140    _mesa_hash_table_insert(ws->bo_handles, (void*)(uintptr_t)bo->handle, bo);
1141 
1142    mtx_unlock(&ws->bo_handles_mutex);
1143 
1144    if (ws->info.r600_has_virtual_memory) {
1145       struct drm_radeon_gem_va va;
1146 
1147       bo->va = radeon_bomgr_find_va64(ws, bo->base.size, 1 << 20);
1148 
1149       va.handle = bo->handle;
1150       va.operation = RADEON_VA_MAP;
1151       va.vm_id = 0;
1152       va.offset = bo->va;
1153       va.flags = RADEON_VM_PAGE_READABLE |
1154                  RADEON_VM_PAGE_WRITEABLE |
1155                  RADEON_VM_PAGE_SNOOPED;
1156       va.offset = bo->va;
1157       r = drmCommandWriteRead(ws->fd, DRM_RADEON_GEM_VA, &va, sizeof(va));
1158       if (r && va.operation == RADEON_VA_RESULT_ERROR) {
1159          fprintf(stderr, "radeon: Failed to assign virtual address space\n");
1160          radeon_bo_destroy(&bo->base);
1161          return NULL;
1162       }
1163       mtx_lock(&ws->bo_handles_mutex);
1164       if (va.operation == RADEON_VA_RESULT_VA_EXIST) {
1165          struct pb_buffer *b = &bo->base;
1166          struct radeon_bo *old_bo =
1167                _mesa_hash_table_u64_search(ws->bo_vas, va.offset);
1168 
1169          mtx_unlock(&ws->bo_handles_mutex);
1170          pb_reference(&b, &old_bo->base);
1171          return b;
1172       }
1173 
1174       _mesa_hash_table_u64_insert(ws->bo_vas, bo->va, bo);
1175       mtx_unlock(&ws->bo_handles_mutex);
1176    }
1177 
1178    ws->allocated_gtt += align(bo->base.size, ws->info.gart_page_size);
1179 
1180    return (struct pb_buffer*)bo;
1181 }
1182 
radeon_winsys_bo_from_handle(struct radeon_winsys * rws,struct winsys_handle * whandle,unsigned vm_alignment)1183 static struct pb_buffer *radeon_winsys_bo_from_handle(struct radeon_winsys *rws,
1184                                                       struct winsys_handle *whandle,
1185                                                       unsigned vm_alignment)
1186 {
1187    struct radeon_drm_winsys *ws = radeon_drm_winsys(rws);
1188    struct radeon_bo *bo;
1189    int r;
1190    unsigned handle;
1191    uint64_t size = 0;
1192 
1193    /* We must maintain a list of pairs <handle, bo>, so that we always return
1194     * the same BO for one particular handle. If we didn't do that and created
1195     * more than one BO for the same handle and then relocated them in a CS,
1196     * we would hit a deadlock in the kernel.
1197     *
1198     * The list of pairs is guarded by a mutex, of course. */
1199    mtx_lock(&ws->bo_handles_mutex);
1200 
1201    if (whandle->type == WINSYS_HANDLE_TYPE_SHARED) {
1202       /* First check if there already is an existing bo for the handle. */
1203       bo = util_hash_table_get(ws->bo_names, (void*)(uintptr_t)whandle->handle);
1204    } else if (whandle->type == WINSYS_HANDLE_TYPE_FD) {
1205       /* We must first get the GEM handle, as fds are unreliable keys */
1206       r = drmPrimeFDToHandle(ws->fd, whandle->handle, &handle);
1207       if (r)
1208          goto fail;
1209       bo = util_hash_table_get(ws->bo_handles, (void*)(uintptr_t)handle);
1210    } else {
1211       /* Unknown handle type */
1212       goto fail;
1213    }
1214 
1215    if (bo) {
1216       /* Increase the refcount. */
1217       struct pb_buffer *b = NULL;
1218       pb_reference(&b, &bo->base);
1219       goto done;
1220    }
1221 
1222    /* There isn't, create a new one. */
1223    bo = CALLOC_STRUCT(radeon_bo);
1224    if (!bo) {
1225       goto fail;
1226    }
1227 
1228    if (whandle->type == WINSYS_HANDLE_TYPE_SHARED) {
1229       struct drm_gem_open open_arg = {};
1230       memset(&open_arg, 0, sizeof(open_arg));
1231       /* Open the BO. */
1232       open_arg.name = whandle->handle;
1233       if (drmIoctl(ws->fd, DRM_IOCTL_GEM_OPEN, &open_arg)) {
1234          FREE(bo);
1235          goto fail;
1236       }
1237       handle = open_arg.handle;
1238       size = open_arg.size;
1239       bo->flink_name = whandle->handle;
1240    } else if (whandle->type == WINSYS_HANDLE_TYPE_FD) {
1241       size = lseek(whandle->handle, 0, SEEK_END);
1242       /*
1243        * Could check errno to determine whether the kernel is new enough, but
1244        * it doesn't really matter why this failed, just that it failed.
1245        */
1246       if (size == (off_t)-1) {
1247          FREE(bo);
1248          goto fail;
1249       }
1250       lseek(whandle->handle, 0, SEEK_SET);
1251    }
1252 
1253    assert(handle != 0);
1254 
1255    bo->handle = handle;
1256 
1257    /* Initialize it. */
1258    pipe_reference_init(&bo->base.reference, 1);
1259    bo->base.alignment = 0;
1260    bo->base.size = (unsigned) size;
1261    bo->base.vtbl = &radeon_bo_vtbl;
1262    bo->rws = ws;
1263    bo->va = 0;
1264    bo->hash = __sync_fetch_and_add(&ws->next_bo_hash, 1);
1265    (void) mtx_init(&bo->u.real.map_mutex, mtx_plain);
1266 
1267    if (bo->flink_name)
1268       _mesa_hash_table_insert(ws->bo_names, (void*)(uintptr_t)bo->flink_name, bo);
1269 
1270    _mesa_hash_table_insert(ws->bo_handles, (void*)(uintptr_t)bo->handle, bo);
1271 
1272 done:
1273    mtx_unlock(&ws->bo_handles_mutex);
1274 
1275    if (ws->info.r600_has_virtual_memory && !bo->va) {
1276       struct drm_radeon_gem_va va;
1277 
1278       bo->va = radeon_bomgr_find_va64(ws, bo->base.size, vm_alignment);
1279 
1280       va.handle = bo->handle;
1281       va.operation = RADEON_VA_MAP;
1282       va.vm_id = 0;
1283       va.offset = bo->va;
1284       va.flags = RADEON_VM_PAGE_READABLE |
1285                  RADEON_VM_PAGE_WRITEABLE |
1286                  RADEON_VM_PAGE_SNOOPED;
1287       va.offset = bo->va;
1288       r = drmCommandWriteRead(ws->fd, DRM_RADEON_GEM_VA, &va, sizeof(va));
1289       if (r && va.operation == RADEON_VA_RESULT_ERROR) {
1290          fprintf(stderr, "radeon: Failed to assign virtual address space\n");
1291          radeon_bo_destroy(&bo->base);
1292          return NULL;
1293       }
1294       mtx_lock(&ws->bo_handles_mutex);
1295       if (va.operation == RADEON_VA_RESULT_VA_EXIST) {
1296          struct pb_buffer *b = &bo->base;
1297          struct radeon_bo *old_bo =
1298                _mesa_hash_table_u64_search(ws->bo_vas, va.offset);
1299 
1300          mtx_unlock(&ws->bo_handles_mutex);
1301          pb_reference(&b, &old_bo->base);
1302          return b;
1303       }
1304 
1305       _mesa_hash_table_u64_insert(ws->bo_vas, bo->va, bo);
1306       mtx_unlock(&ws->bo_handles_mutex);
1307    }
1308 
1309    bo->initial_domain = radeon_bo_get_initial_domain((void*)bo);
1310 
1311    if (bo->initial_domain & RADEON_DOMAIN_VRAM)
1312       ws->allocated_vram += align(bo->base.size, ws->info.gart_page_size);
1313    else if (bo->initial_domain & RADEON_DOMAIN_GTT)
1314       ws->allocated_gtt += align(bo->base.size, ws->info.gart_page_size);
1315 
1316    return (struct pb_buffer*)bo;
1317 
1318 fail:
1319    mtx_unlock(&ws->bo_handles_mutex);
1320    return NULL;
1321 }
1322 
radeon_winsys_bo_get_handle(struct radeon_winsys * rws,struct pb_buffer * buffer,struct winsys_handle * whandle)1323 static bool radeon_winsys_bo_get_handle(struct radeon_winsys *rws,
1324                                         struct pb_buffer *buffer,
1325                                         struct winsys_handle *whandle)
1326 {
1327    struct drm_gem_flink flink;
1328    struct radeon_bo *bo = radeon_bo(buffer);
1329    struct radeon_drm_winsys *ws = bo->rws;
1330 
1331    /* Don't allow exports of slab entries. */
1332    if (!bo->handle)
1333       return false;
1334 
1335    memset(&flink, 0, sizeof(flink));
1336 
1337    bo->u.real.use_reusable_pool = false;
1338 
1339    if (whandle->type == WINSYS_HANDLE_TYPE_SHARED) {
1340       if (!bo->flink_name) {
1341          flink.handle = bo->handle;
1342 
1343          if (ioctl(ws->fd, DRM_IOCTL_GEM_FLINK, &flink)) {
1344             return false;
1345          }
1346 
1347          bo->flink_name = flink.name;
1348 
1349          mtx_lock(&ws->bo_handles_mutex);
1350          _mesa_hash_table_insert(ws->bo_names, (void*)(uintptr_t)bo->flink_name, bo);
1351          mtx_unlock(&ws->bo_handles_mutex);
1352       }
1353       whandle->handle = bo->flink_name;
1354    } else if (whandle->type == WINSYS_HANDLE_TYPE_KMS) {
1355       whandle->handle = bo->handle;
1356    } else if (whandle->type == WINSYS_HANDLE_TYPE_FD) {
1357       if (drmPrimeHandleToFD(ws->fd, bo->handle, DRM_CLOEXEC, (int*)&whandle->handle))
1358          return false;
1359    }
1360 
1361    return true;
1362 }
1363 
radeon_winsys_bo_is_user_ptr(struct pb_buffer * buf)1364 static bool radeon_winsys_bo_is_user_ptr(struct pb_buffer *buf)
1365 {
1366    return ((struct radeon_bo*)buf)->user_ptr != NULL;
1367 }
1368 
radeon_winsys_bo_is_suballocated(struct pb_buffer * buf)1369 static bool radeon_winsys_bo_is_suballocated(struct pb_buffer *buf)
1370 {
1371    return !((struct radeon_bo*)buf)->handle;
1372 }
1373 
radeon_winsys_bo_va(struct pb_buffer * buf)1374 static uint64_t radeon_winsys_bo_va(struct pb_buffer *buf)
1375 {
1376    return ((struct radeon_bo*)buf)->va;
1377 }
1378 
radeon_winsys_bo_get_reloc_offset(struct pb_buffer * buf)1379 static unsigned radeon_winsys_bo_get_reloc_offset(struct pb_buffer *buf)
1380 {
1381    struct radeon_bo *bo = radeon_bo(buf);
1382 
1383    if (bo->handle)
1384       return 0;
1385 
1386    return bo->va - bo->u.slab.real->va;
1387 }
1388 
radeon_drm_bo_init_functions(struct radeon_drm_winsys * ws)1389 void radeon_drm_bo_init_functions(struct radeon_drm_winsys *ws)
1390 {
1391    ws->base.buffer_set_metadata = radeon_bo_set_metadata;
1392    ws->base.buffer_get_metadata = radeon_bo_get_metadata;
1393    ws->base.buffer_map = radeon_bo_map;
1394    ws->base.buffer_unmap = radeon_bo_unmap;
1395    ws->base.buffer_wait = radeon_bo_wait;
1396    ws->base.buffer_create = radeon_winsys_bo_create;
1397    ws->base.buffer_from_handle = radeon_winsys_bo_from_handle;
1398    ws->base.buffer_from_ptr = radeon_winsys_bo_from_ptr;
1399    ws->base.buffer_is_user_ptr = radeon_winsys_bo_is_user_ptr;
1400    ws->base.buffer_is_suballocated = radeon_winsys_bo_is_suballocated;
1401    ws->base.buffer_get_handle = radeon_winsys_bo_get_handle;
1402    ws->base.buffer_get_virtual_address = radeon_winsys_bo_va;
1403    ws->base.buffer_get_reloc_offset = radeon_winsys_bo_get_reloc_offset;
1404    ws->base.buffer_get_initial_domain = radeon_bo_get_initial_domain;
1405 }
1406