1 /*
2  * Copyright © 2011 Marek Olšák <maraeo@gmail.com>
3  * All Rights Reserved.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining
6  * a copy of this software and associated documentation files (the
7  * "Software"), to deal in the Software without restriction, including
8  * without limitation the rights to use, copy, modify, merge, publish,
9  * distribute, sub license, and/or sell copies of the Software, and to
10  * permit persons to whom the Software is furnished to do so, subject to
11  * the following conditions:
12  *
13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
14  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
15  * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
16  * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
17  * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
20  * USE OR OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * The above copyright notice and this permission notice (including the
23  * next paragraph) shall be included in all copies or substantial portions
24  * of the Software.
25  */
26 
27 #include "radeon_drm_cs.h"
28 
29 #include "util/u_hash_table.h"
30 #include "util/u_memory.h"
31 #include "util/simple_list.h"
32 #include "os/os_thread.h"
33 #include "os/os_mman.h"
34 #include "util/os_time.h"
35 
36 #include "state_tracker/drm_driver.h"
37 
38 #include <sys/ioctl.h>
39 #include <xf86drm.h>
40 #include <errno.h>
41 #include <fcntl.h>
42 #include <stdio.h>
43 #include <inttypes.h>
44 
45 static struct pb_buffer *
46 radeon_winsys_bo_create(struct radeon_winsys *rws,
47                         uint64_t size,
48                         unsigned alignment,
49                         enum radeon_bo_domain domain,
50                         enum radeon_bo_flag flags);
51 
radeon_bo(struct pb_buffer * bo)52 static inline struct radeon_bo *radeon_bo(struct pb_buffer *bo)
53 {
54     return (struct radeon_bo *)bo;
55 }
56 
57 struct radeon_bo_va_hole {
58     struct list_head list;
59     uint64_t         offset;
60     uint64_t         size;
61 };
62 
radeon_real_bo_is_busy(struct radeon_bo * bo)63 static bool radeon_real_bo_is_busy(struct radeon_bo *bo)
64 {
65     struct drm_radeon_gem_busy args = {0};
66 
67     args.handle = bo->handle;
68     return drmCommandWriteRead(bo->rws->fd, DRM_RADEON_GEM_BUSY,
69                                &args, sizeof(args)) != 0;
70 }
71 
radeon_bo_is_busy(struct radeon_bo * bo)72 static bool radeon_bo_is_busy(struct radeon_bo *bo)
73 {
74     unsigned num_idle;
75     bool busy = false;
76 
77     if (bo->handle)
78         return radeon_real_bo_is_busy(bo);
79 
80     mtx_lock(&bo->rws->bo_fence_lock);
81     for (num_idle = 0; num_idle < bo->u.slab.num_fences; ++num_idle) {
82         if (radeon_real_bo_is_busy(bo->u.slab.fences[num_idle])) {
83             busy = true;
84             break;
85         }
86         radeon_bo_reference(&bo->u.slab.fences[num_idle], NULL);
87     }
88     memmove(&bo->u.slab.fences[0], &bo->u.slab.fences[num_idle],
89             (bo->u.slab.num_fences - num_idle) * sizeof(bo->u.slab.fences[0]));
90     bo->u.slab.num_fences -= num_idle;
91     mtx_unlock(&bo->rws->bo_fence_lock);
92 
93     return busy;
94 }
95 
radeon_real_bo_wait_idle(struct radeon_bo * bo)96 static void radeon_real_bo_wait_idle(struct radeon_bo *bo)
97 {
98     struct drm_radeon_gem_wait_idle args = {0};
99 
100     args.handle = bo->handle;
101     while (drmCommandWrite(bo->rws->fd, DRM_RADEON_GEM_WAIT_IDLE,
102                            &args, sizeof(args)) == -EBUSY);
103 }
104 
radeon_bo_wait_idle(struct radeon_bo * bo)105 static void radeon_bo_wait_idle(struct radeon_bo *bo)
106 {
107     if (bo->handle) {
108         radeon_real_bo_wait_idle(bo);
109     } else {
110         mtx_lock(&bo->rws->bo_fence_lock);
111         while (bo->u.slab.num_fences) {
112             struct radeon_bo *fence = NULL;
113             radeon_bo_reference(&fence, bo->u.slab.fences[0]);
114             mtx_unlock(&bo->rws->bo_fence_lock);
115 
116             /* Wait without holding the fence lock. */
117             radeon_real_bo_wait_idle(fence);
118 
119             mtx_lock(&bo->rws->bo_fence_lock);
120             if (bo->u.slab.num_fences && fence == bo->u.slab.fences[0]) {
121                 radeon_bo_reference(&bo->u.slab.fences[0], NULL);
122                 memmove(&bo->u.slab.fences[0], &bo->u.slab.fences[1],
123                         (bo->u.slab.num_fences - 1) * sizeof(bo->u.slab.fences[0]));
124                 bo->u.slab.num_fences--;
125             }
126             radeon_bo_reference(&fence, NULL);
127         }
128         mtx_unlock(&bo->rws->bo_fence_lock);
129     }
130 }
131 
radeon_bo_wait(struct pb_buffer * _buf,uint64_t timeout,enum radeon_bo_usage usage)132 static bool radeon_bo_wait(struct pb_buffer *_buf, uint64_t timeout,
133                            enum radeon_bo_usage usage)
134 {
135     struct radeon_bo *bo = radeon_bo(_buf);
136     int64_t abs_timeout;
137 
138     /* No timeout. Just query. */
139     if (timeout == 0)
140         return !bo->num_active_ioctls && !radeon_bo_is_busy(bo);
141 
142     abs_timeout = os_time_get_absolute_timeout(timeout);
143 
144     /* Wait if any ioctl is being submitted with this buffer. */
145     if (!os_wait_until_zero_abs_timeout(&bo->num_active_ioctls, abs_timeout))
146         return false;
147 
148     /* Infinite timeout. */
149     if (abs_timeout == PIPE_TIMEOUT_INFINITE) {
150         radeon_bo_wait_idle(bo);
151         return true;
152     }
153 
154     /* Other timeouts need to be emulated with a loop. */
155     while (radeon_bo_is_busy(bo)) {
156        if (os_time_get_nano() >= abs_timeout)
157           return false;
158        os_time_sleep(10);
159     }
160 
161     return true;
162 }
163 
get_valid_domain(enum radeon_bo_domain domain)164 static enum radeon_bo_domain get_valid_domain(enum radeon_bo_domain domain)
165 {
166     /* Zero domains the driver doesn't understand. */
167     domain &= RADEON_DOMAIN_VRAM_GTT;
168 
169     /* If no domain is set, we must set something... */
170     if (!domain)
171         domain = RADEON_DOMAIN_VRAM_GTT;
172 
173     return domain;
174 }
175 
radeon_bo_get_initial_domain(struct pb_buffer * buf)176 static enum radeon_bo_domain radeon_bo_get_initial_domain(
177 		struct pb_buffer *buf)
178 {
179     struct radeon_bo *bo = (struct radeon_bo*)buf;
180     struct drm_radeon_gem_op args;
181 
182     if (bo->rws->info.drm_minor < 38)
183         return RADEON_DOMAIN_VRAM_GTT;
184 
185     memset(&args, 0, sizeof(args));
186     args.handle = bo->handle;
187     args.op = RADEON_GEM_OP_GET_INITIAL_DOMAIN;
188 
189     if (drmCommandWriteRead(bo->rws->fd, DRM_RADEON_GEM_OP,
190                             &args, sizeof(args))) {
191         fprintf(stderr, "radeon: failed to get initial domain: %p 0x%08X\n",
192                 bo, bo->handle);
193         /* Default domain as returned by get_valid_domain. */
194         return RADEON_DOMAIN_VRAM_GTT;
195     }
196 
197     /* GEM domains and winsys domains are defined the same. */
198     return get_valid_domain(args.value);
199 }
200 
radeon_bomgr_find_va(struct radeon_drm_winsys * rws,uint64_t size,uint64_t alignment)201 static uint64_t radeon_bomgr_find_va(struct radeon_drm_winsys *rws,
202                                      uint64_t size, uint64_t alignment)
203 {
204     struct radeon_bo_va_hole *hole, *n;
205     uint64_t offset = 0, waste = 0;
206 
207     /* All VM address space holes will implicitly start aligned to the
208      * size alignment, so we don't need to sanitize the alignment here
209      */
210     size = align(size, rws->info.gart_page_size);
211 
212     mtx_lock(&rws->bo_va_mutex);
213     /* first look for a hole */
214     LIST_FOR_EACH_ENTRY_SAFE(hole, n, &rws->va_holes, list) {
215         offset = hole->offset;
216         waste = offset % alignment;
217         waste = waste ? alignment - waste : 0;
218         offset += waste;
219         if (offset >= (hole->offset + hole->size)) {
220             continue;
221         }
222         if (!waste && hole->size == size) {
223             offset = hole->offset;
224             list_del(&hole->list);
225             FREE(hole);
226             mtx_unlock(&rws->bo_va_mutex);
227             return offset;
228         }
229         if ((hole->size - waste) > size) {
230             if (waste) {
231                 n = CALLOC_STRUCT(radeon_bo_va_hole);
232                 n->size = waste;
233                 n->offset = hole->offset;
234                 list_add(&n->list, &hole->list);
235             }
236             hole->size -= (size + waste);
237             hole->offset += size + waste;
238             mtx_unlock(&rws->bo_va_mutex);
239             return offset;
240         }
241         if ((hole->size - waste) == size) {
242             hole->size = waste;
243             mtx_unlock(&rws->bo_va_mutex);
244             return offset;
245         }
246     }
247 
248     offset = rws->va_offset;
249     waste = offset % alignment;
250     waste = waste ? alignment - waste : 0;
251     if (waste) {
252         n = CALLOC_STRUCT(radeon_bo_va_hole);
253         n->size = waste;
254         n->offset = offset;
255         list_add(&n->list, &rws->va_holes);
256     }
257     offset += waste;
258     rws->va_offset += size + waste;
259     mtx_unlock(&rws->bo_va_mutex);
260     return offset;
261 }
262 
radeon_bomgr_free_va(struct radeon_drm_winsys * rws,uint64_t va,uint64_t size)263 static void radeon_bomgr_free_va(struct radeon_drm_winsys *rws,
264                                  uint64_t va, uint64_t size)
265 {
266     struct radeon_bo_va_hole *hole = NULL;
267 
268     size = align(size, rws->info.gart_page_size);
269 
270     mtx_lock(&rws->bo_va_mutex);
271     if ((va + size) == rws->va_offset) {
272         rws->va_offset = va;
273         /* Delete uppermost hole if it reaches the new top */
274         if (!LIST_IS_EMPTY(&rws->va_holes)) {
275             hole = container_of(rws->va_holes.next, hole, list);
276             if ((hole->offset + hole->size) == va) {
277                 rws->va_offset = hole->offset;
278                 list_del(&hole->list);
279                 FREE(hole);
280             }
281         }
282     } else {
283         struct radeon_bo_va_hole *next;
284 
285         hole = container_of(&rws->va_holes, hole, list);
286         LIST_FOR_EACH_ENTRY(next, &rws->va_holes, list) {
287 	    if (next->offset < va)
288 	        break;
289             hole = next;
290         }
291 
292         if (&hole->list != &rws->va_holes) {
293             /* Grow upper hole if it's adjacent */
294             if (hole->offset == (va + size)) {
295                 hole->offset = va;
296                 hole->size += size;
297                 /* Merge lower hole if it's adjacent */
298                 if (next != hole && &next->list != &rws->va_holes &&
299                     (next->offset + next->size) == va) {
300                     next->size += hole->size;
301                     list_del(&hole->list);
302                     FREE(hole);
303                 }
304                 goto out;
305             }
306         }
307 
308         /* Grow lower hole if it's adjacent */
309         if (next != hole && &next->list != &rws->va_holes &&
310             (next->offset + next->size) == va) {
311             next->size += size;
312             goto out;
313         }
314 
315         /* FIXME on allocation failure we just lose virtual address space
316          * maybe print a warning
317          */
318         next = CALLOC_STRUCT(radeon_bo_va_hole);
319         if (next) {
320             next->size = size;
321             next->offset = va;
322             list_add(&next->list, &hole->list);
323         }
324     }
325 out:
326     mtx_unlock(&rws->bo_va_mutex);
327 }
328 
radeon_bo_destroy(struct pb_buffer * _buf)329 void radeon_bo_destroy(struct pb_buffer *_buf)
330 {
331     struct radeon_bo *bo = radeon_bo(_buf);
332     struct radeon_drm_winsys *rws = bo->rws;
333     struct drm_gem_close args;
334 
335     assert(bo->handle && "must not be called for slab entries");
336 
337     memset(&args, 0, sizeof(args));
338 
339     mtx_lock(&rws->bo_handles_mutex);
340     util_hash_table_remove(rws->bo_handles, (void*)(uintptr_t)bo->handle);
341     if (bo->flink_name) {
342         util_hash_table_remove(rws->bo_names,
343                                (void*)(uintptr_t)bo->flink_name);
344     }
345     mtx_unlock(&rws->bo_handles_mutex);
346 
347     if (bo->u.real.ptr)
348         os_munmap(bo->u.real.ptr, bo->base.size);
349 
350     if (rws->info.has_virtual_memory) {
351         if (rws->va_unmap_working) {
352             struct drm_radeon_gem_va va;
353 
354             va.handle = bo->handle;
355             va.vm_id = 0;
356             va.operation = RADEON_VA_UNMAP;
357             va.flags = RADEON_VM_PAGE_READABLE |
358                        RADEON_VM_PAGE_WRITEABLE |
359                        RADEON_VM_PAGE_SNOOPED;
360             va.offset = bo->va;
361 
362             if (drmCommandWriteRead(rws->fd, DRM_RADEON_GEM_VA, &va,
363 				    sizeof(va)) != 0 &&
364 		va.operation == RADEON_VA_RESULT_ERROR) {
365                 fprintf(stderr, "radeon: Failed to deallocate virtual address for buffer:\n");
366                 fprintf(stderr, "radeon:    size      : %"PRIu64" bytes\n", bo->base.size);
367                 fprintf(stderr, "radeon:    va        : 0x%"PRIx64"\n", bo->va);
368             }
369 	}
370 
371 	radeon_bomgr_free_va(rws, bo->va, bo->base.size);
372     }
373 
374     /* Close object. */
375     args.handle = bo->handle;
376     drmIoctl(rws->fd, DRM_IOCTL_GEM_CLOSE, &args);
377 
378     mtx_destroy(&bo->u.real.map_mutex);
379 
380     if (bo->initial_domain & RADEON_DOMAIN_VRAM)
381         rws->allocated_vram -= align(bo->base.size, rws->info.gart_page_size);
382     else if (bo->initial_domain & RADEON_DOMAIN_GTT)
383         rws->allocated_gtt -= align(bo->base.size, rws->info.gart_page_size);
384 
385     if (bo->u.real.map_count >= 1) {
386         if (bo->initial_domain & RADEON_DOMAIN_VRAM)
387             bo->rws->mapped_vram -= bo->base.size;
388         else
389             bo->rws->mapped_gtt -= bo->base.size;
390         bo->rws->num_mapped_buffers--;
391     }
392 
393     FREE(bo);
394 }
395 
radeon_bo_destroy_or_cache(struct pb_buffer * _buf)396 static void radeon_bo_destroy_or_cache(struct pb_buffer *_buf)
397 {
398    struct radeon_bo *bo = radeon_bo(_buf);
399 
400     assert(bo->handle && "must not be called for slab entries");
401 
402    if (bo->u.real.use_reusable_pool)
403       pb_cache_add_buffer(&bo->u.real.cache_entry);
404    else
405       radeon_bo_destroy(_buf);
406 }
407 
radeon_bo_do_map(struct radeon_bo * bo)408 void *radeon_bo_do_map(struct radeon_bo *bo)
409 {
410     struct drm_radeon_gem_mmap args = {0};
411     void *ptr;
412     unsigned offset;
413 
414     /* If the buffer is created from user memory, return the user pointer. */
415     if (bo->user_ptr)
416         return bo->user_ptr;
417 
418     if (bo->handle) {
419         offset = 0;
420     } else {
421         offset = bo->va - bo->u.slab.real->va;
422         bo = bo->u.slab.real;
423     }
424 
425     /* Map the buffer. */
426     mtx_lock(&bo->u.real.map_mutex);
427     /* Return the pointer if it's already mapped. */
428     if (bo->u.real.ptr) {
429         bo->u.real.map_count++;
430         mtx_unlock(&bo->u.real.map_mutex);
431         return (uint8_t*)bo->u.real.ptr + offset;
432     }
433     args.handle = bo->handle;
434     args.offset = 0;
435     args.size = (uint64_t)bo->base.size;
436     if (drmCommandWriteRead(bo->rws->fd,
437                             DRM_RADEON_GEM_MMAP,
438                             &args,
439                             sizeof(args))) {
440         mtx_unlock(&bo->u.real.map_mutex);
441         fprintf(stderr, "radeon: gem_mmap failed: %p 0x%08X\n",
442                 bo, bo->handle);
443         return NULL;
444     }
445 
446     ptr = os_mmap(0, args.size, PROT_READ|PROT_WRITE, MAP_SHARED,
447                bo->rws->fd, args.addr_ptr);
448     if (ptr == MAP_FAILED) {
449         /* Clear the cache and try again. */
450         pb_cache_release_all_buffers(&bo->rws->bo_cache);
451 
452         ptr = os_mmap(0, args.size, PROT_READ|PROT_WRITE, MAP_SHARED,
453                       bo->rws->fd, args.addr_ptr);
454         if (ptr == MAP_FAILED) {
455             mtx_unlock(&bo->u.real.map_mutex);
456             fprintf(stderr, "radeon: mmap failed, errno: %i\n", errno);
457             return NULL;
458         }
459     }
460     bo->u.real.ptr = ptr;
461     bo->u.real.map_count = 1;
462 
463     if (bo->initial_domain & RADEON_DOMAIN_VRAM)
464        bo->rws->mapped_vram += bo->base.size;
465     else
466        bo->rws->mapped_gtt += bo->base.size;
467     bo->rws->num_mapped_buffers++;
468 
469     mtx_unlock(&bo->u.real.map_mutex);
470     return (uint8_t*)bo->u.real.ptr + offset;
471 }
472 
radeon_bo_map(struct pb_buffer * buf,struct radeon_winsys_cs * rcs,enum pipe_transfer_usage usage)473 static void *radeon_bo_map(struct pb_buffer *buf,
474                            struct radeon_winsys_cs *rcs,
475                            enum pipe_transfer_usage usage)
476 {
477     struct radeon_bo *bo = (struct radeon_bo*)buf;
478     struct radeon_drm_cs *cs = (struct radeon_drm_cs*)rcs;
479 
480     /* If it's not unsynchronized bo_map, flush CS if needed and then wait. */
481     if (!(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) {
482         /* DONTBLOCK doesn't make sense with UNSYNCHRONIZED. */
483         if (usage & PIPE_TRANSFER_DONTBLOCK) {
484             if (!(usage & PIPE_TRANSFER_WRITE)) {
485                 /* Mapping for read.
486                  *
487                  * Since we are mapping for read, we don't need to wait
488                  * if the GPU is using the buffer for read too
489                  * (neither one is changing it).
490                  *
491                  * Only check whether the buffer is being used for write. */
492                 if (cs && radeon_bo_is_referenced_by_cs_for_write(cs, bo)) {
493                     cs->flush_cs(cs->flush_data, PIPE_FLUSH_ASYNC, NULL);
494                     return NULL;
495                 }
496 
497                 if (!radeon_bo_wait((struct pb_buffer*)bo, 0,
498                                     RADEON_USAGE_WRITE)) {
499                     return NULL;
500                 }
501             } else {
502                 if (cs && radeon_bo_is_referenced_by_cs(cs, bo)) {
503                     cs->flush_cs(cs->flush_data, PIPE_FLUSH_ASYNC, NULL);
504                     return NULL;
505                 }
506 
507                 if (!radeon_bo_wait((struct pb_buffer*)bo, 0,
508                                     RADEON_USAGE_READWRITE)) {
509                     return NULL;
510                 }
511             }
512         } else {
513             uint64_t time = os_time_get_nano();
514 
515             if (!(usage & PIPE_TRANSFER_WRITE)) {
516                 /* Mapping for read.
517                  *
518                  * Since we are mapping for read, we don't need to wait
519                  * if the GPU is using the buffer for read too
520                  * (neither one is changing it).
521                  *
522                  * Only check whether the buffer is being used for write. */
523                 if (cs && radeon_bo_is_referenced_by_cs_for_write(cs, bo)) {
524                     cs->flush_cs(cs->flush_data, 0, NULL);
525                 }
526                 radeon_bo_wait((struct pb_buffer*)bo, PIPE_TIMEOUT_INFINITE,
527                                RADEON_USAGE_WRITE);
528             } else {
529                 /* Mapping for write. */
530                 if (cs) {
531                     if (radeon_bo_is_referenced_by_cs(cs, bo)) {
532                         cs->flush_cs(cs->flush_data, 0, NULL);
533                     } else {
534                         /* Try to avoid busy-waiting in radeon_bo_wait. */
535                         if (p_atomic_read(&bo->num_active_ioctls))
536                             radeon_drm_cs_sync_flush(rcs);
537                     }
538                 }
539 
540                 radeon_bo_wait((struct pb_buffer*)bo, PIPE_TIMEOUT_INFINITE,
541                                RADEON_USAGE_READWRITE);
542             }
543 
544             bo->rws->buffer_wait_time += os_time_get_nano() - time;
545         }
546     }
547 
548     return radeon_bo_do_map(bo);
549 }
550 
radeon_bo_unmap(struct pb_buffer * _buf)551 static void radeon_bo_unmap(struct pb_buffer *_buf)
552 {
553     struct radeon_bo *bo = (struct radeon_bo*)_buf;
554 
555     if (bo->user_ptr)
556         return;
557 
558     if (!bo->handle)
559         bo = bo->u.slab.real;
560 
561     mtx_lock(&bo->u.real.map_mutex);
562     if (!bo->u.real.ptr) {
563         mtx_unlock(&bo->u.real.map_mutex);
564         return; /* it's not been mapped */
565     }
566 
567     assert(bo->u.real.map_count);
568     if (--bo->u.real.map_count) {
569         mtx_unlock(&bo->u.real.map_mutex);
570         return; /* it's been mapped multiple times */
571     }
572 
573     os_munmap(bo->u.real.ptr, bo->base.size);
574     bo->u.real.ptr = NULL;
575 
576     if (bo->initial_domain & RADEON_DOMAIN_VRAM)
577        bo->rws->mapped_vram -= bo->base.size;
578     else
579        bo->rws->mapped_gtt -= bo->base.size;
580     bo->rws->num_mapped_buffers--;
581 
582     mtx_unlock(&bo->u.real.map_mutex);
583 }
584 
585 static const struct pb_vtbl radeon_bo_vtbl = {
586     radeon_bo_destroy_or_cache
587     /* other functions are never called */
588 };
589 
radeon_create_bo(struct radeon_drm_winsys * rws,unsigned size,unsigned alignment,unsigned usage,unsigned initial_domains,unsigned flags,unsigned pb_cache_bucket)590 static struct radeon_bo *radeon_create_bo(struct radeon_drm_winsys *rws,
591                                           unsigned size, unsigned alignment,
592                                           unsigned usage,
593                                           unsigned initial_domains,
594                                           unsigned flags,
595                                           unsigned pb_cache_bucket)
596 {
597     struct radeon_bo *bo;
598     struct drm_radeon_gem_create args;
599     int r;
600 
601     memset(&args, 0, sizeof(args));
602 
603     assert(initial_domains);
604     assert((initial_domains &
605             ~(RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM)) == 0);
606 
607     args.size = size;
608     args.alignment = alignment;
609     args.initial_domain = initial_domains;
610     args.flags = 0;
611 
612     /* If VRAM is just stolen system memory, allow both VRAM and
613      * GTT, whichever has free space. If a buffer is evicted from
614      * VRAM to GTT, it will stay there.
615      */
616     if (!rws->info.has_dedicated_vram)
617         args.initial_domain |= RADEON_DOMAIN_GTT;
618 
619     if (flags & RADEON_FLAG_GTT_WC)
620         args.flags |= RADEON_GEM_GTT_WC;
621     if (flags & RADEON_FLAG_NO_CPU_ACCESS)
622         args.flags |= RADEON_GEM_NO_CPU_ACCESS;
623 
624     if (drmCommandWriteRead(rws->fd, DRM_RADEON_GEM_CREATE,
625                             &args, sizeof(args))) {
626         fprintf(stderr, "radeon: Failed to allocate a buffer:\n");
627         fprintf(stderr, "radeon:    size      : %u bytes\n", size);
628         fprintf(stderr, "radeon:    alignment : %u bytes\n", alignment);
629         fprintf(stderr, "radeon:    domains   : %u\n", args.initial_domain);
630         fprintf(stderr, "radeon:    flags     : %u\n", args.flags);
631         return NULL;
632     }
633 
634     assert(args.handle != 0);
635 
636     bo = CALLOC_STRUCT(radeon_bo);
637     if (!bo)
638         return NULL;
639 
640     pipe_reference_init(&bo->base.reference, 1);
641     bo->base.alignment = alignment;
642     bo->base.usage = usage;
643     bo->base.size = size;
644     bo->base.vtbl = &radeon_bo_vtbl;
645     bo->rws = rws;
646     bo->handle = args.handle;
647     bo->va = 0;
648     bo->initial_domain = initial_domains;
649     bo->hash = __sync_fetch_and_add(&rws->next_bo_hash, 1);
650     (void) mtx_init(&bo->u.real.map_mutex, mtx_plain);
651     pb_cache_init_entry(&rws->bo_cache, &bo->u.real.cache_entry, &bo->base,
652                         pb_cache_bucket);
653 
654     if (rws->info.has_virtual_memory) {
655         struct drm_radeon_gem_va va;
656         unsigned va_gap_size;
657 
658         va_gap_size = rws->check_vm ? MAX2(4 * alignment, 64 * 1024) : 0;
659         bo->va = radeon_bomgr_find_va(rws, size + va_gap_size, alignment);
660 
661         va.handle = bo->handle;
662         va.vm_id = 0;
663         va.operation = RADEON_VA_MAP;
664         va.flags = RADEON_VM_PAGE_READABLE |
665                    RADEON_VM_PAGE_WRITEABLE |
666                    RADEON_VM_PAGE_SNOOPED;
667         va.offset = bo->va;
668         r = drmCommandWriteRead(rws->fd, DRM_RADEON_GEM_VA, &va, sizeof(va));
669         if (r && va.operation == RADEON_VA_RESULT_ERROR) {
670             fprintf(stderr, "radeon: Failed to allocate virtual address for buffer:\n");
671             fprintf(stderr, "radeon:    size      : %d bytes\n", size);
672             fprintf(stderr, "radeon:    alignment : %d bytes\n", alignment);
673             fprintf(stderr, "radeon:    domains   : %d\n", args.initial_domain);
674             fprintf(stderr, "radeon:    va        : 0x%016llx\n", (unsigned long long)bo->va);
675             radeon_bo_destroy(&bo->base);
676             return NULL;
677         }
678         mtx_lock(&rws->bo_handles_mutex);
679         if (va.operation == RADEON_VA_RESULT_VA_EXIST) {
680             struct pb_buffer *b = &bo->base;
681             struct radeon_bo *old_bo =
682                 util_hash_table_get(rws->bo_vas, (void*)(uintptr_t)va.offset);
683 
684             mtx_unlock(&rws->bo_handles_mutex);
685             pb_reference(&b, &old_bo->base);
686             return radeon_bo(b);
687         }
688 
689         util_hash_table_set(rws->bo_vas, (void*)(uintptr_t)bo->va, bo);
690         mtx_unlock(&rws->bo_handles_mutex);
691     }
692 
693     if (initial_domains & RADEON_DOMAIN_VRAM)
694         rws->allocated_vram += align(size, rws->info.gart_page_size);
695     else if (initial_domains & RADEON_DOMAIN_GTT)
696         rws->allocated_gtt += align(size, rws->info.gart_page_size);
697 
698     return bo;
699 }
700 
radeon_bo_can_reclaim(struct pb_buffer * _buf)701 bool radeon_bo_can_reclaim(struct pb_buffer *_buf)
702 {
703    struct radeon_bo *bo = radeon_bo(_buf);
704 
705    if (radeon_bo_is_referenced_by_any_cs(bo))
706       return false;
707 
708    return radeon_bo_wait(_buf, 0, RADEON_USAGE_READWRITE);
709 }
710 
radeon_bo_can_reclaim_slab(void * priv,struct pb_slab_entry * entry)711 bool radeon_bo_can_reclaim_slab(void *priv, struct pb_slab_entry *entry)
712 {
713     struct radeon_bo *bo = NULL; /* fix container_of */
714     bo = container_of(entry, bo, u.slab.entry);
715 
716     return radeon_bo_can_reclaim(&bo->base);
717 }
718 
radeon_bo_slab_destroy(struct pb_buffer * _buf)719 static void radeon_bo_slab_destroy(struct pb_buffer *_buf)
720 {
721     struct radeon_bo *bo = radeon_bo(_buf);
722 
723     assert(!bo->handle);
724 
725     pb_slab_free(&bo->rws->bo_slabs, &bo->u.slab.entry);
726 }
727 
728 static const struct pb_vtbl radeon_winsys_bo_slab_vtbl = {
729     radeon_bo_slab_destroy
730     /* other functions are never called */
731 };
732 
radeon_bo_slab_alloc(void * priv,unsigned heap,unsigned entry_size,unsigned group_index)733 struct pb_slab *radeon_bo_slab_alloc(void *priv, unsigned heap,
734                                      unsigned entry_size,
735                                      unsigned group_index)
736 {
737     struct radeon_drm_winsys *ws = priv;
738     struct radeon_slab *slab = CALLOC_STRUCT(radeon_slab);
739     enum radeon_bo_domain domains = radeon_domain_from_heap(heap);
740     enum radeon_bo_flag flags = radeon_flags_from_heap(heap);
741     unsigned base_hash;
742 
743     if (!slab)
744         return NULL;
745 
746     slab->buffer = radeon_bo(radeon_winsys_bo_create(&ws->base,
747                                                      64 * 1024, 64 * 1024,
748                                                      domains, flags));
749     if (!slab->buffer)
750         goto fail;
751 
752     assert(slab->buffer->handle);
753 
754     slab->base.num_entries = slab->buffer->base.size / entry_size;
755     slab->base.num_free = slab->base.num_entries;
756     slab->entries = CALLOC(slab->base.num_entries, sizeof(*slab->entries));
757     if (!slab->entries)
758         goto fail_buffer;
759 
760     LIST_INITHEAD(&slab->base.free);
761 
762     base_hash = __sync_fetch_and_add(&ws->next_bo_hash, slab->base.num_entries);
763 
764     for (unsigned i = 0; i < slab->base.num_entries; ++i) {
765         struct radeon_bo *bo = &slab->entries[i];
766 
767         bo->base.alignment = entry_size;
768         bo->base.usage = slab->buffer->base.usage;
769         bo->base.size = entry_size;
770         bo->base.vtbl = &radeon_winsys_bo_slab_vtbl;
771         bo->rws = ws;
772         bo->va = slab->buffer->va + i * entry_size;
773         bo->initial_domain = domains;
774         bo->hash = base_hash + i;
775         bo->u.slab.entry.slab = &slab->base;
776         bo->u.slab.entry.group_index = group_index;
777         bo->u.slab.real = slab->buffer;
778 
779         LIST_ADDTAIL(&bo->u.slab.entry.head, &slab->base.free);
780     }
781 
782     return &slab->base;
783 
784 fail_buffer:
785     radeon_bo_reference(&slab->buffer, NULL);
786 fail:
787     FREE(slab);
788     return NULL;
789 }
790 
radeon_bo_slab_free(void * priv,struct pb_slab * pslab)791 void radeon_bo_slab_free(void *priv, struct pb_slab *pslab)
792 {
793     struct radeon_slab *slab = (struct radeon_slab *)pslab;
794 
795     for (unsigned i = 0; i < slab->base.num_entries; ++i) {
796         struct radeon_bo *bo = &slab->entries[i];
797         for (unsigned j = 0; j < bo->u.slab.num_fences; ++j)
798             radeon_bo_reference(&bo->u.slab.fences[j], NULL);
799         FREE(bo->u.slab.fences);
800     }
801 
802     FREE(slab->entries);
803     radeon_bo_reference(&slab->buffer, NULL);
804     FREE(slab);
805 }
806 
eg_tile_split(unsigned tile_split)807 static unsigned eg_tile_split(unsigned tile_split)
808 {
809     switch (tile_split) {
810     case 0:     tile_split = 64;    break;
811     case 1:     tile_split = 128;   break;
812     case 2:     tile_split = 256;   break;
813     case 3:     tile_split = 512;   break;
814     default:
815     case 4:     tile_split = 1024;  break;
816     case 5:     tile_split = 2048;  break;
817     case 6:     tile_split = 4096;  break;
818     }
819     return tile_split;
820 }
821 
eg_tile_split_rev(unsigned eg_tile_split)822 static unsigned eg_tile_split_rev(unsigned eg_tile_split)
823 {
824     switch (eg_tile_split) {
825     case 64:    return 0;
826     case 128:   return 1;
827     case 256:   return 2;
828     case 512:   return 3;
829     default:
830     case 1024:  return 4;
831     case 2048:  return 5;
832     case 4096:  return 6;
833     }
834 }
835 
radeon_bo_get_metadata(struct pb_buffer * _buf,struct radeon_bo_metadata * md)836 static void radeon_bo_get_metadata(struct pb_buffer *_buf,
837 				   struct radeon_bo_metadata *md)
838 {
839     struct radeon_bo *bo = radeon_bo(_buf);
840     struct drm_radeon_gem_set_tiling args;
841 
842     assert(bo->handle && "must not be called for slab entries");
843 
844     memset(&args, 0, sizeof(args));
845 
846     args.handle = bo->handle;
847 
848     drmCommandWriteRead(bo->rws->fd,
849                         DRM_RADEON_GEM_GET_TILING,
850                         &args,
851                         sizeof(args));
852 
853     md->u.legacy.microtile = RADEON_LAYOUT_LINEAR;
854     md->u.legacy.macrotile = RADEON_LAYOUT_LINEAR;
855     if (args.tiling_flags & RADEON_TILING_MICRO)
856         md->u.legacy.microtile = RADEON_LAYOUT_TILED;
857     else if (args.tiling_flags & RADEON_TILING_MICRO_SQUARE)
858         md->u.legacy.microtile = RADEON_LAYOUT_SQUARETILED;
859 
860     if (args.tiling_flags & RADEON_TILING_MACRO)
861         md->u.legacy.macrotile = RADEON_LAYOUT_TILED;
862 
863     md->u.legacy.bankw = (args.tiling_flags >> RADEON_TILING_EG_BANKW_SHIFT) & RADEON_TILING_EG_BANKW_MASK;
864     md->u.legacy.bankh = (args.tiling_flags >> RADEON_TILING_EG_BANKH_SHIFT) & RADEON_TILING_EG_BANKH_MASK;
865     md->u.legacy.tile_split = (args.tiling_flags >> RADEON_TILING_EG_TILE_SPLIT_SHIFT) & RADEON_TILING_EG_TILE_SPLIT_MASK;
866     md->u.legacy.mtilea = (args.tiling_flags >> RADEON_TILING_EG_MACRO_TILE_ASPECT_SHIFT) & RADEON_TILING_EG_MACRO_TILE_ASPECT_MASK;
867     md->u.legacy.tile_split = eg_tile_split(md->u.legacy.tile_split);
868     md->u.legacy.scanout = bo->rws->gen >= DRV_SI && !(args.tiling_flags & RADEON_TILING_R600_NO_SCANOUT);
869 }
870 
radeon_bo_set_metadata(struct pb_buffer * _buf,struct radeon_bo_metadata * md)871 static void radeon_bo_set_metadata(struct pb_buffer *_buf,
872                                    struct radeon_bo_metadata *md)
873 {
874     struct radeon_bo *bo = radeon_bo(_buf);
875     struct drm_radeon_gem_set_tiling args;
876 
877     assert(bo->handle && "must not be called for slab entries");
878 
879     memset(&args, 0, sizeof(args));
880 
881     os_wait_until_zero(&bo->num_active_ioctls, PIPE_TIMEOUT_INFINITE);
882 
883     if (md->u.legacy.microtile == RADEON_LAYOUT_TILED)
884         args.tiling_flags |= RADEON_TILING_MICRO;
885     else if (md->u.legacy.microtile == RADEON_LAYOUT_SQUARETILED)
886         args.tiling_flags |= RADEON_TILING_MICRO_SQUARE;
887 
888     if (md->u.legacy.macrotile == RADEON_LAYOUT_TILED)
889         args.tiling_flags |= RADEON_TILING_MACRO;
890 
891     args.tiling_flags |= (md->u.legacy.bankw & RADEON_TILING_EG_BANKW_MASK) <<
892         RADEON_TILING_EG_BANKW_SHIFT;
893     args.tiling_flags |= (md->u.legacy.bankh & RADEON_TILING_EG_BANKH_MASK) <<
894         RADEON_TILING_EG_BANKH_SHIFT;
895     if (md->u.legacy.tile_split) {
896 	args.tiling_flags |= (eg_tile_split_rev(md->u.legacy.tile_split) &
897 			      RADEON_TILING_EG_TILE_SPLIT_MASK) <<
898 	    RADEON_TILING_EG_TILE_SPLIT_SHIFT;
899     }
900     args.tiling_flags |= (md->u.legacy.mtilea & RADEON_TILING_EG_MACRO_TILE_ASPECT_MASK) <<
901         RADEON_TILING_EG_MACRO_TILE_ASPECT_SHIFT;
902 
903     if (bo->rws->gen >= DRV_SI && !md->u.legacy.scanout)
904         args.tiling_flags |= RADEON_TILING_R600_NO_SCANOUT;
905 
906     args.handle = bo->handle;
907     args.pitch = md->u.legacy.stride;
908 
909     drmCommandWriteRead(bo->rws->fd,
910                         DRM_RADEON_GEM_SET_TILING,
911                         &args,
912                         sizeof(args));
913 }
914 
915 static struct pb_buffer *
radeon_winsys_bo_create(struct radeon_winsys * rws,uint64_t size,unsigned alignment,enum radeon_bo_domain domain,enum radeon_bo_flag flags)916 radeon_winsys_bo_create(struct radeon_winsys *rws,
917                         uint64_t size,
918                         unsigned alignment,
919                         enum radeon_bo_domain domain,
920                         enum radeon_bo_flag flags)
921 {
922     struct radeon_drm_winsys *ws = radeon_drm_winsys(rws);
923     struct radeon_bo *bo;
924     unsigned usage = 0, pb_cache_bucket = 0;
925 
926     assert(!(flags & RADEON_FLAG_SPARSE)); /* not supported */
927 
928     /* Only 32-bit sizes are supported. */
929     if (size > UINT_MAX)
930         return NULL;
931 
932     /* VRAM implies WC. This is not optional. */
933     if (domain & RADEON_DOMAIN_VRAM)
934         flags |= RADEON_FLAG_GTT_WC;
935     /* NO_CPU_ACCESS is valid with VRAM only. */
936     if (domain != RADEON_DOMAIN_VRAM)
937         flags &= ~RADEON_FLAG_NO_CPU_ACCESS;
938 
939     /* Sub-allocate small buffers from slabs. */
940     if (!(flags & RADEON_FLAG_NO_SUBALLOC) &&
941         size <= (1 << RADEON_SLAB_MAX_SIZE_LOG2) &&
942         ws->info.has_virtual_memory &&
943         alignment <= MAX2(1 << RADEON_SLAB_MIN_SIZE_LOG2, util_next_power_of_two(size))) {
944         struct pb_slab_entry *entry;
945         int heap = radeon_get_heap_index(domain, flags);
946 
947         if (heap < 0 || heap >= RADEON_MAX_SLAB_HEAPS)
948             goto no_slab;
949 
950         entry = pb_slab_alloc(&ws->bo_slabs, size, heap);
951         if (!entry) {
952             /* Clear the cache and try again. */
953             pb_cache_release_all_buffers(&ws->bo_cache);
954 
955             entry = pb_slab_alloc(&ws->bo_slabs, size, heap);
956         }
957         if (!entry)
958             return NULL;
959 
960         bo = NULL;
961         bo = container_of(entry, bo, u.slab.entry);
962 
963         pipe_reference_init(&bo->base.reference, 1);
964 
965         return &bo->base;
966     }
967 no_slab:
968 
969     /* This flag is irrelevant for the cache. */
970     flags &= ~RADEON_FLAG_NO_SUBALLOC;
971 
972     /* Align size to page size. This is the minimum alignment for normal
973      * BOs. Aligning this here helps the cached bufmgr. Especially small BOs,
974      * like constant/uniform buffers, can benefit from better and more reuse.
975      */
976     size = align(size, ws->info.gart_page_size);
977     alignment = align(alignment, ws->info.gart_page_size);
978 
979     bool use_reusable_pool = flags & RADEON_FLAG_NO_INTERPROCESS_SHARING;
980 
981     /* Shared resources don't use cached heaps. */
982     if (use_reusable_pool) {
983         int heap = radeon_get_heap_index(domain, flags);
984         assert(heap >= 0 && heap < RADEON_MAX_CACHED_HEAPS);
985         usage = 1 << heap; /* Only set one usage bit for each heap. */
986 
987         pb_cache_bucket = radeon_get_pb_cache_bucket_index(heap);
988         assert(pb_cache_bucket < ARRAY_SIZE(ws->bo_cache.buckets));
989 
990         bo = radeon_bo(pb_cache_reclaim_buffer(&ws->bo_cache, size, alignment,
991                                                usage, pb_cache_bucket));
992         if (bo)
993             return &bo->base;
994     }
995 
996     bo = radeon_create_bo(ws, size, alignment, usage, domain, flags,
997                           pb_cache_bucket);
998     if (!bo) {
999         /* Clear the cache and try again. */
1000         if (ws->info.has_virtual_memory)
1001             pb_slabs_reclaim(&ws->bo_slabs);
1002         pb_cache_release_all_buffers(&ws->bo_cache);
1003         bo = radeon_create_bo(ws, size, alignment, usage, domain, flags,
1004                               pb_cache_bucket);
1005         if (!bo)
1006             return NULL;
1007     }
1008 
1009     bo->u.real.use_reusable_pool = use_reusable_pool;
1010 
1011     mtx_lock(&ws->bo_handles_mutex);
1012     util_hash_table_set(ws->bo_handles, (void*)(uintptr_t)bo->handle, bo);
1013     mtx_unlock(&ws->bo_handles_mutex);
1014 
1015     return &bo->base;
1016 }
1017 
radeon_winsys_bo_from_ptr(struct radeon_winsys * rws,void * pointer,uint64_t size)1018 static struct pb_buffer *radeon_winsys_bo_from_ptr(struct radeon_winsys *rws,
1019                                                    void *pointer, uint64_t size)
1020 {
1021     struct radeon_drm_winsys *ws = radeon_drm_winsys(rws);
1022     struct drm_radeon_gem_userptr args;
1023     struct radeon_bo *bo;
1024     int r;
1025 
1026     bo = CALLOC_STRUCT(radeon_bo);
1027     if (!bo)
1028         return NULL;
1029 
1030     memset(&args, 0, sizeof(args));
1031     args.addr = (uintptr_t)pointer;
1032     args.size = align(size, ws->info.gart_page_size);
1033     args.flags = RADEON_GEM_USERPTR_ANONONLY |
1034         RADEON_GEM_USERPTR_VALIDATE |
1035         RADEON_GEM_USERPTR_REGISTER;
1036     if (drmCommandWriteRead(ws->fd, DRM_RADEON_GEM_USERPTR,
1037                             &args, sizeof(args))) {
1038         FREE(bo);
1039         return NULL;
1040     }
1041 
1042     assert(args.handle != 0);
1043 
1044     mtx_lock(&ws->bo_handles_mutex);
1045 
1046     /* Initialize it. */
1047     pipe_reference_init(&bo->base.reference, 1);
1048     bo->handle = args.handle;
1049     bo->base.alignment = 0;
1050     bo->base.size = size;
1051     bo->base.vtbl = &radeon_bo_vtbl;
1052     bo->rws = ws;
1053     bo->user_ptr = pointer;
1054     bo->va = 0;
1055     bo->initial_domain = RADEON_DOMAIN_GTT;
1056     bo->hash = __sync_fetch_and_add(&ws->next_bo_hash, 1);
1057     (void) mtx_init(&bo->u.real.map_mutex, mtx_plain);
1058 
1059     util_hash_table_set(ws->bo_handles, (void*)(uintptr_t)bo->handle, bo);
1060 
1061     mtx_unlock(&ws->bo_handles_mutex);
1062 
1063     if (ws->info.has_virtual_memory) {
1064         struct drm_radeon_gem_va va;
1065 
1066         bo->va = radeon_bomgr_find_va(ws, bo->base.size, 1 << 20);
1067 
1068         va.handle = bo->handle;
1069         va.operation = RADEON_VA_MAP;
1070         va.vm_id = 0;
1071         va.offset = bo->va;
1072         va.flags = RADEON_VM_PAGE_READABLE |
1073                    RADEON_VM_PAGE_WRITEABLE |
1074                    RADEON_VM_PAGE_SNOOPED;
1075         va.offset = bo->va;
1076         r = drmCommandWriteRead(ws->fd, DRM_RADEON_GEM_VA, &va, sizeof(va));
1077         if (r && va.operation == RADEON_VA_RESULT_ERROR) {
1078             fprintf(stderr, "radeon: Failed to assign virtual address space\n");
1079             radeon_bo_destroy(&bo->base);
1080             return NULL;
1081         }
1082         mtx_lock(&ws->bo_handles_mutex);
1083         if (va.operation == RADEON_VA_RESULT_VA_EXIST) {
1084             struct pb_buffer *b = &bo->base;
1085             struct radeon_bo *old_bo =
1086                 util_hash_table_get(ws->bo_vas, (void*)(uintptr_t)va.offset);
1087 
1088             mtx_unlock(&ws->bo_handles_mutex);
1089             pb_reference(&b, &old_bo->base);
1090             return b;
1091         }
1092 
1093         util_hash_table_set(ws->bo_vas, (void*)(uintptr_t)bo->va, bo);
1094         mtx_unlock(&ws->bo_handles_mutex);
1095     }
1096 
1097     ws->allocated_gtt += align(bo->base.size, ws->info.gart_page_size);
1098 
1099     return (struct pb_buffer*)bo;
1100 }
1101 
radeon_winsys_bo_from_handle(struct radeon_winsys * rws,struct winsys_handle * whandle,unsigned * stride,unsigned * offset)1102 static struct pb_buffer *radeon_winsys_bo_from_handle(struct radeon_winsys *rws,
1103                                                       struct winsys_handle *whandle,
1104                                                       unsigned *stride,
1105                                                       unsigned *offset)
1106 {
1107     struct radeon_drm_winsys *ws = radeon_drm_winsys(rws);
1108     struct radeon_bo *bo;
1109     int r;
1110     unsigned handle;
1111     uint64_t size = 0;
1112 
1113     if (!offset && whandle->offset != 0) {
1114         fprintf(stderr, "attempt to import unsupported winsys offset %u\n",
1115                 whandle->offset);
1116         return NULL;
1117     }
1118 
1119     /* We must maintain a list of pairs <handle, bo>, so that we always return
1120      * the same BO for one particular handle. If we didn't do that and created
1121      * more than one BO for the same handle and then relocated them in a CS,
1122      * we would hit a deadlock in the kernel.
1123      *
1124      * The list of pairs is guarded by a mutex, of course. */
1125     mtx_lock(&ws->bo_handles_mutex);
1126 
1127     if (whandle->type == DRM_API_HANDLE_TYPE_SHARED) {
1128         /* First check if there already is an existing bo for the handle. */
1129         bo = util_hash_table_get(ws->bo_names, (void*)(uintptr_t)whandle->handle);
1130     } else if (whandle->type == DRM_API_HANDLE_TYPE_FD) {
1131         /* We must first get the GEM handle, as fds are unreliable keys */
1132         r = drmPrimeFDToHandle(ws->fd, whandle->handle, &handle);
1133         if (r)
1134             goto fail;
1135         bo = util_hash_table_get(ws->bo_handles, (void*)(uintptr_t)handle);
1136     } else {
1137         /* Unknown handle type */
1138         goto fail;
1139     }
1140 
1141     if (bo) {
1142         /* Increase the refcount. */
1143         struct pb_buffer *b = NULL;
1144         pb_reference(&b, &bo->base);
1145         goto done;
1146     }
1147 
1148     /* There isn't, create a new one. */
1149     bo = CALLOC_STRUCT(radeon_bo);
1150     if (!bo) {
1151         goto fail;
1152     }
1153 
1154     if (whandle->type == DRM_API_HANDLE_TYPE_SHARED) {
1155         struct drm_gem_open open_arg = {};
1156         memset(&open_arg, 0, sizeof(open_arg));
1157         /* Open the BO. */
1158         open_arg.name = whandle->handle;
1159         if (drmIoctl(ws->fd, DRM_IOCTL_GEM_OPEN, &open_arg)) {
1160             FREE(bo);
1161             goto fail;
1162         }
1163         handle = open_arg.handle;
1164         size = open_arg.size;
1165         bo->flink_name = whandle->handle;
1166     } else if (whandle->type == DRM_API_HANDLE_TYPE_FD) {
1167         size = lseek(whandle->handle, 0, SEEK_END);
1168         /*
1169          * Could check errno to determine whether the kernel is new enough, but
1170          * it doesn't really matter why this failed, just that it failed.
1171          */
1172         if (size == (off_t)-1) {
1173             FREE(bo);
1174             goto fail;
1175         }
1176         lseek(whandle->handle, 0, SEEK_SET);
1177     }
1178 
1179     assert(handle != 0);
1180 
1181     bo->handle = handle;
1182 
1183     /* Initialize it. */
1184     pipe_reference_init(&bo->base.reference, 1);
1185     bo->base.alignment = 0;
1186     bo->base.size = (unsigned) size;
1187     bo->base.vtbl = &radeon_bo_vtbl;
1188     bo->rws = ws;
1189     bo->va = 0;
1190     bo->hash = __sync_fetch_and_add(&ws->next_bo_hash, 1);
1191     (void) mtx_init(&bo->u.real.map_mutex, mtx_plain);
1192 
1193     if (bo->flink_name)
1194         util_hash_table_set(ws->bo_names, (void*)(uintptr_t)bo->flink_name, bo);
1195 
1196     util_hash_table_set(ws->bo_handles, (void*)(uintptr_t)bo->handle, bo);
1197 
1198 done:
1199     mtx_unlock(&ws->bo_handles_mutex);
1200 
1201     if (stride)
1202         *stride = whandle->stride;
1203     if (offset)
1204         *offset = whandle->offset;
1205 
1206     if (ws->info.has_virtual_memory && !bo->va) {
1207         struct drm_radeon_gem_va va;
1208 
1209         bo->va = radeon_bomgr_find_va(ws, bo->base.size, 1 << 20);
1210 
1211         va.handle = bo->handle;
1212         va.operation = RADEON_VA_MAP;
1213         va.vm_id = 0;
1214         va.offset = bo->va;
1215         va.flags = RADEON_VM_PAGE_READABLE |
1216                    RADEON_VM_PAGE_WRITEABLE |
1217                    RADEON_VM_PAGE_SNOOPED;
1218         va.offset = bo->va;
1219         r = drmCommandWriteRead(ws->fd, DRM_RADEON_GEM_VA, &va, sizeof(va));
1220         if (r && va.operation == RADEON_VA_RESULT_ERROR) {
1221             fprintf(stderr, "radeon: Failed to assign virtual address space\n");
1222             radeon_bo_destroy(&bo->base);
1223             return NULL;
1224         }
1225         mtx_lock(&ws->bo_handles_mutex);
1226         if (va.operation == RADEON_VA_RESULT_VA_EXIST) {
1227             struct pb_buffer *b = &bo->base;
1228             struct radeon_bo *old_bo =
1229                 util_hash_table_get(ws->bo_vas, (void*)(uintptr_t)va.offset);
1230 
1231             mtx_unlock(&ws->bo_handles_mutex);
1232             pb_reference(&b, &old_bo->base);
1233             return b;
1234         }
1235 
1236         util_hash_table_set(ws->bo_vas, (void*)(uintptr_t)bo->va, bo);
1237         mtx_unlock(&ws->bo_handles_mutex);
1238     }
1239 
1240     bo->initial_domain = radeon_bo_get_initial_domain((void*)bo);
1241 
1242     if (bo->initial_domain & RADEON_DOMAIN_VRAM)
1243         ws->allocated_vram += align(bo->base.size, ws->info.gart_page_size);
1244     else if (bo->initial_domain & RADEON_DOMAIN_GTT)
1245         ws->allocated_gtt += align(bo->base.size, ws->info.gart_page_size);
1246 
1247     return (struct pb_buffer*)bo;
1248 
1249 fail:
1250     mtx_unlock(&ws->bo_handles_mutex);
1251     return NULL;
1252 }
1253 
radeon_winsys_bo_get_handle(struct pb_buffer * buffer,unsigned stride,unsigned offset,unsigned slice_size,struct winsys_handle * whandle)1254 static bool radeon_winsys_bo_get_handle(struct pb_buffer *buffer,
1255                                         unsigned stride, unsigned offset,
1256                                         unsigned slice_size,
1257                                         struct winsys_handle *whandle)
1258 {
1259     struct drm_gem_flink flink;
1260     struct radeon_bo *bo = radeon_bo(buffer);
1261     struct radeon_drm_winsys *ws = bo->rws;
1262 
1263     /* Don't allow exports of slab entries. */
1264     if (!bo->handle)
1265         return false;
1266 
1267     memset(&flink, 0, sizeof(flink));
1268 
1269     bo->u.real.use_reusable_pool = false;
1270 
1271     if (whandle->type == DRM_API_HANDLE_TYPE_SHARED) {
1272         if (!bo->flink_name) {
1273             flink.handle = bo->handle;
1274 
1275             if (ioctl(ws->fd, DRM_IOCTL_GEM_FLINK, &flink)) {
1276                 return false;
1277             }
1278 
1279             bo->flink_name = flink.name;
1280 
1281             mtx_lock(&ws->bo_handles_mutex);
1282             util_hash_table_set(ws->bo_names, (void*)(uintptr_t)bo->flink_name, bo);
1283             mtx_unlock(&ws->bo_handles_mutex);
1284         }
1285         whandle->handle = bo->flink_name;
1286     } else if (whandle->type == DRM_API_HANDLE_TYPE_KMS) {
1287         whandle->handle = bo->handle;
1288     } else if (whandle->type == DRM_API_HANDLE_TYPE_FD) {
1289         if (drmPrimeHandleToFD(ws->fd, bo->handle, DRM_CLOEXEC, (int*)&whandle->handle))
1290             return false;
1291     }
1292 
1293     whandle->stride = stride;
1294     whandle->offset = offset;
1295     whandle->offset += slice_size * whandle->layer;
1296 
1297     return true;
1298 }
1299 
radeon_winsys_bo_is_user_ptr(struct pb_buffer * buf)1300 static bool radeon_winsys_bo_is_user_ptr(struct pb_buffer *buf)
1301 {
1302    return ((struct radeon_bo*)buf)->user_ptr != NULL;
1303 }
1304 
radeon_winsys_bo_is_suballocated(struct pb_buffer * buf)1305 static bool radeon_winsys_bo_is_suballocated(struct pb_buffer *buf)
1306 {
1307    return !((struct radeon_bo*)buf)->handle;
1308 }
1309 
radeon_winsys_bo_va(struct pb_buffer * buf)1310 static uint64_t radeon_winsys_bo_va(struct pb_buffer *buf)
1311 {
1312     return ((struct radeon_bo*)buf)->va;
1313 }
1314 
radeon_winsys_bo_get_reloc_offset(struct pb_buffer * buf)1315 static unsigned radeon_winsys_bo_get_reloc_offset(struct pb_buffer *buf)
1316 {
1317     struct radeon_bo *bo = radeon_bo(buf);
1318 
1319     if (bo->handle)
1320         return 0;
1321 
1322     return bo->va - bo->u.slab.real->va;
1323 }
1324 
radeon_drm_bo_init_functions(struct radeon_drm_winsys * ws)1325 void radeon_drm_bo_init_functions(struct radeon_drm_winsys *ws)
1326 {
1327     ws->base.buffer_set_metadata = radeon_bo_set_metadata;
1328     ws->base.buffer_get_metadata = radeon_bo_get_metadata;
1329     ws->base.buffer_map = radeon_bo_map;
1330     ws->base.buffer_unmap = radeon_bo_unmap;
1331     ws->base.buffer_wait = radeon_bo_wait;
1332     ws->base.buffer_create = radeon_winsys_bo_create;
1333     ws->base.buffer_from_handle = radeon_winsys_bo_from_handle;
1334     ws->base.buffer_from_ptr = radeon_winsys_bo_from_ptr;
1335     ws->base.buffer_is_user_ptr = radeon_winsys_bo_is_user_ptr;
1336     ws->base.buffer_is_suballocated = radeon_winsys_bo_is_suballocated;
1337     ws->base.buffer_get_handle = radeon_winsys_bo_get_handle;
1338     ws->base.buffer_get_virtual_address = radeon_winsys_bo_va;
1339     ws->base.buffer_get_reloc_offset = radeon_winsys_bo_get_reloc_offset;
1340     ws->base.buffer_get_initial_domain = radeon_bo_get_initial_domain;
1341 }
1342