1 /*
2  * Copyright © 2017 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #ifndef IRIS_BUFMGR_H
25 #define IRIS_BUFMGR_H
26 
27 #include <stdbool.h>
28 #include <stdint.h>
29 #include <stdio.h>
30 #include <sys/types.h>
31 #include "c11/threads.h"
32 #include "util/macros.h"
33 #include "util/u_atomic.h"
34 #include "util/list.h"
35 #include "pipe/p_defines.h"
36 
37 struct iris_batch;
38 struct gen_device_info;
39 struct pipe_debug_callback;
40 
41 /**
42  * Memory zones.  When allocating a buffer, you can request that it is
43  * placed into a specific region of the virtual address space (PPGTT).
44  *
45  * Most buffers can go anywhere (IRIS_MEMZONE_OTHER).  Some buffers are
46  * accessed via an offset from a base address.  STATE_BASE_ADDRESS has
47  * a maximum 4GB size for each region, so we need to restrict those
48  * buffers to be within 4GB of the base.  Each memory zone corresponds
49  * to a particular base address.
50  *
51  * We lay out the virtual address space as follows:
52  *
53  * - [0,   4K): Nothing            (empty page for null address)
54  * - [4K,  4G): Shaders            (Instruction Base Address)
55  * - [4G,  8G): Surfaces & Binders (Surface State Base Address, Bindless ...)
56  * - [8G, 12G): Dynamic            (Dynamic State Base Address)
57  * - [12G, *):  Other              (everything else in the full 48-bit VMA)
58  *
59  * A special buffer for border color lives at the start of the dynamic state
60  * memory zone.  This unfortunately has to be handled specially because the
61  * SAMPLER_STATE "Indirect State Pointer" field is only a 24-bit pointer.
62  *
63  * Each GL context uses a separate GEM context, which technically gives them
64  * each a separate VMA.  However, we assign address globally, so buffers will
65  * have the same address in all GEM contexts.  This lets us have a single BO
66  * field for the address, which is easy and cheap.
67  */
68 enum iris_memory_zone {
69    IRIS_MEMZONE_SHADER,
70    IRIS_MEMZONE_BINDER,
71    IRIS_MEMZONE_SURFACE,
72    IRIS_MEMZONE_DYNAMIC,
73    IRIS_MEMZONE_OTHER,
74 
75    IRIS_MEMZONE_BORDER_COLOR_POOL,
76 };
77 
78 /* Intentionally exclude single buffer "zones" */
79 #define IRIS_MEMZONE_COUNT (IRIS_MEMZONE_OTHER + 1)
80 
81 #define IRIS_BINDER_SIZE (64 * 1024)
82 #define IRIS_MAX_BINDERS 100
83 
84 #define IRIS_MEMZONE_SHADER_START     (0ull * (1ull << 32))
85 #define IRIS_MEMZONE_BINDER_START     (1ull * (1ull << 32))
86 #define IRIS_MEMZONE_SURFACE_START    (IRIS_MEMZONE_BINDER_START + IRIS_MAX_BINDERS * IRIS_BINDER_SIZE)
87 #define IRIS_MEMZONE_DYNAMIC_START    (2ull * (1ull << 32))
88 #define IRIS_MEMZONE_OTHER_START      (3ull * (1ull << 32))
89 
90 #define IRIS_BORDER_COLOR_POOL_ADDRESS IRIS_MEMZONE_DYNAMIC_START
91 #define IRIS_BORDER_COLOR_POOL_SIZE (64 * 1024)
92 
93 /**
94  * Classification of the various incoherent caches of the GPU into a number of
95  * caching domains.
96  */
97 enum iris_domain {
98    /** Render color cache. */
99    IRIS_DOMAIN_RENDER_WRITE = 0,
100    /** (Hi)Z/stencil cache. */
101    IRIS_DOMAIN_DEPTH_WRITE,
102    /** Any other read-write cache. */
103    IRIS_DOMAIN_OTHER_WRITE,
104    /** Any other read-only cache. */
105    IRIS_DOMAIN_OTHER_READ,
106    /** Number of caching domains. */
107    NUM_IRIS_DOMAINS,
108    /** Not a real cache, use to opt out of the cache tracking mechanism. */
109    IRIS_DOMAIN_NONE = NUM_IRIS_DOMAINS
110 };
111 
112 /**
113  * Whether a caching domain is guaranteed not to write any data to memory.
114  */
115 static inline bool
iris_domain_is_read_only(enum iris_domain access)116 iris_domain_is_read_only(enum iris_domain access)
117 {
118    return access == IRIS_DOMAIN_OTHER_READ;
119 }
120 
121 struct iris_bo {
122    /**
123     * Size in bytes of the buffer object.
124     *
125     * The size may be larger than the size originally requested for the
126     * allocation, such as being aligned to page size.
127     */
128    uint64_t size;
129 
130    /** Buffer manager context associated with this buffer object */
131    struct iris_bufmgr *bufmgr;
132 
133    /** Pre-computed hash using _mesa_hash_pointer for cache tracking sets */
134    uint32_t hash;
135 
136    /** The GEM handle for this buffer object. */
137    uint32_t gem_handle;
138 
139    /**
140     * Virtual address of the buffer inside the PPGTT (Per-Process Graphics
141     * Translation Table).
142     *
143     * Although each hardware context has its own VMA, we assign BO's to the
144     * same address in all contexts, for simplicity.
145     */
146    uint64_t gtt_offset;
147 
148    /**
149     * If non-zero, then this bo has an aux-map translation to this address.
150     */
151    uint64_t aux_map_address;
152 
153    /**
154     * The validation list index for this buffer, or -1 when not in a batch.
155     * Note that a single buffer may be in multiple batches (contexts), and
156     * this is a global field, which refers to the last batch using the BO.
157     * It should not be considered authoritative, but can be used to avoid a
158     * linear walk of the validation list in the common case by guessing that
159     * exec_bos[bo->index] == bo and confirming whether that's the case.
160     *
161     * XXX: this is not ideal now that we have more than one batch per context,
162     * XXX: as the index will flop back and forth between the render index and
163     * XXX: compute index...
164     */
165    unsigned index;
166 
167    int refcount;
168    const char *name;
169 
170    uint64_t kflags;
171 
172    /**
173     * Kenel-assigned global name for this object
174     *
175     * List contains both flink named and prime fd'd objects
176     */
177    unsigned global_name;
178 
179    /**
180     * Current tiling mode
181     */
182    uint32_t tiling_mode;
183    uint32_t stride;
184 
185    time_t free_time;
186 
187    /** Mapped address for the buffer, saved across map/unmap cycles */
188    void *map_cpu;
189    /** GTT virtual address for the buffer, saved across map/unmap cycles */
190    void *map_gtt;
191    /** WC CPU address for the buffer, saved across map/unmap cycles */
192    void *map_wc;
193 
194    /** BO cache list */
195    struct list_head head;
196 
197    /** List of GEM handle exports of this buffer (bo_export) */
198    struct list_head exports;
199 
200    /**
201     * Synchronization sequence number of most recent access of this BO from
202     * each caching domain.
203     *
204     * Although this is a global field, use in multiple contexts should be
205     * safe, see iris_emit_buffer_barrier_for() for details.
206     *
207     * Also align it to 64 bits. This will make atomic operations faster on 32
208     * bit platforms.
209     */
210    uint64_t last_seqnos[NUM_IRIS_DOMAINS] __attribute__ ((aligned (8)));
211 
212    /**
213     * Boolean of whether the GPU is definitely not accessing the buffer.
214     *
215     * This is only valid when reusable, since non-reusable
216     * buffers are those that have been shared with other
217     * processes, so we don't know their state.
218     */
219    bool idle;
220 
221    /**
222     * Boolean of whether this buffer can be re-used
223     */
224    bool reusable;
225 
226    /**
227     * Boolean of whether this buffer has been shared with an external client.
228     */
229    bool external;
230 
231    /**
232     * Boolean of whether this buffer is cache coherent
233     */
234    bool cache_coherent;
235 
236    /**
237     * Boolean of whether this buffer points into user memory
238     */
239    bool userptr;
240 };
241 
242 #define BO_ALLOC_ZEROED     (1<<0)
243 #define BO_ALLOC_COHERENT   (1<<1)
244 
245 /**
246  * Allocate a buffer object.
247  *
248  * Buffer objects are not necessarily initially mapped into CPU virtual
249  * address space or graphics device aperture.  They must be mapped
250  * using iris_bo_map() to be used by the CPU.
251  */
252 struct iris_bo *iris_bo_alloc(struct iris_bufmgr *bufmgr,
253                               const char *name,
254                               uint64_t size,
255                               enum iris_memory_zone memzone);
256 
257 /**
258  * Allocate a tiled buffer object.
259  *
260  * Alignment for tiled objects is set automatically; the 'flags'
261  * argument provides a hint about how the object will be used initially.
262  *
263  * Valid tiling formats are:
264  *  I915_TILING_NONE
265  *  I915_TILING_X
266  *  I915_TILING_Y
267  */
268 struct iris_bo *iris_bo_alloc_tiled(struct iris_bufmgr *bufmgr,
269                                     const char *name,
270                                     uint64_t size,
271                                     uint32_t alignment,
272                                     enum iris_memory_zone memzone,
273                                     uint32_t tiling_mode,
274                                     uint32_t pitch,
275                                     unsigned flags);
276 
277 struct iris_bo *
278 iris_bo_create_userptr(struct iris_bufmgr *bufmgr, const char *name,
279                        void *ptr, size_t size,
280                        enum iris_memory_zone memzone);
281 
282 /** Takes a reference on a buffer object */
283 static inline void
iris_bo_reference(struct iris_bo * bo)284 iris_bo_reference(struct iris_bo *bo)
285 {
286    p_atomic_inc(&bo->refcount);
287 }
288 
289 /**
290  * Releases a reference on a buffer object, freeing the data if
291  * no references remain.
292  */
293 void iris_bo_unreference(struct iris_bo *bo);
294 
295 #define MAP_READ          PIPE_MAP_READ
296 #define MAP_WRITE         PIPE_MAP_WRITE
297 #define MAP_ASYNC         PIPE_MAP_UNSYNCHRONIZED
298 #define MAP_PERSISTENT    PIPE_MAP_PERSISTENT
299 #define MAP_COHERENT      PIPE_MAP_COHERENT
300 /* internal */
301 #define MAP_INTERNAL_MASK (0xffu << 24)
302 #define MAP_RAW           (0x01 << 24)
303 
304 #define MAP_FLAGS         (MAP_READ | MAP_WRITE | MAP_ASYNC | \
305                            MAP_PERSISTENT | MAP_COHERENT | MAP_INTERNAL_MASK)
306 
307 /**
308  * Maps the buffer into userspace.
309  *
310  * This function will block waiting for any existing execution on the
311  * buffer to complete, first.  The resulting mapping is returned.
312  */
313 MUST_CHECK void *iris_bo_map(struct pipe_debug_callback *dbg,
314                              struct iris_bo *bo, unsigned flags);
315 
316 /**
317  * Reduces the refcount on the userspace mapping of the buffer
318  * object.
319  */
iris_bo_unmap(struct iris_bo * bo)320 static inline int iris_bo_unmap(struct iris_bo *bo) { return 0; }
321 
322 /**
323  * Waits for rendering to an object by the GPU to have completed.
324  *
325  * This is not required for any access to the BO by bo_map,
326  * bo_subdata, etc.  It is merely a way for the driver to implement
327  * glFinish.
328  */
329 void iris_bo_wait_rendering(struct iris_bo *bo);
330 
331 
332 /**
333  * Unref a buffer manager instance.
334  */
335 void iris_bufmgr_unref(struct iris_bufmgr *bufmgr);
336 
337 /**
338  * Create a visible name for a buffer which can be used by other apps
339  *
340  * \param buf Buffer to create a name for
341  * \param name Returned name
342  */
343 int iris_bo_flink(struct iris_bo *bo, uint32_t *name);
344 
345 /**
346  * Make a BO externally accessible.
347  *
348  * \param bo Buffer to make external
349  */
350 void iris_bo_make_external(struct iris_bo *bo);
351 
352 /**
353  * Returns 1 if mapping the buffer for write could cause the process
354  * to block, due to the object being active in the GPU.
355  */
356 int iris_bo_busy(struct iris_bo *bo);
357 
358 /**
359  * Specify the volatility of the buffer.
360  * \param bo Buffer to create a name for
361  * \param madv The purgeable status
362  *
363  * Use I915_MADV_DONTNEED to mark the buffer as purgeable, and it will be
364  * reclaimed under memory pressure. If you subsequently require the buffer,
365  * then you must pass I915_MADV_WILLNEED to mark the buffer as required.
366  *
367  * Returns 1 if the buffer was retained, or 0 if it was discarded whilst
368  * marked as I915_MADV_DONTNEED.
369  */
370 int iris_bo_madvise(struct iris_bo *bo, int madv);
371 
372 /* drm_bacon_bufmgr_gem.c */
373 struct iris_bufmgr *iris_bufmgr_get_for_fd(struct gen_device_info *devinfo, int fd,
374                                            bool bo_reuse);
375 int iris_bufmgr_get_fd(struct iris_bufmgr *bufmgr);
376 
377 struct iris_bo *iris_bo_gem_create_from_name(struct iris_bufmgr *bufmgr,
378                                              const char *name,
379                                              unsigned handle);
380 
381 void* iris_bufmgr_get_aux_map_context(struct iris_bufmgr *bufmgr);
382 
383 int iris_bo_wait(struct iris_bo *bo, int64_t timeout_ns);
384 
385 uint32_t iris_create_hw_context(struct iris_bufmgr *bufmgr);
386 uint32_t iris_clone_hw_context(struct iris_bufmgr *bufmgr, uint32_t ctx_id);
387 
388 #define IRIS_CONTEXT_LOW_PRIORITY    ((I915_CONTEXT_MIN_USER_PRIORITY-1)/2)
389 #define IRIS_CONTEXT_MEDIUM_PRIORITY (I915_CONTEXT_DEFAULT_PRIORITY)
390 #define IRIS_CONTEXT_HIGH_PRIORITY   ((I915_CONTEXT_MAX_USER_PRIORITY+1)/2)
391 
392 int iris_hw_context_set_priority(struct iris_bufmgr *bufmgr,
393                                  uint32_t ctx_id, int priority);
394 
395 void iris_destroy_hw_context(struct iris_bufmgr *bufmgr, uint32_t ctx_id);
396 
397 int iris_bo_export_dmabuf(struct iris_bo *bo, int *prime_fd);
398 struct iris_bo *iris_bo_import_dmabuf(struct iris_bufmgr *bufmgr, int prime_fd,
399                                       uint64_t modifier);
400 
401 /**
402  * Exports a bo as a GEM handle into a given DRM file descriptor
403  * \param bo Buffer to export
404  * \param drm_fd File descriptor where the new handle is created
405  * \param out_handle Pointer to store the new handle
406  *
407  * Returns 0 if the buffer was successfully exported, a non zero error code
408  * otherwise.
409  */
410 int iris_bo_export_gem_handle_for_device(struct iris_bo *bo, int drm_fd,
411                                          uint32_t *out_handle);
412 
413 uint32_t iris_bo_export_gem_handle(struct iris_bo *bo);
414 
415 int iris_reg_read(struct iris_bufmgr *bufmgr, uint32_t offset, uint64_t *out);
416 
417 int drm_ioctl(int fd, unsigned long request, void *arg);
418 
419 /**
420  * Returns the BO's address relative to the appropriate base address.
421  *
422  * All of our base addresses are programmed to the start of a 4GB region,
423  * so simply returning the bottom 32 bits of the BO address will give us
424  * the offset from whatever base address corresponds to that memory region.
425  */
426 static inline uint32_t
iris_bo_offset_from_base_address(struct iris_bo * bo)427 iris_bo_offset_from_base_address(struct iris_bo *bo)
428 {
429    /* This only works for buffers in the memory zones corresponding to a
430     * base address - the top, unbounded memory zone doesn't have a base.
431     */
432    assert(bo->gtt_offset < IRIS_MEMZONE_OTHER_START);
433    return bo->gtt_offset;
434 }
435 
436 /**
437  * Track access of a BO from the specified caching domain and sequence number.
438  *
439  * Can be used without locking.  Only the most recent access (i.e. highest
440  * seqno) is tracked.
441  */
442 static inline void
iris_bo_bump_seqno(struct iris_bo * bo,uint64_t seqno,enum iris_domain type)443 iris_bo_bump_seqno(struct iris_bo *bo, uint64_t seqno,
444                    enum iris_domain type)
445 {
446    uint64_t *const last_seqno = &bo->last_seqnos[type];
447    uint64_t tmp, prev_seqno = p_atomic_read(last_seqno);
448 
449    while (prev_seqno < seqno &&
450           prev_seqno != (tmp = p_atomic_cmpxchg(last_seqno, prev_seqno, seqno)))
451       prev_seqno = tmp;
452 }
453 
454 enum iris_memory_zone iris_memzone_for_address(uint64_t address);
455 
456 #endif /* IRIS_BUFMGR_H */
457