1 /*
2  * Copyright © 2015 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #ifndef ANV_PRIVATE_H
25 #define ANV_PRIVATE_H
26 
27 #include <stdlib.h>
28 #include <stdio.h>
29 #include <stdbool.h>
30 #include <pthread.h>
31 #include <assert.h>
32 #include <stdint.h>
33 #include "drm-uapi/i915_drm.h"
34 
35 #ifdef HAVE_VALGRIND
36 #include <valgrind.h>
37 #include <memcheck.h>
38 #define VG(x) x
39 #ifndef NDEBUG
40 #define __gen_validate_value(x) VALGRIND_CHECK_MEM_IS_DEFINED(&(x), sizeof(x))
41 #endif
42 #else
43 #define VG(x) ((void)0)
44 #endif
45 
46 #include "common/gen_clflush.h"
47 #include "common/gen_decoder.h"
48 #include "common/gen_gem.h"
49 #include "common/gen_l3_config.h"
50 #include "dev/gen_device_info.h"
51 #include "blorp/blorp.h"
52 #include "compiler/brw_compiler.h"
53 #include "util/bitset.h"
54 #include "util/macros.h"
55 #include "util/hash_table.h"
56 #include "util/list.h"
57 #include "util/sparse_array.h"
58 #include "util/u_atomic.h"
59 #include "util/u_vector.h"
60 #include "util/u_math.h"
61 #include "util/vma.h"
62 #include "util/xmlconfig.h"
63 #include "vk_alloc.h"
64 #include "vk_debug_report.h"
65 #include "vk_object.h"
66 
67 /* Pre-declarations needed for WSI entrypoints */
68 struct wl_surface;
69 struct wl_display;
70 typedef struct xcb_connection_t xcb_connection_t;
71 typedef uint32_t xcb_visualid_t;
72 typedef uint32_t xcb_window_t;
73 
74 struct anv_batch;
75 struct anv_buffer;
76 struct anv_buffer_view;
77 struct anv_image_view;
78 struct anv_instance;
79 
80 struct gen_aux_map_context;
81 struct gen_perf_config;
82 struct gen_perf_counter_pass;
83 struct gen_perf_query_result;
84 
85 #include <vulkan/vulkan.h>
86 #include <vulkan/vulkan_intel.h>
87 #include <vulkan/vk_icd.h>
88 
89 #include "anv_android.h"
90 #include "anv_entrypoints.h"
91 #include "anv_extensions.h"
92 #include "isl/isl.h"
93 
94 #include "dev/gen_debug.h"
95 #define MESA_LOG_TAG "MESA-INTEL"
96 #include "util/log.h"
97 #include "wsi_common.h"
98 
99 #define NSEC_PER_SEC 1000000000ull
100 
101 /* anv Virtual Memory Layout
102  * =========================
103  *
104  * When the anv driver is determining the virtual graphics addresses of memory
105  * objects itself using the softpin mechanism, the following memory ranges
106  * will be used.
107  *
108  * Three special considerations to notice:
109  *
110  * (1) the dynamic state pool is located within the same 4 GiB as the low
111  * heap. This is to work around a VF cache issue described in a comment in
112  * anv_physical_device_init_heaps.
113  *
114  * (2) the binding table pool is located at lower addresses than the surface
115  * state pool, within a 4 GiB range. This allows surface state base addresses
116  * to cover both binding tables (16 bit offsets) and surface states (32 bit
117  * offsets).
118  *
119  * (3) the last 4 GiB of the address space is withheld from the high
120  * heap. Various hardware units will read past the end of an object for
121  * various reasons. This healthy margin prevents reads from wrapping around
122  * 48-bit addresses.
123  */
124 #define LOW_HEAP_MIN_ADDRESS               0x000000001000ULL /* 4 KiB */
125 #define LOW_HEAP_MAX_ADDRESS               0x0000bfffffffULL
126 #define DYNAMIC_STATE_POOL_MIN_ADDRESS     0x0000c0000000ULL /* 3 GiB */
127 #define DYNAMIC_STATE_POOL_MAX_ADDRESS     0x0000ffffffffULL
128 #define BINDING_TABLE_POOL_MIN_ADDRESS     0x000100000000ULL /* 4 GiB */
129 #define BINDING_TABLE_POOL_MAX_ADDRESS     0x00013fffffffULL
130 #define SURFACE_STATE_POOL_MIN_ADDRESS     0x000140000000ULL /* 5 GiB */
131 #define SURFACE_STATE_POOL_MAX_ADDRESS     0x00017fffffffULL
132 #define INSTRUCTION_STATE_POOL_MIN_ADDRESS 0x000180000000ULL /* 6 GiB */
133 #define INSTRUCTION_STATE_POOL_MAX_ADDRESS 0x0001bfffffffULL
134 #define CLIENT_VISIBLE_HEAP_MIN_ADDRESS    0x0001c0000000ULL /* 7 GiB */
135 #define CLIENT_VISIBLE_HEAP_MAX_ADDRESS    0x0002bfffffffULL
136 #define HIGH_HEAP_MIN_ADDRESS              0x0002c0000000ULL /* 11 GiB */
137 
138 #define LOW_HEAP_SIZE               \
139    (LOW_HEAP_MAX_ADDRESS - LOW_HEAP_MIN_ADDRESS + 1)
140 #define DYNAMIC_STATE_POOL_SIZE     \
141    (DYNAMIC_STATE_POOL_MAX_ADDRESS - DYNAMIC_STATE_POOL_MIN_ADDRESS + 1)
142 #define BINDING_TABLE_POOL_SIZE     \
143    (BINDING_TABLE_POOL_MAX_ADDRESS - BINDING_TABLE_POOL_MIN_ADDRESS + 1)
144 #define SURFACE_STATE_POOL_SIZE     \
145    (SURFACE_STATE_POOL_MAX_ADDRESS - SURFACE_STATE_POOL_MIN_ADDRESS + 1)
146 #define INSTRUCTION_STATE_POOL_SIZE \
147    (INSTRUCTION_STATE_POOL_MAX_ADDRESS - INSTRUCTION_STATE_POOL_MIN_ADDRESS + 1)
148 #define CLIENT_VISIBLE_HEAP_SIZE               \
149    (CLIENT_VISIBLE_HEAP_MAX_ADDRESS - CLIENT_VISIBLE_HEAP_MIN_ADDRESS + 1)
150 
151 /* Allowing different clear colors requires us to perform a depth resolve at
152  * the end of certain render passes. This is because while slow clears store
153  * the clear color in the HiZ buffer, fast clears (without a resolve) don't.
154  * See the PRMs for examples describing when additional resolves would be
155  * necessary. To enable fast clears without requiring extra resolves, we set
156  * the clear value to a globally-defined one. We could allow different values
157  * if the user doesn't expect coherent data during or after a render passes
158  * (VK_ATTACHMENT_STORE_OP_DONT_CARE), but such users (aside from the CTS)
159  * don't seem to exist yet. In almost all Vulkan applications tested thus far,
160  * 1.0f seems to be the only value used. The only application that doesn't set
161  * this value does so through the usage of an seemingly uninitialized clear
162  * value.
163  */
164 #define ANV_HZ_FC_VAL 1.0f
165 
166 #define MAX_VBS         28
167 #define MAX_XFB_BUFFERS  4
168 #define MAX_XFB_STREAMS  4
169 #define MAX_SETS         8
170 #define MAX_RTS          8
171 #define MAX_VIEWPORTS   16
172 #define MAX_SCISSORS    16
173 #define MAX_PUSH_CONSTANTS_SIZE 128
174 #define MAX_DYNAMIC_BUFFERS 16
175 #define MAX_IMAGES 64
176 #define MAX_PUSH_DESCRIPTORS 32 /* Minimum requirement */
177 #define MAX_INLINE_UNIFORM_BLOCK_SIZE 4096
178 #define MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS 32
179 /* We need 16 for UBO block reads to work and 32 for push UBOs. However, we
180  * use 64 here to avoid cache issues. This could most likely bring it back to
181  * 32 if we had different virtual addresses for the different views on a given
182  * GEM object.
183  */
184 #define ANV_UBO_ALIGNMENT 64
185 #define ANV_SSBO_ALIGNMENT 4
186 #define ANV_SSBO_BOUNDS_CHECK_ALIGNMENT 4
187 #define MAX_VIEWS_FOR_PRIMITIVE_REPLICATION 16
188 
189 /* From the Skylake PRM Vol. 7 "Binding Table Surface State Model":
190  *
191  *    "The surface state model is used when a Binding Table Index (specified
192  *    in the message descriptor) of less than 240 is specified. In this model,
193  *    the Binding Table Index is used to index into the binding table, and the
194  *    binding table entry contains a pointer to the SURFACE_STATE."
195  *
196  * Binding table values above 240 are used for various things in the hardware
197  * such as stateless, stateless with incoherent cache, SLM, and bindless.
198  */
199 #define MAX_BINDING_TABLE_SIZE 240
200 
201 /* The kernel relocation API has a limitation of a 32-bit delta value
202  * applied to the address before it is written which, in spite of it being
203  * unsigned, is treated as signed .  Because of the way that this maps to
204  * the Vulkan API, we cannot handle an offset into a buffer that does not
205  * fit into a signed 32 bits.  The only mechanism we have for dealing with
206  * this at the moment is to limit all VkDeviceMemory objects to a maximum
207  * of 2GB each.  The Vulkan spec allows us to do this:
208  *
209  *    "Some platforms may have a limit on the maximum size of a single
210  *    allocation. For example, certain systems may fail to create
211  *    allocations with a size greater than or equal to 4GB. Such a limit is
212  *    implementation-dependent, and if such a failure occurs then the error
213  *    VK_ERROR_OUT_OF_DEVICE_MEMORY should be returned."
214  *
215  * We don't use vk_error here because it's not an error so much as an
216  * indication to the application that the allocation is too large.
217  */
218 #define MAX_MEMORY_ALLOCATION_SIZE (1ull << 31)
219 
220 #define ANV_SVGS_VB_INDEX    MAX_VBS
221 #define ANV_DRAWID_VB_INDEX (MAX_VBS + 1)
222 
223 /* We reserve this MI ALU register for the purpose of handling predication.
224  * Other code which uses the MI ALU should leave it alone.
225  */
226 #define ANV_PREDICATE_RESULT_REG 0x2678 /* MI_ALU_REG15 */
227 
228 /* We reserve this MI ALU register to pass around an offset computed from
229  * VkPerformanceQuerySubmitInfoKHR::counterPassIndex VK_KHR_performance_query.
230  * Other code which uses the MI ALU should leave it alone.
231  */
232 #define ANV_PERF_QUERY_OFFSET_REG 0x2670 /* MI_ALU_REG14 */
233 
234 /* For gen12 we set the streamout buffers using 4 separate commands
235  * (3DSTATE_SO_BUFFER_INDEX_*) instead of 3DSTATE_SO_BUFFER. However the layout
236  * of the 3DSTATE_SO_BUFFER_INDEX_* commands is identical to that of
237  * 3DSTATE_SO_BUFFER apart from the SOBufferIndex field, so for now we use the
238  * 3DSTATE_SO_BUFFER command, but change the 3DCommandSubOpcode.
239  * SO_BUFFER_INDEX_0_CMD is actually the 3DCommandSubOpcode for
240  * 3DSTATE_SO_BUFFER_INDEX_0.
241  */
242 #define SO_BUFFER_INDEX_0_CMD 0x60
243 #define anv_printflike(a, b) __attribute__((__format__(__printf__, a, b)))
244 
245 static inline uint32_t
align_down_npot_u32(uint32_t v,uint32_t a)246 align_down_npot_u32(uint32_t v, uint32_t a)
247 {
248    return v - (v % a);
249 }
250 
251 static inline uint32_t
align_down_u32(uint32_t v,uint32_t a)252 align_down_u32(uint32_t v, uint32_t a)
253 {
254    assert(a != 0 && a == (a & -a));
255    return v & ~(a - 1);
256 }
257 
258 static inline uint32_t
align_u32(uint32_t v,uint32_t a)259 align_u32(uint32_t v, uint32_t a)
260 {
261    assert(a != 0 && a == (a & -a));
262    return align_down_u32(v + a - 1, a);
263 }
264 
265 static inline uint64_t
align_down_u64(uint64_t v,uint64_t a)266 align_down_u64(uint64_t v, uint64_t a)
267 {
268    assert(a != 0 && a == (a & -a));
269    return v & ~(a - 1);
270 }
271 
272 static inline uint64_t
align_u64(uint64_t v,uint64_t a)273 align_u64(uint64_t v, uint64_t a)
274 {
275    return align_down_u64(v + a - 1, a);
276 }
277 
278 static inline int32_t
align_i32(int32_t v,int32_t a)279 align_i32(int32_t v, int32_t a)
280 {
281    assert(a != 0 && a == (a & -a));
282    return (v + a - 1) & ~(a - 1);
283 }
284 
285 /** Alignment must be a power of 2. */
286 static inline bool
anv_is_aligned(uintmax_t n,uintmax_t a)287 anv_is_aligned(uintmax_t n, uintmax_t a)
288 {
289    assert(a == (a & -a));
290    return (n & (a - 1)) == 0;
291 }
292 
293 static inline uint32_t
anv_minify(uint32_t n,uint32_t levels)294 anv_minify(uint32_t n, uint32_t levels)
295 {
296    if (unlikely(n == 0))
297       return 0;
298    else
299       return MAX2(n >> levels, 1);
300 }
301 
302 static inline float
anv_clamp_f(float f,float min,float max)303 anv_clamp_f(float f, float min, float max)
304 {
305    assert(min < max);
306 
307    if (f > max)
308       return max;
309    else if (f < min)
310       return min;
311    else
312       return f;
313 }
314 
315 static inline bool
anv_clear_mask(uint32_t * inout_mask,uint32_t clear_mask)316 anv_clear_mask(uint32_t *inout_mask, uint32_t clear_mask)
317 {
318    if (*inout_mask & clear_mask) {
319       *inout_mask &= ~clear_mask;
320       return true;
321    } else {
322       return false;
323    }
324 }
325 
326 static inline union isl_color_value
vk_to_isl_color(VkClearColorValue color)327 vk_to_isl_color(VkClearColorValue color)
328 {
329    return (union isl_color_value) {
330       .u32 = {
331          color.uint32[0],
332          color.uint32[1],
333          color.uint32[2],
334          color.uint32[3],
335       },
336    };
337 }
338 
anv_unpack_ptr(uintptr_t ptr,int bits,int * flags)339 static inline void *anv_unpack_ptr(uintptr_t ptr, int bits, int *flags)
340 {
341    uintptr_t mask = (1ull << bits) - 1;
342    *flags = ptr & mask;
343    return (void *) (ptr & ~mask);
344 }
345 
anv_pack_ptr(void * ptr,int bits,int flags)346 static inline uintptr_t anv_pack_ptr(void *ptr, int bits, int flags)
347 {
348    uintptr_t value = (uintptr_t) ptr;
349    uintptr_t mask = (1ull << bits) - 1;
350    return value | (mask & flags);
351 }
352 
353 #define for_each_bit(b, dword)                          \
354    for (uint32_t __dword = (dword);                     \
355         (b) = __builtin_ffs(__dword) - 1, __dword;      \
356         __dword &= ~(1 << (b)))
357 
358 #define typed_memcpy(dest, src, count) ({ \
359    STATIC_ASSERT(sizeof(*src) == sizeof(*dest)); \
360    memcpy((dest), (src), (count) * sizeof(*(src))); \
361 })
362 
363 /* Mapping from anv object to VkDebugReportObjectTypeEXT. New types need
364  * to be added here in order to utilize mapping in debug/error/perf macros.
365  */
366 #define REPORT_OBJECT_TYPE(o)                                                      \
367    __builtin_choose_expr (                                                         \
368    __builtin_types_compatible_p (__typeof (o), struct anv_instance*),              \
369    VK_DEBUG_REPORT_OBJECT_TYPE_INSTANCE_EXT,                                       \
370    __builtin_choose_expr (                                                         \
371    __builtin_types_compatible_p (__typeof (o), struct anv_physical_device*),       \
372    VK_DEBUG_REPORT_OBJECT_TYPE_PHYSICAL_DEVICE_EXT,                                \
373    __builtin_choose_expr (                                                         \
374    __builtin_types_compatible_p (__typeof (o), struct anv_device*),                \
375    VK_DEBUG_REPORT_OBJECT_TYPE_DEVICE_EXT,                                         \
376    __builtin_choose_expr (                                                         \
377    __builtin_types_compatible_p (__typeof (o), const struct anv_device*),          \
378    VK_DEBUG_REPORT_OBJECT_TYPE_DEVICE_EXT,                                         \
379    __builtin_choose_expr (                                                         \
380    __builtin_types_compatible_p (__typeof (o), struct anv_queue*),                 \
381    VK_DEBUG_REPORT_OBJECT_TYPE_QUEUE_EXT,                                          \
382    __builtin_choose_expr (                                                         \
383    __builtin_types_compatible_p (__typeof (o), struct anv_semaphore*),             \
384    VK_DEBUG_REPORT_OBJECT_TYPE_SEMAPHORE_EXT,                                      \
385    __builtin_choose_expr (                                                         \
386    __builtin_types_compatible_p (__typeof (o), struct anv_cmd_buffer*),            \
387    VK_DEBUG_REPORT_OBJECT_TYPE_COMMAND_BUFFER_EXT,                                 \
388    __builtin_choose_expr (                                                         \
389    __builtin_types_compatible_p (__typeof (o), struct anv_fence*),                 \
390    VK_DEBUG_REPORT_OBJECT_TYPE_FENCE_EXT,                                          \
391    __builtin_choose_expr (                                                         \
392    __builtin_types_compatible_p (__typeof (o), struct anv_device_memory*),         \
393    VK_DEBUG_REPORT_OBJECT_TYPE_DEVICE_MEMORY_EXT,                                  \
394    __builtin_choose_expr (                                                         \
395    __builtin_types_compatible_p (__typeof (o), struct anv_buffer*),                \
396    VK_DEBUG_REPORT_OBJECT_TYPE_BUFFER_EXT,                                         \
397    __builtin_choose_expr (                                                         \
398    __builtin_types_compatible_p (__typeof (o), struct anv_image*),                 \
399    VK_DEBUG_REPORT_OBJECT_TYPE_IMAGE_EXT,                                          \
400    __builtin_choose_expr (                                                         \
401    __builtin_types_compatible_p (__typeof (o), const struct anv_image*),           \
402    VK_DEBUG_REPORT_OBJECT_TYPE_IMAGE_EXT,                                          \
403    __builtin_choose_expr (                                                         \
404    __builtin_types_compatible_p (__typeof (o), struct anv_event*),                 \
405    VK_DEBUG_REPORT_OBJECT_TYPE_EVENT_EXT,                                          \
406    __builtin_choose_expr (                                                         \
407    __builtin_types_compatible_p (__typeof (o), struct anv_query_pool*),            \
408    VK_DEBUG_REPORT_OBJECT_TYPE_QUERY_POOL_EXT,                                     \
409    __builtin_choose_expr (                                                         \
410    __builtin_types_compatible_p (__typeof (o), struct anv_buffer_view*),           \
411    VK_DEBUG_REPORT_OBJECT_TYPE_BUFFER_VIEW_EXT,                                    \
412    __builtin_choose_expr (                                                         \
413    __builtin_types_compatible_p (__typeof (o), struct anv_image_view*),            \
414    VK_DEBUG_REPORT_OBJECT_TYPE_IMAGE_VIEW_EXT,                                     \
415    __builtin_choose_expr (                                                         \
416    __builtin_types_compatible_p (__typeof (o), struct anv_shader_module*),         \
417    VK_DEBUG_REPORT_OBJECT_TYPE_SHADER_MODULE_EXT,                                  \
418    __builtin_choose_expr (                                                         \
419    __builtin_types_compatible_p (__typeof (o), struct anv_pipeline_cache*),        \
420    VK_DEBUG_REPORT_OBJECT_TYPE_PIPELINE_CACHE_EXT,                                 \
421    __builtin_choose_expr (                                                         \
422    __builtin_types_compatible_p (__typeof (o), struct anv_pipeline_layout*),       \
423    VK_DEBUG_REPORT_OBJECT_TYPE_PIPELINE_LAYOUT_EXT,                                \
424    __builtin_choose_expr (                                                         \
425    __builtin_types_compatible_p (__typeof (o), struct anv_render_pass*),           \
426    VK_DEBUG_REPORT_OBJECT_TYPE_RENDER_PASS_EXT,                                    \
427    __builtin_choose_expr (                                                         \
428    __builtin_types_compatible_p (__typeof (o), struct anv_pipeline*),              \
429    VK_DEBUG_REPORT_OBJECT_TYPE_PIPELINE_EXT,                                       \
430    __builtin_choose_expr (                                                         \
431    __builtin_types_compatible_p (__typeof (o), struct anv_descriptor_set_layout*), \
432    VK_DEBUG_REPORT_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT_EXT,                          \
433    __builtin_choose_expr (                                                         \
434    __builtin_types_compatible_p (__typeof (o), struct anv_sampler*),               \
435    VK_DEBUG_REPORT_OBJECT_TYPE_SAMPLER_EXT,                                        \
436    __builtin_choose_expr (                                                         \
437    __builtin_types_compatible_p (__typeof (o), struct anv_descriptor_pool*),       \
438    VK_DEBUG_REPORT_OBJECT_TYPE_DESCRIPTOR_POOL_EXT,                                \
439    __builtin_choose_expr (                                                         \
440    __builtin_types_compatible_p (__typeof (o), struct anv_descriptor_set*),        \
441    VK_DEBUG_REPORT_OBJECT_TYPE_DESCRIPTOR_SET_EXT,                                 \
442    __builtin_choose_expr (                                                         \
443    __builtin_types_compatible_p (__typeof (o), struct anv_framebuffer*),           \
444    VK_DEBUG_REPORT_OBJECT_TYPE_FRAMEBUFFER_EXT,                                    \
445    __builtin_choose_expr (                                                         \
446    __builtin_types_compatible_p (__typeof (o), struct anv_cmd_pool*),              \
447    VK_DEBUG_REPORT_OBJECT_TYPE_COMMAND_POOL_EXT,                                   \
448    __builtin_choose_expr (                                                         \
449    __builtin_types_compatible_p (__typeof (o), struct anv_surface*),               \
450    VK_DEBUG_REPORT_OBJECT_TYPE_SURFACE_KHR_EXT,                                    \
451    __builtin_choose_expr (                                                         \
452    __builtin_types_compatible_p (__typeof (o), struct wsi_swapchain*),             \
453    VK_DEBUG_REPORT_OBJECT_TYPE_SWAPCHAIN_KHR_EXT,                                  \
454    __builtin_choose_expr (                                                         \
455    __builtin_types_compatible_p (__typeof (o), struct vk_debug_callback*),         \
456    VK_DEBUG_REPORT_OBJECT_TYPE_DEBUG_REPORT_CALLBACK_EXT_EXT,                      \
457    __builtin_choose_expr (                                                         \
458    __builtin_types_compatible_p (__typeof (o), void*),                             \
459    VK_DEBUG_REPORT_OBJECT_TYPE_UNKNOWN_EXT,                                        \
460    /* The void expression results in a compile-time error                          \
461       when assigning the result to something.  */                                  \
462    (void)0)))))))))))))))))))))))))))))))
463 
464 /* Whenever we generate an error, pass it through this function. Useful for
465  * debugging, where we can break on it. Only call at error site, not when
466  * propagating errors. Might be useful to plug in a stack trace here.
467  */
468 
469 VkResult __vk_errorv(struct anv_instance *instance, const void *object,
470                      VkDebugReportObjectTypeEXT type, VkResult error,
471                      const char *file, int line, const char *format,
472                      va_list args);
473 
474 VkResult __vk_errorf(struct anv_instance *instance, const void *object,
475                      VkDebugReportObjectTypeEXT type, VkResult error,
476                      const char *file, int line, const char *format, ...)
477    anv_printflike(7, 8);
478 
479 #ifdef DEBUG
480 #define vk_error(error) __vk_errorf(NULL, NULL,\
481                                     VK_DEBUG_REPORT_OBJECT_TYPE_UNKNOWN_EXT,\
482                                     error, __FILE__, __LINE__, NULL)
483 #define vk_errorfi(instance, obj, error, format, ...)\
484     __vk_errorf(instance, obj, REPORT_OBJECT_TYPE(obj), error,\
485                 __FILE__, __LINE__, format, ## __VA_ARGS__)
486 #define vk_errorf(device, obj, error, format, ...)\
487    vk_errorfi(anv_device_instance_or_null(device),\
488               obj, error, format, ## __VA_ARGS__)
489 #else
490 #define vk_error(error) error
491 #define vk_errorfi(instance, obj, error, format, ...) error
492 #define vk_errorf(device, obj, error, format, ...) error
493 #endif
494 
495 /**
496  * Warn on ignored extension structs.
497  *
498  * The Vulkan spec requires us to ignore unsupported or unknown structs in
499  * a pNext chain.  In debug mode, emitting warnings for ignored structs may
500  * help us discover structs that we should not have ignored.
501  *
502  *
503  * From the Vulkan 1.0.38 spec:
504  *
505  *    Any component of the implementation (the loader, any enabled layers,
506  *    and drivers) must skip over, without processing (other than reading the
507  *    sType and pNext members) any chained structures with sType values not
508  *    defined by extensions supported by that component.
509  */
510 #define anv_debug_ignored_stype(sType) \
511    mesa_logd("%s: ignored VkStructureType %u\n", __func__, (sType))
512 
513 void __anv_perf_warn(struct anv_device *device, const void *object,
514                      VkDebugReportObjectTypeEXT type, const char *file,
515                      int line, const char *format, ...)
516    anv_printflike(6, 7);
517 void anv_loge(const char *format, ...) anv_printflike(1, 2);
518 void anv_loge_v(const char *format, va_list va);
519 
520 /**
521  * Print a FINISHME message, including its source location.
522  */
523 #define anv_finishme(format, ...) \
524    do { \
525       static bool reported = false; \
526       if (!reported) { \
527          mesa_logw("%s:%d: FINISHME: " format, __FILE__, __LINE__, \
528                     ##__VA_ARGS__); \
529          reported = true; \
530       } \
531    } while (0)
532 
533 /**
534  * Print a perf warning message.  Set INTEL_DEBUG=perf to see these.
535  */
536 #define anv_perf_warn(instance, obj, format, ...) \
537    do { \
538       static bool reported = false; \
539       if (!reported && (INTEL_DEBUG & DEBUG_PERF)) { \
540          __anv_perf_warn(instance, obj, REPORT_OBJECT_TYPE(obj), __FILE__, __LINE__,\
541                          format, ##__VA_ARGS__); \
542          reported = true; \
543       } \
544    } while (0)
545 
546 /* A non-fatal assert.  Useful for debugging. */
547 #ifdef DEBUG
548 #define anv_assert(x) ({ \
549    if (unlikely(!(x))) \
550       mesa_loge("%s:%d ASSERT: %s", __FILE__, __LINE__, #x); \
551 })
552 #else
553 #define anv_assert(x)
554 #endif
555 
556 /* A multi-pointer allocator
557  *
558  * When copying data structures from the user (such as a render pass), it's
559  * common to need to allocate data for a bunch of different things.  Instead
560  * of doing several allocations and having to handle all of the error checking
561  * that entails, it can be easier to do a single allocation.  This struct
562  * helps facilitate that.  The intended usage looks like this:
563  *
564  *    ANV_MULTIALLOC(ma)
565  *    anv_multialloc_add(&ma, &main_ptr, 1);
566  *    anv_multialloc_add(&ma, &substruct1, substruct1Count);
567  *    anv_multialloc_add(&ma, &substruct2, substruct2Count);
568  *
569  *    if (!anv_multialloc_alloc(&ma, pAllocator, VK_ALLOCATION_SCOPE_FOO))
570  *       return vk_error(VK_ERROR_OUT_OF_HOST_MEORY);
571  */
572 struct anv_multialloc {
573     size_t size;
574     size_t align;
575 
576     uint32_t ptr_count;
577     void **ptrs[8];
578 };
579 
580 #define ANV_MULTIALLOC_INIT \
581    ((struct anv_multialloc) { 0, })
582 
583 #define ANV_MULTIALLOC(_name) \
584    struct anv_multialloc _name = ANV_MULTIALLOC_INIT
585 
586 __attribute__((always_inline))
587 static inline void
_anv_multialloc_add(struct anv_multialloc * ma,void ** ptr,size_t size,size_t align)588 _anv_multialloc_add(struct anv_multialloc *ma,
589                     void **ptr, size_t size, size_t align)
590 {
591    size_t offset = align_u64(ma->size, align);
592    ma->size = offset + size;
593    ma->align = MAX2(ma->align, align);
594 
595    /* Store the offset in the pointer. */
596    *ptr = (void *)(uintptr_t)offset;
597 
598    assert(ma->ptr_count < ARRAY_SIZE(ma->ptrs));
599    ma->ptrs[ma->ptr_count++] = ptr;
600 }
601 
602 #define anv_multialloc_add_size(_ma, _ptr, _size) \
603    _anv_multialloc_add((_ma), (void **)(_ptr), (_size), __alignof__(**(_ptr)))
604 
605 #define anv_multialloc_add(_ma, _ptr, _count) \
606    anv_multialloc_add_size(_ma, _ptr, (_count) * sizeof(**(_ptr)));
607 
608 __attribute__((always_inline))
609 static inline void *
anv_multialloc_alloc(struct anv_multialloc * ma,const VkAllocationCallbacks * alloc,VkSystemAllocationScope scope)610 anv_multialloc_alloc(struct anv_multialloc *ma,
611                      const VkAllocationCallbacks *alloc,
612                      VkSystemAllocationScope scope)
613 {
614    void *ptr = vk_alloc(alloc, ma->size, ma->align, scope);
615    if (!ptr)
616       return NULL;
617 
618    /* Fill out each of the pointers with their final value.
619     *
620     *   for (uint32_t i = 0; i < ma->ptr_count; i++)
621     *      *ma->ptrs[i] = ptr + (uintptr_t)*ma->ptrs[i];
622     *
623     * Unfortunately, even though ma->ptr_count is basically guaranteed to be a
624     * constant, GCC is incapable of figuring this out and unrolling the loop
625     * so we have to give it a little help.
626     */
627    STATIC_ASSERT(ARRAY_SIZE(ma->ptrs) == 8);
628 #define _ANV_MULTIALLOC_UPDATE_POINTER(_i) \
629    if ((_i) < ma->ptr_count) \
630       *ma->ptrs[_i] = ptr + (uintptr_t)*ma->ptrs[_i]
631    _ANV_MULTIALLOC_UPDATE_POINTER(0);
632    _ANV_MULTIALLOC_UPDATE_POINTER(1);
633    _ANV_MULTIALLOC_UPDATE_POINTER(2);
634    _ANV_MULTIALLOC_UPDATE_POINTER(3);
635    _ANV_MULTIALLOC_UPDATE_POINTER(4);
636    _ANV_MULTIALLOC_UPDATE_POINTER(5);
637    _ANV_MULTIALLOC_UPDATE_POINTER(6);
638    _ANV_MULTIALLOC_UPDATE_POINTER(7);
639 #undef _ANV_MULTIALLOC_UPDATE_POINTER
640 
641    return ptr;
642 }
643 
644 __attribute__((always_inline))
645 static inline void *
anv_multialloc_alloc2(struct anv_multialloc * ma,const VkAllocationCallbacks * parent_alloc,const VkAllocationCallbacks * alloc,VkSystemAllocationScope scope)646 anv_multialloc_alloc2(struct anv_multialloc *ma,
647                       const VkAllocationCallbacks *parent_alloc,
648                       const VkAllocationCallbacks *alloc,
649                       VkSystemAllocationScope scope)
650 {
651    return anv_multialloc_alloc(ma, alloc ? alloc : parent_alloc, scope);
652 }
653 
654 struct anv_bo {
655    uint32_t gem_handle;
656 
657    uint32_t refcount;
658 
659    /* Index into the current validation list.  This is used by the
660     * validation list building alrogithm to track which buffers are already
661     * in the validation list so that we can ensure uniqueness.
662     */
663    uint32_t index;
664 
665    /* Index for use with util_sparse_array_free_list */
666    uint32_t free_index;
667 
668    /* Last known offset.  This value is provided by the kernel when we
669     * execbuf and is used as the presumed offset for the next bunch of
670     * relocations.
671     */
672    uint64_t offset;
673 
674    /** Size of the buffer not including implicit aux */
675    uint64_t size;
676 
677    /* Map for internally mapped BOs.
678     *
679     * If ANV_BO_WRAPPER is set in flags, map points to the wrapped BO.
680     */
681    void *map;
682 
683    /** Size of the implicit CCS range at the end of the buffer
684     *
685     * On Gen12, CCS data is always a direct 1/256 scale-down.  A single 64K
686     * page of main surface data maps to a 256B chunk of CCS data and that
687     * mapping is provided on TGL-LP by the AUX table which maps virtual memory
688     * addresses in the main surface to virtual memory addresses for CCS data.
689     *
690     * Because we can't change these maps around easily and because Vulkan
691     * allows two VkImages to be bound to overlapping memory regions (as long
692     * as the app is careful), it's not feasible to make this mapping part of
693     * the image.  (On Gen11 and earlier, the mapping was provided via
694     * RENDER_SURFACE_STATE so each image had its own main -> CCS mapping.)
695     * Instead, we attach the CCS data directly to the buffer object and setup
696     * the AUX table mapping at BO creation time.
697     *
698     * This field is for internal tracking use by the BO allocator only and
699     * should not be touched by other parts of the code.  If something wants to
700     * know if a BO has implicit CCS data, it should instead look at the
701     * has_implicit_ccs boolean below.
702     *
703     * This data is not included in maps of this buffer.
704     */
705    uint32_t _ccs_size;
706 
707    /** Flags to pass to the kernel through drm_i915_exec_object2::flags */
708    uint32_t flags;
709 
710    /** True if this BO may be shared with other processes */
711    bool is_external:1;
712 
713    /** True if this BO is a wrapper
714     *
715     * When set to true, none of the fields in this BO are meaningful except
716     * for anv_bo::is_wrapper and anv_bo::map which points to the actual BO.
717     * See also anv_bo_unwrap().  Wrapper BOs are not allowed when use_softpin
718     * is set in the physical device.
719     */
720    bool is_wrapper:1;
721 
722    /** See also ANV_BO_ALLOC_FIXED_ADDRESS */
723    bool has_fixed_address:1;
724 
725    /** True if this BO wraps a host pointer */
726    bool from_host_ptr:1;
727 
728    /** See also ANV_BO_ALLOC_CLIENT_VISIBLE_ADDRESS */
729    bool has_client_visible_address:1;
730 
731    /** True if this BO has implicit CCS data attached to it */
732    bool has_implicit_ccs:1;
733 };
734 
735 static inline struct anv_bo *
anv_bo_ref(struct anv_bo * bo)736 anv_bo_ref(struct anv_bo *bo)
737 {
738    p_atomic_inc(&bo->refcount);
739    return bo;
740 }
741 
742 static inline struct anv_bo *
anv_bo_unwrap(struct anv_bo * bo)743 anv_bo_unwrap(struct anv_bo *bo)
744 {
745    while (bo->is_wrapper)
746       bo = bo->map;
747    return bo;
748 }
749 
750 /* Represents a lock-free linked list of "free" things.  This is used by
751  * both the block pool and the state pools.  Unfortunately, in order to
752  * solve the ABA problem, we can't use a single uint32_t head.
753  */
754 union anv_free_list {
755    struct {
756       uint32_t offset;
757 
758       /* A simple count that is incremented every time the head changes. */
759       uint32_t count;
760    };
761    /* Make sure it's aligned to 64 bits. This will make atomic operations
762     * faster on 32 bit platforms.
763     */
764    uint64_t u64 __attribute__ ((aligned (8)));
765 };
766 
767 #define ANV_FREE_LIST_EMPTY ((union anv_free_list) { { UINT32_MAX, 0 } })
768 
769 struct anv_block_state {
770    union {
771       struct {
772          uint32_t next;
773          uint32_t end;
774       };
775       /* Make sure it's aligned to 64 bits. This will make atomic operations
776        * faster on 32 bit platforms.
777        */
778       uint64_t u64 __attribute__ ((aligned (8)));
779    };
780 };
781 
782 #define anv_block_pool_foreach_bo(bo, pool)  \
783    for (struct anv_bo **_pp_bo = (pool)->bos, *bo; \
784         _pp_bo != &(pool)->bos[(pool)->nbos] && (bo = *_pp_bo, true); \
785         _pp_bo++)
786 
787 #define ANV_MAX_BLOCK_POOL_BOS 20
788 
789 struct anv_block_pool {
790    struct anv_device *device;
791    bool use_softpin;
792 
793    /* Wrapper BO for use in relocation lists.  This BO is simply a wrapper
794     * around the actual BO so that we grow the pool after the wrapper BO has
795     * been put in a relocation list.  This is only used in the non-softpin
796     * case.
797     */
798    struct anv_bo wrapper_bo;
799 
800    struct anv_bo *bos[ANV_MAX_BLOCK_POOL_BOS];
801    struct anv_bo *bo;
802    uint32_t nbos;
803 
804    uint64_t size;
805 
806    /* The address where the start of the pool is pinned. The various bos that
807     * are created as the pool grows will have addresses in the range
808     * [start_address, start_address + BLOCK_POOL_MEMFD_SIZE).
809     */
810    uint64_t start_address;
811 
812    /* The offset from the start of the bo to the "center" of the block
813     * pool.  Pointers to allocated blocks are given by
814     * bo.map + center_bo_offset + offsets.
815     */
816    uint32_t center_bo_offset;
817 
818    /* Current memory map of the block pool.  This pointer may or may not
819     * point to the actual beginning of the block pool memory.  If
820     * anv_block_pool_alloc_back has ever been called, then this pointer
821     * will point to the "center" position of the buffer and all offsets
822     * (negative or positive) given out by the block pool alloc functions
823     * will be valid relative to this pointer.
824     *
825     * In particular, map == bo.map + center_offset
826     *
827     * DO NOT access this pointer directly. Use anv_block_pool_map() instead,
828     * since it will handle the softpin case as well, where this points to NULL.
829     */
830    void *map;
831    int fd;
832 
833    /**
834     * Array of mmaps and gem handles owned by the block pool, reclaimed when
835     * the block pool is destroyed.
836     */
837    struct u_vector mmap_cleanups;
838 
839    struct anv_block_state state;
840 
841    struct anv_block_state back_state;
842 };
843 
844 /* Block pools are backed by a fixed-size 1GB memfd */
845 #define BLOCK_POOL_MEMFD_SIZE (1ul << 30)
846 
847 /* The center of the block pool is also the middle of the memfd.  This may
848  * change in the future if we decide differently for some reason.
849  */
850 #define BLOCK_POOL_MEMFD_CENTER (BLOCK_POOL_MEMFD_SIZE / 2)
851 
852 static inline uint32_t
anv_block_pool_size(struct anv_block_pool * pool)853 anv_block_pool_size(struct anv_block_pool *pool)
854 {
855    return pool->state.end + pool->back_state.end;
856 }
857 
858 struct anv_state {
859    int32_t offset;
860    uint32_t alloc_size;
861    void *map;
862    uint32_t idx;
863 };
864 
865 #define ANV_STATE_NULL ((struct anv_state) { .alloc_size = 0 })
866 
867 struct anv_fixed_size_state_pool {
868    union anv_free_list free_list;
869    struct anv_block_state block;
870 };
871 
872 #define ANV_MIN_STATE_SIZE_LOG2 6
873 #define ANV_MAX_STATE_SIZE_LOG2 21
874 
875 #define ANV_STATE_BUCKETS (ANV_MAX_STATE_SIZE_LOG2 - ANV_MIN_STATE_SIZE_LOG2 + 1)
876 
877 struct anv_free_entry {
878    uint32_t next;
879    struct anv_state state;
880 };
881 
882 struct anv_state_table {
883    struct anv_device *device;
884    int fd;
885    struct anv_free_entry *map;
886    uint32_t size;
887    struct anv_block_state state;
888    struct u_vector cleanups;
889 };
890 
891 struct anv_state_pool {
892    struct anv_block_pool block_pool;
893 
894    /* Offset into the relevant state base address where the state pool starts
895     * allocating memory.
896     */
897    int32_t start_offset;
898 
899    struct anv_state_table table;
900 
901    /* The size of blocks which will be allocated from the block pool */
902    uint32_t block_size;
903 
904    /** Free list for "back" allocations */
905    union anv_free_list back_alloc_free_list;
906 
907    struct anv_fixed_size_state_pool buckets[ANV_STATE_BUCKETS];
908 };
909 
910 struct anv_state_reserved_pool {
911    struct anv_state_pool *pool;
912    union anv_free_list reserved_blocks;
913    uint32_t count;
914 };
915 
916 struct anv_state_stream {
917    struct anv_state_pool *state_pool;
918 
919    /* The size of blocks to allocate from the state pool */
920    uint32_t block_size;
921 
922    /* Current block we're allocating from */
923    struct anv_state block;
924 
925    /* Offset into the current block at which to allocate the next state */
926    uint32_t next;
927 
928    /* List of all blocks allocated from this pool */
929    struct util_dynarray all_blocks;
930 };
931 
932 /* The block_pool functions exported for testing only.  The block pool should
933  * only be used via a state pool (see below).
934  */
935 VkResult anv_block_pool_init(struct anv_block_pool *pool,
936                              struct anv_device *device,
937                              uint64_t start_address,
938                              uint32_t initial_size);
939 void anv_block_pool_finish(struct anv_block_pool *pool);
940 int32_t anv_block_pool_alloc(struct anv_block_pool *pool,
941                              uint32_t block_size, uint32_t *padding);
942 int32_t anv_block_pool_alloc_back(struct anv_block_pool *pool,
943                                   uint32_t block_size);
944 void* anv_block_pool_map(struct anv_block_pool *pool, int32_t offset, uint32_t
945 size);
946 
947 VkResult anv_state_pool_init(struct anv_state_pool *pool,
948                              struct anv_device *device,
949                              uint64_t base_address,
950                              int32_t start_offset,
951                              uint32_t block_size);
952 void anv_state_pool_finish(struct anv_state_pool *pool);
953 struct anv_state anv_state_pool_alloc(struct anv_state_pool *pool,
954                                       uint32_t state_size, uint32_t alignment);
955 struct anv_state anv_state_pool_alloc_back(struct anv_state_pool *pool);
956 void anv_state_pool_free(struct anv_state_pool *pool, struct anv_state state);
957 void anv_state_stream_init(struct anv_state_stream *stream,
958                            struct anv_state_pool *state_pool,
959                            uint32_t block_size);
960 void anv_state_stream_finish(struct anv_state_stream *stream);
961 struct anv_state anv_state_stream_alloc(struct anv_state_stream *stream,
962                                         uint32_t size, uint32_t alignment);
963 
964 void anv_state_reserved_pool_init(struct anv_state_reserved_pool *pool,
965                                       struct anv_state_pool *parent,
966                                       uint32_t count, uint32_t size,
967                                       uint32_t alignment);
968 void anv_state_reserved_pool_finish(struct anv_state_reserved_pool *pool);
969 struct anv_state anv_state_reserved_pool_alloc(struct anv_state_reserved_pool *pool);
970 void anv_state_reserved_pool_free(struct anv_state_reserved_pool *pool,
971                                   struct anv_state state);
972 
973 VkResult anv_state_table_init(struct anv_state_table *table,
974                              struct anv_device *device,
975                              uint32_t initial_entries);
976 void anv_state_table_finish(struct anv_state_table *table);
977 VkResult anv_state_table_add(struct anv_state_table *table, uint32_t *idx,
978                              uint32_t count);
979 void anv_free_list_push(union anv_free_list *list,
980                         struct anv_state_table *table,
981                         uint32_t idx, uint32_t count);
982 struct anv_state* anv_free_list_pop(union anv_free_list *list,
983                                     struct anv_state_table *table);
984 
985 
986 static inline struct anv_state *
anv_state_table_get(struct anv_state_table * table,uint32_t idx)987 anv_state_table_get(struct anv_state_table *table, uint32_t idx)
988 {
989    return &table->map[idx].state;
990 }
991 /**
992  * Implements a pool of re-usable BOs.  The interface is identical to that
993  * of block_pool except that each block is its own BO.
994  */
995 struct anv_bo_pool {
996    struct anv_device *device;
997 
998    struct util_sparse_array_free_list free_list[16];
999 };
1000 
1001 void anv_bo_pool_init(struct anv_bo_pool *pool, struct anv_device *device);
1002 void anv_bo_pool_finish(struct anv_bo_pool *pool);
1003 VkResult anv_bo_pool_alloc(struct anv_bo_pool *pool, uint32_t size,
1004                            struct anv_bo **bo_out);
1005 void anv_bo_pool_free(struct anv_bo_pool *pool, struct anv_bo *bo);
1006 
1007 struct anv_scratch_pool {
1008    /* Indexed by Per-Thread Scratch Space number (the hardware value) and stage */
1009    struct anv_bo *bos[16][MESA_SHADER_STAGES];
1010 };
1011 
1012 void anv_scratch_pool_init(struct anv_device *device,
1013                            struct anv_scratch_pool *pool);
1014 void anv_scratch_pool_finish(struct anv_device *device,
1015                              struct anv_scratch_pool *pool);
1016 struct anv_bo *anv_scratch_pool_alloc(struct anv_device *device,
1017                                       struct anv_scratch_pool *pool,
1018                                       gl_shader_stage stage,
1019                                       unsigned per_thread_scratch);
1020 
1021 /** Implements a BO cache that ensures a 1-1 mapping of GEM BOs to anv_bos */
1022 struct anv_bo_cache {
1023    struct util_sparse_array bo_map;
1024    pthread_mutex_t mutex;
1025 };
1026 
1027 VkResult anv_bo_cache_init(struct anv_bo_cache *cache);
1028 void anv_bo_cache_finish(struct anv_bo_cache *cache);
1029 
1030 struct anv_memory_type {
1031    /* Standard bits passed on to the client */
1032    VkMemoryPropertyFlags   propertyFlags;
1033    uint32_t                heapIndex;
1034 };
1035 
1036 struct anv_memory_heap {
1037    /* Standard bits passed on to the client */
1038    VkDeviceSize      size;
1039    VkMemoryHeapFlags flags;
1040 
1041    /** Driver-internal book-keeping.
1042     *
1043     * Align it to 64 bits to make atomic operations faster on 32 bit platforms.
1044     */
1045    VkDeviceSize      used __attribute__ ((aligned (8)));
1046 };
1047 
1048 struct anv_physical_device {
1049     struct vk_object_base                       base;
1050 
1051     /* Link in anv_instance::physical_devices */
1052     struct list_head                            link;
1053 
1054     struct anv_instance *                       instance;
1055     bool                                        no_hw;
1056     char                                        path[20];
1057     const char *                                name;
1058     struct {
1059        uint16_t                                 domain;
1060        uint8_t                                  bus;
1061        uint8_t                                  device;
1062        uint8_t                                  function;
1063     }                                           pci_info;
1064     struct gen_device_info                      info;
1065     /** Amount of "GPU memory" we want to advertise
1066      *
1067      * Clearly, this value is bogus since Intel is a UMA architecture.  On
1068      * gen7 platforms, we are limited by GTT size unless we want to implement
1069      * fine-grained tracking and GTT splitting.  On Broadwell and above we are
1070      * practically unlimited.  However, we will never report more than 3/4 of
1071      * the total system ram to try and avoid running out of RAM.
1072      */
1073     bool                                        supports_48bit_addresses;
1074     struct brw_compiler *                       compiler;
1075     struct isl_device                           isl_dev;
1076     struct gen_perf_config *                    perf;
1077     int                                         cmd_parser_version;
1078     bool                                        has_softpin;
1079     bool                                        has_exec_async;
1080     bool                                        has_exec_capture;
1081     bool                                        has_exec_fence;
1082     bool                                        has_syncobj;
1083     bool                                        has_syncobj_wait;
1084     bool                                        has_syncobj_wait_available;
1085     bool                                        has_context_priority;
1086     bool                                        has_context_isolation;
1087     bool                                        has_thread_submit;
1088     bool                                        has_mem_available;
1089     bool                                        has_mmap_offset;
1090     uint64_t                                    gtt_size;
1091 
1092     bool                                        use_softpin;
1093     bool                                        always_use_bindless;
1094     bool                                        use_call_secondary;
1095 
1096     /** True if we can access buffers using A64 messages */
1097     bool                                        has_a64_buffer_access;
1098     /** True if we can use bindless access for images */
1099     bool                                        has_bindless_images;
1100     /** True if we can use bindless access for samplers */
1101     bool                                        has_bindless_samplers;
1102     /** True if we can use timeline semaphores through execbuf */
1103     bool                                        has_exec_timeline;
1104 
1105     /** True if we can read the GPU timestamp register
1106      *
1107      * When running in a virtual context, the timestamp register is unreadable
1108      * on Gen12+.
1109      */
1110     bool                                        has_reg_timestamp;
1111 
1112     /** True if this device has implicit AUX
1113      *
1114      * If true, CCS is handled as an implicit attachment to the BO rather than
1115      * as an explicitly bound surface.
1116      */
1117     bool                                        has_implicit_ccs;
1118 
1119     bool                                        always_flush_cache;
1120 
1121     struct anv_device_extension_table           supported_extensions;
1122 
1123     uint32_t                                    eu_total;
1124     uint32_t                                    subslice_total;
1125 
1126     struct {
1127       uint32_t                                  type_count;
1128       struct anv_memory_type                    types[VK_MAX_MEMORY_TYPES];
1129       uint32_t                                  heap_count;
1130       struct anv_memory_heap                    heaps[VK_MAX_MEMORY_HEAPS];
1131     } memory;
1132 
1133     uint8_t                                     driver_build_sha1[20];
1134     uint8_t                                     pipeline_cache_uuid[VK_UUID_SIZE];
1135     uint8_t                                     driver_uuid[VK_UUID_SIZE];
1136     uint8_t                                     device_uuid[VK_UUID_SIZE];
1137 
1138     struct disk_cache *                         disk_cache;
1139 
1140     struct wsi_device                       wsi_device;
1141     int                                         local_fd;
1142     int                                         master_fd;
1143 };
1144 
1145 struct anv_app_info {
1146    const char*        app_name;
1147    uint32_t           app_version;
1148    const char*        engine_name;
1149    uint32_t           engine_version;
1150    uint32_t           api_version;
1151 };
1152 
1153 struct anv_instance {
1154     struct vk_object_base                       base;
1155 
1156     VkAllocationCallbacks                       alloc;
1157 
1158     struct anv_app_info                         app_info;
1159 
1160     struct anv_instance_extension_table         enabled_extensions;
1161     struct anv_instance_dispatch_table          dispatch;
1162     struct anv_physical_device_dispatch_table   physical_device_dispatch;
1163     struct anv_device_dispatch_table            device_dispatch;
1164 
1165     bool                                        physical_devices_enumerated;
1166     struct list_head                            physical_devices;
1167 
1168     bool                                        pipeline_cache_enabled;
1169 
1170     struct vk_debug_report_instance             debug_report_callbacks;
1171 
1172     struct driOptionCache                       dri_options;
1173     struct driOptionCache                       available_dri_options;
1174 };
1175 
1176 VkResult anv_init_wsi(struct anv_physical_device *physical_device);
1177 void anv_finish_wsi(struct anv_physical_device *physical_device);
1178 
1179 uint32_t anv_physical_device_api_version(struct anv_physical_device *dev);
1180 bool anv_physical_device_extension_supported(struct anv_physical_device *dev,
1181                                              const char *name);
1182 
1183 struct anv_queue_submit {
1184    struct anv_cmd_buffer *                   cmd_buffer;
1185 
1186    uint32_t                                  fence_count;
1187    uint32_t                                  fence_array_length;
1188    struct drm_i915_gem_exec_fence *          fences;
1189    uint64_t *                                fence_values;
1190 
1191    uint32_t                                  temporary_semaphore_count;
1192    uint32_t                                  temporary_semaphore_array_length;
1193    struct anv_semaphore_impl *               temporary_semaphores;
1194 
1195    /* Semaphores to be signaled with a SYNC_FD. */
1196    struct anv_semaphore **                   sync_fd_semaphores;
1197    uint32_t                                  sync_fd_semaphore_count;
1198    uint32_t                                  sync_fd_semaphore_array_length;
1199 
1200    /* Allocated only with non shareable timelines. */
1201    union {
1202       struct anv_timeline **                 wait_timelines;
1203       uint32_t *                             wait_timeline_syncobjs;
1204    };
1205    uint32_t                                  wait_timeline_count;
1206    uint32_t                                  wait_timeline_array_length;
1207    uint64_t *                                wait_timeline_values;
1208 
1209    struct anv_timeline **                    signal_timelines;
1210    uint32_t                                  signal_timeline_count;
1211    uint32_t                                  signal_timeline_array_length;
1212    uint64_t *                                signal_timeline_values;
1213 
1214    int                                       in_fence;
1215    bool                                      need_out_fence;
1216    int                                       out_fence;
1217 
1218    uint32_t                                  fence_bo_count;
1219    uint32_t                                  fence_bo_array_length;
1220    /* An array of struct anv_bo pointers with lower bit used as a flag to
1221     * signal we will wait on that BO (see anv_(un)pack_ptr).
1222     */
1223    uintptr_t *                               fence_bos;
1224 
1225    int                                       perf_query_pass;
1226 
1227    const VkAllocationCallbacks *             alloc;
1228    VkSystemAllocationScope                   alloc_scope;
1229 
1230    struct anv_bo *                           simple_bo;
1231    uint32_t                                  simple_bo_size;
1232 
1233    struct list_head                          link;
1234 };
1235 
1236 struct anv_queue {
1237     struct vk_object_base                       base;
1238 
1239    struct anv_device *                       device;
1240 
1241    VkDeviceQueueCreateFlags                  flags;
1242 
1243    /* Set once from the device api calls. */
1244    bool                                      lost_signaled;
1245 
1246    /* Only set once atomically by the queue */
1247    int                                       lost;
1248    int                                       error_line;
1249    const char *                              error_file;
1250    char                                      error_msg[80];
1251 
1252    /*
1253     * This mutext protects the variables below.
1254     */
1255    pthread_mutex_t                           mutex;
1256 
1257    pthread_t                                 thread;
1258    pthread_cond_t                            cond;
1259 
1260    /*
1261     * A list of struct anv_queue_submit to be submitted to i915.
1262     */
1263    struct list_head                          queued_submits;
1264 
1265    /* Set to true to stop the submission thread */
1266    bool                                      quit;
1267 };
1268 
1269 struct anv_pipeline_cache {
1270    struct vk_object_base                        base;
1271    struct anv_device *                          device;
1272    pthread_mutex_t                              mutex;
1273 
1274    struct hash_table *                          nir_cache;
1275 
1276    struct hash_table *                          cache;
1277 
1278    bool                                         external_sync;
1279 };
1280 
1281 struct nir_xfb_info;
1282 struct anv_pipeline_bind_map;
1283 
1284 void anv_pipeline_cache_init(struct anv_pipeline_cache *cache,
1285                              struct anv_device *device,
1286                              bool cache_enabled,
1287                              bool external_sync);
1288 void anv_pipeline_cache_finish(struct anv_pipeline_cache *cache);
1289 
1290 struct anv_shader_bin *
1291 anv_pipeline_cache_search(struct anv_pipeline_cache *cache,
1292                           const void *key, uint32_t key_size);
1293 struct anv_shader_bin *
1294 anv_pipeline_cache_upload_kernel(struct anv_pipeline_cache *cache,
1295                                  gl_shader_stage stage,
1296                                  const void *key_data, uint32_t key_size,
1297                                  const void *kernel_data, uint32_t kernel_size,
1298                                  const struct brw_stage_prog_data *prog_data,
1299                                  uint32_t prog_data_size,
1300                                  const struct brw_compile_stats *stats,
1301                                  uint32_t num_stats,
1302                                  const struct nir_xfb_info *xfb_info,
1303                                  const struct anv_pipeline_bind_map *bind_map);
1304 
1305 struct anv_shader_bin *
1306 anv_device_search_for_kernel(struct anv_device *device,
1307                              struct anv_pipeline_cache *cache,
1308                              const void *key_data, uint32_t key_size,
1309                              bool *user_cache_bit);
1310 
1311 struct anv_shader_bin *
1312 anv_device_upload_kernel(struct anv_device *device,
1313                          struct anv_pipeline_cache *cache,
1314                          gl_shader_stage stage,
1315                          const void *key_data, uint32_t key_size,
1316                          const void *kernel_data, uint32_t kernel_size,
1317                          const struct brw_stage_prog_data *prog_data,
1318                          uint32_t prog_data_size,
1319                          const struct brw_compile_stats *stats,
1320                          uint32_t num_stats,
1321                          const struct nir_xfb_info *xfb_info,
1322                          const struct anv_pipeline_bind_map *bind_map);
1323 
1324 struct nir_shader;
1325 struct nir_shader_compiler_options;
1326 
1327 struct nir_shader *
1328 anv_device_search_for_nir(struct anv_device *device,
1329                           struct anv_pipeline_cache *cache,
1330                           const struct nir_shader_compiler_options *nir_options,
1331                           unsigned char sha1_key[20],
1332                           void *mem_ctx);
1333 
1334 void
1335 anv_device_upload_nir(struct anv_device *device,
1336                       struct anv_pipeline_cache *cache,
1337                       const struct nir_shader *nir,
1338                       unsigned char sha1_key[20]);
1339 
1340 struct anv_address {
1341    struct anv_bo *bo;
1342    uint32_t offset;
1343 };
1344 
1345 struct anv_device {
1346     struct vk_device                            vk;
1347 
1348     struct anv_physical_device *                physical;
1349     bool                                        no_hw;
1350     struct gen_device_info                      info;
1351     struct isl_device                           isl_dev;
1352     int                                         context_id;
1353     int                                         fd;
1354     bool                                        can_chain_batches;
1355     bool                                        robust_buffer_access;
1356     bool                                        has_thread_submit;
1357     struct anv_device_extension_table           enabled_extensions;
1358     struct anv_device_dispatch_table            dispatch;
1359 
1360     pthread_mutex_t                             vma_mutex;
1361     struct util_vma_heap                        vma_lo;
1362     struct util_vma_heap                        vma_cva;
1363     struct util_vma_heap                        vma_hi;
1364 
1365     /** List of all anv_device_memory objects */
1366     struct list_head                            memory_objects;
1367 
1368     struct anv_bo_pool                          batch_bo_pool;
1369 
1370     struct anv_bo_cache                         bo_cache;
1371 
1372     struct anv_state_pool                       dynamic_state_pool;
1373     struct anv_state_pool                       instruction_state_pool;
1374     struct anv_state_pool                       binding_table_pool;
1375     struct anv_state_pool                       surface_state_pool;
1376 
1377     struct anv_state_reserved_pool              custom_border_colors;
1378 
1379     /** BO used for various workarounds
1380      *
1381      * There are a number of workarounds on our hardware which require writing
1382      * data somewhere and it doesn't really matter where.  For that, we use
1383      * this BO and just write to the first dword or so.
1384      *
1385      * We also need to be able to handle NULL buffers bound as pushed UBOs.
1386      * For that, we use the high bytes (>= 1024) of the workaround BO.
1387      */
1388     struct anv_bo *                             workaround_bo;
1389     struct anv_address                          workaround_address;
1390 
1391     struct anv_bo *                             trivial_batch_bo;
1392     struct anv_bo *                             hiz_clear_bo;
1393     struct anv_state                            null_surface_state;
1394 
1395     struct anv_pipeline_cache                   default_pipeline_cache;
1396     struct blorp_context                        blorp;
1397 
1398     struct anv_state                            border_colors;
1399 
1400     struct anv_state                            slice_hash;
1401 
1402     struct anv_queue                            queue;
1403 
1404     struct anv_scratch_pool                     scratch_pool;
1405 
1406     pthread_mutex_t                             mutex;
1407     pthread_cond_t                              queue_submit;
1408     int                                         _lost;
1409     int                                         lost_reported;
1410 
1411     struct gen_batch_decode_ctx                 decoder_ctx;
1412     /*
1413      * When decoding a anv_cmd_buffer, we might need to search for BOs through
1414      * the cmd_buffer's list.
1415      */
1416     struct anv_cmd_buffer                      *cmd_buffer_being_decoded;
1417 
1418     int                                         perf_fd; /* -1 if no opened */
1419     uint64_t                                    perf_metric; /* 0 if unset */
1420 
1421     struct gen_aux_map_context                  *aux_map_ctx;
1422 
1423     struct gen_debug_block_frame                *debug_frame_desc;
1424 };
1425 
1426 static inline struct anv_instance *
anv_device_instance_or_null(const struct anv_device * device)1427 anv_device_instance_or_null(const struct anv_device *device)
1428 {
1429    return device ? device->physical->instance : NULL;
1430 }
1431 
1432 static inline struct anv_state_pool *
anv_binding_table_pool(struct anv_device * device)1433 anv_binding_table_pool(struct anv_device *device)
1434 {
1435    if (device->physical->use_softpin)
1436       return &device->binding_table_pool;
1437    else
1438       return &device->surface_state_pool;
1439 }
1440 
1441 static inline struct anv_state
anv_binding_table_pool_alloc(struct anv_device * device)1442 anv_binding_table_pool_alloc(struct anv_device *device) {
1443    if (device->physical->use_softpin)
1444       return anv_state_pool_alloc(&device->binding_table_pool,
1445                                   device->binding_table_pool.block_size, 0);
1446    else
1447       return anv_state_pool_alloc_back(&device->surface_state_pool);
1448 }
1449 
1450 static inline void
anv_binding_table_pool_free(struct anv_device * device,struct anv_state state)1451 anv_binding_table_pool_free(struct anv_device *device, struct anv_state state) {
1452    anv_state_pool_free(anv_binding_table_pool(device), state);
1453 }
1454 
1455 static inline uint32_t
anv_mocs(const struct anv_device * device,const struct anv_bo * bo,isl_surf_usage_flags_t usage)1456 anv_mocs(const struct anv_device *device,
1457          const struct anv_bo *bo,
1458          isl_surf_usage_flags_t usage)
1459 {
1460    if (bo->is_external)
1461       return device->isl_dev.mocs.external;
1462 
1463    return isl_mocs(&device->isl_dev, usage);
1464 }
1465 
1466 void anv_device_init_blorp(struct anv_device *device);
1467 void anv_device_finish_blorp(struct anv_device *device);
1468 
1469 void _anv_device_report_lost(struct anv_device *device);
1470 VkResult _anv_device_set_lost(struct anv_device *device,
1471                               const char *file, int line,
1472                               const char *msg, ...)
1473    anv_printflike(4, 5);
1474 VkResult _anv_queue_set_lost(struct anv_queue *queue,
1475                               const char *file, int line,
1476                               const char *msg, ...)
1477    anv_printflike(4, 5);
1478 #define anv_device_set_lost(dev, ...) \
1479    _anv_device_set_lost(dev, __FILE__, __LINE__, __VA_ARGS__)
1480 #define anv_queue_set_lost(queue, ...) \
1481    (queue)->device->has_thread_submit ? \
1482    _anv_queue_set_lost(queue, __FILE__, __LINE__, __VA_ARGS__) : \
1483    _anv_device_set_lost(queue->device, __FILE__, __LINE__, __VA_ARGS__)
1484 
1485 static inline bool
anv_device_is_lost(struct anv_device * device)1486 anv_device_is_lost(struct anv_device *device)
1487 {
1488    int lost = p_atomic_read(&device->_lost);
1489    if (unlikely(lost && !device->lost_reported))
1490       _anv_device_report_lost(device);
1491    return lost;
1492 }
1493 
1494 VkResult anv_device_query_status(struct anv_device *device);
1495 
1496 
1497 enum anv_bo_alloc_flags {
1498    /** Specifies that the BO must have a 32-bit address
1499     *
1500     * This is the opposite of EXEC_OBJECT_SUPPORTS_48B_ADDRESS.
1501     */
1502    ANV_BO_ALLOC_32BIT_ADDRESS =  (1 << 0),
1503 
1504    /** Specifies that the BO may be shared externally */
1505    ANV_BO_ALLOC_EXTERNAL =       (1 << 1),
1506 
1507    /** Specifies that the BO should be mapped */
1508    ANV_BO_ALLOC_MAPPED =         (1 << 2),
1509 
1510    /** Specifies that the BO should be snooped so we get coherency */
1511    ANV_BO_ALLOC_SNOOPED =        (1 << 3),
1512 
1513    /** Specifies that the BO should be captured in error states */
1514    ANV_BO_ALLOC_CAPTURE =        (1 << 4),
1515 
1516    /** Specifies that the BO will have an address assigned by the caller
1517     *
1518     * Such BOs do not exist in any VMA heap.
1519     */
1520    ANV_BO_ALLOC_FIXED_ADDRESS = (1 << 5),
1521 
1522    /** Enables implicit synchronization on the BO
1523     *
1524     * This is the opposite of EXEC_OBJECT_ASYNC.
1525     */
1526    ANV_BO_ALLOC_IMPLICIT_SYNC =  (1 << 6),
1527 
1528    /** Enables implicit synchronization on the BO
1529     *
1530     * This is equivalent to EXEC_OBJECT_WRITE.
1531     */
1532    ANV_BO_ALLOC_IMPLICIT_WRITE = (1 << 7),
1533 
1534    /** Has an address which is visible to the client */
1535    ANV_BO_ALLOC_CLIENT_VISIBLE_ADDRESS = (1 << 8),
1536 
1537    /** This buffer has implicit CCS data attached to it */
1538    ANV_BO_ALLOC_IMPLICIT_CCS = (1 << 9),
1539 };
1540 
1541 VkResult anv_device_alloc_bo(struct anv_device *device, uint64_t size,
1542                              enum anv_bo_alloc_flags alloc_flags,
1543                              uint64_t explicit_address,
1544                              struct anv_bo **bo);
1545 VkResult anv_device_import_bo_from_host_ptr(struct anv_device *device,
1546                                             void *host_ptr, uint32_t size,
1547                                             enum anv_bo_alloc_flags alloc_flags,
1548                                             uint64_t client_address,
1549                                             struct anv_bo **bo_out);
1550 VkResult anv_device_import_bo(struct anv_device *device, int fd,
1551                               enum anv_bo_alloc_flags alloc_flags,
1552                               uint64_t client_address,
1553                               struct anv_bo **bo);
1554 VkResult anv_device_export_bo(struct anv_device *device,
1555                               struct anv_bo *bo, int *fd_out);
1556 void anv_device_release_bo(struct anv_device *device,
1557                            struct anv_bo *bo);
1558 
1559 static inline struct anv_bo *
anv_device_lookup_bo(struct anv_device * device,uint32_t gem_handle)1560 anv_device_lookup_bo(struct anv_device *device, uint32_t gem_handle)
1561 {
1562    return util_sparse_array_get(&device->bo_cache.bo_map, gem_handle);
1563 }
1564 
1565 VkResult anv_device_bo_busy(struct anv_device *device, struct anv_bo *bo);
1566 VkResult anv_device_wait(struct anv_device *device, struct anv_bo *bo,
1567                          int64_t timeout);
1568 
1569 VkResult anv_queue_init(struct anv_device *device, struct anv_queue *queue);
1570 void anv_queue_finish(struct anv_queue *queue);
1571 
1572 VkResult anv_queue_execbuf_locked(struct anv_queue *queue, struct anv_queue_submit *submit);
1573 VkResult anv_queue_submit_simple_batch(struct anv_queue *queue,
1574                                        struct anv_batch *batch);
1575 
1576 uint64_t anv_gettime_ns(void);
1577 uint64_t anv_get_absolute_timeout(uint64_t timeout);
1578 
1579 void* anv_gem_mmap(struct anv_device *device,
1580                    uint32_t gem_handle, uint64_t offset, uint64_t size, uint32_t flags);
1581 void anv_gem_munmap(struct anv_device *device, void *p, uint64_t size);
1582 uint32_t anv_gem_create(struct anv_device *device, uint64_t size);
1583 void anv_gem_close(struct anv_device *device, uint32_t gem_handle);
1584 uint32_t anv_gem_userptr(struct anv_device *device, void *mem, size_t size);
1585 int anv_gem_busy(struct anv_device *device, uint32_t gem_handle);
1586 int anv_gem_wait(struct anv_device *device, uint32_t gem_handle, int64_t *timeout_ns);
1587 int anv_gem_execbuffer(struct anv_device *device,
1588                        struct drm_i915_gem_execbuffer2 *execbuf);
1589 int anv_gem_set_tiling(struct anv_device *device, uint32_t gem_handle,
1590                        uint32_t stride, uint32_t tiling);
1591 int anv_gem_create_context(struct anv_device *device);
1592 bool anv_gem_has_context_priority(int fd);
1593 int anv_gem_destroy_context(struct anv_device *device, int context);
1594 int anv_gem_set_context_param(int fd, int context, uint32_t param,
1595                               uint64_t value);
1596 int anv_gem_get_context_param(int fd, int context, uint32_t param,
1597                               uint64_t *value);
1598 int anv_gem_get_param(int fd, uint32_t param);
1599 uint64_t anv_gem_get_drm_cap(int fd, uint32_t capability);
1600 int anv_gem_get_tiling(struct anv_device *device, uint32_t gem_handle);
1601 bool anv_gem_get_bit6_swizzle(int fd, uint32_t tiling);
1602 int anv_gem_gpu_get_reset_stats(struct anv_device *device,
1603                                 uint32_t *active, uint32_t *pending);
1604 int anv_gem_handle_to_fd(struct anv_device *device, uint32_t gem_handle);
1605 int anv_gem_reg_read(int fd, uint32_t offset, uint64_t *result);
1606 uint32_t anv_gem_fd_to_handle(struct anv_device *device, int fd);
1607 int anv_gem_set_caching(struct anv_device *device, uint32_t gem_handle, uint32_t caching);
1608 int anv_gem_set_domain(struct anv_device *device, uint32_t gem_handle,
1609                        uint32_t read_domains, uint32_t write_domain);
1610 int anv_gem_sync_file_merge(struct anv_device *device, int fd1, int fd2);
1611 uint32_t anv_gem_syncobj_create(struct anv_device *device, uint32_t flags);
1612 void anv_gem_syncobj_destroy(struct anv_device *device, uint32_t handle);
1613 int anv_gem_syncobj_handle_to_fd(struct anv_device *device, uint32_t handle);
1614 uint32_t anv_gem_syncobj_fd_to_handle(struct anv_device *device, int fd);
1615 int anv_gem_syncobj_export_sync_file(struct anv_device *device,
1616                                      uint32_t handle);
1617 int anv_gem_syncobj_import_sync_file(struct anv_device *device,
1618                                      uint32_t handle, int fd);
1619 void anv_gem_syncobj_reset(struct anv_device *device, uint32_t handle);
1620 bool anv_gem_supports_syncobj_wait(int fd);
1621 int anv_gem_syncobj_wait(struct anv_device *device,
1622                          const uint32_t *handles, uint32_t num_handles,
1623                          int64_t abs_timeout_ns, bool wait_all);
1624 int anv_gem_syncobj_timeline_wait(struct anv_device *device,
1625                                   const uint32_t *handles, const uint64_t *points,
1626                                   uint32_t num_items, int64_t abs_timeout_ns,
1627                                   bool wait_all, bool wait_materialize);
1628 int anv_gem_syncobj_timeline_signal(struct anv_device *device,
1629                                     const uint32_t *handles, const uint64_t *points,
1630                                     uint32_t num_items);
1631 int anv_gem_syncobj_timeline_query(struct anv_device *device,
1632                                    const uint32_t *handles, uint64_t *points,
1633                                    uint32_t num_items);
1634 
1635 uint64_t anv_vma_alloc(struct anv_device *device,
1636                        uint64_t size, uint64_t align,
1637                        enum anv_bo_alloc_flags alloc_flags,
1638                        uint64_t client_address);
1639 void anv_vma_free(struct anv_device *device,
1640                   uint64_t address, uint64_t size);
1641 
1642 struct anv_reloc_list {
1643    uint32_t                                     num_relocs;
1644    uint32_t                                     array_length;
1645    struct drm_i915_gem_relocation_entry *       relocs;
1646    struct anv_bo **                             reloc_bos;
1647    uint32_t                                     dep_words;
1648    BITSET_WORD *                                deps;
1649 };
1650 
1651 VkResult anv_reloc_list_init(struct anv_reloc_list *list,
1652                              const VkAllocationCallbacks *alloc);
1653 void anv_reloc_list_finish(struct anv_reloc_list *list,
1654                            const VkAllocationCallbacks *alloc);
1655 
1656 VkResult anv_reloc_list_add(struct anv_reloc_list *list,
1657                             const VkAllocationCallbacks *alloc,
1658                             uint32_t offset, struct anv_bo *target_bo,
1659                             uint32_t delta, uint64_t *address_u64_out);
1660 
1661 struct anv_batch_bo {
1662    /* Link in the anv_cmd_buffer.owned_batch_bos list */
1663    struct list_head                             link;
1664 
1665    struct anv_bo *                              bo;
1666 
1667    /* Bytes actually consumed in this batch BO */
1668    uint32_t                                     length;
1669 
1670    struct anv_reloc_list                        relocs;
1671 };
1672 
1673 struct anv_batch {
1674    const VkAllocationCallbacks *                alloc;
1675 
1676    struct anv_address                           start_addr;
1677 
1678    void *                                       start;
1679    void *                                       end;
1680    void *                                       next;
1681 
1682    struct anv_reloc_list *                      relocs;
1683 
1684    /* This callback is called (with the associated user data) in the event
1685     * that the batch runs out of space.
1686     */
1687    VkResult (*extend_cb)(struct anv_batch *, void *);
1688    void *                                       user_data;
1689 
1690    /**
1691     * Current error status of the command buffer. Used to track inconsistent
1692     * or incomplete command buffer states that are the consequence of run-time
1693     * errors such as out of memory scenarios. We want to track this in the
1694     * batch because the command buffer object is not visible to some parts
1695     * of the driver.
1696     */
1697    VkResult                                     status;
1698 };
1699 
1700 void *anv_batch_emit_dwords(struct anv_batch *batch, int num_dwords);
1701 void anv_batch_emit_batch(struct anv_batch *batch, struct anv_batch *other);
1702 uint64_t anv_batch_emit_reloc(struct anv_batch *batch,
1703                               void *location, struct anv_bo *bo, uint32_t offset);
1704 struct anv_address anv_batch_address(struct anv_batch *batch, void *batch_location);
1705 
1706 static inline void
anv_batch_set_storage(struct anv_batch * batch,struct anv_address addr,void * map,size_t size)1707 anv_batch_set_storage(struct anv_batch *batch, struct anv_address addr,
1708                       void *map, size_t size)
1709 {
1710    batch->start_addr = addr;
1711    batch->next = batch->start = map;
1712    batch->end = map + size;
1713 }
1714 
1715 static inline VkResult
anv_batch_set_error(struct anv_batch * batch,VkResult error)1716 anv_batch_set_error(struct anv_batch *batch, VkResult error)
1717 {
1718    assert(error != VK_SUCCESS);
1719    if (batch->status == VK_SUCCESS)
1720       batch->status = error;
1721    return batch->status;
1722 }
1723 
1724 static inline bool
anv_batch_has_error(struct anv_batch * batch)1725 anv_batch_has_error(struct anv_batch *batch)
1726 {
1727    return batch->status != VK_SUCCESS;
1728 }
1729 
1730 #define ANV_NULL_ADDRESS ((struct anv_address) { NULL, 0 })
1731 
1732 static inline bool
anv_address_is_null(struct anv_address addr)1733 anv_address_is_null(struct anv_address addr)
1734 {
1735    return addr.bo == NULL && addr.offset == 0;
1736 }
1737 
1738 static inline uint64_t
anv_address_physical(struct anv_address addr)1739 anv_address_physical(struct anv_address addr)
1740 {
1741    if (addr.bo && (addr.bo->flags & EXEC_OBJECT_PINNED))
1742       return gen_canonical_address(addr.bo->offset + addr.offset);
1743    else
1744       return gen_canonical_address(addr.offset);
1745 }
1746 
1747 static inline struct anv_address
anv_address_add(struct anv_address addr,uint64_t offset)1748 anv_address_add(struct anv_address addr, uint64_t offset)
1749 {
1750    addr.offset += offset;
1751    return addr;
1752 }
1753 
1754 static inline void
write_reloc(const struct anv_device * device,void * p,uint64_t v,bool flush)1755 write_reloc(const struct anv_device *device, void *p, uint64_t v, bool flush)
1756 {
1757    unsigned reloc_size = 0;
1758    if (device->info.gen >= 8) {
1759       reloc_size = sizeof(uint64_t);
1760       *(uint64_t *)p = gen_canonical_address(v);
1761    } else {
1762       reloc_size = sizeof(uint32_t);
1763       *(uint32_t *)p = v;
1764    }
1765 
1766    if (flush && !device->info.has_llc)
1767       gen_flush_range(p, reloc_size);
1768 }
1769 
1770 static inline uint64_t
_anv_combine_address(struct anv_batch * batch,void * location,const struct anv_address address,uint32_t delta)1771 _anv_combine_address(struct anv_batch *batch, void *location,
1772                      const struct anv_address address, uint32_t delta)
1773 {
1774    if (address.bo == NULL) {
1775       return address.offset + delta;
1776    } else {
1777       assert(batch->start <= location && location < batch->end);
1778 
1779       return anv_batch_emit_reloc(batch, location, address.bo, address.offset + delta);
1780    }
1781 }
1782 
1783 #define __gen_address_type struct anv_address
1784 #define __gen_user_data struct anv_batch
1785 #define __gen_combine_address _anv_combine_address
1786 
1787 /* Wrapper macros needed to work around preprocessor argument issues.  In
1788  * particular, arguments don't get pre-evaluated if they are concatenated.
1789  * This means that, if you pass GENX(3DSTATE_PS) into the emit macro, the
1790  * GENX macro won't get evaluated if the emit macro contains "cmd ## foo".
1791  * We can work around this easily enough with these helpers.
1792  */
1793 #define __anv_cmd_length(cmd) cmd ## _length
1794 #define __anv_cmd_length_bias(cmd) cmd ## _length_bias
1795 #define __anv_cmd_header(cmd) cmd ## _header
1796 #define __anv_cmd_pack(cmd) cmd ## _pack
1797 #define __anv_reg_num(reg) reg ## _num
1798 
1799 #define anv_pack_struct(dst, struc, ...) do {                              \
1800       struct struc __template = {                                          \
1801          __VA_ARGS__                                                       \
1802       };                                                                   \
1803       __anv_cmd_pack(struc)(NULL, dst, &__template);                       \
1804       VG(VALGRIND_CHECK_MEM_IS_DEFINED(dst, __anv_cmd_length(struc) * 4)); \
1805    } while (0)
1806 
1807 #define anv_batch_emitn(batch, n, cmd, ...) ({             \
1808       void *__dst = anv_batch_emit_dwords(batch, n);       \
1809       if (__dst) {                                         \
1810          struct cmd __template = {                         \
1811             __anv_cmd_header(cmd),                         \
1812            .DWordLength = n - __anv_cmd_length_bias(cmd),  \
1813             __VA_ARGS__                                    \
1814          };                                                \
1815          __anv_cmd_pack(cmd)(batch, __dst, &__template);   \
1816       }                                                    \
1817       __dst;                                               \
1818    })
1819 
1820 #define anv_batch_emit_merge(batch, dwords0, dwords1)                   \
1821    do {                                                                 \
1822       uint32_t *dw;                                                     \
1823                                                                         \
1824       STATIC_ASSERT(ARRAY_SIZE(dwords0) == ARRAY_SIZE(dwords1));        \
1825       dw = anv_batch_emit_dwords((batch), ARRAY_SIZE(dwords0));         \
1826       if (!dw)                                                          \
1827          break;                                                         \
1828       for (uint32_t i = 0; i < ARRAY_SIZE(dwords0); i++)                \
1829          dw[i] = (dwords0)[i] | (dwords1)[i];                           \
1830       VG(VALGRIND_CHECK_MEM_IS_DEFINED(dw, ARRAY_SIZE(dwords0) * 4));\
1831    } while (0)
1832 
1833 #define anv_batch_emit(batch, cmd, name)                            \
1834    for (struct cmd name = { __anv_cmd_header(cmd) },                    \
1835         *_dst = anv_batch_emit_dwords(batch, __anv_cmd_length(cmd));    \
1836         __builtin_expect(_dst != NULL, 1);                              \
1837         ({ __anv_cmd_pack(cmd)(batch, _dst, &name);                     \
1838            VG(VALGRIND_CHECK_MEM_IS_DEFINED(_dst, __anv_cmd_length(cmd) * 4)); \
1839            _dst = NULL;                                                 \
1840          }))
1841 
1842 /* #define __gen_get_batch_dwords anv_batch_emit_dwords */
1843 /* #define __gen_get_batch_address anv_batch_address */
1844 /* #define __gen_address_value anv_address_physical */
1845 /* #define __gen_address_offset anv_address_add */
1846 
1847 struct anv_device_memory {
1848    struct vk_object_base                        base;
1849 
1850    struct list_head                             link;
1851 
1852    struct anv_bo *                              bo;
1853    struct anv_memory_type *                     type;
1854    VkDeviceSize                                 map_size;
1855    void *                                       map;
1856 
1857    /* If set, we are holding reference to AHardwareBuffer
1858     * which we must release when memory is freed.
1859     */
1860    struct AHardwareBuffer *                     ahw;
1861 
1862    /* If set, this memory comes from a host pointer. */
1863    void *                                       host_ptr;
1864 };
1865 
1866 /**
1867  * Header for Vertex URB Entry (VUE)
1868  */
1869 struct anv_vue_header {
1870    uint32_t Reserved;
1871    uint32_t RTAIndex; /* RenderTargetArrayIndex */
1872    uint32_t ViewportIndex;
1873    float PointWidth;
1874 };
1875 
1876 /** Struct representing a sampled image descriptor
1877  *
1878  * This descriptor layout is used for sampled images, bare sampler, and
1879  * combined image/sampler descriptors.
1880  */
1881 struct anv_sampled_image_descriptor {
1882    /** Bindless image handle
1883     *
1884     * This is expected to already be shifted such that the 20-bit
1885     * SURFACE_STATE table index is in the top 20 bits.
1886     */
1887    uint32_t image;
1888 
1889    /** Bindless sampler handle
1890     *
1891     * This is assumed to be a 32B-aligned SAMPLER_STATE pointer relative
1892     * to the dynamic state base address.
1893     */
1894    uint32_t sampler;
1895 };
1896 
1897 struct anv_texture_swizzle_descriptor {
1898    /** Texture swizzle
1899     *
1900     * See also nir_intrinsic_channel_select_intel
1901     */
1902    uint8_t swizzle[4];
1903 
1904    /** Unused padding to ensure the struct is a multiple of 64 bits */
1905    uint32_t _pad;
1906 };
1907 
1908 /** Struct representing a storage image descriptor */
1909 struct anv_storage_image_descriptor {
1910    /** Bindless image handles
1911     *
1912     * These are expected to already be shifted such that the 20-bit
1913     * SURFACE_STATE table index is in the top 20 bits.
1914     */
1915    uint32_t read_write;
1916    uint32_t write_only;
1917 };
1918 
1919 /** Struct representing a address/range descriptor
1920  *
1921  * The fields of this struct correspond directly to the data layout of
1922  * nir_address_format_64bit_bounded_global addresses.  The last field is the
1923  * offset in the NIR address so it must be zero so that when you load the
1924  * descriptor you get a pointer to the start of the range.
1925  */
1926 struct anv_address_range_descriptor {
1927    uint64_t address;
1928    uint32_t range;
1929    uint32_t zero;
1930 };
1931 
1932 enum anv_descriptor_data {
1933    /** The descriptor contains a BTI reference to a surface state */
1934    ANV_DESCRIPTOR_SURFACE_STATE  = (1 << 0),
1935    /** The descriptor contains a BTI reference to a sampler state */
1936    ANV_DESCRIPTOR_SAMPLER_STATE  = (1 << 1),
1937    /** The descriptor contains an actual buffer view */
1938    ANV_DESCRIPTOR_BUFFER_VIEW    = (1 << 2),
1939    /** The descriptor contains auxiliary image layout data */
1940    ANV_DESCRIPTOR_IMAGE_PARAM    = (1 << 3),
1941    /** The descriptor contains auxiliary image layout data */
1942    ANV_DESCRIPTOR_INLINE_UNIFORM = (1 << 4),
1943    /** anv_address_range_descriptor with a buffer address and range */
1944    ANV_DESCRIPTOR_ADDRESS_RANGE  = (1 << 5),
1945    /** Bindless surface handle */
1946    ANV_DESCRIPTOR_SAMPLED_IMAGE  = (1 << 6),
1947    /** Storage image handles */
1948    ANV_DESCRIPTOR_STORAGE_IMAGE  = (1 << 7),
1949    /** Storage image handles */
1950    ANV_DESCRIPTOR_TEXTURE_SWIZZLE  = (1 << 8),
1951 };
1952 
1953 struct anv_descriptor_set_binding_layout {
1954    /* The type of the descriptors in this binding */
1955    VkDescriptorType type;
1956 
1957    /* Flags provided when this binding was created */
1958    VkDescriptorBindingFlagsEXT flags;
1959 
1960    /* Bitfield representing the type of data this descriptor contains */
1961    enum anv_descriptor_data data;
1962 
1963    /* Maximum number of YCbCr texture/sampler planes */
1964    uint8_t max_plane_count;
1965 
1966    /* Number of array elements in this binding (or size in bytes for inline
1967     * uniform data)
1968     */
1969    uint16_t array_size;
1970 
1971    /* Index into the flattend descriptor set */
1972    uint16_t descriptor_index;
1973 
1974    /* Index into the dynamic state array for a dynamic buffer */
1975    int16_t dynamic_offset_index;
1976 
1977    /* Index into the descriptor set buffer views */
1978    int16_t buffer_view_index;
1979 
1980    /* Offset into the descriptor buffer where this descriptor lives */
1981    uint32_t descriptor_offset;
1982 
1983    /* Immutable samplers (or NULL if no immutable samplers) */
1984    struct anv_sampler **immutable_samplers;
1985 };
1986 
1987 unsigned anv_descriptor_size(const struct anv_descriptor_set_binding_layout *layout);
1988 
1989 unsigned anv_descriptor_type_size(const struct anv_physical_device *pdevice,
1990                                   VkDescriptorType type);
1991 
1992 bool anv_descriptor_supports_bindless(const struct anv_physical_device *pdevice,
1993                                       const struct anv_descriptor_set_binding_layout *binding,
1994                                       bool sampler);
1995 
1996 bool anv_descriptor_requires_bindless(const struct anv_physical_device *pdevice,
1997                                       const struct anv_descriptor_set_binding_layout *binding,
1998                                       bool sampler);
1999 
2000 struct anv_descriptor_set_layout {
2001    struct vk_object_base base;
2002 
2003    /* Descriptor set layouts can be destroyed at almost any time */
2004    uint32_t ref_cnt;
2005 
2006    /* Number of bindings in this descriptor set */
2007    uint16_t binding_count;
2008 
2009    /* Total number of descriptors */
2010    uint16_t descriptor_count;
2011 
2012    /* Shader stages affected by this descriptor set */
2013    uint16_t shader_stages;
2014 
2015    /* Number of buffer views in this descriptor set */
2016    uint16_t buffer_view_count;
2017 
2018    /* Number of dynamic offsets used by this descriptor set */
2019    uint16_t dynamic_offset_count;
2020 
2021    /* For each dynamic buffer, which VkShaderStageFlagBits stages are using
2022     * this buffer
2023     */
2024    VkShaderStageFlags dynamic_offset_stages[MAX_DYNAMIC_BUFFERS];
2025 
2026    /* Size of the descriptor buffer for this descriptor set */
2027    uint32_t descriptor_buffer_size;
2028 
2029    /* Bindings in this descriptor set */
2030    struct anv_descriptor_set_binding_layout binding[0];
2031 };
2032 
2033 void anv_descriptor_set_layout_destroy(struct anv_device *device,
2034                                        struct anv_descriptor_set_layout *layout);
2035 
2036 static inline void
anv_descriptor_set_layout_ref(struct anv_descriptor_set_layout * layout)2037 anv_descriptor_set_layout_ref(struct anv_descriptor_set_layout *layout)
2038 {
2039    assert(layout && layout->ref_cnt >= 1);
2040    p_atomic_inc(&layout->ref_cnt);
2041 }
2042 
2043 static inline void
anv_descriptor_set_layout_unref(struct anv_device * device,struct anv_descriptor_set_layout * layout)2044 anv_descriptor_set_layout_unref(struct anv_device *device,
2045                                 struct anv_descriptor_set_layout *layout)
2046 {
2047    assert(layout && layout->ref_cnt >= 1);
2048    if (p_atomic_dec_zero(&layout->ref_cnt))
2049       anv_descriptor_set_layout_destroy(device, layout);
2050 }
2051 
2052 struct anv_descriptor {
2053    VkDescriptorType type;
2054 
2055    union {
2056       struct {
2057          VkImageLayout layout;
2058          struct anv_image_view *image_view;
2059          struct anv_sampler *sampler;
2060       };
2061 
2062       struct {
2063          struct anv_buffer *buffer;
2064          uint64_t offset;
2065          uint64_t range;
2066       };
2067 
2068       struct anv_buffer_view *buffer_view;
2069    };
2070 };
2071 
2072 struct anv_descriptor_set {
2073    struct vk_object_base base;
2074 
2075    struct anv_descriptor_pool *pool;
2076    struct anv_descriptor_set_layout *layout;
2077 
2078    /* Amount of space occupied in the the pool by this descriptor set. It can
2079     * be larger than the size of the descriptor set.
2080     */
2081    uint32_t size;
2082 
2083    /* State relative to anv_descriptor_pool::bo */
2084    struct anv_state desc_mem;
2085    /* Surface state for the descriptor buffer */
2086    struct anv_state desc_surface_state;
2087 
2088    uint32_t buffer_view_count;
2089    struct anv_buffer_view *buffer_views;
2090 
2091    /* Link to descriptor pool's desc_sets list . */
2092    struct list_head pool_link;
2093 
2094    uint32_t descriptor_count;
2095    struct anv_descriptor descriptors[0];
2096 };
2097 
2098 struct anv_buffer_view {
2099    struct vk_object_base base;
2100 
2101    enum isl_format format; /**< VkBufferViewCreateInfo::format */
2102    uint64_t range; /**< VkBufferViewCreateInfo::range */
2103 
2104    struct anv_address address;
2105 
2106    struct anv_state surface_state;
2107    struct anv_state storage_surface_state;
2108    struct anv_state writeonly_storage_surface_state;
2109 
2110    struct brw_image_param storage_image_param;
2111 };
2112 
2113 struct anv_push_descriptor_set {
2114    struct anv_descriptor_set set;
2115 
2116    /* Put this field right behind anv_descriptor_set so it fills up the
2117     * descriptors[0] field. */
2118    struct anv_descriptor descriptors[MAX_PUSH_DESCRIPTORS];
2119 
2120    /** True if the descriptor set buffer has been referenced by a draw or
2121     * dispatch command.
2122     */
2123    bool set_used_on_gpu;
2124 
2125    struct anv_buffer_view buffer_views[MAX_PUSH_DESCRIPTORS];
2126 };
2127 
2128 struct anv_descriptor_pool {
2129    struct vk_object_base base;
2130 
2131    uint32_t size;
2132    uint32_t next;
2133    uint32_t free_list;
2134 
2135    struct anv_bo *bo;
2136    struct util_vma_heap bo_heap;
2137 
2138    struct anv_state_stream surface_state_stream;
2139    void *surface_state_free_list;
2140 
2141    struct list_head desc_sets;
2142 
2143    char data[0];
2144 };
2145 
2146 enum anv_descriptor_template_entry_type {
2147    ANV_DESCRIPTOR_TEMPLATE_ENTRY_TYPE_IMAGE,
2148    ANV_DESCRIPTOR_TEMPLATE_ENTRY_TYPE_BUFFER,
2149    ANV_DESCRIPTOR_TEMPLATE_ENTRY_TYPE_BUFFER_VIEW
2150 };
2151 
2152 struct anv_descriptor_template_entry {
2153    /* The type of descriptor in this entry */
2154    VkDescriptorType type;
2155 
2156    /* Binding in the descriptor set */
2157    uint32_t binding;
2158 
2159    /* Offset at which to write into the descriptor set binding */
2160    uint32_t array_element;
2161 
2162    /* Number of elements to write into the descriptor set binding */
2163    uint32_t array_count;
2164 
2165    /* Offset into the user provided data */
2166    size_t offset;
2167 
2168    /* Stride between elements into the user provided data */
2169    size_t stride;
2170 };
2171 
2172 struct anv_descriptor_update_template {
2173     struct vk_object_base base;
2174 
2175     VkPipelineBindPoint bind_point;
2176 
2177    /* The descriptor set this template corresponds to. This value is only
2178     * valid if the template was created with the templateType
2179     * VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET.
2180     */
2181    uint8_t set;
2182 
2183    /* Number of entries in this template */
2184    uint32_t entry_count;
2185 
2186    /* Entries of the template */
2187    struct anv_descriptor_template_entry entries[0];
2188 };
2189 
2190 size_t
2191 anv_descriptor_set_layout_size(const struct anv_descriptor_set_layout *layout,
2192                                uint32_t var_desc_count);
2193 
2194 void
2195 anv_descriptor_set_write_image_view(struct anv_device *device,
2196                                     struct anv_descriptor_set *set,
2197                                     const VkDescriptorImageInfo * const info,
2198                                     VkDescriptorType type,
2199                                     uint32_t binding,
2200                                     uint32_t element);
2201 
2202 void
2203 anv_descriptor_set_write_buffer_view(struct anv_device *device,
2204                                      struct anv_descriptor_set *set,
2205                                      VkDescriptorType type,
2206                                      struct anv_buffer_view *buffer_view,
2207                                      uint32_t binding,
2208                                      uint32_t element);
2209 
2210 void
2211 anv_descriptor_set_write_buffer(struct anv_device *device,
2212                                 struct anv_descriptor_set *set,
2213                                 struct anv_state_stream *alloc_stream,
2214                                 VkDescriptorType type,
2215                                 struct anv_buffer *buffer,
2216                                 uint32_t binding,
2217                                 uint32_t element,
2218                                 VkDeviceSize offset,
2219                                 VkDeviceSize range);
2220 void
2221 anv_descriptor_set_write_inline_uniform_data(struct anv_device *device,
2222                                              struct anv_descriptor_set *set,
2223                                              uint32_t binding,
2224                                              const void *data,
2225                                              size_t offset,
2226                                              size_t size);
2227 
2228 void
2229 anv_descriptor_set_write_template(struct anv_device *device,
2230                                   struct anv_descriptor_set *set,
2231                                   struct anv_state_stream *alloc_stream,
2232                                   const struct anv_descriptor_update_template *template,
2233                                   const void *data);
2234 
2235 VkResult
2236 anv_descriptor_set_create(struct anv_device *device,
2237                           struct anv_descriptor_pool *pool,
2238                           struct anv_descriptor_set_layout *layout,
2239                           uint32_t var_desc_count,
2240                           struct anv_descriptor_set **out_set);
2241 
2242 void
2243 anv_descriptor_set_destroy(struct anv_device *device,
2244                            struct anv_descriptor_pool *pool,
2245                            struct anv_descriptor_set *set);
2246 
2247 #define ANV_DESCRIPTOR_SET_NULL             (UINT8_MAX - 5)
2248 #define ANV_DESCRIPTOR_SET_PUSH_CONSTANTS   (UINT8_MAX - 4)
2249 #define ANV_DESCRIPTOR_SET_DESCRIPTORS      (UINT8_MAX - 3)
2250 #define ANV_DESCRIPTOR_SET_NUM_WORK_GROUPS  (UINT8_MAX - 2)
2251 #define ANV_DESCRIPTOR_SET_SHADER_CONSTANTS (UINT8_MAX - 1)
2252 #define ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS UINT8_MAX
2253 
2254 struct anv_pipeline_binding {
2255    /** Index in the descriptor set
2256     *
2257     * This is a flattened index; the descriptor set layout is already taken
2258     * into account.
2259     */
2260    uint32_t index;
2261 
2262    /** The descriptor set this surface corresponds to.
2263     *
2264     * The special ANV_DESCRIPTOR_SET_* values above indicates that this
2265     * binding is not a normal descriptor set but something else.
2266     */
2267    uint8_t set;
2268 
2269    union {
2270       /** Plane in the binding index for images */
2271       uint8_t plane;
2272 
2273       /** Input attachment index (relative to the subpass) */
2274       uint8_t input_attachment_index;
2275 
2276       /** Dynamic offset index (for dynamic UBOs and SSBOs) */
2277       uint8_t dynamic_offset_index;
2278    };
2279 
2280    /** For a storage image, whether it is write-only */
2281    uint8_t write_only;
2282 
2283    /** Pad to 64 bits so that there are no holes and we can safely memcmp
2284     * assuming POD zero-initialization.
2285     */
2286    uint8_t pad;
2287 };
2288 
2289 struct anv_push_range {
2290    /** Index in the descriptor set */
2291    uint32_t index;
2292 
2293    /** Descriptor set index */
2294    uint8_t set;
2295 
2296    /** Dynamic offset index (for dynamic UBOs) */
2297    uint8_t dynamic_offset_index;
2298 
2299    /** Start offset in units of 32B */
2300    uint8_t start;
2301 
2302    /** Range in units of 32B */
2303    uint8_t length;
2304 };
2305 
2306 struct anv_pipeline_layout {
2307    struct vk_object_base base;
2308 
2309    struct {
2310       struct anv_descriptor_set_layout *layout;
2311       uint32_t dynamic_offset_start;
2312    } set[MAX_SETS];
2313 
2314    uint32_t num_sets;
2315 
2316    unsigned char sha1[20];
2317 };
2318 
2319 struct anv_buffer {
2320    struct vk_object_base                        base;
2321 
2322    struct anv_device *                          device;
2323    VkDeviceSize                                 size;
2324 
2325    VkBufferUsageFlags                           usage;
2326 
2327    /* Set when bound */
2328    struct anv_address                           address;
2329 };
2330 
2331 static inline uint64_t
anv_buffer_get_range(struct anv_buffer * buffer,uint64_t offset,uint64_t range)2332 anv_buffer_get_range(struct anv_buffer *buffer, uint64_t offset, uint64_t range)
2333 {
2334    assert(offset <= buffer->size);
2335    if (range == VK_WHOLE_SIZE) {
2336       return buffer->size - offset;
2337    } else {
2338       assert(range + offset >= range);
2339       assert(range + offset <= buffer->size);
2340       return range;
2341    }
2342 }
2343 
2344 enum anv_cmd_dirty_bits {
2345    ANV_CMD_DIRTY_DYNAMIC_VIEWPORT                    = 1 << 0, /* VK_DYNAMIC_STATE_VIEWPORT */
2346    ANV_CMD_DIRTY_DYNAMIC_SCISSOR                     = 1 << 1, /* VK_DYNAMIC_STATE_SCISSOR */
2347    ANV_CMD_DIRTY_DYNAMIC_LINE_WIDTH                  = 1 << 2, /* VK_DYNAMIC_STATE_LINE_WIDTH */
2348    ANV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS                  = 1 << 3, /* VK_DYNAMIC_STATE_DEPTH_BIAS */
2349    ANV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS             = 1 << 4, /* VK_DYNAMIC_STATE_BLEND_CONSTANTS */
2350    ANV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS                = 1 << 5, /* VK_DYNAMIC_STATE_DEPTH_BOUNDS */
2351    ANV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK        = 1 << 6, /* VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK */
2352    ANV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK          = 1 << 7, /* VK_DYNAMIC_STATE_STENCIL_WRITE_MASK */
2353    ANV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE           = 1 << 8, /* VK_DYNAMIC_STATE_STENCIL_REFERENCE */
2354    ANV_CMD_DIRTY_PIPELINE                            = 1 << 9,
2355    ANV_CMD_DIRTY_INDEX_BUFFER                        = 1 << 10,
2356    ANV_CMD_DIRTY_RENDER_TARGETS                      = 1 << 11,
2357    ANV_CMD_DIRTY_XFB_ENABLE                          = 1 << 12,
2358    ANV_CMD_DIRTY_DYNAMIC_LINE_STIPPLE                = 1 << 13, /* VK_DYNAMIC_STATE_LINE_STIPPLE_EXT */
2359    ANV_CMD_DIRTY_DYNAMIC_CULL_MODE                   = 1 << 14, /* VK_DYNAMIC_STATE_CULL_MODE_EXT */
2360    ANV_CMD_DIRTY_DYNAMIC_FRONT_FACE                  = 1 << 15, /* VK_DYNAMIC_STATE_FRONT_FACE_EXT */
2361    ANV_CMD_DIRTY_DYNAMIC_PRIMITIVE_TOPOLOGY          = 1 << 16, /* VK_DYNAMIC_STATE_PRIMITIVE_TOPOLOGY_EXT */
2362    ANV_CMD_DIRTY_DYNAMIC_VERTEX_INPUT_BINDING_STRIDE = 1 << 17, /* VK_DYNAMIC_STATE_VERTEX_INPUT_BINDING_STRIDE_EXT */
2363    ANV_CMD_DIRTY_DYNAMIC_DEPTH_TEST_ENABLE           = 1 << 18, /* VK_DYNAMIC_STATE_DEPTH_TEST_ENABLE_EXT */
2364    ANV_CMD_DIRTY_DYNAMIC_DEPTH_WRITE_ENABLE          = 1 << 19, /* VK_DYNAMIC_STATE_DEPTH_WRITE_ENABLE_EXT */
2365    ANV_CMD_DIRTY_DYNAMIC_DEPTH_COMPARE_OP            = 1 << 20, /* VK_DYNAMIC_STATE_DEPTH_COMPARE_OP_EXT */
2366    ANV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS_TEST_ENABLE    = 1 << 21, /* VK_DYNAMIC_STATE_DEPTH_BOUNDS_TEST_ENABLE_EXT */
2367    ANV_CMD_DIRTY_DYNAMIC_STENCIL_TEST_ENABLE         = 1 << 22, /* VK_DYNAMIC_STATE_STENCIL_TEST_ENABLE_EXT */
2368    ANV_CMD_DIRTY_DYNAMIC_STENCIL_OP                  = 1 << 23, /* VK_DYNAMIC_STATE_STENCIL_OP_EXT */
2369 };
2370 typedef uint32_t anv_cmd_dirty_mask_t;
2371 
2372 #define ANV_CMD_DIRTY_DYNAMIC_ALL                       \
2373    (ANV_CMD_DIRTY_DYNAMIC_VIEWPORT |                    \
2374     ANV_CMD_DIRTY_DYNAMIC_SCISSOR |                     \
2375     ANV_CMD_DIRTY_DYNAMIC_LINE_WIDTH |                  \
2376     ANV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS |                  \
2377     ANV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS |             \
2378     ANV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS |                \
2379     ANV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK |        \
2380     ANV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK |          \
2381     ANV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE |           \
2382     ANV_CMD_DIRTY_DYNAMIC_LINE_STIPPLE |                \
2383     ANV_CMD_DIRTY_DYNAMIC_CULL_MODE |                   \
2384     ANV_CMD_DIRTY_DYNAMIC_FRONT_FACE |                  \
2385     ANV_CMD_DIRTY_DYNAMIC_PRIMITIVE_TOPOLOGY |          \
2386     ANV_CMD_DIRTY_DYNAMIC_VERTEX_INPUT_BINDING_STRIDE | \
2387     ANV_CMD_DIRTY_DYNAMIC_DEPTH_TEST_ENABLE |           \
2388     ANV_CMD_DIRTY_DYNAMIC_DEPTH_WRITE_ENABLE |          \
2389     ANV_CMD_DIRTY_DYNAMIC_DEPTH_COMPARE_OP |            \
2390     ANV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS_TEST_ENABLE |    \
2391     ANV_CMD_DIRTY_DYNAMIC_STENCIL_TEST_ENABLE |         \
2392     ANV_CMD_DIRTY_DYNAMIC_STENCIL_OP)
2393 
2394 static inline enum anv_cmd_dirty_bits
anv_cmd_dirty_bit_for_vk_dynamic_state(VkDynamicState vk_state)2395 anv_cmd_dirty_bit_for_vk_dynamic_state(VkDynamicState vk_state)
2396 {
2397    switch (vk_state) {
2398    case VK_DYNAMIC_STATE_VIEWPORT:
2399    case VK_DYNAMIC_STATE_VIEWPORT_WITH_COUNT_EXT:
2400       return ANV_CMD_DIRTY_DYNAMIC_VIEWPORT;
2401    case VK_DYNAMIC_STATE_SCISSOR:
2402    case VK_DYNAMIC_STATE_SCISSOR_WITH_COUNT_EXT:
2403       return ANV_CMD_DIRTY_DYNAMIC_SCISSOR;
2404    case VK_DYNAMIC_STATE_LINE_WIDTH:
2405       return ANV_CMD_DIRTY_DYNAMIC_LINE_WIDTH;
2406    case VK_DYNAMIC_STATE_DEPTH_BIAS:
2407       return ANV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS;
2408    case VK_DYNAMIC_STATE_BLEND_CONSTANTS:
2409       return ANV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS;
2410    case VK_DYNAMIC_STATE_DEPTH_BOUNDS:
2411       return ANV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS;
2412    case VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK:
2413       return ANV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK;
2414    case VK_DYNAMIC_STATE_STENCIL_WRITE_MASK:
2415       return ANV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK;
2416    case VK_DYNAMIC_STATE_STENCIL_REFERENCE:
2417       return ANV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE;
2418    case VK_DYNAMIC_STATE_LINE_STIPPLE_EXT:
2419       return ANV_CMD_DIRTY_DYNAMIC_LINE_STIPPLE;
2420    case VK_DYNAMIC_STATE_CULL_MODE_EXT:
2421       return ANV_CMD_DIRTY_DYNAMIC_CULL_MODE;
2422    case VK_DYNAMIC_STATE_FRONT_FACE_EXT:
2423       return ANV_CMD_DIRTY_DYNAMIC_FRONT_FACE;
2424    case VK_DYNAMIC_STATE_PRIMITIVE_TOPOLOGY_EXT:
2425       return ANV_CMD_DIRTY_DYNAMIC_PRIMITIVE_TOPOLOGY;
2426    case VK_DYNAMIC_STATE_VERTEX_INPUT_BINDING_STRIDE_EXT:
2427       return ANV_CMD_DIRTY_DYNAMIC_VERTEX_INPUT_BINDING_STRIDE;
2428    case VK_DYNAMIC_STATE_DEPTH_TEST_ENABLE_EXT:
2429       return ANV_CMD_DIRTY_DYNAMIC_DEPTH_TEST_ENABLE;
2430    case VK_DYNAMIC_STATE_DEPTH_WRITE_ENABLE_EXT:
2431       return ANV_CMD_DIRTY_DYNAMIC_DEPTH_WRITE_ENABLE;
2432    case VK_DYNAMIC_STATE_DEPTH_COMPARE_OP_EXT:
2433       return ANV_CMD_DIRTY_DYNAMIC_DEPTH_COMPARE_OP;
2434    case VK_DYNAMIC_STATE_DEPTH_BOUNDS_TEST_ENABLE_EXT:
2435       return ANV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS_TEST_ENABLE;
2436    case VK_DYNAMIC_STATE_STENCIL_TEST_ENABLE_EXT:
2437       return ANV_CMD_DIRTY_DYNAMIC_STENCIL_TEST_ENABLE;
2438    case VK_DYNAMIC_STATE_STENCIL_OP_EXT:
2439       return ANV_CMD_DIRTY_DYNAMIC_STENCIL_OP;
2440    default:
2441       assert(!"Unsupported dynamic state");
2442       return 0;
2443    }
2444 }
2445 
2446 
2447 enum anv_pipe_bits {
2448    ANV_PIPE_DEPTH_CACHE_FLUSH_BIT            = (1 << 0),
2449    ANV_PIPE_STALL_AT_SCOREBOARD_BIT          = (1 << 1),
2450    ANV_PIPE_STATE_CACHE_INVALIDATE_BIT       = (1 << 2),
2451    ANV_PIPE_CONSTANT_CACHE_INVALIDATE_BIT    = (1 << 3),
2452    ANV_PIPE_VF_CACHE_INVALIDATE_BIT          = (1 << 4),
2453    ANV_PIPE_DATA_CACHE_FLUSH_BIT             = (1 << 5),
2454    ANV_PIPE_TILE_CACHE_FLUSH_BIT             = (1 << 6),
2455    ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT     = (1 << 10),
2456    ANV_PIPE_INSTRUCTION_CACHE_INVALIDATE_BIT = (1 << 11),
2457    ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT    = (1 << 12),
2458    ANV_PIPE_DEPTH_STALL_BIT                  = (1 << 13),
2459    ANV_PIPE_CS_STALL_BIT                     = (1 << 20),
2460    ANV_PIPE_END_OF_PIPE_SYNC_BIT             = (1 << 21),
2461 
2462    /* This bit does not exist directly in PIPE_CONTROL.  Instead it means that
2463     * a flush has happened but not a CS stall.  The next time we do any sort
2464     * of invalidation we need to insert a CS stall at that time.  Otherwise,
2465     * we would have to CS stall on every flush which could be bad.
2466     */
2467    ANV_PIPE_NEEDS_END_OF_PIPE_SYNC_BIT       = (1 << 22),
2468 
2469    /* This bit does not exist directly in PIPE_CONTROL. It means that render
2470     * target operations related to transfer commands with VkBuffer as
2471     * destination are ongoing. Some operations like copies on the command
2472     * streamer might need to be aware of this to trigger the appropriate stall
2473     * before they can proceed with the copy.
2474     */
2475    ANV_PIPE_RENDER_TARGET_BUFFER_WRITES      = (1 << 23),
2476 
2477    /* This bit does not exist directly in PIPE_CONTROL. It means that Gen12
2478     * AUX-TT data has changed and we need to invalidate AUX-TT data.  This is
2479     * done by writing the AUX-TT register.
2480     */
2481    ANV_PIPE_AUX_TABLE_INVALIDATE_BIT         = (1 << 24),
2482 
2483    /* This bit does not exist directly in PIPE_CONTROL. It means that a
2484     * PIPE_CONTROL with a post-sync operation will follow. This is used to
2485     * implement a workaround for Gen9.
2486     */
2487    ANV_PIPE_POST_SYNC_BIT                    = (1 << 25),
2488 };
2489 
2490 #define ANV_PIPE_FLUSH_BITS ( \
2491    ANV_PIPE_DEPTH_CACHE_FLUSH_BIT | \
2492    ANV_PIPE_DATA_CACHE_FLUSH_BIT | \
2493    ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | \
2494    ANV_PIPE_TILE_CACHE_FLUSH_BIT)
2495 
2496 #define ANV_PIPE_STALL_BITS ( \
2497    ANV_PIPE_STALL_AT_SCOREBOARD_BIT | \
2498    ANV_PIPE_DEPTH_STALL_BIT | \
2499    ANV_PIPE_CS_STALL_BIT)
2500 
2501 #define ANV_PIPE_INVALIDATE_BITS ( \
2502    ANV_PIPE_STATE_CACHE_INVALIDATE_BIT | \
2503    ANV_PIPE_CONSTANT_CACHE_INVALIDATE_BIT | \
2504    ANV_PIPE_VF_CACHE_INVALIDATE_BIT | \
2505    ANV_PIPE_DATA_CACHE_FLUSH_BIT | \
2506    ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT | \
2507    ANV_PIPE_INSTRUCTION_CACHE_INVALIDATE_BIT | \
2508    ANV_PIPE_AUX_TABLE_INVALIDATE_BIT)
2509 
2510 static inline enum anv_pipe_bits
anv_pipe_flush_bits_for_access_flags(struct anv_device * device,VkAccessFlags flags)2511 anv_pipe_flush_bits_for_access_flags(struct anv_device *device,
2512                                      VkAccessFlags flags)
2513 {
2514    enum anv_pipe_bits pipe_bits = 0;
2515 
2516    unsigned b;
2517    for_each_bit(b, flags) {
2518       switch ((VkAccessFlagBits)(1 << b)) {
2519       case VK_ACCESS_SHADER_WRITE_BIT:
2520          /* We're transitioning a buffer that was previously used as write
2521           * destination through the data port. To make its content available
2522           * to future operations, flush the data cache.
2523           */
2524          pipe_bits |= ANV_PIPE_DATA_CACHE_FLUSH_BIT;
2525          break;
2526       case VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT:
2527          /* We're transitioning a buffer that was previously used as render
2528           * target. To make its content available to future operations, flush
2529           * the render target cache.
2530           */
2531          pipe_bits |= ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT;
2532          break;
2533       case VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT:
2534          /* We're transitioning a buffer that was previously used as depth
2535           * buffer. To make its content available to future operations, flush
2536           * the depth cache.
2537           */
2538          pipe_bits |= ANV_PIPE_DEPTH_CACHE_FLUSH_BIT;
2539          break;
2540       case VK_ACCESS_TRANSFER_WRITE_BIT:
2541          /* We're transitioning a buffer that was previously used as a
2542           * transfer write destination. Generic write operations include color
2543           * & depth operations as well as buffer operations like :
2544           *     - vkCmdClearColorImage()
2545           *     - vkCmdClearDepthStencilImage()
2546           *     - vkCmdBlitImage()
2547           *     - vkCmdCopy*(), vkCmdUpdate*(), vkCmdFill*()
2548           *
2549           * Most of these operations are implemented using Blorp which writes
2550           * through the render target, so flush that cache to make it visible
2551           * to future operations. And for depth related operations we also
2552           * need to flush the depth cache.
2553           */
2554          pipe_bits |= ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT;
2555          pipe_bits |= ANV_PIPE_DEPTH_CACHE_FLUSH_BIT;
2556          break;
2557       case VK_ACCESS_MEMORY_WRITE_BIT:
2558          /* We're transitioning a buffer for generic write operations. Flush
2559           * all the caches.
2560           */
2561          pipe_bits |= ANV_PIPE_FLUSH_BITS;
2562          break;
2563       default:
2564          break; /* Nothing to do */
2565       }
2566    }
2567 
2568    return pipe_bits;
2569 }
2570 
2571 static inline enum anv_pipe_bits
anv_pipe_invalidate_bits_for_access_flags(struct anv_device * device,VkAccessFlags flags)2572 anv_pipe_invalidate_bits_for_access_flags(struct anv_device *device,
2573                                           VkAccessFlags flags)
2574 {
2575    enum anv_pipe_bits pipe_bits = 0;
2576 
2577    unsigned b;
2578    for_each_bit(b, flags) {
2579       switch ((VkAccessFlagBits)(1 << b)) {
2580       case VK_ACCESS_INDIRECT_COMMAND_READ_BIT:
2581          /* Indirect draw commands take a buffer as input that we're going to
2582           * read from the command streamer to load some of the HW registers
2583           * (see genX_cmd_buffer.c:load_indirect_parameters). This requires a
2584           * command streamer stall so that all the cache flushes have
2585           * completed before the command streamer loads from memory.
2586           */
2587          pipe_bits |=  ANV_PIPE_CS_STALL_BIT;
2588          /* Indirect draw commands also set gl_BaseVertex & gl_BaseIndex
2589           * through a vertex buffer, so invalidate that cache.
2590           */
2591          pipe_bits |= ANV_PIPE_VF_CACHE_INVALIDATE_BIT;
2592          /* For CmdDipatchIndirect, we also load gl_NumWorkGroups through a
2593           * UBO from the buffer, so we need to invalidate constant cache.
2594           */
2595          pipe_bits |= ANV_PIPE_CONSTANT_CACHE_INVALIDATE_BIT;
2596          break;
2597       case VK_ACCESS_INDEX_READ_BIT:
2598       case VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT:
2599          /* We transitioning a buffer to be used for as input for vkCmdDraw*
2600           * commands, so we invalidate the VF cache to make sure there is no
2601           * stale data when we start rendering.
2602           */
2603          pipe_bits |= ANV_PIPE_VF_CACHE_INVALIDATE_BIT;
2604          break;
2605       case VK_ACCESS_UNIFORM_READ_BIT:
2606          /* We transitioning a buffer to be used as uniform data. Because
2607           * uniform is accessed through the data port & sampler, we need to
2608           * invalidate the texture cache (sampler) & constant cache (data
2609           * port) to avoid stale data.
2610           */
2611          pipe_bits |= ANV_PIPE_CONSTANT_CACHE_INVALIDATE_BIT;
2612          if (device->physical->compiler->indirect_ubos_use_sampler)
2613             pipe_bits |= ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT;
2614          else
2615             pipe_bits |= ANV_PIPE_DATA_CACHE_FLUSH_BIT;
2616          break;
2617       case VK_ACCESS_SHADER_READ_BIT:
2618       case VK_ACCESS_INPUT_ATTACHMENT_READ_BIT:
2619       case VK_ACCESS_TRANSFER_READ_BIT:
2620          /* Transitioning a buffer to be read through the sampler, so
2621           * invalidate the texture cache, we don't want any stale data.
2622           */
2623          pipe_bits |= ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT;
2624          break;
2625       case VK_ACCESS_MEMORY_READ_BIT:
2626          /* Transitioning a buffer for generic read, invalidate all the
2627           * caches.
2628           */
2629          pipe_bits |= ANV_PIPE_INVALIDATE_BITS;
2630          break;
2631       case VK_ACCESS_MEMORY_WRITE_BIT:
2632          /* Generic write, make sure all previously written things land in
2633           * memory.
2634           */
2635          pipe_bits |= ANV_PIPE_FLUSH_BITS;
2636          break;
2637       case VK_ACCESS_CONDITIONAL_RENDERING_READ_BIT_EXT:
2638          /* Transitioning a buffer for conditional rendering. We'll load the
2639           * content of this buffer into HW registers using the command
2640           * streamer, so we need to stall the command streamer to make sure
2641           * any in-flight flush operations have completed.
2642           */
2643          pipe_bits |= ANV_PIPE_CS_STALL_BIT;
2644          break;
2645       default:
2646          break; /* Nothing to do */
2647       }
2648    }
2649 
2650    return pipe_bits;
2651 }
2652 
2653 #define VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV (         \
2654    VK_IMAGE_ASPECT_COLOR_BIT | \
2655    VK_IMAGE_ASPECT_PLANE_0_BIT | \
2656    VK_IMAGE_ASPECT_PLANE_1_BIT | \
2657    VK_IMAGE_ASPECT_PLANE_2_BIT)
2658 #define VK_IMAGE_ASPECT_PLANES_BITS_ANV ( \
2659    VK_IMAGE_ASPECT_PLANE_0_BIT | \
2660    VK_IMAGE_ASPECT_PLANE_1_BIT | \
2661    VK_IMAGE_ASPECT_PLANE_2_BIT)
2662 
2663 struct anv_vertex_binding {
2664    struct anv_buffer *                          buffer;
2665    VkDeviceSize                                 offset;
2666    VkDeviceSize                                 stride;
2667    VkDeviceSize                                 size;
2668 };
2669 
2670 struct anv_xfb_binding {
2671    struct anv_buffer *                          buffer;
2672    VkDeviceSize                                 offset;
2673    VkDeviceSize                                 size;
2674 };
2675 
2676 struct anv_push_constants {
2677    /** Push constant data provided by the client through vkPushConstants */
2678    uint8_t client_data[MAX_PUSH_CONSTANTS_SIZE];
2679 
2680    /** Dynamic offsets for dynamic UBOs and SSBOs */
2681    uint32_t dynamic_offsets[MAX_DYNAMIC_BUFFERS];
2682 
2683    /* Robust access pushed registers. */
2684    uint64_t push_reg_mask[MESA_SHADER_STAGES];
2685 
2686    /** Pad out to a multiple of 32 bytes */
2687    uint32_t pad[2];
2688 
2689    struct {
2690       /** Base workgroup ID
2691        *
2692        * Used for vkCmdDispatchBase.
2693        */
2694       uint32_t base_work_group_id[3];
2695 
2696       /** Subgroup ID
2697        *
2698        * This is never set by software but is implicitly filled out when
2699        * uploading the push constants for compute shaders.
2700        */
2701       uint32_t subgroup_id;
2702    } cs;
2703 };
2704 
2705 struct anv_dynamic_state {
2706    struct {
2707       uint32_t                                  count;
2708       VkViewport                                viewports[MAX_VIEWPORTS];
2709    } viewport;
2710 
2711    struct {
2712       uint32_t                                  count;
2713       VkRect2D                                  scissors[MAX_SCISSORS];
2714    } scissor;
2715 
2716    float                                        line_width;
2717 
2718    struct {
2719       float                                     bias;
2720       float                                     clamp;
2721       float                                     slope;
2722    } depth_bias;
2723 
2724    float                                        blend_constants[4];
2725 
2726    struct {
2727       float                                     min;
2728       float                                     max;
2729    } depth_bounds;
2730 
2731    struct {
2732       uint32_t                                  front;
2733       uint32_t                                  back;
2734    } stencil_compare_mask;
2735 
2736    struct {
2737       uint32_t                                  front;
2738       uint32_t                                  back;
2739    } stencil_write_mask;
2740 
2741    struct {
2742       uint32_t                                  front;
2743       uint32_t                                  back;
2744    } stencil_reference;
2745 
2746    struct {
2747       struct {
2748          VkStencilOp fail_op;
2749          VkStencilOp pass_op;
2750          VkStencilOp depth_fail_op;
2751          VkCompareOp compare_op;
2752       } front;
2753       struct {
2754          VkStencilOp fail_op;
2755          VkStencilOp pass_op;
2756          VkStencilOp depth_fail_op;
2757          VkCompareOp compare_op;
2758       } back;
2759    } stencil_op;
2760 
2761    struct {
2762       uint32_t                                  factor;
2763       uint16_t                                  pattern;
2764    } line_stipple;
2765 
2766    VkCullModeFlags                              cull_mode;
2767    VkFrontFace                                  front_face;
2768    VkPrimitiveTopology                          primitive_topology;
2769    bool                                         depth_test_enable;
2770    bool                                         depth_write_enable;
2771    VkCompareOp                                  depth_compare_op;
2772    bool                                         depth_bounds_test_enable;
2773    bool                                         stencil_test_enable;
2774    bool                                         dyn_vbo_stride;
2775    bool                                         dyn_vbo_size;
2776 };
2777 
2778 extern const struct anv_dynamic_state default_dynamic_state;
2779 
2780 uint32_t anv_dynamic_state_copy(struct anv_dynamic_state *dest,
2781                                 const struct anv_dynamic_state *src,
2782                                 uint32_t copy_mask);
2783 
2784 struct anv_surface_state {
2785    struct anv_state state;
2786    /** Address of the surface referred to by this state
2787     *
2788     * This address is relative to the start of the BO.
2789     */
2790    struct anv_address address;
2791    /* Address of the aux surface, if any
2792     *
2793     * This field is ANV_NULL_ADDRESS if and only if no aux surface exists.
2794     *
2795     * With the exception of gen8, the bottom 12 bits of this address' offset
2796     * include extra aux information.
2797     */
2798    struct anv_address aux_address;
2799    /* Address of the clear color, if any
2800     *
2801     * This address is relative to the start of the BO.
2802     */
2803    struct anv_address clear_address;
2804 };
2805 
2806 /**
2807  * Attachment state when recording a renderpass instance.
2808  *
2809  * The clear value is valid only if there exists a pending clear.
2810  */
2811 struct anv_attachment_state {
2812    enum isl_aux_usage                           aux_usage;
2813    struct anv_surface_state                     color;
2814    struct anv_surface_state                     input;
2815 
2816    VkImageLayout                                current_layout;
2817    VkImageLayout                                current_stencil_layout;
2818    VkImageAspectFlags                           pending_clear_aspects;
2819    VkImageAspectFlags                           pending_load_aspects;
2820    bool                                         fast_clear;
2821    VkClearValue                                 clear_value;
2822 
2823    /* When multiview is active, attachments with a renderpass clear
2824     * operation have their respective layers cleared on the first
2825     * subpass that uses them, and only in that subpass. We keep track
2826     * of this using a bitfield to indicate which layers of an attachment
2827     * have not been cleared yet when multiview is active.
2828     */
2829    uint32_t                                     pending_clear_views;
2830    struct anv_image_view *                      image_view;
2831 };
2832 
2833 /** State tracking for vertex buffer flushes
2834  *
2835  * On Gen8-9, the VF cache only considers the bottom 32 bits of memory
2836  * addresses.  If you happen to have two vertex buffers which get placed
2837  * exactly 4 GiB apart and use them in back-to-back draw calls, you can get
2838  * collisions.  In order to solve this problem, we track vertex address ranges
2839  * which are live in the cache and invalidate the cache if one ever exceeds 32
2840  * bits.
2841  */
2842 struct anv_vb_cache_range {
2843    /* Virtual address at which the live vertex buffer cache range starts for
2844     * this vertex buffer index.
2845     */
2846    uint64_t start;
2847 
2848    /* Virtual address of the byte after where vertex buffer cache range ends.
2849     * This is exclusive such that end - start is the size of the range.
2850     */
2851    uint64_t end;
2852 };
2853 
2854 /** State tracking for particular pipeline bind point
2855  *
2856  * This struct is the base struct for anv_cmd_graphics_state and
2857  * anv_cmd_compute_state.  These are used to track state which is bound to a
2858  * particular type of pipeline.  Generic state that applies per-stage such as
2859  * binding table offsets and push constants is tracked generically with a
2860  * per-stage array in anv_cmd_state.
2861  */
2862 struct anv_cmd_pipeline_state {
2863    struct anv_descriptor_set *descriptors[MAX_SETS];
2864    struct anv_push_descriptor_set *push_descriptors[MAX_SETS];
2865 
2866    struct anv_push_constants push_constants;
2867 
2868    /* Push constant state allocated when flushing push constants. */
2869    struct anv_state          push_constants_state;
2870 };
2871 
2872 /** State tracking for graphics pipeline
2873  *
2874  * This has anv_cmd_pipeline_state as a base struct to track things which get
2875  * bound to a graphics pipeline.  Along with general pipeline bind point state
2876  * which is in the anv_cmd_pipeline_state base struct, it also contains other
2877  * state which is graphics-specific.
2878  */
2879 struct anv_cmd_graphics_state {
2880    struct anv_cmd_pipeline_state base;
2881 
2882    struct anv_graphics_pipeline *pipeline;
2883 
2884    anv_cmd_dirty_mask_t dirty;
2885    uint32_t vb_dirty;
2886 
2887    struct anv_vb_cache_range ib_bound_range;
2888    struct anv_vb_cache_range ib_dirty_range;
2889    struct anv_vb_cache_range vb_bound_ranges[33];
2890    struct anv_vb_cache_range vb_dirty_ranges[33];
2891 
2892    VkShaderStageFlags push_constant_stages;
2893 
2894    struct anv_dynamic_state dynamic;
2895 
2896    uint32_t primitive_topology;
2897 
2898    struct {
2899       struct anv_buffer *index_buffer;
2900       uint32_t index_type; /**< 3DSTATE_INDEX_BUFFER.IndexFormat */
2901       uint32_t index_offset;
2902    } gen7;
2903 };
2904 
2905 /** State tracking for compute pipeline
2906  *
2907  * This has anv_cmd_pipeline_state as a base struct to track things which get
2908  * bound to a compute pipeline.  Along with general pipeline bind point state
2909  * which is in the anv_cmd_pipeline_state base struct, it also contains other
2910  * state which is compute-specific.
2911  */
2912 struct anv_cmd_compute_state {
2913    struct anv_cmd_pipeline_state base;
2914 
2915    struct anv_compute_pipeline *pipeline;
2916 
2917    bool pipeline_dirty;
2918 
2919    struct anv_address num_workgroups;
2920 };
2921 
2922 /** State required while building cmd buffer */
2923 struct anv_cmd_state {
2924    /* PIPELINE_SELECT.PipelineSelection */
2925    uint32_t                                     current_pipeline;
2926    const struct gen_l3_config *                 current_l3_config;
2927    uint32_t                                     last_aux_map_state;
2928 
2929    struct anv_cmd_graphics_state                gfx;
2930    struct anv_cmd_compute_state                 compute;
2931 
2932    enum anv_pipe_bits                           pending_pipe_bits;
2933    VkShaderStageFlags                           descriptors_dirty;
2934    VkShaderStageFlags                           push_constants_dirty;
2935 
2936    struct anv_framebuffer *                     framebuffer;
2937    struct anv_render_pass *                     pass;
2938    struct anv_subpass *                         subpass;
2939    VkRect2D                                     render_area;
2940    uint32_t                                     restart_index;
2941    struct anv_vertex_binding                    vertex_bindings[MAX_VBS];
2942    bool                                         xfb_enabled;
2943    struct anv_xfb_binding                       xfb_bindings[MAX_XFB_BUFFERS];
2944    struct anv_state                             binding_tables[MESA_SHADER_STAGES];
2945    struct anv_state                             samplers[MESA_SHADER_STAGES];
2946 
2947    unsigned char                                sampler_sha1s[MESA_SHADER_STAGES][20];
2948    unsigned char                                surface_sha1s[MESA_SHADER_STAGES][20];
2949    unsigned char                                push_sha1s[MESA_SHADER_STAGES][20];
2950 
2951    /**
2952     * Whether or not the gen8 PMA fix is enabled.  We ensure that, at the top
2953     * of any command buffer it is disabled by disabling it in EndCommandBuffer
2954     * and before invoking the secondary in ExecuteCommands.
2955     */
2956    bool                                         pma_fix_enabled;
2957 
2958    /**
2959     * Whether or not we know for certain that HiZ is enabled for the current
2960     * subpass.  If, for whatever reason, we are unsure as to whether HiZ is
2961     * enabled or not, this will be false.
2962     */
2963    bool                                         hiz_enabled;
2964 
2965    bool                                         conditional_render_enabled;
2966 
2967    /**
2968     * Last rendering scale argument provided to
2969     * genX(cmd_buffer_emit_hashing_mode)().
2970     */
2971    unsigned                                     current_hash_scale;
2972 
2973    /**
2974     * Array length is anv_cmd_state::pass::attachment_count. Array content is
2975     * valid only when recording a render pass instance.
2976     */
2977    struct anv_attachment_state *                attachments;
2978 
2979    /**
2980     * Surface states for color render targets.  These are stored in a single
2981     * flat array.  For depth-stencil attachments, the surface state is simply
2982     * left blank.
2983     */
2984    struct anv_state                             attachment_states;
2985 
2986    /**
2987     * A null surface state of the right size to match the framebuffer.  This
2988     * is one of the states in attachment_states.
2989     */
2990    struct anv_state                             null_surface_state;
2991 };
2992 
2993 struct anv_cmd_pool {
2994    struct vk_object_base                        base;
2995    VkAllocationCallbacks                        alloc;
2996    struct list_head                             cmd_buffers;
2997 };
2998 
2999 #define ANV_CMD_BUFFER_BATCH_SIZE 8192
3000 
3001 enum anv_cmd_buffer_exec_mode {
3002    ANV_CMD_BUFFER_EXEC_MODE_PRIMARY,
3003    ANV_CMD_BUFFER_EXEC_MODE_EMIT,
3004    ANV_CMD_BUFFER_EXEC_MODE_GROW_AND_EMIT,
3005    ANV_CMD_BUFFER_EXEC_MODE_CHAIN,
3006    ANV_CMD_BUFFER_EXEC_MODE_COPY_AND_CHAIN,
3007    ANV_CMD_BUFFER_EXEC_MODE_CALL_AND_RETURN,
3008 };
3009 
3010 struct anv_cmd_buffer {
3011    struct vk_object_base                        base;
3012 
3013    struct anv_device *                          device;
3014 
3015    struct anv_cmd_pool *                        pool;
3016    struct list_head                             pool_link;
3017 
3018    struct anv_batch                             batch;
3019 
3020    /* Fields required for the actual chain of anv_batch_bo's.
3021     *
3022     * These fields are initialized by anv_cmd_buffer_init_batch_bo_chain().
3023     */
3024    struct list_head                             batch_bos;
3025    enum anv_cmd_buffer_exec_mode                exec_mode;
3026 
3027    /* A vector of anv_batch_bo pointers for every batch or surface buffer
3028     * referenced by this command buffer
3029     *
3030     * initialized by anv_cmd_buffer_init_batch_bo_chain()
3031     */
3032    struct u_vector                            seen_bbos;
3033 
3034    /* A vector of int32_t's for every block of binding tables.
3035     *
3036     * initialized by anv_cmd_buffer_init_batch_bo_chain()
3037     */
3038    struct u_vector                              bt_block_states;
3039    struct anv_state                             bt_next;
3040 
3041    struct anv_reloc_list                        surface_relocs;
3042    /** Last seen surface state block pool center bo offset */
3043    uint32_t                                     last_ss_pool_center;
3044 
3045    /* Serial for tracking buffer completion */
3046    uint32_t                                     serial;
3047 
3048    /* Stream objects for storing temporary data */
3049    struct anv_state_stream                      surface_state_stream;
3050    struct anv_state_stream                      dynamic_state_stream;
3051 
3052    VkCommandBufferUsageFlags                    usage_flags;
3053    VkCommandBufferLevel                         level;
3054 
3055    struct anv_query_pool                       *perf_query_pool;
3056 
3057    struct anv_cmd_state                         state;
3058 
3059    struct anv_address                           return_addr;
3060 
3061    /* Set by SetPerformanceMarkerINTEL, written into queries by CmdBeginQuery */
3062    uint64_t                                     intel_perf_marker;
3063 };
3064 
3065 VkResult anv_cmd_buffer_init_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer);
3066 void anv_cmd_buffer_fini_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer);
3067 void anv_cmd_buffer_reset_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer);
3068 void anv_cmd_buffer_end_batch_buffer(struct anv_cmd_buffer *cmd_buffer);
3069 void anv_cmd_buffer_add_secondary(struct anv_cmd_buffer *primary,
3070                                   struct anv_cmd_buffer *secondary);
3071 void anv_cmd_buffer_prepare_execbuf(struct anv_cmd_buffer *cmd_buffer);
3072 VkResult anv_cmd_buffer_execbuf(struct anv_queue *queue,
3073                                 struct anv_cmd_buffer *cmd_buffer,
3074                                 const VkSemaphore *in_semaphores,
3075                                 const uint64_t *in_wait_values,
3076                                 uint32_t num_in_semaphores,
3077                                 const VkSemaphore *out_semaphores,
3078                                 const uint64_t *out_signal_values,
3079                                 uint32_t num_out_semaphores,
3080                                 VkFence fence,
3081                                 int perf_query_pass);
3082 
3083 VkResult anv_cmd_buffer_reset(struct anv_cmd_buffer *cmd_buffer);
3084 
3085 struct anv_state anv_cmd_buffer_emit_dynamic(struct anv_cmd_buffer *cmd_buffer,
3086                                              const void *data, uint32_t size, uint32_t alignment);
3087 struct anv_state anv_cmd_buffer_merge_dynamic(struct anv_cmd_buffer *cmd_buffer,
3088                                               uint32_t *a, uint32_t *b,
3089                                               uint32_t dwords, uint32_t alignment);
3090 
3091 struct anv_address
3092 anv_cmd_buffer_surface_base_address(struct anv_cmd_buffer *cmd_buffer);
3093 struct anv_state
3094 anv_cmd_buffer_alloc_binding_table(struct anv_cmd_buffer *cmd_buffer,
3095                                    uint32_t entries, uint32_t *state_offset);
3096 struct anv_state
3097 anv_cmd_buffer_alloc_surface_state(struct anv_cmd_buffer *cmd_buffer);
3098 struct anv_state
3099 anv_cmd_buffer_alloc_dynamic_state(struct anv_cmd_buffer *cmd_buffer,
3100                                    uint32_t size, uint32_t alignment);
3101 
3102 VkResult
3103 anv_cmd_buffer_new_binding_table_block(struct anv_cmd_buffer *cmd_buffer);
3104 
3105 void gen8_cmd_buffer_emit_viewport(struct anv_cmd_buffer *cmd_buffer);
3106 void gen8_cmd_buffer_emit_depth_viewport(struct anv_cmd_buffer *cmd_buffer,
3107                                          bool depth_clamp_enable);
3108 void gen7_cmd_buffer_emit_scissor(struct anv_cmd_buffer *cmd_buffer);
3109 
3110 void anv_cmd_buffer_setup_attachments(struct anv_cmd_buffer *cmd_buffer,
3111                                       struct anv_render_pass *pass,
3112                                       struct anv_framebuffer *framebuffer,
3113                                       const VkClearValue *clear_values);
3114 
3115 void anv_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer);
3116 
3117 struct anv_state
3118 anv_cmd_buffer_gfx_push_constants(struct anv_cmd_buffer *cmd_buffer);
3119 struct anv_state
3120 anv_cmd_buffer_cs_push_constants(struct anv_cmd_buffer *cmd_buffer);
3121 
3122 const struct anv_image_view *
3123 anv_cmd_buffer_get_depth_stencil_view(const struct anv_cmd_buffer *cmd_buffer);
3124 
3125 VkResult
3126 anv_cmd_buffer_alloc_blorp_binding_table(struct anv_cmd_buffer *cmd_buffer,
3127                                          uint32_t num_entries,
3128                                          uint32_t *state_offset,
3129                                          struct anv_state *bt_state);
3130 
3131 void anv_cmd_buffer_dump(struct anv_cmd_buffer *cmd_buffer);
3132 
3133 void anv_cmd_emit_conditional_render_predicate(struct anv_cmd_buffer *cmd_buffer);
3134 
3135 enum anv_fence_type {
3136    ANV_FENCE_TYPE_NONE = 0,
3137    ANV_FENCE_TYPE_BO,
3138    ANV_FENCE_TYPE_WSI_BO,
3139    ANV_FENCE_TYPE_SYNCOBJ,
3140    ANV_FENCE_TYPE_WSI,
3141 };
3142 
3143 enum anv_bo_fence_state {
3144    /** Indicates that this is a new (or newly reset fence) */
3145    ANV_BO_FENCE_STATE_RESET,
3146 
3147    /** Indicates that this fence has been submitted to the GPU but is still
3148     * (as far as we know) in use by the GPU.
3149     */
3150    ANV_BO_FENCE_STATE_SUBMITTED,
3151 
3152    ANV_BO_FENCE_STATE_SIGNALED,
3153 };
3154 
3155 struct anv_fence_impl {
3156    enum anv_fence_type type;
3157 
3158    union {
3159       /** Fence implementation for BO fences
3160        *
3161        * These fences use a BO and a set of CPU-tracked state flags.  The BO
3162        * is added to the object list of the last execbuf call in a QueueSubmit
3163        * and is marked EXEC_WRITE.  The state flags track when the BO has been
3164        * submitted to the kernel.  We need to do this because Vulkan lets you
3165        * wait on a fence that has not yet been submitted and I915_GEM_BUSY
3166        * will say it's idle in this case.
3167        */
3168       struct {
3169          struct anv_bo *bo;
3170          enum anv_bo_fence_state state;
3171       } bo;
3172 
3173       /** DRM syncobj handle for syncobj-based fences */
3174       uint32_t syncobj;
3175 
3176       /** WSI fence */
3177       struct wsi_fence *fence_wsi;
3178    };
3179 };
3180 
3181 struct anv_fence {
3182    struct vk_object_base base;
3183 
3184    /* Permanent fence state.  Every fence has some form of permanent state
3185     * (type != ANV_SEMAPHORE_TYPE_NONE).  This may be a BO to fence on (for
3186     * cross-process fences) or it could just be a dummy for use internally.
3187     */
3188    struct anv_fence_impl permanent;
3189 
3190    /* Temporary fence state.  A fence *may* have temporary state.  That state
3191     * is added to the fence by an import operation and is reset back to
3192     * ANV_SEMAPHORE_TYPE_NONE when the fence is reset.  A fence with temporary
3193     * state cannot be signaled because the fence must already be signaled
3194     * before the temporary state can be exported from the fence in the other
3195     * process and imported here.
3196     */
3197    struct anv_fence_impl temporary;
3198 };
3199 
3200 void anv_fence_reset_temporary(struct anv_device *device,
3201                                struct anv_fence *fence);
3202 
3203 struct anv_event {
3204    struct vk_object_base                        base;
3205    uint64_t                                     semaphore;
3206    struct anv_state                             state;
3207 };
3208 
3209 enum anv_semaphore_type {
3210    ANV_SEMAPHORE_TYPE_NONE = 0,
3211    ANV_SEMAPHORE_TYPE_DUMMY,
3212    ANV_SEMAPHORE_TYPE_BO,
3213    ANV_SEMAPHORE_TYPE_WSI_BO,
3214    ANV_SEMAPHORE_TYPE_SYNC_FILE,
3215    ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ,
3216    ANV_SEMAPHORE_TYPE_TIMELINE,
3217    ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ_TIMELINE,
3218 };
3219 
3220 struct anv_timeline_point {
3221    struct list_head link;
3222 
3223    uint64_t serial;
3224 
3225    /* Number of waiter on this point, when > 0 the point should not be garbage
3226     * collected.
3227     */
3228    int waiting;
3229 
3230    /* BO used for synchronization. */
3231    struct anv_bo *bo;
3232 };
3233 
3234 struct anv_timeline {
3235    pthread_mutex_t mutex;
3236    pthread_cond_t  cond;
3237 
3238    uint64_t highest_past;
3239    uint64_t highest_pending;
3240 
3241    struct list_head points;
3242    struct list_head free_points;
3243 };
3244 
3245 struct anv_semaphore_impl {
3246    enum anv_semaphore_type type;
3247 
3248    union {
3249       /* A BO representing this semaphore when type == ANV_SEMAPHORE_TYPE_BO
3250        * or type == ANV_SEMAPHORE_TYPE_WSI_BO.  This BO will be added to the
3251        * object list on any execbuf2 calls for which this semaphore is used as
3252        * a wait or signal fence.  When used as a signal fence or when type ==
3253        * ANV_SEMAPHORE_TYPE_WSI_BO, the EXEC_OBJECT_WRITE flag will be set.
3254        */
3255       struct anv_bo *bo;
3256 
3257       /* The sync file descriptor when type == ANV_SEMAPHORE_TYPE_SYNC_FILE.
3258        * If the semaphore is in the unsignaled state due to either just being
3259        * created or because it has been used for a wait, fd will be -1.
3260        */
3261       int fd;
3262 
3263       /* Sync object handle when type == ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ.
3264        * Unlike GEM BOs, DRM sync objects aren't deduplicated by the kernel on
3265        * import so we don't need to bother with a userspace cache.
3266        */
3267       uint32_t syncobj;
3268 
3269       /* Non shareable timeline semaphore
3270        *
3271        * Used when kernel don't have support for timeline semaphores.
3272        */
3273       struct anv_timeline timeline;
3274    };
3275 };
3276 
3277 struct anv_semaphore {
3278    struct vk_object_base base;
3279 
3280    uint32_t refcount;
3281 
3282    /* Permanent semaphore state.  Every semaphore has some form of permanent
3283     * state (type != ANV_SEMAPHORE_TYPE_NONE).  This may be a BO to fence on
3284     * (for cross-process semaphores0 or it could just be a dummy for use
3285     * internally.
3286     */
3287    struct anv_semaphore_impl permanent;
3288 
3289    /* Temporary semaphore state.  A semaphore *may* have temporary state.
3290     * That state is added to the semaphore by an import operation and is reset
3291     * back to ANV_SEMAPHORE_TYPE_NONE when the semaphore is waited on.  A
3292     * semaphore with temporary state cannot be signaled because the semaphore
3293     * must already be signaled before the temporary state can be exported from
3294     * the semaphore in the other process and imported here.
3295     */
3296    struct anv_semaphore_impl temporary;
3297 };
3298 
3299 void anv_semaphore_reset_temporary(struct anv_device *device,
3300                                    struct anv_semaphore *semaphore);
3301 
3302 struct anv_shader_module {
3303    struct vk_object_base                        base;
3304 
3305    unsigned char                                sha1[20];
3306    uint32_t                                     size;
3307    char                                         data[0];
3308 };
3309 
3310 static inline gl_shader_stage
vk_to_mesa_shader_stage(VkShaderStageFlagBits vk_stage)3311 vk_to_mesa_shader_stage(VkShaderStageFlagBits vk_stage)
3312 {
3313    assert(__builtin_popcount(vk_stage) == 1);
3314    return ffs(vk_stage) - 1;
3315 }
3316 
3317 static inline VkShaderStageFlagBits
mesa_to_vk_shader_stage(gl_shader_stage mesa_stage)3318 mesa_to_vk_shader_stage(gl_shader_stage mesa_stage)
3319 {
3320    return (1 << mesa_stage);
3321 }
3322 
3323 #define ANV_STAGE_MASK ((1 << MESA_SHADER_STAGES) - 1)
3324 
3325 #define anv_foreach_stage(stage, stage_bits)                         \
3326    for (gl_shader_stage stage,                                       \
3327         __tmp = (gl_shader_stage)((stage_bits) & ANV_STAGE_MASK);    \
3328         stage = __builtin_ffs(__tmp) - 1, __tmp;                     \
3329         __tmp &= ~(1 << (stage)))
3330 
3331 enum anv_shader_reloc {
3332    ANV_SHADER_RELOC_CONST_DATA_ADDR_LOW,
3333    ANV_SHADER_RELOC_CONST_DATA_ADDR_HIGH,
3334 };
3335 
3336 struct anv_pipeline_bind_map {
3337    unsigned char                                surface_sha1[20];
3338    unsigned char                                sampler_sha1[20];
3339    unsigned char                                push_sha1[20];
3340 
3341    uint32_t surface_count;
3342    uint32_t sampler_count;
3343 
3344    struct anv_pipeline_binding *                surface_to_descriptor;
3345    struct anv_pipeline_binding *                sampler_to_descriptor;
3346 
3347    struct anv_push_range                        push_ranges[4];
3348 };
3349 
3350 struct anv_shader_bin_key {
3351    uint32_t size;
3352    uint8_t data[0];
3353 };
3354 
3355 struct anv_shader_bin {
3356    uint32_t ref_cnt;
3357 
3358    gl_shader_stage stage;
3359 
3360    const struct anv_shader_bin_key *key;
3361 
3362    struct anv_state kernel;
3363    uint32_t kernel_size;
3364 
3365    const struct brw_stage_prog_data *prog_data;
3366    uint32_t prog_data_size;
3367 
3368    struct brw_compile_stats stats[3];
3369    uint32_t num_stats;
3370 
3371    struct nir_xfb_info *xfb_info;
3372 
3373    struct anv_pipeline_bind_map bind_map;
3374 };
3375 
3376 struct anv_shader_bin *
3377 anv_shader_bin_create(struct anv_device *device,
3378                       gl_shader_stage stage,
3379                       const void *key, uint32_t key_size,
3380                       const void *kernel, uint32_t kernel_size,
3381                       const struct brw_stage_prog_data *prog_data,
3382                       uint32_t prog_data_size,
3383                       const struct brw_compile_stats *stats, uint32_t num_stats,
3384                       const struct nir_xfb_info *xfb_info,
3385                       const struct anv_pipeline_bind_map *bind_map);
3386 
3387 void
3388 anv_shader_bin_destroy(struct anv_device *device, struct anv_shader_bin *shader);
3389 
3390 static inline void
anv_shader_bin_ref(struct anv_shader_bin * shader)3391 anv_shader_bin_ref(struct anv_shader_bin *shader)
3392 {
3393    assert(shader && shader->ref_cnt >= 1);
3394    p_atomic_inc(&shader->ref_cnt);
3395 }
3396 
3397 static inline void
anv_shader_bin_unref(struct anv_device * device,struct anv_shader_bin * shader)3398 anv_shader_bin_unref(struct anv_device *device, struct anv_shader_bin *shader)
3399 {
3400    assert(shader && shader->ref_cnt >= 1);
3401    if (p_atomic_dec_zero(&shader->ref_cnt))
3402       anv_shader_bin_destroy(device, shader);
3403 }
3404 
3405 struct anv_pipeline_executable {
3406    gl_shader_stage stage;
3407 
3408    struct brw_compile_stats stats;
3409 
3410    char *nir;
3411    char *disasm;
3412 };
3413 
3414 enum anv_pipeline_type {
3415    ANV_PIPELINE_GRAPHICS,
3416    ANV_PIPELINE_COMPUTE,
3417 };
3418 
3419 struct anv_pipeline {
3420    struct vk_object_base                        base;
3421 
3422    struct anv_device *                          device;
3423 
3424    struct anv_batch                             batch;
3425    struct anv_reloc_list                        batch_relocs;
3426 
3427    void *                                       mem_ctx;
3428 
3429    enum anv_pipeline_type                       type;
3430    VkPipelineCreateFlags                        flags;
3431 
3432    struct util_dynarray                         executables;
3433 
3434    const struct gen_l3_config *                 l3_config;
3435 };
3436 
3437 struct anv_graphics_pipeline {
3438    struct anv_pipeline                          base;
3439 
3440    uint32_t                                     batch_data[512];
3441 
3442    anv_cmd_dirty_mask_t                         dynamic_state_mask;
3443    struct anv_dynamic_state                     dynamic_state;
3444 
3445    uint32_t                                     topology;
3446 
3447    struct anv_subpass *                         subpass;
3448 
3449    struct anv_shader_bin *                      shaders[MESA_SHADER_STAGES];
3450 
3451    VkShaderStageFlags                           active_stages;
3452 
3453    bool                                         primitive_restart;
3454    bool                                         writes_depth;
3455    bool                                         depth_test_enable;
3456    bool                                         writes_stencil;
3457    bool                                         stencil_test_enable;
3458    bool                                         depth_clamp_enable;
3459    bool                                         depth_clip_enable;
3460    bool                                         sample_shading_enable;
3461    bool                                         kill_pixel;
3462    bool                                         depth_bounds_test_enable;
3463 
3464    /* When primitive replication is used, subpass->view_mask will describe what
3465     * views to replicate.
3466     */
3467    bool                                         use_primitive_replication;
3468 
3469    struct anv_state                             blend_state;
3470 
3471    uint32_t                                     vb_used;
3472    struct anv_pipeline_vertex_binding {
3473       uint32_t                                  stride;
3474       bool                                      instanced;
3475       uint32_t                                  instance_divisor;
3476    } vb[MAX_VBS];
3477 
3478    struct {
3479       uint32_t                                  sf[7];
3480       uint32_t                                  depth_stencil_state[3];
3481       uint32_t                                  clip[4];
3482       uint32_t                                  xfb_bo_pitch[4];
3483    } gen7;
3484 
3485    struct {
3486       uint32_t                                  sf[4];
3487       uint32_t                                  raster[5];
3488       uint32_t                                  wm_depth_stencil[3];
3489    } gen8;
3490 
3491    struct {
3492       uint32_t                                  wm_depth_stencil[4];
3493    } gen9;
3494 };
3495 
3496 struct anv_compute_pipeline {
3497    struct anv_pipeline                          base;
3498 
3499    struct anv_shader_bin *                      cs;
3500    uint32_t                                     cs_right_mask;
3501    uint32_t                                     batch_data[9];
3502    uint32_t                                     interface_descriptor_data[8];
3503 };
3504 
3505 #define ANV_DECL_PIPELINE_DOWNCAST(pipe_type, pipe_enum)             \
3506    static inline struct anv_##pipe_type##_pipeline *                 \
3507    anv_pipeline_to_##pipe_type(struct anv_pipeline *pipeline)      \
3508    {                                                                 \
3509       assert(pipeline->type == pipe_enum);                           \
3510       return (struct anv_##pipe_type##_pipeline *) pipeline;         \
3511    }
3512 
ANV_DECL_PIPELINE_DOWNCAST(graphics,ANV_PIPELINE_GRAPHICS)3513 ANV_DECL_PIPELINE_DOWNCAST(graphics, ANV_PIPELINE_GRAPHICS)
3514 ANV_DECL_PIPELINE_DOWNCAST(compute, ANV_PIPELINE_COMPUTE)
3515 
3516 static inline bool
3517 anv_pipeline_has_stage(const struct anv_graphics_pipeline *pipeline,
3518                        gl_shader_stage stage)
3519 {
3520    return (pipeline->active_stages & mesa_to_vk_shader_stage(stage)) != 0;
3521 }
3522 
3523 #define ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(prefix, stage)             \
3524 static inline const struct brw_##prefix##_prog_data *                   \
3525 get_##prefix##_prog_data(const struct anv_graphics_pipeline *pipeline)  \
3526 {                                                                       \
3527    if (anv_pipeline_has_stage(pipeline, stage)) {                       \
3528       return (const struct brw_##prefix##_prog_data *)                  \
3529              pipeline->shaders[stage]->prog_data;                       \
3530    } else {                                                             \
3531       return NULL;                                                      \
3532    }                                                                    \
3533 }
3534 
ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(vs,MESA_SHADER_VERTEX)3535 ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(vs, MESA_SHADER_VERTEX)
3536 ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(tcs, MESA_SHADER_TESS_CTRL)
3537 ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(tes, MESA_SHADER_TESS_EVAL)
3538 ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(gs, MESA_SHADER_GEOMETRY)
3539 ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(wm, MESA_SHADER_FRAGMENT)
3540 
3541 static inline const struct brw_cs_prog_data *
3542 get_cs_prog_data(const struct anv_compute_pipeline *pipeline)
3543 {
3544    assert(pipeline->cs);
3545    return (const struct brw_cs_prog_data *) pipeline->cs->prog_data;
3546 }
3547 
3548 static inline const struct brw_vue_prog_data *
anv_pipeline_get_last_vue_prog_data(const struct anv_graphics_pipeline * pipeline)3549 anv_pipeline_get_last_vue_prog_data(const struct anv_graphics_pipeline *pipeline)
3550 {
3551    if (anv_pipeline_has_stage(pipeline, MESA_SHADER_GEOMETRY))
3552       return &get_gs_prog_data(pipeline)->base;
3553    else if (anv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_EVAL))
3554       return &get_tes_prog_data(pipeline)->base;
3555    else
3556       return &get_vs_prog_data(pipeline)->base;
3557 }
3558 
3559 VkResult
3560 anv_pipeline_init(struct anv_pipeline *pipeline,
3561                   struct anv_device *device,
3562                   enum anv_pipeline_type type,
3563                   VkPipelineCreateFlags flags,
3564                   const VkAllocationCallbacks *pAllocator);
3565 
3566 void
3567 anv_pipeline_finish(struct anv_pipeline *pipeline,
3568                     struct anv_device *device,
3569                     const VkAllocationCallbacks *pAllocator);
3570 
3571 VkResult
3572 anv_graphics_pipeline_init(struct anv_graphics_pipeline *pipeline, struct anv_device *device,
3573                            struct anv_pipeline_cache *cache,
3574                            const VkGraphicsPipelineCreateInfo *pCreateInfo,
3575                            const VkAllocationCallbacks *alloc);
3576 
3577 VkResult
3578 anv_pipeline_compile_cs(struct anv_compute_pipeline *pipeline,
3579                         struct anv_pipeline_cache *cache,
3580                         const VkComputePipelineCreateInfo *info,
3581                         const struct anv_shader_module *module,
3582                         const char *entrypoint,
3583                         const VkSpecializationInfo *spec_info);
3584 
3585 struct anv_cs_parameters {
3586    uint32_t group_size;
3587    uint32_t simd_size;
3588    uint32_t threads;
3589 };
3590 
3591 struct anv_cs_parameters
3592 anv_cs_parameters(const struct anv_compute_pipeline *pipeline);
3593 
3594 struct anv_format_plane {
3595    enum isl_format isl_format:16;
3596    struct isl_swizzle swizzle;
3597 
3598    /* Whether this plane contains chroma channels */
3599    bool has_chroma;
3600 
3601    /* For downscaling of YUV planes */
3602    uint8_t denominator_scales[2];
3603 
3604    /* How to map sampled ycbcr planes to a single 4 component element. */
3605    struct isl_swizzle ycbcr_swizzle;
3606 
3607    /* What aspect is associated to this plane */
3608    VkImageAspectFlags aspect;
3609 };
3610 
3611 
3612 struct anv_format {
3613    struct anv_format_plane planes[3];
3614    VkFormat vk_format;
3615    uint8_t n_planes;
3616    bool can_ycbcr;
3617 };
3618 
3619 /**
3620  * Return the aspect's _format_ plane, not its _memory_ plane (using the
3621  * vocabulary of VK_EXT_image_drm_format_modifier). As a consequence, \a
3622  * aspect_mask may contain VK_IMAGE_ASPECT_PLANE_*, but must not contain
3623  * VK_IMAGE_ASPECT_MEMORY_PLANE_* .
3624  */
3625 static inline uint32_t
anv_image_aspect_to_plane(VkImageAspectFlags image_aspects,VkImageAspectFlags aspect_mask)3626 anv_image_aspect_to_plane(VkImageAspectFlags image_aspects,
3627                           VkImageAspectFlags aspect_mask)
3628 {
3629    switch (aspect_mask) {
3630    case VK_IMAGE_ASPECT_COLOR_BIT:
3631    case VK_IMAGE_ASPECT_DEPTH_BIT:
3632    case VK_IMAGE_ASPECT_PLANE_0_BIT:
3633       return 0;
3634    case VK_IMAGE_ASPECT_STENCIL_BIT:
3635       if ((image_aspects & VK_IMAGE_ASPECT_DEPTH_BIT) == 0)
3636          return 0;
3637       /* Fall-through */
3638    case VK_IMAGE_ASPECT_PLANE_1_BIT:
3639       return 1;
3640    case VK_IMAGE_ASPECT_PLANE_2_BIT:
3641       return 2;
3642    default:
3643       /* Purposefully assert with depth/stencil aspects. */
3644       unreachable("invalid image aspect");
3645    }
3646 }
3647 
3648 static inline VkImageAspectFlags
anv_plane_to_aspect(VkImageAspectFlags image_aspects,uint32_t plane)3649 anv_plane_to_aspect(VkImageAspectFlags image_aspects,
3650                     uint32_t plane)
3651 {
3652    if (image_aspects & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV) {
3653       if (util_bitcount(image_aspects) > 1)
3654          return VK_IMAGE_ASPECT_PLANE_0_BIT << plane;
3655       return VK_IMAGE_ASPECT_COLOR_BIT;
3656    }
3657    if (image_aspects & VK_IMAGE_ASPECT_DEPTH_BIT)
3658       return VK_IMAGE_ASPECT_DEPTH_BIT << plane;
3659    assert(image_aspects == VK_IMAGE_ASPECT_STENCIL_BIT);
3660    return VK_IMAGE_ASPECT_STENCIL_BIT;
3661 }
3662 
3663 #define anv_foreach_image_aspect_bit(b, image, aspects) \
3664    for_each_bit(b, anv_image_expand_aspects(image, aspects))
3665 
3666 const struct anv_format *
3667 anv_get_format(VkFormat format);
3668 
3669 static inline uint32_t
anv_get_format_planes(VkFormat vk_format)3670 anv_get_format_planes(VkFormat vk_format)
3671 {
3672    const struct anv_format *format = anv_get_format(vk_format);
3673 
3674    return format != NULL ? format->n_planes : 0;
3675 }
3676 
3677 struct anv_format_plane
3678 anv_get_format_plane(const struct gen_device_info *devinfo, VkFormat vk_format,
3679                      VkImageAspectFlagBits aspect, VkImageTiling tiling);
3680 
3681 static inline enum isl_format
anv_get_isl_format(const struct gen_device_info * devinfo,VkFormat vk_format,VkImageAspectFlags aspect,VkImageTiling tiling)3682 anv_get_isl_format(const struct gen_device_info *devinfo, VkFormat vk_format,
3683                    VkImageAspectFlags aspect, VkImageTiling tiling)
3684 {
3685    return anv_get_format_plane(devinfo, vk_format, aspect, tiling).isl_format;
3686 }
3687 
3688 bool anv_formats_ccs_e_compatible(const struct gen_device_info *devinfo,
3689                                   VkImageCreateFlags create_flags,
3690                                   VkFormat vk_format,
3691                                   VkImageTiling vk_tiling,
3692                                   const VkImageFormatListCreateInfoKHR *fmt_list);
3693 
3694 static inline struct isl_swizzle
anv_swizzle_for_render(struct isl_swizzle swizzle)3695 anv_swizzle_for_render(struct isl_swizzle swizzle)
3696 {
3697    /* Sometimes the swizzle will have alpha map to one.  We do this to fake
3698     * RGB as RGBA for texturing
3699     */
3700    assert(swizzle.a == ISL_CHANNEL_SELECT_ONE ||
3701           swizzle.a == ISL_CHANNEL_SELECT_ALPHA);
3702 
3703    /* But it doesn't matter what we render to that channel */
3704    swizzle.a = ISL_CHANNEL_SELECT_ALPHA;
3705 
3706    return swizzle;
3707 }
3708 
3709 void
3710 anv_pipeline_setup_l3_config(struct anv_pipeline *pipeline, bool needs_slm);
3711 
3712 /**
3713  * Subsurface of an anv_image.
3714  */
3715 struct anv_surface {
3716    /** Valid only if isl_surf::size_B > 0. */
3717    struct isl_surf isl;
3718 
3719    /**
3720     * Offset from VkImage's base address, as bound by vkBindImageMemory().
3721     */
3722    uint32_t offset;
3723 };
3724 
3725 struct anv_image {
3726    struct vk_object_base base;
3727 
3728    VkImageType type; /**< VkImageCreateInfo::imageType */
3729    /* The original VkFormat provided by the client.  This may not match any
3730     * of the actual surface formats.
3731     */
3732    VkFormat vk_format;
3733    const struct anv_format *format;
3734 
3735    VkImageAspectFlags aspects;
3736    VkExtent3D extent;
3737    uint32_t levels;
3738    uint32_t array_size;
3739    uint32_t samples; /**< VkImageCreateInfo::samples */
3740    uint32_t n_planes;
3741    VkImageUsageFlags usage; /**< VkImageCreateInfo::usage. */
3742    VkImageUsageFlags stencil_usage;
3743    VkImageCreateFlags create_flags; /* Flags used when creating image. */
3744    VkImageTiling tiling; /** VkImageCreateInfo::tiling */
3745 
3746    /** True if this is needs to be bound to an appropriately tiled BO.
3747     *
3748     * When not using modifiers, consumers such as X11, Wayland, and KMS need
3749     * the tiling passed via I915_GEM_SET_TILING.  When exporting these buffers
3750     * we require a dedicated allocation so that we can know to allocate a
3751     * tiled buffer.
3752     */
3753    bool needs_set_tiling;
3754 
3755    /**
3756     * Must be DRM_FORMAT_MOD_INVALID unless tiling is
3757     * VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT.
3758     */
3759    uint64_t drm_format_mod;
3760 
3761    VkDeviceSize size;
3762    uint32_t alignment;
3763 
3764    /* Whether the image is made of several underlying buffer objects rather a
3765     * single one with different offsets.
3766     */
3767    bool disjoint;
3768 
3769    /* Image was created with external format. */
3770    bool external_format;
3771 
3772    /**
3773     * Image subsurfaces
3774     *
3775     * For each foo, anv_image::planes[x].surface is valid if and only if
3776     * anv_image::aspects has a x aspect. Refer to anv_image_aspect_to_plane()
3777     * to figure the number associated with a given aspect.
3778     *
3779     * The hardware requires that the depth buffer and stencil buffer be
3780     * separate surfaces.  From Vulkan's perspective, though, depth and stencil
3781     * reside in the same VkImage.  To satisfy both the hardware and Vulkan, we
3782     * allocate the depth and stencil buffers as separate surfaces in the same
3783     * bo.
3784     *
3785     * Memory layout :
3786     *
3787     * -----------------------
3788     * |     surface0        |   /|\
3789     * -----------------------    |
3790     * |   shadow surface0   |    |
3791     * -----------------------    | Plane 0
3792     * |    aux surface0     |    |
3793     * -----------------------    |
3794     * | fast clear colors0  |   \|/
3795     * -----------------------
3796     * |     surface1        |   /|\
3797     * -----------------------    |
3798     * |   shadow surface1   |    |
3799     * -----------------------    | Plane 1
3800     * |    aux surface1     |    |
3801     * -----------------------    |
3802     * | fast clear colors1  |   \|/
3803     * -----------------------
3804     * |        ...          |
3805     * |                     |
3806     * -----------------------
3807     */
3808    struct {
3809       /**
3810        * Offset of the entire plane (whenever the image is disjoint this is
3811        * set to 0).
3812        */
3813       uint32_t offset;
3814 
3815       VkDeviceSize size;
3816       uint32_t alignment;
3817 
3818       struct anv_surface surface;
3819 
3820       /**
3821        * A surface which shadows the main surface and may have different
3822        * tiling. This is used for sampling using a tiling that isn't supported
3823        * for other operations.
3824        */
3825       struct anv_surface shadow_surface;
3826 
3827       /**
3828        * The base aux usage for this image.  For color images, this can be
3829        * either CCS_E or CCS_D depending on whether or not we can reliably
3830        * leave CCS on all the time.
3831        */
3832       enum isl_aux_usage aux_usage;
3833 
3834       struct anv_surface aux_surface;
3835 
3836       /**
3837        * Offset of the fast clear state (used to compute the
3838        * fast_clear_state_offset of the following planes).
3839        */
3840       uint32_t fast_clear_state_offset;
3841 
3842       /**
3843        * BO associated with this plane, set when bound.
3844        */
3845       struct anv_address address;
3846 
3847       /**
3848        * When destroying the image, also free the bo.
3849        * */
3850       bool bo_is_owned;
3851    } planes[3];
3852 };
3853 
3854 /* The ordering of this enum is important */
3855 enum anv_fast_clear_type {
3856    /** Image does not have/support any fast-clear blocks */
3857    ANV_FAST_CLEAR_NONE = 0,
3858    /** Image has/supports fast-clear but only to the default value */
3859    ANV_FAST_CLEAR_DEFAULT_VALUE = 1,
3860    /** Image has/supports fast-clear with an arbitrary fast-clear value */
3861    ANV_FAST_CLEAR_ANY = 2,
3862 };
3863 
3864 /* Returns the number of auxiliary buffer levels attached to an image. */
3865 static inline uint8_t
anv_image_aux_levels(const struct anv_image * const image,VkImageAspectFlagBits aspect)3866 anv_image_aux_levels(const struct anv_image * const image,
3867                      VkImageAspectFlagBits aspect)
3868 {
3869    uint32_t plane = anv_image_aspect_to_plane(image->aspects, aspect);
3870    if (image->planes[plane].aux_usage == ISL_AUX_USAGE_NONE)
3871       return 0;
3872 
3873    return image->levels;
3874 }
3875 
3876 /* Returns the number of auxiliary buffer layers attached to an image. */
3877 static inline uint32_t
anv_image_aux_layers(const struct anv_image * const image,VkImageAspectFlagBits aspect,const uint8_t miplevel)3878 anv_image_aux_layers(const struct anv_image * const image,
3879                      VkImageAspectFlagBits aspect,
3880                      const uint8_t miplevel)
3881 {
3882    assert(image);
3883 
3884    /* The miplevel must exist in the main buffer. */
3885    assert(miplevel < image->levels);
3886 
3887    if (miplevel >= anv_image_aux_levels(image, aspect)) {
3888       /* There are no layers with auxiliary data because the miplevel has no
3889        * auxiliary data.
3890        */
3891       return 0;
3892    }
3893 
3894    return MAX2(image->array_size, image->extent.depth >> miplevel);
3895 }
3896 
3897 static inline struct anv_address
anv_image_get_clear_color_addr(UNUSED const struct anv_device * device,const struct anv_image * image,VkImageAspectFlagBits aspect)3898 anv_image_get_clear_color_addr(UNUSED const struct anv_device *device,
3899                                const struct anv_image *image,
3900                                VkImageAspectFlagBits aspect)
3901 {
3902    assert(image->aspects & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV);
3903 
3904    uint32_t plane = anv_image_aspect_to_plane(image->aspects, aspect);
3905    return anv_address_add(image->planes[plane].address,
3906                           image->planes[plane].fast_clear_state_offset);
3907 }
3908 
3909 static inline struct anv_address
anv_image_get_fast_clear_type_addr(const struct anv_device * device,const struct anv_image * image,VkImageAspectFlagBits aspect)3910 anv_image_get_fast_clear_type_addr(const struct anv_device *device,
3911                                    const struct anv_image *image,
3912                                    VkImageAspectFlagBits aspect)
3913 {
3914    struct anv_address addr =
3915       anv_image_get_clear_color_addr(device, image, aspect);
3916 
3917    const unsigned clear_color_state_size = device->info.gen >= 10 ?
3918       device->isl_dev.ss.clear_color_state_size :
3919       device->isl_dev.ss.clear_value_size;
3920    return anv_address_add(addr, clear_color_state_size);
3921 }
3922 
3923 static inline struct anv_address
anv_image_get_compression_state_addr(const struct anv_device * device,const struct anv_image * image,VkImageAspectFlagBits aspect,uint32_t level,uint32_t array_layer)3924 anv_image_get_compression_state_addr(const struct anv_device *device,
3925                                      const struct anv_image *image,
3926                                      VkImageAspectFlagBits aspect,
3927                                      uint32_t level, uint32_t array_layer)
3928 {
3929    assert(level < anv_image_aux_levels(image, aspect));
3930    assert(array_layer < anv_image_aux_layers(image, aspect, level));
3931    UNUSED uint32_t plane = anv_image_aspect_to_plane(image->aspects, aspect);
3932    assert(image->planes[plane].aux_usage == ISL_AUX_USAGE_CCS_E);
3933 
3934    struct anv_address addr =
3935       anv_image_get_fast_clear_type_addr(device, image, aspect);
3936    addr.offset += 4; /* Go past the fast clear type */
3937 
3938    if (image->type == VK_IMAGE_TYPE_3D) {
3939       for (uint32_t l = 0; l < level; l++)
3940          addr.offset += anv_minify(image->extent.depth, l) * 4;
3941    } else {
3942       addr.offset += level * image->array_size * 4;
3943    }
3944    addr.offset += array_layer * 4;
3945 
3946    assert(addr.offset <
3947           image->planes[plane].address.offset + image->planes[plane].size);
3948    return addr;
3949 }
3950 
3951 /* Returns true if a HiZ-enabled depth buffer can be sampled from. */
3952 static inline bool
anv_can_sample_with_hiz(const struct gen_device_info * const devinfo,const struct anv_image * image)3953 anv_can_sample_with_hiz(const struct gen_device_info * const devinfo,
3954                         const struct anv_image *image)
3955 {
3956    if (!(image->aspects & VK_IMAGE_ASPECT_DEPTH_BIT))
3957       return false;
3958 
3959    /* For Gen8-11, there are some restrictions around sampling from HiZ.
3960     * The Skylake PRM docs for RENDER_SURFACE_STATE::AuxiliarySurfaceMode
3961     * say:
3962     *
3963     *    "If this field is set to AUX_HIZ, Number of Multisamples must
3964     *    be MULTISAMPLECOUNT_1, and Surface Type cannot be SURFTYPE_3D."
3965     */
3966    if (image->type == VK_IMAGE_TYPE_3D)
3967       return false;
3968 
3969    /* Allow this feature on BDW even though it is disabled in the BDW devinfo
3970     * struct. There's documentation which suggests that this feature actually
3971     * reduces performance on BDW, but it has only been observed to help so
3972     * far. Sampling fast-cleared blocks on BDW must also be handled with care
3973     * (see depth_stencil_attachment_compute_aux_usage() for more info).
3974     */
3975    if (devinfo->gen != 8 && !devinfo->has_sample_with_hiz)
3976       return false;
3977 
3978    return image->samples == 1;
3979 }
3980 
3981 static inline bool
anv_image_plane_uses_aux_map(const struct anv_device * device,const struct anv_image * image,uint32_t plane)3982 anv_image_plane_uses_aux_map(const struct anv_device *device,
3983                              const struct anv_image *image,
3984                              uint32_t plane)
3985 {
3986    return device->info.has_aux_map &&
3987       isl_aux_usage_has_ccs(image->planes[plane].aux_usage);
3988 }
3989 
3990 void
3991 anv_cmd_buffer_mark_image_written(struct anv_cmd_buffer *cmd_buffer,
3992                                   const struct anv_image *image,
3993                                   VkImageAspectFlagBits aspect,
3994                                   enum isl_aux_usage aux_usage,
3995                                   uint32_t level,
3996                                   uint32_t base_layer,
3997                                   uint32_t layer_count);
3998 
3999 void
4000 anv_image_clear_color(struct anv_cmd_buffer *cmd_buffer,
4001                       const struct anv_image *image,
4002                       VkImageAspectFlagBits aspect,
4003                       enum isl_aux_usage aux_usage,
4004                       enum isl_format format, struct isl_swizzle swizzle,
4005                       uint32_t level, uint32_t base_layer, uint32_t layer_count,
4006                       VkRect2D area, union isl_color_value clear_color);
4007 void
4008 anv_image_clear_depth_stencil(struct anv_cmd_buffer *cmd_buffer,
4009                               const struct anv_image *image,
4010                               VkImageAspectFlags aspects,
4011                               enum isl_aux_usage depth_aux_usage,
4012                               uint32_t level,
4013                               uint32_t base_layer, uint32_t layer_count,
4014                               VkRect2D area,
4015                               float depth_value, uint8_t stencil_value);
4016 void
4017 anv_image_msaa_resolve(struct anv_cmd_buffer *cmd_buffer,
4018                        const struct anv_image *src_image,
4019                        enum isl_aux_usage src_aux_usage,
4020                        uint32_t src_level, uint32_t src_base_layer,
4021                        const struct anv_image *dst_image,
4022                        enum isl_aux_usage dst_aux_usage,
4023                        uint32_t dst_level, uint32_t dst_base_layer,
4024                        VkImageAspectFlagBits aspect,
4025                        uint32_t src_x, uint32_t src_y,
4026                        uint32_t dst_x, uint32_t dst_y,
4027                        uint32_t width, uint32_t height,
4028                        uint32_t layer_count,
4029                        enum blorp_filter filter);
4030 void
4031 anv_image_hiz_op(struct anv_cmd_buffer *cmd_buffer,
4032                  const struct anv_image *image,
4033                  VkImageAspectFlagBits aspect, uint32_t level,
4034                  uint32_t base_layer, uint32_t layer_count,
4035                  enum isl_aux_op hiz_op);
4036 void
4037 anv_image_hiz_clear(struct anv_cmd_buffer *cmd_buffer,
4038                     const struct anv_image *image,
4039                     VkImageAspectFlags aspects,
4040                     uint32_t level,
4041                     uint32_t base_layer, uint32_t layer_count,
4042                     VkRect2D area, uint8_t stencil_value);
4043 void
4044 anv_image_mcs_op(struct anv_cmd_buffer *cmd_buffer,
4045                  const struct anv_image *image,
4046                  enum isl_format format, struct isl_swizzle swizzle,
4047                  VkImageAspectFlagBits aspect,
4048                  uint32_t base_layer, uint32_t layer_count,
4049                  enum isl_aux_op mcs_op, union isl_color_value *clear_value,
4050                  bool predicate);
4051 void
4052 anv_image_ccs_op(struct anv_cmd_buffer *cmd_buffer,
4053                  const struct anv_image *image,
4054                  enum isl_format format, struct isl_swizzle swizzle,
4055                  VkImageAspectFlagBits aspect, uint32_t level,
4056                  uint32_t base_layer, uint32_t layer_count,
4057                  enum isl_aux_op ccs_op, union isl_color_value *clear_value,
4058                  bool predicate);
4059 
4060 void
4061 anv_image_copy_to_shadow(struct anv_cmd_buffer *cmd_buffer,
4062                          const struct anv_image *image,
4063                          VkImageAspectFlagBits aspect,
4064                          uint32_t base_level, uint32_t level_count,
4065                          uint32_t base_layer, uint32_t layer_count);
4066 
4067 enum isl_aux_state
4068 anv_layout_to_aux_state(const struct gen_device_info * const devinfo,
4069                         const struct anv_image *image,
4070                         const VkImageAspectFlagBits aspect,
4071                         const VkImageLayout layout);
4072 
4073 enum isl_aux_usage
4074 anv_layout_to_aux_usage(const struct gen_device_info * const devinfo,
4075                         const struct anv_image *image,
4076                         const VkImageAspectFlagBits aspect,
4077                         const VkImageUsageFlagBits usage,
4078                         const VkImageLayout layout);
4079 
4080 enum anv_fast_clear_type
4081 anv_layout_to_fast_clear_type(const struct gen_device_info * const devinfo,
4082                               const struct anv_image * const image,
4083                               const VkImageAspectFlagBits aspect,
4084                               const VkImageLayout layout);
4085 
4086 /* This is defined as a macro so that it works for both
4087  * VkImageSubresourceRange and VkImageSubresourceLayers
4088  */
4089 #define anv_get_layerCount(_image, _range) \
4090    ((_range)->layerCount == VK_REMAINING_ARRAY_LAYERS ? \
4091     (_image)->array_size - (_range)->baseArrayLayer : (_range)->layerCount)
4092 
4093 static inline uint32_t
anv_get_levelCount(const struct anv_image * image,const VkImageSubresourceRange * range)4094 anv_get_levelCount(const struct anv_image *image,
4095                    const VkImageSubresourceRange *range)
4096 {
4097    return range->levelCount == VK_REMAINING_MIP_LEVELS ?
4098           image->levels - range->baseMipLevel : range->levelCount;
4099 }
4100 
4101 static inline VkImageAspectFlags
anv_image_expand_aspects(const struct anv_image * image,VkImageAspectFlags aspects)4102 anv_image_expand_aspects(const struct anv_image *image,
4103                          VkImageAspectFlags aspects)
4104 {
4105    /* If the underlying image has color plane aspects and
4106     * VK_IMAGE_ASPECT_COLOR_BIT has been requested, then return the aspects of
4107     * the underlying image. */
4108    if ((image->aspects & VK_IMAGE_ASPECT_PLANES_BITS_ANV) != 0 &&
4109        aspects == VK_IMAGE_ASPECT_COLOR_BIT)
4110       return image->aspects;
4111 
4112    return aspects;
4113 }
4114 
4115 static inline bool
anv_image_aspects_compatible(VkImageAspectFlags aspects1,VkImageAspectFlags aspects2)4116 anv_image_aspects_compatible(VkImageAspectFlags aspects1,
4117                              VkImageAspectFlags aspects2)
4118 {
4119    if (aspects1 == aspects2)
4120       return true;
4121 
4122    /* Only 1 color aspects are compatibles. */
4123    if ((aspects1 & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV) != 0 &&
4124        (aspects2 & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV) != 0 &&
4125        util_bitcount(aspects1) == util_bitcount(aspects2))
4126       return true;
4127 
4128    return false;
4129 }
4130 
4131 struct anv_image_view {
4132    struct vk_object_base base;
4133 
4134    const struct anv_image *image; /**< VkImageViewCreateInfo::image */
4135 
4136    VkImageAspectFlags aspect_mask;
4137    VkFormat vk_format;
4138    VkExtent3D extent; /**< Extent of VkImageViewCreateInfo::baseMipLevel. */
4139 
4140    unsigned n_planes;
4141    struct {
4142       uint32_t image_plane;
4143 
4144       struct isl_view isl;
4145 
4146       /**
4147        * RENDER_SURFACE_STATE when using image as a sampler surface with an
4148        * image layout of SHADER_READ_ONLY_OPTIMAL or
4149        * DEPTH_STENCIL_READ_ONLY_OPTIMAL.
4150        */
4151       struct anv_surface_state optimal_sampler_surface_state;
4152 
4153       /**
4154        * RENDER_SURFACE_STATE when using image as a sampler surface with an
4155        * image layout of GENERAL.
4156        */
4157       struct anv_surface_state general_sampler_surface_state;
4158 
4159       /**
4160        * RENDER_SURFACE_STATE when using image as a storage image. Separate
4161        * states for write-only and readable, using the real format for
4162        * write-only and the lowered format for readable.
4163        */
4164       struct anv_surface_state storage_surface_state;
4165       struct anv_surface_state writeonly_storage_surface_state;
4166 
4167       struct brw_image_param storage_image_param;
4168    } planes[3];
4169 };
4170 
4171 enum anv_image_view_state_flags {
4172    ANV_IMAGE_VIEW_STATE_STORAGE_WRITE_ONLY   = (1 << 0),
4173    ANV_IMAGE_VIEW_STATE_TEXTURE_OPTIMAL      = (1 << 1),
4174 };
4175 
4176 void anv_image_fill_surface_state(struct anv_device *device,
4177                                   const struct anv_image *image,
4178                                   VkImageAspectFlagBits aspect,
4179                                   const struct isl_view *view,
4180                                   isl_surf_usage_flags_t view_usage,
4181                                   enum isl_aux_usage aux_usage,
4182                                   const union isl_color_value *clear_color,
4183                                   enum anv_image_view_state_flags flags,
4184                                   struct anv_surface_state *state_inout,
4185                                   struct brw_image_param *image_param_out);
4186 
4187 struct anv_image_create_info {
4188    const VkImageCreateInfo *vk_info;
4189 
4190    /** An opt-in bitmask which filters an ISL-mapping of the Vulkan tiling. */
4191    isl_tiling_flags_t isl_tiling_flags;
4192 
4193    /** These flags will be added to any derived from VkImageCreateInfo. */
4194    isl_surf_usage_flags_t isl_extra_usage_flags;
4195 
4196    uint32_t stride;
4197    bool external_format;
4198 };
4199 
4200 VkResult anv_image_create(VkDevice _device,
4201                           const struct anv_image_create_info *info,
4202                           const VkAllocationCallbacks* alloc,
4203                           VkImage *pImage);
4204 
4205 enum isl_format
4206 anv_isl_format_for_descriptor_type(const struct anv_device *device,
4207                                    VkDescriptorType type);
4208 
4209 static inline VkExtent3D
anv_sanitize_image_extent(const VkImageType imageType,const VkExtent3D imageExtent)4210 anv_sanitize_image_extent(const VkImageType imageType,
4211                           const VkExtent3D imageExtent)
4212 {
4213    switch (imageType) {
4214    case VK_IMAGE_TYPE_1D:
4215       return (VkExtent3D) { imageExtent.width, 1, 1 };
4216    case VK_IMAGE_TYPE_2D:
4217       return (VkExtent3D) { imageExtent.width, imageExtent.height, 1 };
4218    case VK_IMAGE_TYPE_3D:
4219       return imageExtent;
4220    default:
4221       unreachable("invalid image type");
4222    }
4223 }
4224 
4225 static inline VkOffset3D
anv_sanitize_image_offset(const VkImageType imageType,const VkOffset3D imageOffset)4226 anv_sanitize_image_offset(const VkImageType imageType,
4227                           const VkOffset3D imageOffset)
4228 {
4229    switch (imageType) {
4230    case VK_IMAGE_TYPE_1D:
4231       return (VkOffset3D) { imageOffset.x, 0, 0 };
4232    case VK_IMAGE_TYPE_2D:
4233       return (VkOffset3D) { imageOffset.x, imageOffset.y, 0 };
4234    case VK_IMAGE_TYPE_3D:
4235       return imageOffset;
4236    default:
4237       unreachable("invalid image type");
4238    }
4239 }
4240 
4241 VkFormatFeatureFlags
4242 anv_get_image_format_features(const struct gen_device_info *devinfo,
4243                               VkFormat vk_format,
4244                               const struct anv_format *anv_format,
4245                               VkImageTiling vk_tiling);
4246 
4247 void anv_fill_buffer_surface_state(struct anv_device *device,
4248                                    struct anv_state state,
4249                                    enum isl_format format,
4250                                    isl_surf_usage_flags_t usage,
4251                                    struct anv_address address,
4252                                    uint32_t range, uint32_t stride);
4253 
4254 static inline void
anv_clear_color_from_att_state(union isl_color_value * clear_color,const struct anv_attachment_state * att_state,const struct anv_image_view * iview)4255 anv_clear_color_from_att_state(union isl_color_value *clear_color,
4256                                const struct anv_attachment_state *att_state,
4257                                const struct anv_image_view *iview)
4258 {
4259    const struct isl_format_layout *view_fmtl =
4260       isl_format_get_layout(iview->planes[0].isl.format);
4261 
4262 #define COPY_CLEAR_COLOR_CHANNEL(c, i) \
4263    if (view_fmtl->channels.c.bits) \
4264       clear_color->u32[i] = att_state->clear_value.color.uint32[i]
4265 
4266    COPY_CLEAR_COLOR_CHANNEL(r, 0);
4267    COPY_CLEAR_COLOR_CHANNEL(g, 1);
4268    COPY_CLEAR_COLOR_CHANNEL(b, 2);
4269    COPY_CLEAR_COLOR_CHANNEL(a, 3);
4270 
4271 #undef COPY_CLEAR_COLOR_CHANNEL
4272 }
4273 
4274 
4275 /* Haswell border color is a bit of a disaster.  Float and unorm formats use a
4276  * straightforward 32-bit float color in the first 64 bytes.  Instead of using
4277  * a nice float/integer union like Gen8+, Haswell specifies the integer border
4278  * color as a separate entry /after/ the float color.  The layout of this entry
4279  * also depends on the format's bpp (with extra hacks for RG32), and overlaps.
4280  *
4281  * Since we don't know the format/bpp, we can't make any of the border colors
4282  * containing '1' work for all formats, as it would be in the wrong place for
4283  * some of them.  We opt to make 32-bit integers work as this seems like the
4284  * most common option.  Fortunately, transparent black works regardless, as
4285  * all zeroes is the same in every bit-size.
4286  */
4287 struct hsw_border_color {
4288    float float32[4];
4289    uint32_t _pad0[12];
4290    uint32_t uint32[4];
4291    uint32_t _pad1[108];
4292 };
4293 
4294 struct gen8_border_color {
4295    union {
4296       float float32[4];
4297       uint32_t uint32[4];
4298    };
4299    /* Pad out to 64 bytes */
4300    uint32_t _pad[12];
4301 };
4302 
4303 struct anv_ycbcr_conversion {
4304    struct vk_object_base base;
4305 
4306    const struct anv_format *        format;
4307    VkSamplerYcbcrModelConversion    ycbcr_model;
4308    VkSamplerYcbcrRange              ycbcr_range;
4309    VkComponentSwizzle               mapping[4];
4310    VkChromaLocation                 chroma_offsets[2];
4311    VkFilter                         chroma_filter;
4312    bool                             chroma_reconstruction;
4313 };
4314 
4315 struct anv_sampler {
4316    struct vk_object_base        base;
4317 
4318    uint32_t                     state[3][4];
4319    uint32_t                     n_planes;
4320    struct anv_ycbcr_conversion *conversion;
4321 
4322    /* Blob of sampler state data which is guaranteed to be 32-byte aligned
4323     * and with a 32-byte stride for use as bindless samplers.
4324     */
4325    struct anv_state             bindless_state;
4326 
4327    struct anv_state             custom_border_color;
4328 };
4329 
4330 struct anv_framebuffer {
4331    struct vk_object_base                        base;
4332 
4333    uint32_t                                     width;
4334    uint32_t                                     height;
4335    uint32_t                                     layers;
4336 
4337    uint32_t                                     attachment_count;
4338    struct anv_image_view *                      attachments[0];
4339 };
4340 
4341 struct anv_subpass_attachment {
4342    VkImageUsageFlagBits usage;
4343    uint32_t attachment;
4344    VkImageLayout layout;
4345 
4346    /* Used only with attachment containing stencil data. */
4347    VkImageLayout stencil_layout;
4348 };
4349 
4350 struct anv_subpass {
4351    uint32_t                                     attachment_count;
4352 
4353    /**
4354     * A pointer to all attachment references used in this subpass.
4355     * Only valid if ::attachment_count > 0.
4356     */
4357    struct anv_subpass_attachment *              attachments;
4358    uint32_t                                     input_count;
4359    struct anv_subpass_attachment *              input_attachments;
4360    uint32_t                                     color_count;
4361    struct anv_subpass_attachment *              color_attachments;
4362    struct anv_subpass_attachment *              resolve_attachments;
4363 
4364    struct anv_subpass_attachment *              depth_stencil_attachment;
4365    struct anv_subpass_attachment *              ds_resolve_attachment;
4366    VkResolveModeFlagBitsKHR                     depth_resolve_mode;
4367    VkResolveModeFlagBitsKHR                     stencil_resolve_mode;
4368 
4369    uint32_t                                     view_mask;
4370 
4371    /** Subpass has a depth/stencil self-dependency */
4372    bool                                         has_ds_self_dep;
4373 
4374    /** Subpass has at least one color resolve attachment */
4375    bool                                         has_color_resolve;
4376 };
4377 
4378 static inline unsigned
anv_subpass_view_count(const struct anv_subpass * subpass)4379 anv_subpass_view_count(const struct anv_subpass *subpass)
4380 {
4381    return MAX2(1, util_bitcount(subpass->view_mask));
4382 }
4383 
4384 struct anv_render_pass_attachment {
4385    /* TODO: Consider using VkAttachmentDescription instead of storing each of
4386     * its members individually.
4387     */
4388    VkFormat                                     format;
4389    uint32_t                                     samples;
4390    VkImageUsageFlags                            usage;
4391    VkAttachmentLoadOp                           load_op;
4392    VkAttachmentStoreOp                          store_op;
4393    VkAttachmentLoadOp                           stencil_load_op;
4394    VkImageLayout                                initial_layout;
4395    VkImageLayout                                final_layout;
4396    VkImageLayout                                first_subpass_layout;
4397 
4398    VkImageLayout                                stencil_initial_layout;
4399    VkImageLayout                                stencil_final_layout;
4400 
4401    /* The subpass id in which the attachment will be used last. */
4402    uint32_t                                     last_subpass_idx;
4403 };
4404 
4405 struct anv_render_pass {
4406    struct vk_object_base                        base;
4407 
4408    uint32_t                                     attachment_count;
4409    uint32_t                                     subpass_count;
4410    /* An array of subpass_count+1 flushes, one per subpass boundary */
4411    enum anv_pipe_bits *                         subpass_flushes;
4412    struct anv_render_pass_attachment *          attachments;
4413    struct anv_subpass                           subpasses[0];
4414 };
4415 
4416 #define ANV_PIPELINE_STATISTICS_MASK 0x000007ff
4417 
4418 #define OA_SNAPSHOT_SIZE (256)
4419 #define ANV_KHR_PERF_QUERY_SIZE (ALIGN(sizeof(uint64_t), 64) + 2 * OA_SNAPSHOT_SIZE)
4420 
4421 struct anv_query_pool {
4422    struct vk_object_base                        base;
4423 
4424    VkQueryType                                  type;
4425    VkQueryPipelineStatisticFlags                pipeline_statistics;
4426    /** Stride between slots, in bytes */
4427    uint32_t                                     stride;
4428    /** Number of slots in this query pool */
4429    uint32_t                                     slots;
4430    struct anv_bo *                              bo;
4431 
4432    /* Perf queries : */
4433    struct anv_bo                                reset_bo;
4434    uint32_t                                     n_counters;
4435    struct gen_perf_counter_pass                *counter_pass;
4436    uint32_t                                     n_passes;
4437    struct gen_perf_query_info                 **pass_query;
4438 };
4439 
khr_perf_query_preamble_offset(struct anv_query_pool * pool,uint32_t pass)4440 static inline uint32_t khr_perf_query_preamble_offset(struct anv_query_pool *pool,
4441                                                       uint32_t pass)
4442 {
4443    return pass * ANV_KHR_PERF_QUERY_SIZE + 8;
4444 }
4445 
4446 int anv_get_instance_entrypoint_index(const char *name);
4447 int anv_get_device_entrypoint_index(const char *name);
4448 int anv_get_physical_device_entrypoint_index(const char *name);
4449 
4450 const char *anv_get_instance_entry_name(int index);
4451 const char *anv_get_physical_device_entry_name(int index);
4452 const char *anv_get_device_entry_name(int index);
4453 
4454 bool
4455 anv_instance_entrypoint_is_enabled(int index, uint32_t core_version,
4456                                    const struct anv_instance_extension_table *instance);
4457 bool
4458 anv_physical_device_entrypoint_is_enabled(int index, uint32_t core_version,
4459                                           const struct anv_instance_extension_table *instance);
4460 bool
4461 anv_device_entrypoint_is_enabled(int index, uint32_t core_version,
4462                                  const struct anv_instance_extension_table *instance,
4463                                  const struct anv_device_extension_table *device);
4464 
4465 void *anv_resolve_device_entrypoint(const struct gen_device_info *devinfo,
4466                                     uint32_t index);
4467 void *anv_lookup_entrypoint(const struct gen_device_info *devinfo,
4468                             const char *name);
4469 
4470 void anv_dump_image_to_ppm(struct anv_device *device,
4471                            struct anv_image *image, unsigned miplevel,
4472                            unsigned array_layer, VkImageAspectFlagBits aspect,
4473                            const char *filename);
4474 
4475 enum anv_dump_action {
4476    ANV_DUMP_FRAMEBUFFERS_BIT = 0x1,
4477 };
4478 
4479 #ifdef DEBUG
4480 PUBLIC
4481 #endif
4482 void anv_dump_start(struct anv_device *device, enum anv_dump_action actions);
4483 #ifdef DEBUG
4484 PUBLIC
4485 #endif
4486 void anv_dump_finish(void);
4487 
4488 void anv_dump_add_attachments(struct anv_cmd_buffer *cmd_buffer);
4489 
4490 static inline uint32_t
anv_get_subpass_id(const struct anv_cmd_state * const cmd_state)4491 anv_get_subpass_id(const struct anv_cmd_state * const cmd_state)
4492 {
4493    /* This function must be called from within a subpass. */
4494    assert(cmd_state->pass && cmd_state->subpass);
4495 
4496    const uint32_t subpass_id = cmd_state->subpass - cmd_state->pass->subpasses;
4497 
4498    /* The id of this subpass shouldn't exceed the number of subpasses in this
4499     * render pass minus 1.
4500     */
4501    assert(subpass_id < cmd_state->pass->subpass_count);
4502    return subpass_id;
4503 }
4504 
4505 struct anv_performance_configuration_intel {
4506    struct vk_object_base      base;
4507 
4508    struct gen_perf_registers *register_config;
4509 
4510    uint64_t                   config_id;
4511 };
4512 
4513 struct gen_perf_config *anv_get_perf(const struct gen_device_info *devinfo, int fd);
4514 void anv_device_perf_init(struct anv_device *device);
4515 void anv_perf_write_pass_results(struct gen_perf_config *perf,
4516                                  struct anv_query_pool *pool, uint32_t pass,
4517                                  const struct gen_perf_query_result *accumulated_results,
4518                                  union VkPerformanceCounterResultKHR *results);
4519 
4520 #define ANV_FROM_HANDLE(__anv_type, __name, __handle) \
4521    VK_FROM_HANDLE(__anv_type, __name, __handle)
4522 
4523 VK_DEFINE_HANDLE_CASTS(anv_cmd_buffer, base, VkCommandBuffer,
4524                        VK_OBJECT_TYPE_COMMAND_BUFFER)
4525 VK_DEFINE_HANDLE_CASTS(anv_device, vk.base, VkDevice, VK_OBJECT_TYPE_DEVICE)
4526 VK_DEFINE_HANDLE_CASTS(anv_instance, base, VkInstance, VK_OBJECT_TYPE_INSTANCE)
4527 VK_DEFINE_HANDLE_CASTS(anv_physical_device, base, VkPhysicalDevice,
4528                        VK_OBJECT_TYPE_PHYSICAL_DEVICE)
4529 VK_DEFINE_HANDLE_CASTS(anv_queue, base, VkQueue, VK_OBJECT_TYPE_QUEUE)
4530 
4531 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_cmd_pool, base, VkCommandPool,
4532                                VK_OBJECT_TYPE_COMMAND_POOL)
4533 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_buffer, base, VkBuffer,
4534                                VK_OBJECT_TYPE_BUFFER)
4535 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_buffer_view, base, VkBufferView,
4536                                VK_OBJECT_TYPE_BUFFER_VIEW)
4537 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_descriptor_pool, base, VkDescriptorPool,
4538                                VK_OBJECT_TYPE_DESCRIPTOR_POOL)
4539 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_descriptor_set, base, VkDescriptorSet,
4540                                VK_OBJECT_TYPE_DESCRIPTOR_SET)
4541 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_descriptor_set_layout, base,
4542                                VkDescriptorSetLayout,
4543                                VK_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT)
4544 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_descriptor_update_template, base,
4545                                VkDescriptorUpdateTemplate,
4546                                VK_OBJECT_TYPE_DESCRIPTOR_UPDATE_TEMPLATE)
4547 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_device_memory, base, VkDeviceMemory,
4548                                VK_OBJECT_TYPE_DEVICE_MEMORY)
4549 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_fence, base, VkFence, VK_OBJECT_TYPE_FENCE)
4550 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_event, base, VkEvent, VK_OBJECT_TYPE_EVENT)
4551 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_framebuffer, base, VkFramebuffer,
4552                                VK_OBJECT_TYPE_FRAMEBUFFER)
4553 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_image, base, VkImage, VK_OBJECT_TYPE_IMAGE)
4554 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_image_view, base, VkImageView,
4555                                VK_OBJECT_TYPE_IMAGE_VIEW);
4556 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_pipeline_cache, base, VkPipelineCache,
4557                                VK_OBJECT_TYPE_PIPELINE_CACHE)
4558 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_pipeline, base, VkPipeline,
4559                                VK_OBJECT_TYPE_PIPELINE)
4560 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_pipeline_layout, base, VkPipelineLayout,
4561                                VK_OBJECT_TYPE_PIPELINE_LAYOUT)
4562 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_query_pool, base, VkQueryPool,
4563                                VK_OBJECT_TYPE_QUERY_POOL)
4564 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_render_pass, base, VkRenderPass,
4565                                VK_OBJECT_TYPE_RENDER_PASS)
4566 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_sampler, base, VkSampler,
4567                                VK_OBJECT_TYPE_SAMPLER)
4568 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_semaphore, base, VkSemaphore,
4569                                VK_OBJECT_TYPE_SEMAPHORE)
4570 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_shader_module, base, VkShaderModule,
4571                                VK_OBJECT_TYPE_SHADER_MODULE)
4572 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_ycbcr_conversion, base,
4573                                VkSamplerYcbcrConversion,
4574                                VK_OBJECT_TYPE_SAMPLER_YCBCR_CONVERSION)
4575 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_performance_configuration_intel, base,
4576                                VkPerformanceConfigurationINTEL,
4577                                VK_OBJECT_TYPE_PERFORMANCE_CONFIGURATION_INTEL)
4578 
4579 /* Gen-specific function declarations */
4580 #ifdef genX
4581 #  include "anv_genX.h"
4582 #else
4583 #  define genX(x) gen7_##x
4584 #  include "anv_genX.h"
4585 #  undef genX
4586 #  define genX(x) gen75_##x
4587 #  include "anv_genX.h"
4588 #  undef genX
4589 #  define genX(x) gen8_##x
4590 #  include "anv_genX.h"
4591 #  undef genX
4592 #  define genX(x) gen9_##x
4593 #  include "anv_genX.h"
4594 #  undef genX
4595 #  define genX(x) gen11_##x
4596 #  include "anv_genX.h"
4597 #  undef genX
4598 #  define genX(x) gen12_##x
4599 #  include "anv_genX.h"
4600 #  undef genX
4601 #endif
4602 
4603 #endif /* ANV_PRIVATE_H */
4604