1 /*
2  * Copyright © 2019 Raspberry Pi
3  *
4  * based in part on anv driver which is:
5  * Copyright © 2015 Intel Corporation
6  *
7  * based in part on radv driver which is:
8  * Copyright © 2016 Red Hat.
9  * Copyright © 2016 Bas Nieuwenhuizen
10  *
11  * Permission is hereby granted, free of charge, to any person obtaining a
12  * copy of this software and associated documentation files (the "Software"),
13  * to deal in the Software without restriction, including without limitation
14  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
15  * and/or sell copies of the Software, and to permit persons to whom the
16  * Software is furnished to do so, subject to the following conditions:
17  *
18  * The above copyright notice and this permission notice (including the next
19  * paragraph) shall be included in all copies or substantial portions of the
20  * Software.
21  *
22  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
23  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
24  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
25  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
26  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
27  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
28  * IN THE SOFTWARE.
29  */
30 #ifndef V3DV_PRIVATE_H
31 #define V3DV_PRIVATE_H
32 
33 #include <stdio.h>
34 #include <string.h>
35 #include <vulkan/vulkan.h>
36 #include <vulkan/vk_icd.h>
37 #include <vk_enum_to_str.h>
38 
39 #include <xf86drm.h>
40 
41 #ifdef HAVE_VALGRIND
42 #include <valgrind/valgrind.h>
43 #include <valgrind/memcheck.h>
44 #define VG(x) x
45 #else
46 #define VG(x) ((void)0)
47 #endif
48 
49 #include "v3dv_limits.h"
50 
51 #include "common/v3d_device_info.h"
52 #include "common/v3d_limits.h"
53 
54 #include "compiler/shader_enums.h"
55 #include "compiler/spirv/nir_spirv.h"
56 
57 #include "compiler/v3d_compiler.h"
58 
59 #include "vk_debug_report.h"
60 #include "util/set.h"
61 #include "util/hash_table.h"
62 #include "util/xmlconfig.h"
63 #include "u_atomic.h"
64 
65 #include "v3dv_entrypoints.h"
66 #include "v3dv_extensions.h"
67 #include "v3dv_bo.h"
68 
69 #include "drm-uapi/v3d_drm.h"
70 
71 /* FIXME: hooks for the packet definition functions. */
72 static inline void
pack_emit_reloc(void * cl,const void * reloc)73 pack_emit_reloc(void *cl, const void *reloc) {}
74 
75 #define __gen_user_data struct v3dv_cl
76 #define __gen_address_type struct v3dv_cl_reloc
77 #define __gen_address_offset(reloc) (((reloc)->bo ? (reloc)->bo->offset : 0) + \
78                                      (reloc)->offset)
79 #define __gen_emit_reloc cl_pack_emit_reloc
80 #define __gen_unpack_address(cl, s, e) __unpack_address(cl, s, e)
81 #include "v3dv_cl.h"
82 
83 #include "vk_alloc.h"
84 #include "simulator/v3d_simulator.h"
85 
86 
87 /* FIXME: pipe_box from Gallium. Needed for some v3d_tiling.c functions.
88  * In the future we might want to drop that depedency, but for now it is
89  * good enough.
90  */
91 #include "util/u_box.h"
92 #include "wsi_common.h"
93 
94 #include "broadcom/cle/v3dx_pack.h"
95 
96 /* A non-fatal assert.  Useful for debugging. */
97 #ifdef DEBUG
98 #define v3dv_assert(x) ({ \
99    if (unlikely(!(x))) \
100       fprintf(stderr, "%s:%d ASSERT: %s", __FILE__, __LINE__, #x); \
101 })
102 #else
103 #define v3dv_assert(x)
104 #endif
105 
106 #define perf_debug(...) do {                       \
107    if (unlikely(V3D_DEBUG & V3D_DEBUG_PERF))       \
108       fprintf(stderr, __VA_ARGS__);                \
109 } while (0)
110 
111 #define for_each_bit(b, dword)                                               \
112    for (uint32_t __dword = (dword);                                          \
113         (b) = __builtin_ffs(__dword) - 1, __dword; __dword &= ~(1 << (b)))
114 
115 #define typed_memcpy(dest, src, count) ({				\
116 			STATIC_ASSERT(sizeof(*src) == sizeof(*dest)); \
117 			memcpy((dest), (src), (count) * sizeof(*(src))); \
118 		})
119 
120 struct v3dv_instance;
121 
122 #ifdef USE_V3D_SIMULATOR
123 #define using_v3d_simulator true
124 #else
125 #define using_v3d_simulator false
126 #endif
127 
128 struct v3d_simulator_file;
129 
130 struct v3dv_physical_device {
131    VK_LOADER_DATA _loader_data;
132 
133    struct v3dv_instance *instance;
134 
135    struct v3dv_device_extension_table supported_extensions;
136    struct v3dv_physical_device_dispatch_table dispatch;
137 
138    char *name;
139    int32_t render_fd;
140    int32_t display_fd;
141 
142    uint8_t pipeline_cache_uuid[VK_UUID_SIZE];
143    uint8_t device_uuid[VK_UUID_SIZE];
144    uint8_t driver_uuid[VK_UUID_SIZE];
145 
146    struct wsi_device wsi_device;
147 
148    VkPhysicalDeviceMemoryProperties memory;
149 
150    struct v3d_device_info devinfo;
151 
152    struct v3d_simulator_file *sim_file;
153 
154    const struct v3d_compiler *compiler;
155    uint32_t next_program_id;
156 
157    struct {
158       bool merge_jobs;
159    } options;
160 };
161 
162 VkResult v3dv_wsi_init(struct v3dv_physical_device *physical_device);
163 void v3dv_wsi_finish(struct v3dv_physical_device *physical_device);
164 
165 void v3dv_meta_clear_init(struct v3dv_device *device);
166 void v3dv_meta_clear_finish(struct v3dv_device *device);
167 
168 void v3dv_meta_blit_init(struct v3dv_device *device);
169 void v3dv_meta_blit_finish(struct v3dv_device *device);
170 
171 struct v3dv_app_info {
172    const char *app_name;
173    uint32_t app_version;
174    const char *engine_name;
175    uint32_t engine_version;
176    uint32_t api_version;
177 };
178 
179 struct v3dv_instance {
180    VK_LOADER_DATA _loader_data;
181 
182    VkAllocationCallbacks alloc;
183 
184    struct v3dv_app_info app_info;
185 
186    struct v3dv_instance_extension_table enabled_extensions;
187    struct v3dv_instance_dispatch_table dispatch;
188    struct v3dv_device_dispatch_table device_dispatch;
189 
190    int physicalDeviceCount;
191    struct v3dv_physical_device physicalDevice;
192 
193    struct vk_debug_report_instance debug_report_callbacks;
194 
195    bool pipeline_cache_enabled;
196    bool default_pipeline_cache_enabled;
197 };
198 
199 /* Tracks wait threads spawned from a single vkQueueSubmit call */
200 struct v3dv_queue_submit_wait_info {
201    struct list_head list_link;
202 
203    struct v3dv_device *device;
204 
205    /* List of wait threads spawned for any command buffers in a particular
206     * call to vkQueueSubmit.
207     */
208    uint32_t wait_thread_count;
209    struct {
210       pthread_t thread;
211       bool finished;
212    } wait_threads[16];
213 
214    /* The master wait thread for the entire submit. This will wait for all
215     * other threads in this submit to complete  before processing signal
216     * semaphores and fences.
217     */
218    pthread_t master_wait_thread;
219 
220    /* List of semaphores (and fence) to signal after all wait threads completed
221     * and all command buffer jobs in the submission have been sent to the GPU.
222     */
223    uint32_t signal_semaphore_count;
224    VkSemaphore *signal_semaphores;
225    VkFence fence;
226 };
227 
228 struct v3dv_queue {
229    VK_LOADER_DATA _loader_data;
230 
231    struct v3dv_device *device;
232    VkDeviceQueueCreateFlags flags;
233 
234    /* A list of active v3dv_queue_submit_wait_info */
235    struct list_head submit_wait_list;
236 
237    /* A mutex to prevent concurrent access to the list of wait threads */
238    mtx_t mutex;
239 
240    struct v3dv_job *noop_job;
241 };
242 
243 #define V3DV_META_BLIT_CACHE_KEY_SIZE (4 * sizeof(uint32_t))
244 
245 struct v3dv_meta_color_clear_pipeline {
246    VkPipeline pipeline;
247    VkRenderPass pass;
248    bool cached;
249    uint64_t key;
250 };
251 
252 struct v3dv_meta_depth_clear_pipeline {
253    VkPipeline pipeline;
254    uint64_t key;
255 };
256 
257 struct v3dv_meta_blit_pipeline {
258    VkPipeline pipeline;
259    VkRenderPass pass;
260    VkRenderPass pass_no_load;
261    uint8_t key[V3DV_META_BLIT_CACHE_KEY_SIZE];
262 };
263 
264 struct v3dv_pipeline_cache_stats {
265    uint32_t miss;
266    uint32_t hit;
267    uint32_t count;
268 };
269 
270 struct v3dv_pipeline_cache {
271    VK_LOADER_DATA _loader_data;
272 
273    struct v3dv_device *device;
274    mtx_t mutex;
275 
276    struct hash_table *nir_cache;
277    struct v3dv_pipeline_cache_stats nir_stats;
278 
279    struct hash_table *variant_cache;
280    struct v3dv_pipeline_cache_stats variant_stats;
281 };
282 
283 struct v3dv_device {
284    VK_LOADER_DATA _loader_data;
285 
286    VkAllocationCallbacks alloc;
287 
288    struct v3dv_instance *instance;
289 
290    struct v3dv_device_extension_table enabled_extensions;
291    struct v3dv_device_dispatch_table dispatch;
292 
293    int32_t render_fd;
294    int32_t display_fd;
295    struct v3d_device_info devinfo;
296    struct v3dv_queue queue;
297 
298    /* A sync object to track the last job submitted to the GPU. */
299    uint32_t last_job_sync;
300 
301    /* A mutex to prevent concurrent access to last_job_sync from the queue */
302    mtx_t mutex;
303 
304    /* Resources used for meta operations */
305    struct {
306       mtx_t mtx;
307       struct {
308          VkPipelineLayout playout;
309          struct hash_table *cache; /* v3dv_meta_color_clear_pipeline */
310       } color_clear;
311       struct {
312          VkPipelineLayout playout;
313          struct hash_table *cache; /* v3dv_meta_depth_clear_pipeline */
314       } depth_clear;
315       struct {
316          VkDescriptorSetLayout dslayout;
317          VkPipelineLayout playout;
318          struct hash_table *cache[3]; /* v3dv_meta_blit_pipeline for 1d, 2d, 3d */
319       } blit;
320    } meta;
321 
322    struct v3dv_bo_cache {
323       /** List of struct v3d_bo freed, by age. */
324       struct list_head time_list;
325       /** List of struct v3d_bo freed, per size, by age. */
326       struct list_head *size_list;
327       uint32_t size_list_size;
328 
329       mtx_t lock;
330 
331       uint32_t cache_size;
332       uint32_t cache_count;
333       uint32_t max_cache_size;
334    } bo_cache;
335 
336    uint32_t bo_size;
337    uint32_t bo_count;
338 
339    struct v3dv_pipeline_cache default_pipeline_cache;
340 
341    VkPhysicalDeviceFeatures features;
342 };
343 
344 struct v3dv_device_memory {
345    struct v3dv_bo *bo;
346    const VkMemoryType *type;
347    bool has_bo_ownership;
348    bool is_for_wsi;
349 };
350 
351 #define V3D_OUTPUT_IMAGE_FORMAT_NO 255
352 #define TEXTURE_DATA_FORMAT_NO     255
353 
354 struct v3dv_format {
355    bool supported;
356 
357    /* One of V3D33_OUTPUT_IMAGE_FORMAT_*, or OUTPUT_IMAGE_FORMAT_NO */
358    uint8_t rt_type;
359 
360    /* One of V3D33_TEXTURE_DATA_FORMAT_*. */
361    uint8_t tex_type;
362 
363    /* Swizzle to apply to the RGBA shader output for storing to the tile
364     * buffer, to the RGBA tile buffer to produce shader input (for
365     * blending), and for turning the rgba8888 texture sampler return
366     * value into shader rgba values.
367     */
368    uint8_t swizzle[4];
369 
370    /* Whether the return value is 16F/I/UI or 32F/I/UI. */
371    uint8_t return_size;
372 
373    /* If the format supports (linear) filtering when texturing. */
374    bool supports_filtering;
375 };
376 
377 /**
378  * Tiling mode enum used for v3d_resource.c, which maps directly to the Memory
379  * Format field of render target and Z/Stencil config.
380  */
381 enum v3d_tiling_mode {
382    /* Untiled resources.  Not valid as texture inputs. */
383    VC5_TILING_RASTER,
384 
385    /* Single line of u-tiles. */
386    VC5_TILING_LINEARTILE,
387 
388    /* Departure from standard 4-UIF block column format. */
389    VC5_TILING_UBLINEAR_1_COLUMN,
390 
391    /* Departure from standard 4-UIF block column format. */
392    VC5_TILING_UBLINEAR_2_COLUMN,
393 
394    /* Normal tiling format: grouped in 4x4 UIFblocks, each of which is
395     * split 2x2 into utiles.
396     */
397    VC5_TILING_UIF_NO_XOR,
398 
399    /* Normal tiling format: grouped in 4x4 UIFblocks, each of which is
400     * split 2x2 into utiles.
401     */
402    VC5_TILING_UIF_XOR,
403 };
404 
405 struct v3d_resource_slice {
406    uint32_t offset;
407    uint32_t stride;
408    uint32_t padded_height;
409    /* Size of a single pane of the slice.  For 3D textures, there will be
410     * a number of panes equal to the minified, power-of-two-aligned
411     * depth.
412     */
413    uint32_t size;
414    uint8_t ub_pad;
415    enum v3d_tiling_mode tiling;
416    uint32_t padded_height_of_output_image_in_uif_blocks;
417 };
418 
419 struct v3dv_image {
420    VkImageType type;
421    VkImageAspectFlags aspects;
422 
423    VkExtent3D extent;
424    uint32_t levels;
425    uint32_t array_size;
426    uint32_t samples;
427    VkImageUsageFlags usage;
428    VkImageCreateFlags flags;
429    VkImageTiling tiling;
430 
431    VkFormat vk_format;
432    const struct v3dv_format *format;
433 
434    uint32_t cpp;
435 
436    uint64_t drm_format_mod;
437    bool tiled;
438 
439    struct v3d_resource_slice slices[V3D_MAX_MIP_LEVELS];
440    uint64_t size; /* Total size in bytes */
441    uint32_t cube_map_stride;
442    uint32_t alignment;
443 
444    struct v3dv_device_memory *mem;
445    VkDeviceSize mem_offset;
446 };
447 
448 VkImageViewType v3dv_image_type_to_view_type(VkImageType type);
449 
450 struct v3dv_image_view {
451    const struct v3dv_image *image;
452    VkImageAspectFlags aspects;
453    VkExtent3D extent;
454    VkImageViewType type;
455 
456    VkFormat vk_format;
457    const struct v3dv_format *format;
458    bool swap_rb;
459    uint32_t internal_bpp;
460    uint32_t internal_type;
461 
462    uint32_t base_level;
463    uint32_t max_level;
464    uint32_t first_layer;
465    uint32_t last_layer;
466    uint32_t offset;
467 
468    /* Precomputed (composed from createinfo->components and formar swizzle)
469     * swizzles to pass in to the shader key.
470     *
471     * This could be also included on the descriptor bo, but the shader state
472     * packet doesn't need it on a bo, so we can just avoid a memory copy
473     */
474    uint8_t swizzle[4];
475 
476    /* Prepacked TEXTURE_SHADER_STATE. It will be copied to the descriptor info
477     * during UpdateDescriptorSets.
478     *
479     * Empirical tests show that cube arrays need a different shader state
480     * depending on whether they are used with a sampler or not, so for these
481     * we generate two states and select the one to use based on the descriptor
482     * type.
483     */
484    uint8_t texture_shader_state[2][cl_packet_length(TEXTURE_SHADER_STATE)];
485 };
486 
487 uint32_t v3dv_layer_offset(const struct v3dv_image *image, uint32_t level, uint32_t layer);
488 
489 struct v3dv_buffer {
490    VkDeviceSize size;
491    VkBufferUsageFlags usage;
492    uint32_t alignment;
493 
494    struct v3dv_device_memory *mem;
495    VkDeviceSize mem_offset;
496 };
497 
498 struct v3dv_buffer_view {
499    const struct v3dv_buffer *buffer;
500 
501    VkFormat vk_format;
502    const struct v3dv_format *format;
503    uint32_t internal_bpp;
504    uint32_t internal_type;
505 
506    uint32_t offset;
507    uint32_t size;
508    uint32_t num_elements;
509 
510    /* Prepacked TEXTURE_SHADER_STATE. */
511    uint8_t texture_shader_state[cl_packet_length(TEXTURE_SHADER_STATE)];
512 };
513 
514 struct v3dv_subpass_attachment {
515    uint32_t attachment;
516    VkImageLayout layout;
517 };
518 
519 struct v3dv_subpass {
520    uint32_t input_count;
521    struct v3dv_subpass_attachment *input_attachments;
522 
523    uint32_t color_count;
524    struct v3dv_subpass_attachment *color_attachments;
525    struct v3dv_subpass_attachment *resolve_attachments;
526 
527    struct v3dv_subpass_attachment ds_attachment;
528 
529    bool has_srgb_rt;
530 
531    /* If we need to emit the clear of the depth/stencil attachment using a
532     * a draw call instead of using the TLB (GFXH-1461).
533     */
534    bool do_depth_clear_with_draw;
535    bool do_stencil_clear_with_draw;
536 };
537 
538 struct v3dv_render_pass_attachment {
539    VkAttachmentDescription desc;
540    uint32_t first_subpass;
541    uint32_t last_subpass;
542 
543    /* If this is a multismapled attachment that is going to be resolved,
544     * whether we can use the TLB resolve on store.
545     */
546    bool use_tlb_resolve;
547 };
548 
549 struct v3dv_render_pass {
550    uint32_t attachment_count;
551    struct v3dv_render_pass_attachment *attachments;
552 
553    uint32_t subpass_count;
554    struct v3dv_subpass *subpasses;
555 
556    struct v3dv_subpass_attachment *subpass_attachments;
557 };
558 
559 struct v3dv_framebuffer {
560    uint32_t width;
561    uint32_t height;
562    uint32_t layers;
563 
564    /* Typically, edge tiles in the framebuffer have padding depending on the
565     * underlying tiling layout. One consequnce of this is that when the
566     * framebuffer dimensions are not aligned to tile boundaries, tile stores
567     * would still write full tiles on the edges and write to the padded area.
568     * If the framebuffer is aliasing a smaller region of a larger image, then
569     * we need to be careful with this though, as we won't have padding on the
570     * edge tiles (which typically means that we need to load the tile buffer
571     * before we store).
572     */
573    bool has_edge_padding;
574 
575    uint32_t attachment_count;
576    uint32_t color_attachment_count;
577    struct v3dv_image_view *attachments[0];
578 };
579 
580 struct v3dv_frame_tiling {
581    uint32_t width;
582    uint32_t height;
583    uint32_t layers;
584    uint32_t render_target_count;
585    uint32_t internal_bpp;
586    bool     msaa;
587    uint32_t tile_width;
588    uint32_t tile_height;
589    uint32_t draw_tiles_x;
590    uint32_t draw_tiles_y;
591    uint32_t supertile_width;
592    uint32_t supertile_height;
593    uint32_t frame_width_in_supertiles;
594    uint32_t frame_height_in_supertiles;
595 };
596 
597 void v3dv_framebuffer_compute_internal_bpp_msaa(const struct v3dv_framebuffer *framebuffer,
598                                                 const struct v3dv_subpass *subpass,
599                                                 uint8_t *max_bpp, bool *msaa);
600 
601 bool v3dv_subpass_area_is_tile_aligned(const VkRect2D *area,
602                                        struct v3dv_framebuffer *fb,
603                                        struct v3dv_render_pass *pass,
604                                        uint32_t subpass_idx);
605 struct v3dv_cmd_pool {
606    VkAllocationCallbacks alloc;
607    struct list_head cmd_buffers;
608 };
609 
610 enum v3dv_cmd_buffer_status {
611    V3DV_CMD_BUFFER_STATUS_NEW           = 0,
612    V3DV_CMD_BUFFER_STATUS_INITIALIZED   = 1,
613    V3DV_CMD_BUFFER_STATUS_RECORDING     = 2,
614    V3DV_CMD_BUFFER_STATUS_EXECUTABLE    = 3
615 };
616 
617 union v3dv_clear_value {
618    uint32_t color[4];
619    struct {
620       float z;
621       uint8_t s;
622    };
623 };
624 
625 struct v3dv_cmd_buffer_attachment_state {
626    /* The original clear value as provided by the Vulkan API */
627    VkClearValue vk_clear_value;
628 
629    /* The hardware clear value */
630    union v3dv_clear_value clear_value;
631 };
632 
633 void v3dv_get_hw_clear_color(const VkClearColorValue *color,
634                              uint32_t internal_type,
635                              uint32_t internal_size,
636                              uint32_t *hw_color);
637 
638 struct v3dv_viewport_state {
639    uint32_t count;
640    VkViewport viewports[MAX_VIEWPORTS];
641    float translate[MAX_VIEWPORTS][3];
642    float scale[MAX_VIEWPORTS][3];
643 };
644 
645 struct v3dv_scissor_state {
646    uint32_t count;
647    VkRect2D scissors[MAX_SCISSORS];
648 };
649 
650 /* Mostly a v3dv mapping of VkDynamicState, used to track which data as
651  * defined as dynamic
652  */
653 enum v3dv_dynamic_state_bits {
654    V3DV_DYNAMIC_VIEWPORT                  = 1 << 0,
655    V3DV_DYNAMIC_SCISSOR                   = 1 << 1,
656    V3DV_DYNAMIC_STENCIL_COMPARE_MASK      = 1 << 2,
657    V3DV_DYNAMIC_STENCIL_WRITE_MASK        = 1 << 3,
658    V3DV_DYNAMIC_STENCIL_REFERENCE         = 1 << 4,
659    V3DV_DYNAMIC_BLEND_CONSTANTS           = 1 << 5,
660    V3DV_DYNAMIC_DEPTH_BIAS                = 1 << 6,
661    V3DV_DYNAMIC_LINE_WIDTH                = 1 << 7,
662    V3DV_DYNAMIC_ALL                       = (1 << 8) - 1,
663 };
664 
665 /* Flags for dirty pipeline state.
666  */
667 enum v3dv_cmd_dirty_bits {
668    V3DV_CMD_DIRTY_VIEWPORT                  = 1 << 0,
669    V3DV_CMD_DIRTY_SCISSOR                   = 1 << 1,
670    V3DV_CMD_DIRTY_STENCIL_COMPARE_MASK      = 1 << 2,
671    V3DV_CMD_DIRTY_STENCIL_WRITE_MASK        = 1 << 3,
672    V3DV_CMD_DIRTY_STENCIL_REFERENCE         = 1 << 4,
673    V3DV_CMD_DIRTY_PIPELINE                  = 1 << 5,
674    V3DV_CMD_DIRTY_VERTEX_BUFFER             = 1 << 6,
675    V3DV_CMD_DIRTY_INDEX_BUFFER              = 1 << 7,
676    V3DV_CMD_DIRTY_DESCRIPTOR_SETS           = 1 << 8,
677    V3DV_CMD_DIRTY_COMPUTE_DESCRIPTOR_SETS   = 1 << 9,
678    V3DV_CMD_DIRTY_PUSH_CONSTANTS            = 1 << 10,
679    V3DV_CMD_DIRTY_BLEND_CONSTANTS           = 1 << 11,
680    V3DV_CMD_DIRTY_OCCLUSION_QUERY           = 1 << 12,
681    V3DV_CMD_DIRTY_DEPTH_BIAS                = 1 << 13,
682    V3DV_CMD_DIRTY_LINE_WIDTH                = 1 << 14,
683 };
684 
685 struct v3dv_dynamic_state {
686    /**
687     * Bitmask of (1 << VK_DYNAMIC_STATE_*).
688     * Defines the set of saved dynamic state.
689     */
690    uint32_t mask;
691 
692    struct v3dv_viewport_state viewport;
693 
694    struct v3dv_scissor_state scissor;
695 
696    struct {
697       uint32_t front;
698       uint32_t back;
699    } stencil_compare_mask;
700 
701    struct {
702       uint32_t front;
703       uint32_t back;
704    } stencil_write_mask;
705 
706    struct {
707       uint32_t front;
708       uint32_t back;
709    } stencil_reference;
710 
711    float blend_constants[4];
712 
713    struct {
714       float constant_factor;
715       float slope_factor;
716    } depth_bias;
717 
718    float line_width;
719 };
720 
721 extern const struct v3dv_dynamic_state default_dynamic_state;
722 
723 void v3dv_viewport_compute_xform(const VkViewport *viewport,
724                                  float scale[3],
725                                  float translate[3]);
726 
727 enum v3dv_ez_state {
728    VC5_EZ_UNDECIDED = 0,
729    VC5_EZ_GT_GE,
730    VC5_EZ_LT_LE,
731    VC5_EZ_DISABLED,
732 };
733 
734 enum v3dv_job_type {
735    V3DV_JOB_TYPE_GPU_CL = 0,
736    V3DV_JOB_TYPE_GPU_CL_SECONDARY,
737    V3DV_JOB_TYPE_GPU_TFU,
738    V3DV_JOB_TYPE_GPU_CSD,
739    V3DV_JOB_TYPE_CPU_RESET_QUERIES,
740    V3DV_JOB_TYPE_CPU_END_QUERY,
741    V3DV_JOB_TYPE_CPU_COPY_QUERY_RESULTS,
742    V3DV_JOB_TYPE_CPU_SET_EVENT,
743    V3DV_JOB_TYPE_CPU_WAIT_EVENTS,
744    V3DV_JOB_TYPE_CPU_CLEAR_ATTACHMENTS,
745    V3DV_JOB_TYPE_CPU_COPY_BUFFER_TO_IMAGE,
746    V3DV_JOB_TYPE_CPU_CSD_INDIRECT,
747    V3DV_JOB_TYPE_CPU_TIMESTAMP_QUERY,
748 };
749 
750 struct v3dv_reset_query_cpu_job_info {
751    struct v3dv_query_pool *pool;
752    uint32_t first;
753    uint32_t count;
754 };
755 
756 struct v3dv_end_query_cpu_job_info {
757    struct v3dv_query_pool *pool;
758    uint32_t query;
759 };
760 
761 struct v3dv_copy_query_results_cpu_job_info {
762    struct v3dv_query_pool *pool;
763    uint32_t first;
764    uint32_t count;
765    struct v3dv_buffer *dst;
766    uint32_t offset;
767    uint32_t stride;
768    VkQueryResultFlags flags;
769 };
770 
771 struct v3dv_event_set_cpu_job_info {
772    struct v3dv_event *event;
773    int state;
774 };
775 
776 struct v3dv_event_wait_cpu_job_info {
777    /* List of events to wait on */
778    uint32_t event_count;
779    struct v3dv_event **events;
780 
781    /* Whether any postponed jobs after the wait should wait on semaphores */
782    bool sem_wait;
783 };
784 
785 struct v3dv_clear_attachments_cpu_job_info {
786    uint32_t attachment_count;
787    VkClearAttachment attachments[V3D_MAX_DRAW_BUFFERS + 1]; /* 4 color + D/S */
788    uint32_t rect_count;
789    VkClearRect *rects;
790 };
791 
792 struct v3dv_copy_buffer_to_image_cpu_job_info {
793    struct v3dv_image *image;
794    struct v3dv_buffer *buffer;
795    uint32_t buffer_offset;
796    uint32_t buffer_stride;
797    uint32_t buffer_layer_stride;
798    VkOffset3D image_offset;
799    VkExtent3D image_extent;
800    uint32_t mip_level;
801    uint32_t base_layer;
802    uint32_t layer_count;
803 };
804 
805 struct v3dv_csd_indirect_cpu_job_info {
806    struct v3dv_buffer *buffer;
807    uint32_t offset;
808    struct v3dv_job *csd_job;
809    uint32_t wg_size;
810    uint32_t *wg_uniform_offsets[3];
811    bool needs_wg_uniform_rewrite;
812 };
813 
814 struct v3dv_timestamp_query_cpu_job_info {
815    struct v3dv_query_pool *pool;
816    uint32_t query;
817 };
818 
819 struct v3dv_job {
820    struct list_head list_link;
821 
822    /* We only create job clones when executing secondary command buffers into
823     * primaries. These clones don't make deep copies of the original object
824     * so we want to flag them to avoid freeing resources they don't own.
825     */
826    bool is_clone;
827 
828    enum v3dv_job_type type;
829 
830    struct v3dv_device *device;
831 
832    struct v3dv_cmd_buffer *cmd_buffer;
833 
834    struct v3dv_cl bcl;
835    struct v3dv_cl rcl;
836    struct v3dv_cl indirect;
837 
838    /* Set of all BOs referenced by the job. This will be used for making
839     * the list of BOs that the kernel will need to have paged in to
840     * execute our job.
841     */
842    struct set *bos;
843    uint32_t bo_count;
844 
845    struct v3dv_bo *tile_alloc;
846    struct v3dv_bo *tile_state;
847 
848    bool tmu_dirty_rcl;
849 
850    uint32_t first_subpass;
851 
852    /* When the current subpass is split into multiple jobs, this flag is set
853     * to true for any jobs after the first in the same subpass.
854     */
855    bool is_subpass_continue;
856 
857    /* If this job is the last job emitted for a subpass. */
858    bool is_subpass_finish;
859 
860    struct v3dv_frame_tiling frame_tiling;
861 
862    enum v3dv_ez_state ez_state;
863    enum v3dv_ez_state first_ez_state;
864 
865    /* Number of draw calls recorded into the job */
866    uint32_t draw_count;
867 
868    /* A flag indicating whether we want to flush every draw separately. This
869     * can be used for debugging, or for cases where special circumstances
870     * require this behavior.
871     */
872    bool always_flush;
873 
874    /* Whether we need to serialize this job in our command stream */
875    bool serialize;
876 
877    /* If this is a CL job, whether we should sync before binning */
878    bool needs_bcl_sync;
879 
880    /* Job specs for CPU jobs */
881    union {
882       struct v3dv_reset_query_cpu_job_info          query_reset;
883       struct v3dv_end_query_cpu_job_info            query_end;
884       struct v3dv_copy_query_results_cpu_job_info   query_copy_results;
885       struct v3dv_event_set_cpu_job_info            event_set;
886       struct v3dv_event_wait_cpu_job_info           event_wait;
887       struct v3dv_clear_attachments_cpu_job_info    clear_attachments;
888       struct v3dv_copy_buffer_to_image_cpu_job_info copy_buffer_to_image;
889       struct v3dv_csd_indirect_cpu_job_info         csd_indirect;
890       struct v3dv_timestamp_query_cpu_job_info      query_timestamp;
891    } cpu;
892 
893    /* Job specs for TFU jobs */
894    struct drm_v3d_submit_tfu tfu;
895 
896    /* Job specs for CSD jobs */
897    struct {
898       struct v3dv_bo *shared_memory;
899       uint32_t wg_count[3];
900       struct drm_v3d_submit_csd submit;
901    } csd;
902 };
903 
904 void v3dv_job_init(struct v3dv_job *job,
905                    enum v3dv_job_type type,
906                    struct v3dv_device *device,
907                    struct v3dv_cmd_buffer *cmd_buffer,
908                    int32_t subpass_idx);
909 void v3dv_job_destroy(struct v3dv_job *job);
910 void v3dv_job_add_bo(struct v3dv_job *job, struct v3dv_bo *bo);
911 void v3dv_job_emit_binning_flush(struct v3dv_job *job);
912 void v3dv_job_start_frame(struct v3dv_job *job,
913                           uint32_t width,
914                           uint32_t height,
915                           uint32_t layers,
916                           uint32_t render_target_count,
917                           uint8_t max_internal_bpp,
918                           bool msaa);
919 struct v3dv_job *v3dv_cmd_buffer_create_cpu_job(struct v3dv_device *device,
920                                                 enum v3dv_job_type type,
921                                                 struct v3dv_cmd_buffer *cmd_buffer,
922                                                 uint32_t subpass_idx);
923 
924 struct v3dv_vertex_binding {
925    struct v3dv_buffer *buffer;
926    VkDeviceSize offset;
927 };
928 
929 struct v3dv_descriptor_state {
930    struct v3dv_descriptor_set *descriptor_sets[MAX_SETS];
931    uint32_t valid;
932    uint32_t dynamic_offsets[MAX_DYNAMIC_BUFFERS];
933 };
934 
935 struct v3dv_cmd_buffer_state {
936    struct v3dv_render_pass *pass;
937    struct v3dv_framebuffer *framebuffer;
938    VkRect2D render_area;
939 
940    /* Current job being recorded */
941    struct v3dv_job *job;
942 
943    uint32_t subpass_idx;
944 
945    struct v3dv_pipeline *pipeline;
946    struct v3dv_descriptor_state descriptor_state[2];
947 
948    struct v3dv_dynamic_state dynamic;
949    uint32_t dirty;
950 
951    /* Current clip window. We use this to check whether we have an active
952     * scissor, since in that case we can't use TLB clears and need to fallback
953     * to drawing rects.
954     */
955    VkRect2D clip_window;
956 
957    /* Whether our render area is aligned to tile boundaries. If this is false
958     * then we have tiles that are only partially covered by the render area,
959     * and therefore, we need to be careful with our loads and stores so we don't
960     * modify pixels for the tile area that is not covered by the render area.
961     * This means, for example, that we can't use the TLB to clear, since that
962     * always clears full tiles.
963     */
964    bool tile_aligned_render_area;
965 
966    uint32_t attachment_alloc_count;
967    struct v3dv_cmd_buffer_attachment_state *attachments;
968 
969    struct v3dv_vertex_binding vertex_bindings[MAX_VBS];
970 
971    struct {
972       VkBuffer buffer;
973       VkDeviceSize offset;
974       uint8_t index_size;
975    } index_buffer;
976 
977    /* Used to flag OOM conditions during command buffer recording */
978    bool oom;
979 
980    /* Whether we have recorded a pipeline barrier that we still need to
981     * process.
982     */
983    bool has_barrier;
984    bool has_bcl_barrier;
985 
986    /* Secondary command buffer state */
987    struct {
988       bool occlusion_query_enable;
989    } inheritance;
990 
991    /* Command buffer state saved during a meta operation */
992    struct {
993       uint32_t subpass_idx;
994       VkRenderPass pass;
995       VkPipeline pipeline;
996       VkFramebuffer framebuffer;
997 
998       uint32_t attachment_alloc_count;
999       uint32_t attachment_count;
1000       struct v3dv_cmd_buffer_attachment_state *attachments;
1001 
1002       bool tile_aligned_render_area;
1003       VkRect2D render_area;
1004 
1005       struct v3dv_dynamic_state dynamic;
1006 
1007       struct v3dv_descriptor_state descriptor_state;
1008       bool has_descriptor_state;
1009 
1010       uint32_t push_constants[MAX_PUSH_CONSTANTS_SIZE / 4];
1011    } meta;
1012 
1013    /* Command buffer state for queries */
1014    struct {
1015       /* A list of vkCmdQueryEnd commands recorded in the command buffer during
1016        * a render pass. We queue these here and then schedule the corresponding
1017        * CPU jobs for them at the time we finish the GPU job in which they have
1018        * been recorded.
1019        */
1020       struct {
1021          uint32_t used_count;
1022          uint32_t alloc_count;
1023          struct v3dv_end_query_cpu_job_info *states;
1024       } end;
1025 
1026       /* This is not NULL if we have an active query, that is, we have called
1027        * vkCmdBeginQuery but not vkCmdEndQuery.
1028        */
1029       struct v3dv_bo *active_query;
1030    } query;
1031 };
1032 
1033 /* The following struct represents the info from a descriptor that we store on
1034  * the host memory. They are mostly links to other existing vulkan objects,
1035  * like the image_view in order to access to swizzle info, or the buffer used
1036  * for a UBO/SSBO, for example.
1037  *
1038  * FIXME: revisit if makes sense to just move everything that would be needed
1039  * from a descriptor to the bo.
1040  */
1041 struct v3dv_descriptor {
1042    VkDescriptorType type;
1043 
1044    union {
1045       struct {
1046          struct v3dv_image_view *image_view;
1047          struct v3dv_sampler *sampler;
1048       };
1049 
1050       struct {
1051          struct v3dv_buffer *buffer;
1052          uint32_t offset;
1053          uint32_t range;
1054       };
1055 
1056       struct v3dv_buffer_view *buffer_view;
1057    };
1058 };
1059 
1060 /* The following v3dv_xxx_descriptor structs represent descriptor info that we
1061  * upload to a bo, specifically a subregion of the descriptor pool bo.
1062  *
1063  * The general rule that we apply right now to decide which info goes to such
1064  * bo is that we upload those that are referenced by an address when emitting
1065  * a packet, so needed to be uploaded to an bo in any case.
1066  *
1067  * Note that these structs are mostly helpers that improve the semantics when
1068  * doing all that, but we could do as other mesa vulkan drivers and just
1069  * upload the info we know it is expected based on the context.
1070  *
1071  * Also note that the sizes are aligned, as there is an alignment requirement
1072  * for addresses.
1073  */
1074 struct v3dv_sampled_image_descriptor {
1075    uint8_t texture_state[cl_aligned_packet_length(TEXTURE_SHADER_STATE, 32)];
1076 };
1077 
1078 struct v3dv_sampler_descriptor {
1079    uint8_t sampler_state[cl_aligned_packet_length(SAMPLER_STATE, 32)];
1080 };
1081 
1082 struct v3dv_combined_image_sampler_descriptor {
1083    uint8_t texture_state[cl_aligned_packet_length(TEXTURE_SHADER_STATE, 32)];
1084    uint8_t sampler_state[cl_aligned_packet_length(SAMPLER_STATE, 32)];
1085 };
1086 
1087 /* Aux struct as it is really common to have a pair bo/address. Called
1088  * resource because it is really likely that we would need something like that
1089  * if we work on reuse the same bo at different points (like the shader
1090  * assembly).
1091  */
1092 struct v3dv_resource {
1093    struct v3dv_bo *bo;
1094    uint32_t offset;
1095 };
1096 
1097 struct v3dv_query {
1098    bool maybe_available;
1099    union {
1100       struct v3dv_bo *bo; /* Used by GPU queries (occlusion) */
1101       uint64_t value; /* Used by CPU queries (timestamp) */
1102    };
1103 };
1104 
1105 struct v3dv_query_pool {
1106    VkQueryType query_type;
1107    uint32_t query_count;
1108    struct v3dv_query *queries;
1109 };
1110 
1111 VkResult v3dv_get_query_pool_results_cpu(struct v3dv_device *device,
1112                                          struct v3dv_query_pool *pool,
1113                                          uint32_t first,
1114                                          uint32_t count,
1115                                          void *data,
1116                                          VkDeviceSize stride,
1117                                          VkQueryResultFlags flags);
1118 
1119 typedef void (*v3dv_cmd_buffer_private_obj_destroy_cb)(VkDevice device,
1120                                                        uint64_t pobj,
1121                                                        VkAllocationCallbacks *alloc);
1122 struct v3dv_cmd_buffer_private_obj {
1123    struct list_head list_link;
1124    uint64_t obj;
1125    v3dv_cmd_buffer_private_obj_destroy_cb destroy_cb;
1126 };
1127 
1128 struct v3dv_cmd_buffer {
1129    VK_LOADER_DATA _loader_data;
1130 
1131    struct v3dv_device *device;
1132 
1133    struct v3dv_cmd_pool *pool;
1134    struct list_head pool_link;
1135 
1136    /* Used at submit time to link command buffers in the submission that have
1137     * spawned wait threads, so we can then wait on all of them to complete
1138     * before we process any signal sempahores or fences.
1139     */
1140    struct list_head list_link;
1141 
1142    VkCommandBufferUsageFlags usage_flags;
1143    VkCommandBufferLevel level;
1144 
1145    enum v3dv_cmd_buffer_status status;
1146 
1147    struct v3dv_cmd_buffer_state state;
1148 
1149    uint32_t push_constants_data[MAX_PUSH_CONSTANTS_SIZE / 4];
1150    struct v3dv_resource push_constants_resource;
1151 
1152    /* Collection of Vulkan objects created internally by the driver (typically
1153     * during recording of meta operations) that are part of the command buffer
1154     * and should be destroyed with it.
1155     */
1156    struct list_head private_objs; /* v3dv_cmd_buffer_private_obj */
1157 
1158    /* Per-command buffer resources for meta operations. */
1159    struct {
1160       struct {
1161          /* The current descriptor pool for blit sources */
1162          VkDescriptorPool dspool;
1163       } blit;
1164    } meta;
1165 
1166    /* List of jobs in the command buffer. For primary command buffers it
1167     * represents the jobs we want to submit to the GPU. For secondary command
1168     * buffers it represents jobs that will be merged into a primary command
1169     * buffer via vkCmdExecuteCommands.
1170     */
1171    struct list_head jobs;
1172 };
1173 
1174 struct v3dv_job *v3dv_cmd_buffer_start_job(struct v3dv_cmd_buffer *cmd_buffer,
1175                                            int32_t subpass_idx,
1176                                            enum v3dv_job_type type);
1177 void v3dv_cmd_buffer_finish_job(struct v3dv_cmd_buffer *cmd_buffer);
1178 
1179 struct v3dv_job *v3dv_cmd_buffer_subpass_start(struct v3dv_cmd_buffer *cmd_buffer,
1180                                                uint32_t subpass_idx);
1181 struct v3dv_job *v3dv_cmd_buffer_subpass_resume(struct v3dv_cmd_buffer *cmd_buffer,
1182                                                 uint32_t subpass_idx);
1183 
1184 void v3dv_cmd_buffer_subpass_finish(struct v3dv_cmd_buffer *cmd_buffer);
1185 
1186 void v3dv_cmd_buffer_meta_state_push(struct v3dv_cmd_buffer *cmd_buffer,
1187                                      bool push_descriptor_state);
1188 void v3dv_cmd_buffer_meta_state_pop(struct v3dv_cmd_buffer *cmd_buffer,
1189                                     uint32_t dirty_dynamic_state,
1190                                     bool needs_subpass_resume);
1191 
1192 void v3dv_render_pass_setup_render_target(struct v3dv_cmd_buffer *cmd_buffer,
1193                                           int rt,
1194                                           uint32_t *rt_bpp,
1195                                           uint32_t *rt_type,
1196                                           uint32_t *rt_clamp);
1197 
1198 void v3dv_cmd_buffer_reset_queries(struct v3dv_cmd_buffer *cmd_buffer,
1199                                    struct v3dv_query_pool *pool,
1200                                    uint32_t first,
1201                                    uint32_t count);
1202 
1203 void v3dv_cmd_buffer_begin_query(struct v3dv_cmd_buffer *cmd_buffer,
1204                                  struct v3dv_query_pool *pool,
1205                                  uint32_t query,
1206                                  VkQueryControlFlags flags);
1207 
1208 void v3dv_cmd_buffer_end_query(struct v3dv_cmd_buffer *cmd_buffer,
1209                                struct v3dv_query_pool *pool,
1210                                uint32_t query);
1211 
1212 void v3dv_cmd_buffer_copy_query_results(struct v3dv_cmd_buffer *cmd_buffer,
1213                                         struct v3dv_query_pool *pool,
1214                                         uint32_t first,
1215                                         uint32_t count,
1216                                         struct v3dv_buffer *dst,
1217                                         uint32_t offset,
1218                                         uint32_t stride,
1219                                         VkQueryResultFlags flags);
1220 
1221 void v3dv_cmd_buffer_add_tfu_job(struct v3dv_cmd_buffer *cmd_buffer,
1222                                  struct drm_v3d_submit_tfu *tfu);
1223 
1224 void v3dv_cmd_buffer_rewrite_indirect_csd_job(struct v3dv_csd_indirect_cpu_job_info *info,
1225                                               const uint32_t *wg_counts);
1226 
1227 void v3dv_cmd_buffer_add_private_obj(struct v3dv_cmd_buffer *cmd_buffer,
1228                                      uint64_t obj,
1229                                      v3dv_cmd_buffer_private_obj_destroy_cb destroy_cb);
1230 
1231 struct v3dv_semaphore {
1232    /* A syncobject handle associated with this semaphore */
1233    uint32_t sync;
1234 
1235    /* The file handle of a fence that we imported into our syncobject */
1236    int32_t fd;
1237 };
1238 
1239 struct v3dv_fence {
1240    /* A syncobject handle associated with this fence */
1241    uint32_t sync;
1242 
1243    /* The file handle of a fence that we imported into our syncobject */
1244    int32_t fd;
1245 };
1246 
1247 struct v3dv_event {
1248    int state;
1249 };
1250 
1251 struct v3dv_shader_module {
1252    /* A NIR shader. We create NIR modules for shaders that are generated
1253     * internally by the driver.
1254     */
1255    struct nir_shader *nir;
1256 
1257    /* A SPIR-V shader */
1258    unsigned char sha1[20];
1259    uint32_t size;
1260    char data[0];
1261 };
1262 
1263 /* FIXME: the same function at anv, radv and tu, perhaps create common
1264  * place?
1265  */
1266 static inline gl_shader_stage
vk_to_mesa_shader_stage(VkShaderStageFlagBits vk_stage)1267 vk_to_mesa_shader_stage(VkShaderStageFlagBits vk_stage)
1268 {
1269    assert(__builtin_popcount(vk_stage) == 1);
1270    return ffs(vk_stage) - 1;
1271 }
1272 
1273 struct v3dv_shader_variant {
1274    uint32_t ref_cnt;
1275 
1276    gl_shader_stage stage;
1277    bool is_coord;
1278 
1279    /* v3d_key used to compile the variant. Sometimes we can just skip the
1280     * pipeline caches, and look using this.
1281     */
1282    union {
1283       struct v3d_key base;
1284       struct v3d_vs_key vs;
1285       struct v3d_fs_key fs;
1286    } key;
1287    uint32_t v3d_key_size;
1288 
1289    /* key for the pipeline cache, it is p_stage shader_sha1 + v3d compiler
1290     * sha1
1291     */
1292    unsigned char variant_sha1[20];
1293 
1294    union {
1295       struct v3d_prog_data *base;
1296       struct v3d_vs_prog_data *vs;
1297       struct v3d_fs_prog_data *fs;
1298       struct v3d_compute_prog_data *cs;
1299    } prog_data;
1300 
1301    /* We explicitly save the prog_data_size as it would make easier to
1302     * serialize
1303     */
1304    uint32_t prog_data_size;
1305    /* FIXME: using one bo per shader. Eventually we would be interested on
1306     * reusing the same bo for all the shaders, like a bo per v3dv_pipeline for
1307     * shaders.
1308     */
1309    struct v3dv_bo *assembly_bo;
1310    uint32_t qpu_insts_size;
1311 };
1312 
1313 /*
1314  * Per-stage info for each stage, useful so shader_module_compile_to_nir and
1315  * other methods doesn't have so many parameters.
1316  *
1317  * FIXME: for the case of the coordinate shader and the vertex shader, module,
1318  * entrypoint, spec_info and nir are the same. There are also info only
1319  * relevant to some stages. But seemed too much a hassle to create a new
1320  * struct only to handle that. Revisit if such kind of info starts to grow.
1321  */
1322 struct v3dv_pipeline_stage {
1323    struct v3dv_pipeline *pipeline;
1324 
1325    gl_shader_stage stage;
1326    /* FIXME: is_coord only make sense if stage == MESA_SHADER_VERTEX. Perhaps
1327     * a stage base/vs/fs as keys and prog_data?
1328     */
1329    bool is_coord;
1330 
1331    const struct v3dv_shader_module *module;
1332    const char *entrypoint;
1333    const VkSpecializationInfo *spec_info;
1334 
1335    nir_shader *nir;
1336 
1337    /* The following is the combined hash of module+entrypoint+spec_info+nir */
1338    unsigned char shader_sha1[20];
1339 
1340    /** A name for this program, so you can track it in shader-db output. */
1341    uint32_t program_id;
1342    /** How many variants of this program were compiled, for shader-db. */
1343    uint32_t compiled_variant_count;
1344 
1345    /* The following are the default v3d_key populated using
1346     * VkCreateGraphicsPipelineCreateInfo. Variants will be created tweaking
1347     * them, so we don't need to maintain a copy of that create info struct
1348     * around
1349     */
1350    union {
1351       struct v3d_key base;
1352       struct v3d_vs_key vs;
1353       struct v3d_fs_key fs;
1354    } key;
1355 
1356    struct v3dv_shader_variant*current_variant;
1357 
1358    /* FIXME: only make sense on vs, so perhaps a v3dv key like radv? or a kind
1359     * of pipe_draw_info
1360     */
1361    enum pipe_prim_type topology;
1362 };
1363 
1364 /* FIXME: although the full vpm_config is not required at this point, as we
1365  * don't plan to initially support GS, it is more readable and serves as a
1366  * placeholder, to have the struct and fill it with default values.
1367  */
1368 struct vpm_config {
1369    uint32_t As;
1370    uint32_t Vc;
1371    uint32_t Gs;
1372    uint32_t Gd;
1373    uint32_t Gv;
1374    uint32_t Ve;
1375    uint32_t gs_width;
1376 };
1377 
1378 /* We are using the descriptor pool entry for two things:
1379  * * Track the allocated sets, so we can properly free it if needed
1380  * * Track the suballocated pool bo regions, so if some descriptor set is
1381  *   freed, the gap could be reallocated later.
1382  *
1383  * Those only make sense if the pool was not created with the flag
1384  * VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT
1385  */
1386 struct v3dv_descriptor_pool_entry
1387 {
1388    struct v3dv_descriptor_set *set;
1389    /* Offset and size of the subregion allocated for this entry from the
1390     * pool->bo
1391     */
1392    uint32_t offset;
1393    uint32_t size;
1394 };
1395 
1396 struct v3dv_descriptor_pool {
1397    struct v3dv_bo *bo;
1398    /* Current offset at the descriptor bo. 0 means that we didn't use it for
1399     * any descriptor. If the descriptor bo is NULL, current offset is
1400     * meaningless
1401     */
1402    uint32_t current_offset;
1403 
1404    /* If VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT is not set the
1405     * descriptor sets are handled as a whole as pool memory and handled by the
1406     * following pointers. If set, they are not used, and individually
1407     * descriptor sets are allocated/freed.
1408     */
1409    uint8_t *host_memory_base;
1410    uint8_t *host_memory_ptr;
1411    uint8_t *host_memory_end;
1412 
1413    uint32_t entry_count;
1414    uint32_t max_entry_count;
1415    struct v3dv_descriptor_pool_entry entries[0];
1416 };
1417 
1418 struct v3dv_descriptor_set {
1419    struct v3dv_descriptor_pool *pool;
1420 
1421    const struct v3dv_descriptor_set_layout *layout;
1422 
1423    /* Offset relative to the descriptor pool bo for this set */
1424    uint32_t base_offset;
1425 
1426    /* The descriptors below can be indexed (set/binding) using the set_layout
1427     */
1428    struct v3dv_descriptor descriptors[0];
1429 };
1430 
1431 struct v3dv_descriptor_set_binding_layout {
1432    VkDescriptorType type;
1433 
1434    /* Number of array elements in this binding */
1435    uint32_t array_size;
1436 
1437    /* Index into the flattend descriptor set */
1438    uint32_t descriptor_index;
1439 
1440    uint32_t dynamic_offset_count;
1441    uint32_t dynamic_offset_index;
1442 
1443    /* Offset into the descriptor set where this descriptor lives (final offset
1444     * on the descriptor bo need to take into account set->base_offset)
1445     */
1446    uint32_t descriptor_offset;
1447 
1448    /* Offset in the v3dv_descriptor_set_layout of the immutable samplers, or 0
1449     * if there are no immutable samplers.
1450     */
1451    uint32_t immutable_samplers_offset;
1452 };
1453 
1454 struct v3dv_descriptor_set_layout {
1455    VkDescriptorSetLayoutCreateFlags flags;
1456 
1457    /* Number of bindings in this descriptor set */
1458    uint32_t binding_count;
1459 
1460    /* Total bo size needed for this descriptor set
1461     */
1462    uint32_t bo_size;
1463 
1464    /* Shader stages affected by this descriptor set */
1465    uint16_t shader_stages;
1466 
1467    /* Number of descriptors in this descriptor set */
1468    uint32_t descriptor_count;
1469 
1470    /* Number of dynamic offsets used by this descriptor set */
1471    uint16_t dynamic_offset_count;
1472 
1473    /* Bindings in this descriptor set */
1474    struct v3dv_descriptor_set_binding_layout binding[0];
1475 };
1476 
1477 struct v3dv_pipeline_layout {
1478    struct {
1479       struct v3dv_descriptor_set_layout *layout;
1480       uint32_t dynamic_offset_start;
1481    } set[MAX_SETS];
1482 
1483    uint32_t num_sets;
1484    uint32_t dynamic_offset_count;
1485 
1486    uint32_t push_constant_size;
1487 };
1488 
1489 struct v3dv_descriptor_map {
1490    /* TODO: avoid fixed size array/justify the size */
1491    unsigned num_desc; /* Number of descriptors  */
1492    int set[64];
1493    int binding[64];
1494    int array_index[64];
1495    int array_size[64];
1496 
1497    /* The following makes sense for textures, but this is the easier place to
1498     * put it
1499     */
1500    bool is_shadow[64];
1501 };
1502 
1503 struct v3dv_sampler {
1504    bool compare_enable;
1505    bool unnormalized_coordinates;
1506    bool clamp_to_transparent_black_border;
1507 
1508    /* Prepacked SAMPLER_STATE, that is referenced as part of the tmu
1509     * configuration. If needed it will be copied to the descriptor info during
1510     * UpdateDescriptorSets
1511     */
1512    uint8_t sampler_state[cl_packet_length(SAMPLER_STATE)];
1513 };
1514 
1515 #define V3DV_NO_SAMPLER_IDX 666
1516 
1517 /*
1518  * Following two methods are using on the combined to/from texture/sampler
1519  * indices maps at v3dv_pipeline.
1520  */
1521 static inline uint32_t
v3dv_pipeline_combined_index_key_create(uint32_t texture_index,uint32_t sampler_index)1522 v3dv_pipeline_combined_index_key_create(uint32_t texture_index,
1523                                         uint32_t sampler_index)
1524 {
1525    return texture_index << 24 | sampler_index;
1526 }
1527 
1528 static inline void
v3dv_pipeline_combined_index_key_unpack(uint32_t combined_index_key,uint32_t * texture_index,uint32_t * sampler_index)1529 v3dv_pipeline_combined_index_key_unpack(uint32_t combined_index_key,
1530                                         uint32_t *texture_index,
1531                                         uint32_t *sampler_index)
1532 {
1533    uint32_t texture = combined_index_key >> 24;
1534    uint32_t sampler = combined_index_key & 0xffffff;
1535 
1536    if (texture_index)
1537       *texture_index = texture;
1538 
1539    if (sampler_index)
1540       *sampler_index = sampler;
1541 }
1542 
1543 struct v3dv_pipeline {
1544    struct v3dv_device *device;
1545 
1546    VkShaderStageFlags active_stages;
1547 
1548    struct v3dv_render_pass *pass;
1549    struct v3dv_subpass *subpass;
1550 
1551    /* Note: We can't use just a MESA_SHADER_STAGES array as we need to track
1552     * too the coordinate shader
1553     */
1554    struct v3dv_pipeline_stage *vs;
1555    struct v3dv_pipeline_stage *vs_bin;
1556    struct v3dv_pipeline_stage *fs;
1557    struct v3dv_pipeline_stage *cs;
1558 
1559    /* Spilling memory requirements */
1560    struct {
1561       struct v3dv_bo *bo;
1562       uint32_t size_per_thread;
1563    } spill;
1564 
1565    struct v3dv_dynamic_state dynamic_state;
1566 
1567    struct v3dv_pipeline_layout *layout;
1568 
1569    enum v3dv_ez_state ez_state;
1570 
1571    bool msaa;
1572    bool sample_rate_shading;
1573    uint32_t sample_mask;
1574 
1575    bool primitive_restart;
1576 
1577    /* Accessed by binding. So vb[binding]->stride is the stride of the vertex
1578     * array with such binding
1579     */
1580    struct v3dv_pipeline_vertex_binding {
1581       uint32_t stride;
1582       uint32_t instance_divisor;
1583    } vb[MAX_VBS];
1584    uint32_t vb_count;
1585 
1586    /* Note that a lot of info from VkVertexInputAttributeDescription is
1587     * already prepacked, so here we are only storing those that need recheck
1588     * later. The array must be indexed by driver location, since that is the
1589     * order in which we need to emit the attributes.
1590     */
1591    struct v3dv_pipeline_vertex_attrib {
1592       uint32_t binding;
1593       uint32_t offset;
1594       VkFormat vk_format;
1595    } va[MAX_VERTEX_ATTRIBS];
1596    uint32_t va_count;
1597 
1598    struct v3dv_descriptor_map ubo_map;
1599    struct v3dv_descriptor_map ssbo_map;
1600 
1601    struct v3dv_descriptor_map sampler_map;
1602    struct v3dv_descriptor_map texture_map;
1603 
1604    /*
1605     * Vulkan has separate texture and sampler objects. Previous sampler and
1606     * texture map uses a sampler and texture index respectively, that can be
1607     * different. But OpenGL combine both (or in other words, they are the
1608     * same). The v3d compiler and all the nir lowerings that they use were
1609     * written under that assumption. In order to not update all those, we
1610     * combine the indexes, and we use the following maps to get one or the
1611     * other. In general the driver side uses the tex/sampler indexes to gather
1612     * resources, and the compiler side uses the combined index (so the v3d key
1613     * texture info will be indexed using the combined index).
1614     */
1615    struct hash_table *combined_index_map;
1616    uint32_t combined_index_to_key_map[32];
1617    uint32_t next_combined_index;
1618 
1619    /* FIXME: this bo is another candidate to data to be uploaded using a
1620     * resource manager, instead of a individual bo
1621     */
1622    struct v3dv_bo *default_attribute_values;
1623 
1624    struct vpm_config vpm_cfg;
1625    struct vpm_config vpm_cfg_bin;
1626 
1627    /* If the pipeline should emit any of the stencil configuration packets */
1628    bool emit_stencil_cfg[2];
1629 
1630    /* If the pipeline is using push constants */
1631    bool use_push_constants;
1632 
1633    /* Blend state */
1634    struct {
1635       /* Per-RT bit mask with blend enables */
1636       uint8_t enables;
1637       /* Per-RT prepacked blend config packets */
1638       uint8_t cfg[V3D_MAX_DRAW_BUFFERS][cl_packet_length(BLEND_CFG)];
1639       /* Flag indicating whether the blend factors in use require
1640        * color constants.
1641        */
1642       bool needs_color_constants;
1643       /* Mask with enabled color channels for each RT (4 bits per RT) */
1644       uint32_t color_write_masks;
1645    } blend;
1646 
1647    /* Depth bias */
1648    struct {
1649       bool enabled;
1650       bool is_z16;
1651    } depth_bias;
1652 
1653    /* Packets prepacked during pipeline creation
1654     */
1655    uint8_t cfg_bits[cl_packet_length(CFG_BITS)];
1656    uint8_t shader_state_record[cl_packet_length(GL_SHADER_STATE_RECORD)];
1657    uint8_t vcm_cache_size[cl_packet_length(VCM_CACHE_SIZE)];
1658    uint8_t vertex_attrs[cl_packet_length(GL_SHADER_STATE_ATTRIBUTE_RECORD) *
1659                         MAX_VERTEX_ATTRIBS];
1660    uint8_t stencil_cfg[2][cl_packet_length(STENCIL_CFG)];
1661 };
1662 
1663 static inline VkPipelineBindPoint
v3dv_pipeline_get_binding_point(struct v3dv_pipeline * pipeline)1664 v3dv_pipeline_get_binding_point(struct v3dv_pipeline *pipeline)
1665 {
1666    assert(pipeline->active_stages == VK_SHADER_STAGE_COMPUTE_BIT ||
1667           !(pipeline->active_stages & VK_SHADER_STAGE_COMPUTE_BIT));
1668    return pipeline->active_stages == VK_SHADER_STAGE_COMPUTE_BIT ?
1669       VK_PIPELINE_BIND_POINT_COMPUTE : VK_PIPELINE_BIND_POINT_GRAPHICS;
1670 }
1671 
1672 const nir_shader_compiler_options *v3dv_pipeline_get_nir_options(void);
1673 
1674 static inline uint32_t
v3dv_zs_buffer_from_aspect_bits(VkImageAspectFlags aspects)1675 v3dv_zs_buffer_from_aspect_bits(VkImageAspectFlags aspects)
1676 {
1677    const VkImageAspectFlags zs_aspects =
1678       VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT;
1679    const VkImageAspectFlags filtered_aspects = aspects & zs_aspects;
1680 
1681    if (filtered_aspects == zs_aspects)
1682       return ZSTENCIL;
1683    else if (filtered_aspects == VK_IMAGE_ASPECT_DEPTH_BIT)
1684       return Z;
1685    else if (filtered_aspects == VK_IMAGE_ASPECT_STENCIL_BIT)
1686       return STENCIL;
1687    else
1688       return NONE;
1689 }
1690 
1691 static inline uint32_t
v3dv_zs_buffer_from_vk_format(VkFormat format)1692 v3dv_zs_buffer_from_vk_format(VkFormat format)
1693 {
1694    switch (format) {
1695    case VK_FORMAT_D16_UNORM_S8_UINT:
1696    case VK_FORMAT_D24_UNORM_S8_UINT:
1697    case VK_FORMAT_D32_SFLOAT_S8_UINT:
1698       return ZSTENCIL;
1699    case VK_FORMAT_D16_UNORM:
1700    case VK_FORMAT_D32_SFLOAT:
1701    case VK_FORMAT_X8_D24_UNORM_PACK32:
1702       return Z;
1703    case VK_FORMAT_S8_UINT:
1704       return STENCIL;
1705    default:
1706       return NONE;
1707    }
1708 }
1709 
1710 static inline uint32_t
v3dv_zs_buffer(bool depth,bool stencil)1711 v3dv_zs_buffer(bool depth, bool stencil)
1712 {
1713    if (depth && stencil)
1714       return ZSTENCIL;
1715    else if (depth)
1716       return Z;
1717    else if (stencil)
1718       return STENCIL;
1719    return NONE;
1720 }
1721 
1722 static inline uint8_t
v3dv_get_internal_depth_type(VkFormat format)1723 v3dv_get_internal_depth_type(VkFormat format)
1724 {
1725    switch (format) {
1726    case VK_FORMAT_D16_UNORM:
1727       return V3D_INTERNAL_TYPE_DEPTH_16;
1728    case VK_FORMAT_D32_SFLOAT:
1729       return V3D_INTERNAL_TYPE_DEPTH_32F;
1730    case VK_FORMAT_X8_D24_UNORM_PACK32:
1731    case VK_FORMAT_D24_UNORM_S8_UINT:
1732       return V3D_INTERNAL_TYPE_DEPTH_24;
1733    default:
1734       unreachable("Invalid depth format");
1735       break;
1736    }
1737 }
1738 
1739 uint32_t v3dv_physical_device_api_version(struct v3dv_physical_device *dev);
1740 uint32_t v3dv_physical_device_vendor_id(struct v3dv_physical_device *dev);
1741 uint32_t v3dv_physical_device_device_id(struct v3dv_physical_device *dev);
1742 
1743 int v3dv_get_instance_entrypoint_index(const char *name);
1744 int v3dv_get_device_entrypoint_index(const char *name);
1745 int v3dv_get_physical_device_entrypoint_index(const char *name);
1746 
1747 const char *v3dv_get_instance_entry_name(int index);
1748 const char *v3dv_get_physical_device_entry_name(int index);
1749 const char *v3dv_get_device_entry_name(int index);
1750 
1751 bool
1752 v3dv_instance_entrypoint_is_enabled(int index, uint32_t core_version,
1753                                     const struct v3dv_instance_extension_table *instance);
1754 bool
1755 v3dv_physical_device_entrypoint_is_enabled(int index, uint32_t core_version,
1756                                            const struct v3dv_instance_extension_table *instance);
1757 bool
1758 v3dv_device_entrypoint_is_enabled(int index, uint32_t core_version,
1759                                   const struct v3dv_instance_extension_table *instance,
1760                                   const struct v3dv_device_extension_table *device);
1761 
1762 void *v3dv_lookup_entrypoint(const struct v3d_device_info *devinfo,
1763                              const char *name);
1764 
1765 VkResult __vk_errorf(struct v3dv_instance *instance, VkResult error,
1766                      const char *file, int line,
1767                      const char *format, ...);
1768 
1769 #define vk_error(instance, error) __vk_errorf(instance, error, __FILE__, __LINE__, NULL);
1770 #define vk_errorf(instance, error, format, ...) __vk_errorf(instance, error, __FILE__, __LINE__, format, ## __VA_ARGS__);
1771 
1772 #ifdef DEBUG
1773 #define v3dv_debug_ignored_stype(sType) \
1774    fprintf(stderr, "%s: ignored VkStructureType %u:%s\n\n", __func__, (sType), vk_StructureType_to_str(sType))
1775 #else
1776 #define v3dv_debug_ignored_stype(sType)
1777 #endif
1778 
1779 const struct v3dv_format *v3dv_get_format(VkFormat);
1780 const uint8_t *v3dv_get_format_swizzle(VkFormat f);
1781 void v3dv_get_internal_type_bpp_for_output_format(uint32_t format, uint32_t *type, uint32_t *bpp);
1782 uint8_t v3dv_get_tex_return_size(const struct v3dv_format *vf, bool compare_enable);
1783 bool v3dv_tfu_supports_tex_format(const struct v3d_device_info *devinfo,
1784                                   uint32_t tex_format);
1785 bool v3dv_format_supports_tlb_resolve(const struct v3dv_format *format);
1786 
1787 uint32_t v3d_utile_width(int cpp);
1788 uint32_t v3d_utile_height(int cpp);
1789 
1790 void v3d_load_tiled_image(void *dst, uint32_t dst_stride,
1791                           void *src, uint32_t src_stride,
1792                           enum v3d_tiling_mode tiling_format,
1793                           int cpp, uint32_t image_h,
1794                           const struct pipe_box *box);
1795 
1796 void v3d_store_tiled_image(void *dst, uint32_t dst_stride,
1797                            void *src, uint32_t src_stride,
1798                            enum v3d_tiling_mode tiling_format,
1799                            int cpp, uint32_t image_h,
1800                            const struct pipe_box *box);
1801 
1802 struct v3dv_cl_reloc v3dv_write_uniforms(struct v3dv_cmd_buffer *cmd_buffer,
1803                                          struct v3dv_pipeline_stage *p_stage);
1804 struct v3dv_cl_reloc v3dv_write_uniforms_wg_offsets(struct v3dv_cmd_buffer *cmd_buffer,
1805                                                     struct v3dv_pipeline_stage *p_stage,
1806                                                     uint32_t **wg_count_offsets);
1807 
1808 void v3d_key_update_return_size(struct v3dv_pipeline *pipeline,
1809                                 struct v3d_key *key,
1810                                 uint32_t return_size);
1811 
1812 struct v3dv_shader_variant *
1813 v3dv_get_shader_variant(struct v3dv_pipeline_stage *p_stage,
1814                         struct v3dv_pipeline_cache *cache,
1815                         struct v3d_key *key,
1816                         size_t key_size,
1817                         const VkAllocationCallbacks *pAllocator,
1818                         VkResult *out_vk_result);
1819 
1820 struct v3dv_shader_variant *
1821 v3dv_shader_variant_create(struct v3dv_device *device,
1822                            gl_shader_stage stage,
1823                            bool is_coord,
1824                            const unsigned char *variant_sha1,
1825                            const struct v3d_key *key,
1826                            uint32_t key_size,
1827                            struct v3d_prog_data *prog_data,
1828                            uint32_t prog_data_size,
1829                            const uint64_t *qpu_insts,
1830                            uint32_t qpu_insts_size,
1831                            VkResult *out_vk_result);
1832 
1833 void
1834 v3dv_shader_variant_destroy(struct v3dv_device *device,
1835                             struct v3dv_shader_variant *variant);
1836 
1837 static inline void
v3dv_shader_variant_ref(struct v3dv_shader_variant * variant)1838 v3dv_shader_variant_ref(struct v3dv_shader_variant *variant)
1839 {
1840    assert(variant && variant->ref_cnt >= 1);
1841    p_atomic_inc(&variant->ref_cnt);
1842 }
1843 
1844 static inline void
v3dv_shader_variant_unref(struct v3dv_device * device,struct v3dv_shader_variant * variant)1845 v3dv_shader_variant_unref(struct v3dv_device *device,
1846                           struct v3dv_shader_variant *variant)
1847 {
1848    assert(variant && variant->ref_cnt >= 1);
1849    if (p_atomic_dec_zero(&variant->ref_cnt))
1850       v3dv_shader_variant_destroy(device, variant);
1851 }
1852 
1853 struct v3dv_descriptor *
1854 v3dv_descriptor_map_get_descriptor(struct v3dv_descriptor_state *descriptor_state,
1855                                    struct v3dv_descriptor_map *map,
1856                                    struct v3dv_pipeline_layout *pipeline_layout,
1857                                    uint32_t index,
1858                                    uint32_t *dynamic_offset);
1859 
1860 const struct v3dv_sampler *
1861 v3dv_descriptor_map_get_sampler(struct v3dv_descriptor_state *descriptor_state,
1862                                 struct v3dv_descriptor_map *map,
1863                                 struct v3dv_pipeline_layout *pipeline_layout,
1864                                 uint32_t index);
1865 
1866 struct v3dv_cl_reloc
1867 v3dv_descriptor_map_get_sampler_state(struct v3dv_descriptor_state *descriptor_state,
1868                                       struct v3dv_descriptor_map *map,
1869                                       struct v3dv_pipeline_layout *pipeline_layout,
1870                                       uint32_t index);
1871 
1872 struct v3dv_cl_reloc
1873 v3dv_descriptor_map_get_texture_shader_state(struct v3dv_descriptor_state *descriptor_state,
1874                                              struct v3dv_descriptor_map *map,
1875                                              struct v3dv_pipeline_layout *pipeline_layout,
1876                                              uint32_t index);
1877 
1878 const struct v3dv_format*
1879 v3dv_descriptor_map_get_texture_format(struct v3dv_descriptor_state *descriptor_state,
1880                                        struct v3dv_descriptor_map *map,
1881                                        struct v3dv_pipeline_layout *pipeline_layout,
1882                                        uint32_t index,
1883                                        VkFormat *out_vk_format);
1884 
1885 struct v3dv_bo*
1886 v3dv_descriptor_map_get_texture_bo(struct v3dv_descriptor_state *descriptor_state,
1887                                    struct v3dv_descriptor_map *map,
1888                                    struct v3dv_pipeline_layout *pipeline_layout,
1889                                    uint32_t index);
1890 
1891 static inline const struct v3dv_sampler *
v3dv_immutable_samplers(const struct v3dv_descriptor_set_layout * set,const struct v3dv_descriptor_set_binding_layout * binding)1892 v3dv_immutable_samplers(const struct v3dv_descriptor_set_layout *set,
1893                         const struct v3dv_descriptor_set_binding_layout *binding)
1894 {
1895    assert(binding->immutable_samplers_offset);
1896    return (const struct v3dv_sampler *) ((const char *) set + binding->immutable_samplers_offset);
1897 }
1898 
1899 void v3dv_pipeline_cache_init(struct v3dv_pipeline_cache *cache,
1900                               struct v3dv_device *device,
1901                               bool cache_enabled);
1902 
1903 void v3dv_pipeline_cache_finish(struct v3dv_pipeline_cache *cache);
1904 
1905 void v3dv_pipeline_cache_upload_nir(struct v3dv_pipeline *pipeline,
1906                                     struct v3dv_pipeline_cache *cache,
1907                                     nir_shader *nir,
1908                                     unsigned char sha1_key[20]);
1909 
1910 nir_shader* v3dv_pipeline_cache_search_for_nir(struct v3dv_pipeline *pipeline,
1911                                                struct v3dv_pipeline_cache *cache,
1912                                                const nir_shader_compiler_options *nir_options,
1913                                                unsigned char sha1_key[20]);
1914 
1915 struct v3dv_shader_variant*
1916 v3dv_pipeline_cache_search_for_variant(struct v3dv_pipeline *pipeline,
1917                                        struct v3dv_pipeline_cache *cache,
1918                                        unsigned char sha1_key[20]);
1919 
1920 void
1921 v3dv_pipeline_cache_upload_variant(struct v3dv_pipeline *pipeline,
1922                                    struct v3dv_pipeline_cache *cache,
1923                                    struct v3dv_shader_variant  *variant);
1924 
1925 void v3dv_shader_module_internal_init(struct v3dv_shader_module *module,
1926                                       nir_shader *nir);
1927 
1928 #define V3DV_DEFINE_HANDLE_CASTS(__v3dv_type, __VkType)   \
1929                                                         \
1930    static inline struct __v3dv_type *                    \
1931    __v3dv_type ## _from_handle(__VkType _handle)         \
1932    {                                                    \
1933       return (struct __v3dv_type *) _handle;             \
1934    }                                                    \
1935                                                         \
1936    static inline __VkType                               \
1937    __v3dv_type ## _to_handle(struct __v3dv_type *_obj)    \
1938    {                                                    \
1939       return (__VkType) _obj;                           \
1940    }
1941 
1942 #define V3DV_DEFINE_NONDISP_HANDLE_CASTS(__v3dv_type, __VkType)              \
1943                                                                            \
1944    static inline struct __v3dv_type *                                       \
1945    __v3dv_type ## _from_handle(__VkType _handle)                            \
1946    {                                                                       \
1947       return (struct __v3dv_type *)(uintptr_t) _handle;                     \
1948    }                                                                       \
1949                                                                            \
1950    static inline __VkType                                                  \
1951    __v3dv_type ## _to_handle(struct __v3dv_type *_obj)                       \
1952    {                                                                       \
1953       return (__VkType)(uintptr_t) _obj;                                   \
1954    }
1955 
1956 #define V3DV_FROM_HANDLE(__v3dv_type, __name, __handle)			\
1957    struct __v3dv_type *__name = __v3dv_type ## _from_handle(__handle)
1958 
V3DV_DEFINE_HANDLE_CASTS(v3dv_cmd_buffer,VkCommandBuffer)1959 V3DV_DEFINE_HANDLE_CASTS(v3dv_cmd_buffer, VkCommandBuffer)
1960 V3DV_DEFINE_HANDLE_CASTS(v3dv_device, VkDevice)
1961 V3DV_DEFINE_HANDLE_CASTS(v3dv_instance, VkInstance)
1962 V3DV_DEFINE_HANDLE_CASTS(v3dv_physical_device, VkPhysicalDevice)
1963 V3DV_DEFINE_HANDLE_CASTS(v3dv_queue, VkQueue)
1964 
1965 V3DV_DEFINE_NONDISP_HANDLE_CASTS(v3dv_cmd_pool, VkCommandPool)
1966 V3DV_DEFINE_NONDISP_HANDLE_CASTS(v3dv_buffer, VkBuffer)
1967 V3DV_DEFINE_NONDISP_HANDLE_CASTS(v3dv_buffer_view, VkBufferView)
1968 V3DV_DEFINE_NONDISP_HANDLE_CASTS(v3dv_device_memory, VkDeviceMemory)
1969 V3DV_DEFINE_NONDISP_HANDLE_CASTS(v3dv_descriptor_pool, VkDescriptorPool)
1970 V3DV_DEFINE_NONDISP_HANDLE_CASTS(v3dv_descriptor_set, VkDescriptorSet)
1971 V3DV_DEFINE_NONDISP_HANDLE_CASTS(v3dv_descriptor_set_layout, VkDescriptorSetLayout)
1972 V3DV_DEFINE_NONDISP_HANDLE_CASTS(v3dv_event, VkEvent)
1973 V3DV_DEFINE_NONDISP_HANDLE_CASTS(v3dv_fence, VkFence)
1974 V3DV_DEFINE_NONDISP_HANDLE_CASTS(v3dv_framebuffer, VkFramebuffer)
1975 V3DV_DEFINE_NONDISP_HANDLE_CASTS(v3dv_image, VkImage)
1976 V3DV_DEFINE_NONDISP_HANDLE_CASTS(v3dv_image_view, VkImageView)
1977 V3DV_DEFINE_NONDISP_HANDLE_CASTS(v3dv_pipeline, VkPipeline)
1978 V3DV_DEFINE_NONDISP_HANDLE_CASTS(v3dv_pipeline_cache, VkPipelineCache)
1979 V3DV_DEFINE_NONDISP_HANDLE_CASTS(v3dv_pipeline_layout, VkPipelineLayout)
1980 V3DV_DEFINE_NONDISP_HANDLE_CASTS(v3dv_query_pool, VkQueryPool)
1981 V3DV_DEFINE_NONDISP_HANDLE_CASTS(v3dv_render_pass, VkRenderPass)
1982 V3DV_DEFINE_NONDISP_HANDLE_CASTS(v3dv_sampler, VkSampler)
1983 V3DV_DEFINE_NONDISP_HANDLE_CASTS(v3dv_semaphore, VkSemaphore)
1984 V3DV_DEFINE_NONDISP_HANDLE_CASTS(v3dv_shader_module, VkShaderModule)
1985 
1986 /* This is defined as a macro so that it works for both
1987  * VkImageSubresourceRange and VkImageSubresourceLayers
1988  */
1989 #define v3dv_layer_count(_image, _range) \
1990    ((_range)->layerCount == VK_REMAINING_ARRAY_LAYERS ? \
1991     (_image)->array_size - (_range)->baseArrayLayer : (_range)->layerCount)
1992 
1993 #define v3dv_level_count(_image, _range) \
1994    ((_range)->levelCount == VK_REMAINING_MIP_LEVELS ? \
1995     (_image)->levels - (_range)->baseMipLevel : (_range)->levelCount)
1996 
1997 static inline int
1998 v3dv_ioctl(int fd, unsigned long request, void *arg)
1999 {
2000    if (using_v3d_simulator)
2001       return v3d_simulator_ioctl(fd, request, arg);
2002    else
2003       return drmIoctl(fd, request, arg);
2004 }
2005 
2006 /* Flags OOM conditions in command buffer state.
2007  *
2008  * Note: notice that no-op jobs don't have a command buffer reference.
2009  */
2010 static inline void
v3dv_flag_oom(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_job * job)2011 v3dv_flag_oom(struct v3dv_cmd_buffer *cmd_buffer, struct v3dv_job *job)
2012 {
2013    if (cmd_buffer) {
2014       cmd_buffer->state.oom = true;
2015    } else {
2016       assert(job);
2017       if (job->cmd_buffer)
2018          job->cmd_buffer->state.oom = true;
2019    }
2020 }
2021 
2022 #define v3dv_return_if_oom(_cmd_buffer, _job) do {                  \
2023    const struct v3dv_cmd_buffer *__cmd_buffer = _cmd_buffer;        \
2024    if (__cmd_buffer && __cmd_buffer->state.oom)                     \
2025       return;                                                       \
2026    const struct v3dv_job *__job = _job;                             \
2027    if (__job && __job->cmd_buffer && __job->cmd_buffer->state.oom)  \
2028       return;                                                       \
2029 } while(0)                                                          \
2030 
2031 static inline uint32_t
u64_hash(const void * key)2032 u64_hash(const void *key)
2033 {
2034    return _mesa_hash_data(key, sizeof(uint64_t));
2035 }
2036 
2037 static inline bool
u64_compare(const void * key1,const void * key2)2038 u64_compare(const void *key1, const void *key2)
2039 {
2040    return memcmp(key1, key2, sizeof(uint64_t)) == 0;
2041 }
2042 
2043 #endif /* V3DV_PRIVATE_H */
2044