1 /*
2  * Copyright © 2019 Google LLC
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  */
23 
24 #include "tu_private.h"
25 
26 #include "spirv/nir_spirv.h"
27 #include "util/mesa-sha1.h"
28 #include "nir/nir_xfb_info.h"
29 #include "nir/nir_vulkan.h"
30 #include "vk_util.h"
31 
32 #include "ir3/ir3_nir.h"
33 
34 nir_shader *
tu_spirv_to_nir(struct tu_device * dev,const VkPipelineShaderStageCreateInfo * stage_info,gl_shader_stage stage)35 tu_spirv_to_nir(struct tu_device *dev,
36                 const VkPipelineShaderStageCreateInfo *stage_info,
37                 gl_shader_stage stage)
38 {
39    /* TODO these are made-up */
40    const struct spirv_to_nir_options spirv_options = {
41       .frag_coord_is_sysval = true,
42 
43       .ubo_addr_format = nir_address_format_vec2_index_32bit_offset,
44       .ssbo_addr_format = nir_address_format_vec2_index_32bit_offset,
45 
46       /* Accessed via stg/ldg */
47       .phys_ssbo_addr_format = nir_address_format_64bit_global,
48 
49       /* Accessed via the const register file */
50       .push_const_addr_format = nir_address_format_logical,
51 
52       /* Accessed via ldl/stl */
53       .shared_addr_format = nir_address_format_32bit_offset,
54 
55       /* Accessed via stg/ldg (not used with Vulkan?) */
56       .global_addr_format = nir_address_format_64bit_global,
57 
58       /* ViewID is a sysval in geometry stages and an input in the FS */
59       .view_index_is_input = stage == MESA_SHADER_FRAGMENT,
60       .caps = {
61          .transform_feedback = true,
62          .tessellation = true,
63          .draw_parameters = true,
64          .image_read_without_format = true,
65          .image_write_without_format = true,
66          .variable_pointers = true,
67          .stencil_export = true,
68          .multiview = true,
69          .shader_viewport_index_layer = true,
70          .geometry_streams = true,
71       },
72    };
73    const nir_shader_compiler_options *nir_options =
74       ir3_get_compiler_options(dev->compiler);
75 
76    /* convert VkSpecializationInfo */
77    const VkSpecializationInfo *spec_info = stage_info->pSpecializationInfo;
78    struct nir_spirv_specialization *spec = NULL;
79    uint32_t num_spec = 0;
80    if (spec_info && spec_info->mapEntryCount) {
81       spec = calloc(spec_info->mapEntryCount, sizeof(*spec));
82       if (!spec)
83          return NULL;
84 
85       for (uint32_t i = 0; i < spec_info->mapEntryCount; i++) {
86          const VkSpecializationMapEntry *entry = &spec_info->pMapEntries[i];
87          const void *data = spec_info->pData + entry->offset;
88          assert(data + entry->size <= spec_info->pData + spec_info->dataSize);
89          spec[i].id = entry->constantID;
90          switch (entry->size) {
91          case 8:
92             spec[i].value.u64 = *(const uint64_t *)data;
93             break;
94          case 4:
95             spec[i].value.u32 = *(const uint32_t *)data;
96             break;
97          case 2:
98             spec[i].value.u16 = *(const uint16_t *)data;
99             break;
100          case 1:
101             spec[i].value.u8 = *(const uint8_t *)data;
102             break;
103          default:
104             assert(!"Invalid spec constant size");
105             break;
106          }
107          spec[i].defined_on_module = false;
108       }
109 
110       num_spec = spec_info->mapEntryCount;
111    }
112 
113    struct tu_shader_module *module =
114       tu_shader_module_from_handle(stage_info->module);
115    assert(module->code_size % 4 == 0);
116    nir_shader *nir =
117       spirv_to_nir(module->code, module->code_size / 4,
118                    spec, num_spec, stage, stage_info->pName,
119                    &spirv_options, nir_options);
120 
121    free(spec);
122 
123    assert(nir->info.stage == stage);
124    nir_validate_shader(nir, "after spirv_to_nir");
125 
126    if (unlikely(dev->physical_device->instance->debug_flags & TU_DEBUG_NIR)) {
127       fprintf(stderr, "translated nir:\n");
128       nir_print_shader(nir, stderr);
129    }
130 
131    /* multi step inlining procedure */
132    NIR_PASS_V(nir, nir_lower_variable_initializers, nir_var_function_temp);
133    NIR_PASS_V(nir, nir_lower_returns);
134    NIR_PASS_V(nir, nir_inline_functions);
135    NIR_PASS_V(nir, nir_copy_prop);
136    NIR_PASS_V(nir, nir_opt_deref);
137    foreach_list_typed_safe(nir_function, func, node, &nir->functions) {
138       if (!func->is_entrypoint)
139          exec_node_remove(&func->node);
140    }
141    assert(exec_list_length(&nir->functions) == 1);
142    NIR_PASS_V(nir, nir_lower_variable_initializers, ~nir_var_function_temp);
143 
144    /* Split member structs.  We do this before lower_io_to_temporaries so that
145     * it doesn't lower system values to temporaries by accident.
146     */
147    NIR_PASS_V(nir, nir_split_var_copies);
148    NIR_PASS_V(nir, nir_split_per_member_structs);
149 
150    NIR_PASS_V(nir, nir_remove_dead_variables,
151               nir_var_shader_in | nir_var_shader_out | nir_var_system_value | nir_var_mem_shared,
152               NULL);
153 
154    NIR_PASS_V(nir, nir_propagate_invariant);
155 
156    NIR_PASS_V(nir, nir_lower_io_to_temporaries, nir_shader_get_entrypoint(nir), true, true);
157 
158    NIR_PASS_V(nir, nir_lower_global_vars_to_local);
159    NIR_PASS_V(nir, nir_split_var_copies);
160    NIR_PASS_V(nir, nir_lower_var_copies);
161 
162    NIR_PASS_V(nir, nir_opt_copy_prop_vars);
163    NIR_PASS_V(nir, nir_opt_combine_stores, nir_var_all);
164 
165    /* ir3 doesn't support indirect input/output */
166    /* TODO: We shouldn't perform this lowering pass on gl_TessLevelInner
167     * and gl_TessLevelOuter. Since the tess levels are actually stored in
168     * a global BO, they can be directly accessed via stg and ldg.
169     * nir_lower_indirect_derefs will instead generate a big if-ladder which
170     * isn't *incorrect* but is much less efficient. */
171    NIR_PASS_V(nir, nir_lower_indirect_derefs, nir_var_shader_in | nir_var_shader_out, UINT32_MAX);
172 
173    NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false);
174 
175    NIR_PASS_V(nir, nir_lower_system_values);
176    NIR_PASS_V(nir, nir_lower_compute_system_values, NULL);
177 
178    NIR_PASS_V(nir, nir_lower_clip_cull_distance_arrays);
179 
180    NIR_PASS_V(nir, nir_lower_frexp);
181 
182    ir3_optimize_loop(nir);
183 
184    return nir;
185 }
186 
187 static void
lower_load_push_constant(nir_builder * b,nir_intrinsic_instr * instr,struct tu_shader * shader)188 lower_load_push_constant(nir_builder *b, nir_intrinsic_instr *instr,
189                          struct tu_shader *shader)
190 {
191    nir_intrinsic_instr *load =
192       nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_uniform);
193    load->num_components = instr->num_components;
194    uint32_t base = nir_intrinsic_base(instr);
195    assert(base % 4 == 0);
196    assert(base >= shader->push_consts.lo * 16);
197    base -= shader->push_consts.lo * 16;
198    nir_intrinsic_set_base(load, base / 4);
199    load->src[0] =
200       nir_src_for_ssa(nir_ushr(b, instr->src[0].ssa, nir_imm_int(b, 2)));
201    nir_ssa_dest_init(&load->instr, &load->dest,
202                      load->num_components, instr->dest.ssa.bit_size,
203                      instr->dest.ssa.name);
204    nir_builder_instr_insert(b, &load->instr);
205    nir_ssa_def_rewrite_uses(&instr->dest.ssa, nir_src_for_ssa(&load->dest.ssa));
206 
207    nir_instr_remove(&instr->instr);
208 }
209 
210 static void
lower_vulkan_resource_index(nir_builder * b,nir_intrinsic_instr * instr,struct tu_shader * shader,const struct tu_pipeline_layout * layout)211 lower_vulkan_resource_index(nir_builder *b, nir_intrinsic_instr *instr,
212                             struct tu_shader *shader,
213                             const struct tu_pipeline_layout *layout)
214 {
215    nir_ssa_def *vulkan_idx = instr->src[0].ssa;
216 
217    unsigned set = nir_intrinsic_desc_set(instr);
218    unsigned binding = nir_intrinsic_binding(instr);
219    struct tu_descriptor_set_layout *set_layout = layout->set[set].layout;
220    struct tu_descriptor_set_binding_layout *binding_layout =
221       &set_layout->binding[binding];
222    uint32_t base;
223 
224    shader->active_desc_sets |= 1u << set;
225 
226    switch (binding_layout->type) {
227    case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
228    case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
229       base = layout->set[set].dynamic_offset_start +
230          binding_layout->dynamic_offset_offset;
231       set = MAX_SETS;
232       break;
233    default:
234       base = binding_layout->offset / (4 * A6XX_TEX_CONST_DWORDS);
235       break;
236    }
237 
238    nir_ssa_def *def = nir_vec3(b, nir_imm_int(b, set),
239                                nir_iadd(b, nir_imm_int(b, base), vulkan_idx),
240                                nir_imm_int(b, 0));
241 
242    nir_ssa_def_rewrite_uses(&instr->dest.ssa, nir_src_for_ssa(def));
243    nir_instr_remove(&instr->instr);
244 }
245 
246 static void
lower_load_vulkan_descriptor(nir_intrinsic_instr * intrin)247 lower_load_vulkan_descriptor(nir_intrinsic_instr *intrin)
248 {
249    /* Loading the descriptor happens as part of the load/store instruction so
250     * this is a no-op.
251     */
252    nir_ssa_def_rewrite_uses(&intrin->dest.ssa, intrin->src[0]);
253    nir_instr_remove(&intrin->instr);
254 }
255 
256 static void
lower_ssbo_ubo_intrinsic(nir_builder * b,nir_intrinsic_instr * intrin)257 lower_ssbo_ubo_intrinsic(nir_builder *b, nir_intrinsic_instr *intrin)
258 {
259    const nir_intrinsic_info *info = &nir_intrinsic_infos[intrin->intrinsic];
260 
261    /* The bindless base is part of the instruction, which means that part of
262     * the "pointer" has to be constant. We solve this in the same way the blob
263     * does, by generating a bunch of if-statements. In the usual case where
264     * the descriptor set is constant this will get optimized out.
265     */
266 
267    unsigned buffer_src;
268    if (intrin->intrinsic == nir_intrinsic_store_ssbo) {
269       /* This has the value first */
270       buffer_src = 1;
271    } else {
272       buffer_src = 0;
273    }
274 
275    nir_ssa_def *base_idx = nir_channel(b, intrin->src[buffer_src].ssa, 0);
276    nir_ssa_def *descriptor_idx = nir_channel(b, intrin->src[buffer_src].ssa, 1);
277 
278    nir_ssa_def *results[MAX_SETS + 1] = { NULL };
279 
280    for (unsigned i = 0; i < MAX_SETS + 1; i++) {
281       /* if (base_idx == i) { ... */
282       nir_if *nif = nir_push_if(b, nir_ieq_imm(b, base_idx, i));
283 
284       nir_intrinsic_instr *bindless =
285          nir_intrinsic_instr_create(b->shader,
286                                     nir_intrinsic_bindless_resource_ir3);
287       bindless->num_components = 0;
288       nir_ssa_dest_init(&bindless->instr, &bindless->dest,
289                         1, 32, NULL);
290       nir_intrinsic_set_desc_set(bindless, i);
291       bindless->src[0] = nir_src_for_ssa(descriptor_idx);
292       nir_builder_instr_insert(b, &bindless->instr);
293 
294       nir_intrinsic_instr *copy =
295          nir_intrinsic_instr_create(b->shader, intrin->intrinsic);
296 
297       copy->num_components = intrin->num_components;
298 
299       for (unsigned src = 0; src < info->num_srcs; src++) {
300          if (src == buffer_src)
301             copy->src[src] = nir_src_for_ssa(&bindless->dest.ssa);
302          else
303             copy->src[src] = nir_src_for_ssa(intrin->src[src].ssa);
304       }
305 
306       for (unsigned idx = 0; idx < info->num_indices; idx++) {
307          copy->const_index[idx] = intrin->const_index[idx];
308       }
309 
310       if (info->has_dest) {
311          nir_ssa_dest_init(&copy->instr, &copy->dest,
312                            intrin->dest.ssa.num_components,
313                            intrin->dest.ssa.bit_size,
314                            intrin->dest.ssa.name);
315          results[i] = &copy->dest.ssa;
316       }
317 
318       nir_builder_instr_insert(b, &copy->instr);
319 
320       /* } else { ... */
321       nir_push_else(b, nif);
322    }
323 
324    nir_ssa_def *result =
325       nir_ssa_undef(b, intrin->dest.ssa.num_components, intrin->dest.ssa.bit_size);
326    for (int i = MAX_SETS; i >= 0; i--) {
327       nir_pop_if(b, NULL);
328       if (info->has_dest)
329          result = nir_if_phi(b, results[i], result);
330    }
331 
332    if (info->has_dest)
333       nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(result));
334    nir_instr_remove(&intrin->instr);
335 }
336 
337 static nir_ssa_def *
build_bindless(nir_builder * b,nir_deref_instr * deref,bool is_sampler,struct tu_shader * shader,const struct tu_pipeline_layout * layout)338 build_bindless(nir_builder *b, nir_deref_instr *deref, bool is_sampler,
339                struct tu_shader *shader,
340                const struct tu_pipeline_layout *layout)
341 {
342    nir_variable *var = nir_deref_instr_get_variable(deref);
343 
344    unsigned set = var->data.descriptor_set;
345    unsigned binding = var->data.binding;
346    const struct tu_descriptor_set_binding_layout *bind_layout =
347       &layout->set[set].layout->binding[binding];
348 
349    /* input attachments use non bindless workaround */
350    if (bind_layout->type == VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT) {
351       const struct glsl_type *glsl_type = glsl_without_array(var->type);
352       uint32_t idx = var->data.index * 2;
353 
354       b->shader->info.textures_used |=
355          ((1ull << (bind_layout->array_size * 2)) - 1) << (idx * 2);
356 
357       /* D24S8 workaround: stencil of D24S8 will be sampled as uint */
358       if (glsl_get_sampler_result_type(glsl_type) == GLSL_TYPE_UINT)
359          idx += 1;
360 
361       if (deref->deref_type == nir_deref_type_var)
362          return nir_imm_int(b, idx);
363 
364       nir_ssa_def *arr_index = nir_ssa_for_src(b, deref->arr.index, 1);
365       return nir_iadd(b, nir_imm_int(b, idx),
366                       nir_imul_imm(b, arr_index, 2));
367    }
368 
369    shader->active_desc_sets |= 1u << set;
370 
371    nir_ssa_def *desc_offset;
372    unsigned descriptor_stride;
373    unsigned offset = 0;
374    /* Samplers come second in combined image/sampler descriptors, see
375       * write_combined_image_sampler_descriptor().
376       */
377    if (is_sampler && bind_layout->type ==
378          VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) {
379       offset = 1;
380    }
381    desc_offset =
382       nir_imm_int(b, (bind_layout->offset / (4 * A6XX_TEX_CONST_DWORDS)) +
383                   offset);
384    descriptor_stride = bind_layout->size / (4 * A6XX_TEX_CONST_DWORDS);
385 
386    if (deref->deref_type != nir_deref_type_var) {
387       assert(deref->deref_type == nir_deref_type_array);
388 
389       nir_ssa_def *arr_index = nir_ssa_for_src(b, deref->arr.index, 1);
390       desc_offset = nir_iadd(b, desc_offset,
391                              nir_imul_imm(b, arr_index, descriptor_stride));
392    }
393 
394    nir_intrinsic_instr *bindless =
395       nir_intrinsic_instr_create(b->shader,
396                                  nir_intrinsic_bindless_resource_ir3);
397    bindless->num_components = 0;
398    nir_ssa_dest_init(&bindless->instr, &bindless->dest,
399                      1, 32, NULL);
400    nir_intrinsic_set_desc_set(bindless, set);
401    bindless->src[0] = nir_src_for_ssa(desc_offset);
402    nir_builder_instr_insert(b, &bindless->instr);
403 
404    return &bindless->dest.ssa;
405 }
406 
407 static void
lower_image_deref(nir_builder * b,nir_intrinsic_instr * instr,struct tu_shader * shader,const struct tu_pipeline_layout * layout)408 lower_image_deref(nir_builder *b,
409                   nir_intrinsic_instr *instr, struct tu_shader *shader,
410                   const struct tu_pipeline_layout *layout)
411 {
412    nir_deref_instr *deref = nir_src_as_deref(instr->src[0]);
413    nir_ssa_def *bindless = build_bindless(b, deref, false, shader, layout);
414    nir_rewrite_image_intrinsic(instr, bindless, true);
415 }
416 
417 static bool
lower_intrinsic(nir_builder * b,nir_intrinsic_instr * instr,struct tu_shader * shader,const struct tu_pipeline_layout * layout)418 lower_intrinsic(nir_builder *b, nir_intrinsic_instr *instr,
419                 struct tu_shader *shader,
420                 const struct tu_pipeline_layout *layout)
421 {
422    switch (instr->intrinsic) {
423    case nir_intrinsic_load_push_constant:
424       lower_load_push_constant(b, instr, shader);
425       return true;
426 
427    case nir_intrinsic_load_vulkan_descriptor:
428       lower_load_vulkan_descriptor(instr);
429       return true;
430 
431    case nir_intrinsic_vulkan_resource_index:
432       lower_vulkan_resource_index(b, instr, shader, layout);
433       return true;
434 
435    case nir_intrinsic_load_ubo:
436    case nir_intrinsic_load_ssbo:
437    case nir_intrinsic_store_ssbo:
438    case nir_intrinsic_ssbo_atomic_add:
439    case nir_intrinsic_ssbo_atomic_imin:
440    case nir_intrinsic_ssbo_atomic_umin:
441    case nir_intrinsic_ssbo_atomic_imax:
442    case nir_intrinsic_ssbo_atomic_umax:
443    case nir_intrinsic_ssbo_atomic_and:
444    case nir_intrinsic_ssbo_atomic_or:
445    case nir_intrinsic_ssbo_atomic_xor:
446    case nir_intrinsic_ssbo_atomic_exchange:
447    case nir_intrinsic_ssbo_atomic_comp_swap:
448    case nir_intrinsic_ssbo_atomic_fadd:
449    case nir_intrinsic_ssbo_atomic_fmin:
450    case nir_intrinsic_ssbo_atomic_fmax:
451    case nir_intrinsic_ssbo_atomic_fcomp_swap:
452    case nir_intrinsic_get_ssbo_size:
453       lower_ssbo_ubo_intrinsic(b, instr);
454       return true;
455 
456    case nir_intrinsic_image_deref_load:
457    case nir_intrinsic_image_deref_store:
458    case nir_intrinsic_image_deref_atomic_add:
459    case nir_intrinsic_image_deref_atomic_imin:
460    case nir_intrinsic_image_deref_atomic_umin:
461    case nir_intrinsic_image_deref_atomic_imax:
462    case nir_intrinsic_image_deref_atomic_umax:
463    case nir_intrinsic_image_deref_atomic_and:
464    case nir_intrinsic_image_deref_atomic_or:
465    case nir_intrinsic_image_deref_atomic_xor:
466    case nir_intrinsic_image_deref_atomic_exchange:
467    case nir_intrinsic_image_deref_atomic_comp_swap:
468    case nir_intrinsic_image_deref_size:
469    case nir_intrinsic_image_deref_samples:
470       lower_image_deref(b, instr, shader, layout);
471       return true;
472 
473    default:
474       return false;
475    }
476 }
477 
478 static void
lower_tex_ycbcr(const struct tu_pipeline_layout * layout,nir_builder * builder,nir_tex_instr * tex)479 lower_tex_ycbcr(const struct tu_pipeline_layout *layout,
480                 nir_builder *builder,
481                 nir_tex_instr *tex)
482 {
483    int deref_src_idx = nir_tex_instr_src_index(tex, nir_tex_src_texture_deref);
484    assert(deref_src_idx >= 0);
485    nir_deref_instr *deref = nir_src_as_deref(tex->src[deref_src_idx].src);
486 
487    nir_variable *var = nir_deref_instr_get_variable(deref);
488    const struct tu_descriptor_set_layout *set_layout =
489       layout->set[var->data.descriptor_set].layout;
490    const struct tu_descriptor_set_binding_layout *binding =
491       &set_layout->binding[var->data.binding];
492    const struct tu_sampler_ycbcr_conversion *ycbcr_samplers =
493       tu_immutable_ycbcr_samplers(set_layout, binding);
494 
495    if (!ycbcr_samplers)
496       return;
497 
498    /* For the following instructions, we don't apply any change */
499    if (tex->op == nir_texop_txs ||
500        tex->op == nir_texop_query_levels ||
501        tex->op == nir_texop_lod)
502       return;
503 
504    assert(tex->texture_index == 0);
505    unsigned array_index = 0;
506    if (deref->deref_type != nir_deref_type_var) {
507       assert(deref->deref_type == nir_deref_type_array);
508       if (!nir_src_is_const(deref->arr.index))
509          return;
510       array_index = nir_src_as_uint(deref->arr.index);
511       array_index = MIN2(array_index, binding->array_size - 1);
512    }
513    const struct tu_sampler_ycbcr_conversion *ycbcr_sampler = ycbcr_samplers + array_index;
514 
515    if (ycbcr_sampler->ycbcr_model == VK_SAMPLER_YCBCR_MODEL_CONVERSION_RGB_IDENTITY)
516       return;
517 
518    builder->cursor = nir_after_instr(&tex->instr);
519 
520    uint8_t bits = vk_format_get_component_bits(ycbcr_sampler->format,
521                                                UTIL_FORMAT_COLORSPACE_RGB,
522                                                PIPE_SWIZZLE_X);
523    uint32_t bpcs[3] = {bits, bits, bits}; /* TODO: use right bpc for each channel ? */
524    nir_ssa_def *result = nir_convert_ycbcr_to_rgb(builder,
525                                                   ycbcr_sampler->ycbcr_model,
526                                                   ycbcr_sampler->ycbcr_range,
527                                                   &tex->dest.ssa,
528                                                   bpcs);
529    nir_ssa_def_rewrite_uses_after(&tex->dest.ssa, nir_src_for_ssa(result),
530                                   result->parent_instr);
531 
532    builder->cursor = nir_before_instr(&tex->instr);
533 }
534 
535 static bool
lower_tex(nir_builder * b,nir_tex_instr * tex,struct tu_shader * shader,const struct tu_pipeline_layout * layout)536 lower_tex(nir_builder *b, nir_tex_instr *tex,
537           struct tu_shader *shader, const struct tu_pipeline_layout *layout)
538 {
539    lower_tex_ycbcr(layout, b, tex);
540 
541    int sampler_src_idx = nir_tex_instr_src_index(tex, nir_tex_src_sampler_deref);
542    if (sampler_src_idx >= 0) {
543       nir_deref_instr *deref = nir_src_as_deref(tex->src[sampler_src_idx].src);
544       nir_ssa_def *bindless = build_bindless(b, deref, true, shader, layout);
545       nir_instr_rewrite_src(&tex->instr, &tex->src[sampler_src_idx].src,
546                             nir_src_for_ssa(bindless));
547       tex->src[sampler_src_idx].src_type = nir_tex_src_sampler_handle;
548    }
549 
550    int tex_src_idx = nir_tex_instr_src_index(tex, nir_tex_src_texture_deref);
551    if (tex_src_idx >= 0) {
552       nir_deref_instr *deref = nir_src_as_deref(tex->src[tex_src_idx].src);
553       nir_ssa_def *bindless = build_bindless(b, deref, false, shader, layout);
554       nir_instr_rewrite_src(&tex->instr, &tex->src[tex_src_idx].src,
555                             nir_src_for_ssa(bindless));
556       tex->src[tex_src_idx].src_type = nir_tex_src_texture_handle;
557 
558       /* for the input attachment case: */
559       if (bindless->parent_instr->type != nir_instr_type_intrinsic)
560          tex->src[tex_src_idx].src_type = nir_tex_src_texture_offset;
561    }
562 
563    return true;
564 }
565 
566 static bool
lower_impl(nir_function_impl * impl,struct tu_shader * shader,const struct tu_pipeline_layout * layout)567 lower_impl(nir_function_impl *impl, struct tu_shader *shader,
568             const struct tu_pipeline_layout *layout)
569 {
570    nir_builder b;
571    nir_builder_init(&b, impl);
572    bool progress = false;
573 
574    nir_foreach_block(block, impl) {
575       nir_foreach_instr_safe(instr, block) {
576          b.cursor = nir_before_instr(instr);
577          switch (instr->type) {
578          case nir_instr_type_tex:
579             progress |= lower_tex(&b, nir_instr_as_tex(instr), shader, layout);
580             break;
581          case nir_instr_type_intrinsic:
582             progress |= lower_intrinsic(&b, nir_instr_as_intrinsic(instr), shader, layout);
583             break;
584          default:
585             break;
586          }
587       }
588    }
589 
590    if (progress)
591       nir_metadata_preserve(impl, nir_metadata_none);
592    else
593       nir_metadata_preserve(impl, nir_metadata_all);
594 
595    return progress;
596 }
597 
598 
599 /* Figure out the range of push constants that we're actually going to push to
600  * the shader, and tell the backend to reserve this range when pushing UBO
601  * constants.
602  */
603 
604 static void
gather_push_constants(nir_shader * shader,struct tu_shader * tu_shader)605 gather_push_constants(nir_shader *shader, struct tu_shader *tu_shader)
606 {
607    uint32_t min = UINT32_MAX, max = 0;
608    nir_foreach_function(function, shader) {
609       if (!function->impl)
610          continue;
611 
612       nir_foreach_block(block, function->impl) {
613          nir_foreach_instr_safe(instr, block) {
614             if (instr->type != nir_instr_type_intrinsic)
615                continue;
616 
617             nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
618             if (intrin->intrinsic != nir_intrinsic_load_push_constant)
619                continue;
620 
621             uint32_t base = nir_intrinsic_base(intrin);
622             uint32_t range = nir_intrinsic_range(intrin);
623             min = MIN2(min, base);
624             max = MAX2(max, base + range);
625             break;
626          }
627       }
628    }
629 
630    if (min >= max) {
631       tu_shader->push_consts.lo = 0;
632       tu_shader->push_consts.count = 0;
633       return;
634    }
635 
636    /* CP_LOAD_STATE OFFSET and NUM_UNIT are in units of vec4 (4 dwords),
637     * however there's an alignment requirement of 4 on OFFSET. Expand the
638     * range and change units accordingly.
639     */
640    tu_shader->push_consts.lo = (min / 16) / 4 * 4;
641    tu_shader->push_consts.count =
642       align(max, 16) / 16 - tu_shader->push_consts.lo;
643 }
644 
645 static bool
tu_lower_io(nir_shader * shader,struct tu_shader * tu_shader,const struct tu_pipeline_layout * layout)646 tu_lower_io(nir_shader *shader, struct tu_shader *tu_shader,
647             const struct tu_pipeline_layout *layout)
648 {
649    bool progress = false;
650 
651    gather_push_constants(shader, tu_shader);
652 
653    nir_foreach_function(function, shader) {
654       if (function->impl)
655          progress |= lower_impl(function->impl, tu_shader, layout);
656    }
657 
658    /* Remove now-unused variables so that when we gather the shader info later
659     * they won't be counted.
660     */
661 
662    if (progress)
663       nir_opt_dce(shader);
664 
665    progress |=
666       nir_remove_dead_variables(shader,
667                                 nir_var_uniform | nir_var_mem_ubo | nir_var_mem_ssbo,
668                                 NULL);
669 
670    return progress;
671 }
672 
673 static void
shared_type_info(const struct glsl_type * type,unsigned * size,unsigned * align)674 shared_type_info(const struct glsl_type *type, unsigned *size, unsigned *align)
675 {
676    assert(glsl_type_is_vector_or_scalar(type));
677 
678    unsigned comp_size =
679       glsl_type_is_boolean(type) ? 4 : glsl_get_bit_size(type) / 8;
680    unsigned length = glsl_get_vector_elements(type);
681    *size = comp_size * length;
682    *align = 4;
683 }
684 
685 static void
tu_gather_xfb_info(nir_shader * nir,struct ir3_stream_output_info * info)686 tu_gather_xfb_info(nir_shader *nir, struct ir3_stream_output_info *info)
687 {
688    nir_xfb_info *xfb = nir_gather_xfb_info(nir, NULL);
689 
690    if (!xfb)
691       return;
692 
693    /* creating a map from VARYING_SLOT_* enums to consecutive index */
694    uint8_t num_outputs = 0;
695    uint64_t outputs_written = 0;
696    for (int i = 0; i < xfb->output_count; i++)
697       outputs_written |= BITFIELD64_BIT(xfb->outputs[i].location);
698 
699    uint8_t output_map[VARYING_SLOT_TESS_MAX];
700    memset(output_map, 0, sizeof(output_map));
701 
702    for (unsigned attr = 0; attr < VARYING_SLOT_MAX; attr++) {
703       if (outputs_written & BITFIELD64_BIT(attr))
704          output_map[attr] = num_outputs++;
705    }
706 
707    assert(xfb->output_count < IR3_MAX_SO_OUTPUTS);
708    info->num_outputs = xfb->output_count;
709 
710    for (int i = 0; i < IR3_MAX_SO_BUFFERS; i++) {
711       info->stride[i] = xfb->buffers[i].stride / 4;
712       info->buffer_to_stream[i] = xfb->buffer_to_stream[i];
713    }
714 
715    info->streams_written = xfb->streams_written;
716 
717    for (int i = 0; i < xfb->output_count; i++) {
718       info->output[i].register_index = output_map[xfb->outputs[i].location];
719       info->output[i].start_component = xfb->outputs[i].component_offset;
720       info->output[i].num_components =
721                            util_bitcount(xfb->outputs[i].component_mask);
722       info->output[i].output_buffer  = xfb->outputs[i].buffer;
723       info->output[i].dst_offset = xfb->outputs[i].offset / 4;
724       info->output[i].stream = xfb->buffer_to_stream[xfb->outputs[i].buffer];
725    }
726 
727    ralloc_free(xfb);
728 }
729 
730 struct tu_shader *
tu_shader_create(struct tu_device * dev,nir_shader * nir,unsigned multiview_mask,struct tu_pipeline_layout * layout,const VkAllocationCallbacks * alloc)731 tu_shader_create(struct tu_device *dev,
732                  nir_shader *nir,
733                  unsigned multiview_mask,
734                  struct tu_pipeline_layout *layout,
735                  const VkAllocationCallbacks *alloc)
736 {
737    struct tu_shader *shader;
738 
739    shader = vk_zalloc2(
740       &dev->vk.alloc, alloc,
741       sizeof(*shader),
742       8, VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
743    if (!shader)
744       return NULL;
745 
746    /* Gather information for transform feedback.
747     * This should be called after nir_split_per_member_structs.
748     * Also needs to be called after nir_remove_dead_variables with varyings,
749     * so that we could align stream outputs correctly.
750     */
751    struct ir3_stream_output_info so_info = {};
752    if (nir->info.stage == MESA_SHADER_VERTEX ||
753          nir->info.stage == MESA_SHADER_TESS_EVAL ||
754          nir->info.stage == MESA_SHADER_GEOMETRY)
755       tu_gather_xfb_info(nir, &so_info);
756 
757    if (nir->info.stage == MESA_SHADER_FRAGMENT) {
758       NIR_PASS_V(nir, nir_lower_input_attachments,
759                  &(nir_input_attachment_options) {
760                      .use_fragcoord_sysval = true,
761                      .use_layer_id_sysval = false,
762                      /* When using multiview rendering, we must use
763                       * gl_ViewIndex as the layer id to pass to the texture
764                       * sampling function. gl_Layer doesn't work when
765                       * multiview is enabled.
766                       */
767                      .use_view_id_for_layer = multiview_mask != 0,
768                  });
769    }
770 
771    if (nir->info.stage == MESA_SHADER_VERTEX && multiview_mask) {
772       tu_nir_lower_multiview(nir, multiview_mask,
773                              &shader->multi_pos_output, dev);
774    }
775 
776    NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_push_const,
777               nir_address_format_32bit_offset);
778 
779    NIR_PASS_V(nir, nir_lower_explicit_io,
780               nir_var_mem_ubo | nir_var_mem_ssbo,
781               nir_address_format_vec2_index_32bit_offset);
782 
783    if (nir->info.stage == MESA_SHADER_COMPUTE) {
784       NIR_PASS_V(nir, nir_lower_vars_to_explicit_types,
785                  nir_var_mem_shared, shared_type_info);
786       NIR_PASS_V(nir, nir_lower_explicit_io,
787                  nir_var_mem_shared,
788                  nir_address_format_32bit_offset);
789    }
790 
791    nir_assign_io_var_locations(nir, nir_var_shader_in, &nir->num_inputs, nir->info.stage);
792    nir_assign_io_var_locations(nir, nir_var_shader_out, &nir->num_outputs, nir->info.stage);
793 
794    NIR_PASS_V(nir, tu_lower_io, shader, layout);
795 
796    nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
797 
798    ir3_finalize_nir(dev->compiler, nir);
799 
800    shader->ir3_shader =
801       ir3_shader_from_nir(dev->compiler, nir,
802                           align(shader->push_consts.count, 4),
803                           &so_info);
804 
805    return shader;
806 }
807 
808 void
tu_shader_destroy(struct tu_device * dev,struct tu_shader * shader,const VkAllocationCallbacks * alloc)809 tu_shader_destroy(struct tu_device *dev,
810                   struct tu_shader *shader,
811                   const VkAllocationCallbacks *alloc)
812 {
813    ir3_shader_destroy(shader->ir3_shader);
814 
815    vk_free2(&dev->vk.alloc, alloc, shader);
816 }
817 
818 VkResult
tu_CreateShaderModule(VkDevice _device,const VkShaderModuleCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkShaderModule * pShaderModule)819 tu_CreateShaderModule(VkDevice _device,
820                       const VkShaderModuleCreateInfo *pCreateInfo,
821                       const VkAllocationCallbacks *pAllocator,
822                       VkShaderModule *pShaderModule)
823 {
824    TU_FROM_HANDLE(tu_device, device, _device);
825    struct tu_shader_module *module;
826 
827    assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO);
828    assert(pCreateInfo->flags == 0);
829    assert(pCreateInfo->codeSize % 4 == 0);
830 
831    module = vk_object_alloc(&device->vk, pAllocator,
832                             sizeof(*module) + pCreateInfo->codeSize,
833                             VK_OBJECT_TYPE_SHADER_MODULE);
834    if (module == NULL)
835       return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
836 
837    module->code_size = pCreateInfo->codeSize;
838    memcpy(module->code, pCreateInfo->pCode, pCreateInfo->codeSize);
839 
840    *pShaderModule = tu_shader_module_to_handle(module);
841 
842    return VK_SUCCESS;
843 }
844 
845 void
tu_DestroyShaderModule(VkDevice _device,VkShaderModule _module,const VkAllocationCallbacks * pAllocator)846 tu_DestroyShaderModule(VkDevice _device,
847                        VkShaderModule _module,
848                        const VkAllocationCallbacks *pAllocator)
849 {
850    TU_FROM_HANDLE(tu_device, device, _device);
851    TU_FROM_HANDLE(tu_shader_module, module, _module);
852 
853    if (!module)
854       return;
855 
856    vk_object_free(&device->vk, pAllocator, module);
857 }
858