1 /*
2  * Copyright © 2014 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Connor Abbott (cwabbott0@gmail.com)
25  *    Jason Ekstrand (jason@jlekstrand.net)
26  *
27  */
28 
29 /*
30  * This lowering pass converts references to input/output variables with
31  * loads/stores to actual input/output intrinsics.
32  */
33 
34 #include "nir.h"
35 #include "nir_builder.h"
36 #include "nir_deref.h"
37 
38 #include "util/u_math.h"
39 
40 struct lower_io_state {
41    void *dead_ctx;
42    nir_builder builder;
43    int (*type_size)(const struct glsl_type *type, bool);
44    nir_variable_mode modes;
45    nir_lower_io_options options;
46 };
47 
48 static nir_intrinsic_op
ssbo_atomic_for_deref(nir_intrinsic_op deref_op)49 ssbo_atomic_for_deref(nir_intrinsic_op deref_op)
50 {
51    switch (deref_op) {
52 #define OP(O) case nir_intrinsic_deref_##O: return nir_intrinsic_ssbo_##O;
53    OP(atomic_exchange)
54    OP(atomic_comp_swap)
55    OP(atomic_add)
56    OP(atomic_imin)
57    OP(atomic_umin)
58    OP(atomic_imax)
59    OP(atomic_umax)
60    OP(atomic_and)
61    OP(atomic_or)
62    OP(atomic_xor)
63    OP(atomic_fadd)
64    OP(atomic_fmin)
65    OP(atomic_fmax)
66    OP(atomic_fcomp_swap)
67 #undef OP
68    default:
69       unreachable("Invalid SSBO atomic");
70    }
71 }
72 
73 static nir_intrinsic_op
global_atomic_for_deref(nir_intrinsic_op deref_op)74 global_atomic_for_deref(nir_intrinsic_op deref_op)
75 {
76    switch (deref_op) {
77 #define OP(O) case nir_intrinsic_deref_##O: return nir_intrinsic_global_##O;
78    OP(atomic_exchange)
79    OP(atomic_comp_swap)
80    OP(atomic_add)
81    OP(atomic_imin)
82    OP(atomic_umin)
83    OP(atomic_imax)
84    OP(atomic_umax)
85    OP(atomic_and)
86    OP(atomic_or)
87    OP(atomic_xor)
88    OP(atomic_fadd)
89    OP(atomic_fmin)
90    OP(atomic_fmax)
91    OP(atomic_fcomp_swap)
92 #undef OP
93    default:
94       unreachable("Invalid SSBO atomic");
95    }
96 }
97 
98 static nir_intrinsic_op
shared_atomic_for_deref(nir_intrinsic_op deref_op)99 shared_atomic_for_deref(nir_intrinsic_op deref_op)
100 {
101    switch (deref_op) {
102 #define OP(O) case nir_intrinsic_deref_##O: return nir_intrinsic_shared_##O;
103    OP(atomic_exchange)
104    OP(atomic_comp_swap)
105    OP(atomic_add)
106    OP(atomic_imin)
107    OP(atomic_umin)
108    OP(atomic_imax)
109    OP(atomic_umax)
110    OP(atomic_and)
111    OP(atomic_or)
112    OP(atomic_xor)
113    OP(atomic_fadd)
114    OP(atomic_fmin)
115    OP(atomic_fmax)
116    OP(atomic_fcomp_swap)
117 #undef OP
118    default:
119       unreachable("Invalid shared atomic");
120    }
121 }
122 
123 void
nir_assign_var_locations(nir_shader * shader,nir_variable_mode mode,unsigned * size,int (* type_size)(const struct glsl_type *,bool))124 nir_assign_var_locations(nir_shader *shader, nir_variable_mode mode,
125                          unsigned *size,
126                          int (*type_size)(const struct glsl_type *, bool))
127 {
128    unsigned location = 0;
129 
130    nir_foreach_variable_with_modes(var, shader, mode) {
131       var->data.driver_location = location;
132       bool bindless_type_size = var->data.mode == nir_var_shader_in ||
133                                 var->data.mode == nir_var_shader_out ||
134                                 var->data.bindless;
135       location += type_size(var->type, bindless_type_size);
136    }
137 
138    *size = location;
139 }
140 
141 /**
142  * Return true if the given variable is a per-vertex input/output array.
143  * (such as geometry shader inputs).
144  */
145 bool
nir_is_per_vertex_io(const nir_variable * var,gl_shader_stage stage)146 nir_is_per_vertex_io(const nir_variable *var, gl_shader_stage stage)
147 {
148    if (var->data.patch || !glsl_type_is_array(var->type))
149       return false;
150 
151    if (var->data.mode == nir_var_shader_in)
152       return stage == MESA_SHADER_GEOMETRY ||
153              stage == MESA_SHADER_TESS_CTRL ||
154              stage == MESA_SHADER_TESS_EVAL;
155 
156    if (var->data.mode == nir_var_shader_out)
157       return stage == MESA_SHADER_TESS_CTRL;
158 
159    return false;
160 }
161 
get_number_of_slots(struct lower_io_state * state,const nir_variable * var)162 static unsigned get_number_of_slots(struct lower_io_state *state,
163                                     const nir_variable *var)
164 {
165    const struct glsl_type *type = var->type;
166 
167    if (nir_is_per_vertex_io(var, state->builder.shader->info.stage)) {
168       assert(glsl_type_is_array(type));
169       type = glsl_get_array_element(type);
170    }
171 
172    return state->type_size(type, var->data.bindless);
173 }
174 
175 static nir_ssa_def *
get_io_offset(nir_builder * b,nir_deref_instr * deref,nir_ssa_def ** vertex_index,int (* type_size)(const struct glsl_type *,bool),unsigned * component,bool bts)176 get_io_offset(nir_builder *b, nir_deref_instr *deref,
177               nir_ssa_def **vertex_index,
178               int (*type_size)(const struct glsl_type *, bool),
179               unsigned *component, bool bts)
180 {
181    nir_deref_path path;
182    nir_deref_path_init(&path, deref, NULL);
183 
184    assert(path.path[0]->deref_type == nir_deref_type_var);
185    nir_deref_instr **p = &path.path[1];
186 
187    /* For per-vertex input arrays (i.e. geometry shader inputs), keep the
188     * outermost array index separate.  Process the rest normally.
189     */
190    if (vertex_index != NULL) {
191       assert((*p)->deref_type == nir_deref_type_array);
192       *vertex_index = nir_ssa_for_src(b, (*p)->arr.index, 1);
193       p++;
194    }
195 
196    if (path.path[0]->var->data.compact) {
197       assert((*p)->deref_type == nir_deref_type_array);
198       assert(glsl_type_is_scalar((*p)->type));
199 
200       /* We always lower indirect dereferences for "compact" array vars. */
201       const unsigned index = nir_src_as_uint((*p)->arr.index);
202       const unsigned total_offset = *component + index;
203       const unsigned slot_offset = total_offset / 4;
204       *component = total_offset % 4;
205       return nir_imm_int(b, type_size(glsl_vec4_type(), bts) * slot_offset);
206    }
207 
208    /* Just emit code and let constant-folding go to town */
209    nir_ssa_def *offset = nir_imm_int(b, 0);
210 
211    for (; *p; p++) {
212       if ((*p)->deref_type == nir_deref_type_array) {
213          unsigned size = type_size((*p)->type, bts);
214 
215          nir_ssa_def *mul =
216             nir_amul_imm(b, nir_ssa_for_src(b, (*p)->arr.index, 1), size);
217 
218          offset = nir_iadd(b, offset, mul);
219       } else if ((*p)->deref_type == nir_deref_type_struct) {
220          /* p starts at path[1], so this is safe */
221          nir_deref_instr *parent = *(p - 1);
222 
223          unsigned field_offset = 0;
224          for (unsigned i = 0; i < (*p)->strct.index; i++) {
225             field_offset += type_size(glsl_get_struct_field(parent->type, i), bts);
226          }
227          offset = nir_iadd_imm(b, offset, field_offset);
228       } else {
229          unreachable("Unsupported deref type");
230       }
231    }
232 
233    nir_deref_path_finish(&path);
234 
235    return offset;
236 }
237 
238 static nir_ssa_def *
emit_load(struct lower_io_state * state,nir_ssa_def * vertex_index,nir_variable * var,nir_ssa_def * offset,unsigned component,unsigned num_components,unsigned bit_size,nir_alu_type dest_type)239 emit_load(struct lower_io_state *state,
240           nir_ssa_def *vertex_index, nir_variable *var, nir_ssa_def *offset,
241           unsigned component, unsigned num_components, unsigned bit_size,
242           nir_alu_type dest_type)
243 {
244    nir_builder *b = &state->builder;
245    const nir_shader *nir = b->shader;
246    nir_variable_mode mode = var->data.mode;
247    nir_ssa_def *barycentric = NULL;
248 
249    nir_intrinsic_op op;
250    switch (mode) {
251    case nir_var_shader_in:
252       if (nir->info.stage == MESA_SHADER_FRAGMENT &&
253           nir->options->use_interpolated_input_intrinsics &&
254           var->data.interpolation != INTERP_MODE_FLAT) {
255          if (var->data.interpolation == INTERP_MODE_EXPLICIT) {
256             assert(vertex_index != NULL);
257             op = nir_intrinsic_load_input_vertex;
258          } else {
259             assert(vertex_index == NULL);
260 
261             nir_intrinsic_op bary_op;
262             if (var->data.sample ||
263                 (state->options & nir_lower_io_force_sample_interpolation))
264                bary_op = nir_intrinsic_load_barycentric_sample;
265             else if (var->data.centroid)
266                bary_op = nir_intrinsic_load_barycentric_centroid;
267             else
268                bary_op = nir_intrinsic_load_barycentric_pixel;
269 
270             barycentric = nir_load_barycentric(&state->builder, bary_op,
271                                                var->data.interpolation);
272             op = nir_intrinsic_load_interpolated_input;
273          }
274       } else {
275          op = vertex_index ? nir_intrinsic_load_per_vertex_input :
276                              nir_intrinsic_load_input;
277       }
278       break;
279    case nir_var_shader_out:
280       op = vertex_index ? nir_intrinsic_load_per_vertex_output :
281                           nir_intrinsic_load_output;
282       break;
283    case nir_var_uniform:
284       op = nir_intrinsic_load_uniform;
285       break;
286    default:
287       unreachable("Unknown variable mode");
288    }
289 
290    nir_intrinsic_instr *load =
291       nir_intrinsic_instr_create(state->builder.shader, op);
292    load->num_components = num_components;
293 
294    nir_intrinsic_set_base(load, var->data.driver_location);
295    if (mode == nir_var_shader_in || mode == nir_var_shader_out)
296       nir_intrinsic_set_component(load, component);
297 
298    if (load->intrinsic == nir_intrinsic_load_uniform)
299       nir_intrinsic_set_range(load,
300                               state->type_size(var->type, var->data.bindless));
301 
302    if (load->intrinsic == nir_intrinsic_load_input ||
303        load->intrinsic == nir_intrinsic_load_input_vertex ||
304        load->intrinsic == nir_intrinsic_load_uniform)
305       nir_intrinsic_set_dest_type(load, dest_type);
306 
307    if (load->intrinsic != nir_intrinsic_load_uniform) {
308       nir_io_semantics semantics = {0};
309       semantics.location = var->data.location;
310       semantics.num_slots = get_number_of_slots(state, var);
311       semantics.fb_fetch_output = var->data.fb_fetch_output;
312       semantics.medium_precision =
313          var->data.precision == GLSL_PRECISION_MEDIUM ||
314          var->data.precision == GLSL_PRECISION_LOW;
315       nir_intrinsic_set_io_semantics(load, semantics);
316    }
317 
318    if (vertex_index) {
319       load->src[0] = nir_src_for_ssa(vertex_index);
320       load->src[1] = nir_src_for_ssa(offset);
321    } else if (barycentric) {
322       load->src[0] = nir_src_for_ssa(barycentric);
323       load->src[1] = nir_src_for_ssa(offset);
324    } else {
325       load->src[0] = nir_src_for_ssa(offset);
326    }
327 
328    nir_ssa_dest_init(&load->instr, &load->dest,
329                      num_components, bit_size, NULL);
330    nir_builder_instr_insert(b, &load->instr);
331 
332    return &load->dest.ssa;
333 }
334 
335 static nir_ssa_def *
lower_load(nir_intrinsic_instr * intrin,struct lower_io_state * state,nir_ssa_def * vertex_index,nir_variable * var,nir_ssa_def * offset,unsigned component,const struct glsl_type * type)336 lower_load(nir_intrinsic_instr *intrin, struct lower_io_state *state,
337            nir_ssa_def *vertex_index, nir_variable *var, nir_ssa_def *offset,
338            unsigned component, const struct glsl_type *type)
339 {
340    assert(intrin->dest.is_ssa);
341    if (intrin->dest.ssa.bit_size == 64 &&
342        (state->options & nir_lower_io_lower_64bit_to_32)) {
343       nir_builder *b = &state->builder;
344 
345       const unsigned slot_size = state->type_size(glsl_dvec_type(2), false);
346 
347       nir_ssa_def *comp64[4];
348       assert(component == 0 || component == 2);
349       unsigned dest_comp = 0;
350       while (dest_comp < intrin->dest.ssa.num_components) {
351          const unsigned num_comps =
352             MIN2(intrin->dest.ssa.num_components - dest_comp,
353                  (4 - component) / 2);
354 
355          nir_ssa_def *data32 =
356             emit_load(state, vertex_index, var, offset, component,
357                       num_comps * 2, 32, nir_type_uint32);
358          for (unsigned i = 0; i < num_comps; i++) {
359             comp64[dest_comp + i] =
360                nir_pack_64_2x32(b, nir_channels(b, data32, 3 << (i * 2)));
361          }
362 
363          /* Only the first store has a component offset */
364          component = 0;
365          dest_comp += num_comps;
366          offset = nir_iadd_imm(b, offset, slot_size);
367       }
368 
369       return nir_vec(b, comp64, intrin->dest.ssa.num_components);
370    } else if (intrin->dest.ssa.bit_size == 1) {
371       /* Booleans are 32-bit */
372       assert(glsl_type_is_boolean(type));
373       return nir_b2b1(&state->builder,
374                       emit_load(state, vertex_index, var, offset, component,
375                                 intrin->dest.ssa.num_components, 32,
376                                 nir_type_bool32));
377    } else {
378       return emit_load(state, vertex_index, var, offset, component,
379                        intrin->dest.ssa.num_components,
380                        intrin->dest.ssa.bit_size,
381                        nir_get_nir_type_for_glsl_type(type));
382    }
383 }
384 
385 static void
emit_store(struct lower_io_state * state,nir_ssa_def * data,nir_ssa_def * vertex_index,nir_variable * var,nir_ssa_def * offset,unsigned component,unsigned num_components,nir_component_mask_t write_mask,nir_alu_type src_type)386 emit_store(struct lower_io_state *state, nir_ssa_def *data,
387            nir_ssa_def *vertex_index, nir_variable *var, nir_ssa_def *offset,
388            unsigned component, unsigned num_components,
389            nir_component_mask_t write_mask, nir_alu_type src_type)
390 {
391    nir_builder *b = &state->builder;
392    nir_variable_mode mode = var->data.mode;
393 
394    assert(mode == nir_var_shader_out);
395    nir_intrinsic_op op;
396    op = vertex_index ? nir_intrinsic_store_per_vertex_output :
397                        nir_intrinsic_store_output;
398 
399    nir_intrinsic_instr *store =
400       nir_intrinsic_instr_create(state->builder.shader, op);
401    store->num_components = num_components;
402 
403    store->src[0] = nir_src_for_ssa(data);
404 
405    nir_intrinsic_set_base(store, var->data.driver_location);
406 
407    if (mode == nir_var_shader_out)
408       nir_intrinsic_set_component(store, component);
409 
410    if (store->intrinsic == nir_intrinsic_store_output)
411       nir_intrinsic_set_src_type(store, src_type);
412 
413    nir_intrinsic_set_write_mask(store, write_mask);
414 
415    if (vertex_index)
416       store->src[1] = nir_src_for_ssa(vertex_index);
417 
418    store->src[vertex_index ? 2 : 1] = nir_src_for_ssa(offset);
419 
420    unsigned gs_streams = 0;
421    if (state->builder.shader->info.stage == MESA_SHADER_GEOMETRY) {
422       if (var->data.stream & NIR_STREAM_PACKED) {
423          gs_streams = var->data.stream & ~NIR_STREAM_PACKED;
424       } else {
425          assert(var->data.stream < 4);
426          gs_streams = 0;
427          for (unsigned i = 0; i < num_components; ++i)
428             gs_streams |= var->data.stream << (2 * i);
429       }
430    }
431 
432    nir_io_semantics semantics = {0};
433    semantics.location = var->data.location;
434    semantics.num_slots = get_number_of_slots(state, var);
435    semantics.dual_source_blend_index = var->data.index;
436    semantics.gs_streams = gs_streams;
437    semantics.medium_precision =
438       var->data.precision == GLSL_PRECISION_MEDIUM ||
439       var->data.precision == GLSL_PRECISION_LOW;
440    semantics.per_view = var->data.per_view;
441    nir_intrinsic_set_io_semantics(store, semantics);
442 
443    nir_builder_instr_insert(b, &store->instr);
444 }
445 
446 static void
lower_store(nir_intrinsic_instr * intrin,struct lower_io_state * state,nir_ssa_def * vertex_index,nir_variable * var,nir_ssa_def * offset,unsigned component,const struct glsl_type * type)447 lower_store(nir_intrinsic_instr *intrin, struct lower_io_state *state,
448             nir_ssa_def *vertex_index, nir_variable *var, nir_ssa_def *offset,
449             unsigned component, const struct glsl_type *type)
450 {
451    assert(intrin->src[1].is_ssa);
452    if (intrin->src[1].ssa->bit_size == 64 &&
453        (state->options & nir_lower_io_lower_64bit_to_32)) {
454       nir_builder *b = &state->builder;
455 
456       const unsigned slot_size = state->type_size(glsl_dvec_type(2), false);
457 
458       assert(component == 0 || component == 2);
459       unsigned src_comp = 0;
460       nir_component_mask_t write_mask = nir_intrinsic_write_mask(intrin);
461       while (src_comp < intrin->num_components) {
462          const unsigned num_comps =
463             MIN2(intrin->num_components - src_comp,
464                  (4 - component) / 2);
465 
466          if (write_mask & BITFIELD_MASK(num_comps)) {
467             nir_ssa_def *data =
468                nir_channels(b, intrin->src[1].ssa,
469                             BITFIELD_RANGE(src_comp, num_comps));
470             nir_ssa_def *data32 = nir_bitcast_vector(b, data, 32);
471 
472             nir_component_mask_t write_mask32 = 0;
473             for (unsigned i = 0; i < num_comps; i++) {
474                if (write_mask & BITFIELD_MASK(num_comps) & (1 << i))
475                   write_mask32 |= 3 << (i * 2);
476             }
477 
478             emit_store(state, data32, vertex_index, var, offset,
479                        component, data32->num_components, write_mask32,
480                        nir_type_uint32);
481          }
482 
483          /* Only the first store has a component offset */
484          component = 0;
485          src_comp += num_comps;
486          write_mask >>= num_comps;
487          offset = nir_iadd_imm(b, offset, slot_size);
488       }
489    } else if (intrin->dest.ssa.bit_size == 1) {
490       /* Booleans are 32-bit */
491       assert(glsl_type_is_boolean(type));
492       nir_ssa_def *b32_val = nir_b2b32(&state->builder, intrin->src[1].ssa);
493       emit_store(state, b32_val, vertex_index, var, offset,
494                  component, intrin->num_components,
495                  nir_intrinsic_write_mask(intrin),
496                  nir_type_bool32);
497    } else {
498       emit_store(state, intrin->src[1].ssa, vertex_index, var, offset,
499                  component, intrin->num_components,
500                  nir_intrinsic_write_mask(intrin),
501                  nir_get_nir_type_for_glsl_type(type));
502    }
503 }
504 
505 static nir_ssa_def *
lower_interpolate_at(nir_intrinsic_instr * intrin,struct lower_io_state * state,nir_variable * var,nir_ssa_def * offset,unsigned component,const struct glsl_type * type)506 lower_interpolate_at(nir_intrinsic_instr *intrin, struct lower_io_state *state,
507                      nir_variable *var, nir_ssa_def *offset, unsigned component,
508                      const struct glsl_type *type)
509 {
510    nir_builder *b = &state->builder;
511    assert(var->data.mode == nir_var_shader_in);
512 
513    /* Ignore interpolateAt() for flat variables - flat is flat. Lower
514     * interpolateAtVertex() for explicit variables.
515     */
516    if (var->data.interpolation == INTERP_MODE_FLAT ||
517        var->data.interpolation == INTERP_MODE_EXPLICIT) {
518       nir_ssa_def *vertex_index = NULL;
519 
520       if (var->data.interpolation == INTERP_MODE_EXPLICIT) {
521          assert(intrin->intrinsic == nir_intrinsic_interp_deref_at_vertex);
522          vertex_index = intrin->src[1].ssa;
523       }
524 
525       return lower_load(intrin, state, vertex_index, var, offset, component, type);
526    }
527 
528    /* None of the supported APIs allow interpolation on 64-bit things */
529    assert(intrin->dest.is_ssa && intrin->dest.ssa.bit_size <= 32);
530 
531    nir_intrinsic_op bary_op;
532    switch (intrin->intrinsic) {
533    case nir_intrinsic_interp_deref_at_centroid:
534       bary_op = (state->options & nir_lower_io_force_sample_interpolation) ?
535                 nir_intrinsic_load_barycentric_sample :
536                 nir_intrinsic_load_barycentric_centroid;
537       break;
538    case nir_intrinsic_interp_deref_at_sample:
539       bary_op = nir_intrinsic_load_barycentric_at_sample;
540       break;
541    case nir_intrinsic_interp_deref_at_offset:
542       bary_op = nir_intrinsic_load_barycentric_at_offset;
543       break;
544    default:
545       unreachable("Bogus interpolateAt() intrinsic.");
546    }
547 
548    nir_intrinsic_instr *bary_setup =
549       nir_intrinsic_instr_create(state->builder.shader, bary_op);
550 
551    nir_ssa_dest_init(&bary_setup->instr, &bary_setup->dest, 2, 32, NULL);
552    nir_intrinsic_set_interp_mode(bary_setup, var->data.interpolation);
553 
554    if (intrin->intrinsic == nir_intrinsic_interp_deref_at_sample ||
555        intrin->intrinsic == nir_intrinsic_interp_deref_at_offset ||
556        intrin->intrinsic == nir_intrinsic_interp_deref_at_vertex)
557       nir_src_copy(&bary_setup->src[0], &intrin->src[1], bary_setup);
558 
559    nir_builder_instr_insert(b, &bary_setup->instr);
560 
561    nir_intrinsic_instr *load =
562       nir_intrinsic_instr_create(state->builder.shader,
563                                  nir_intrinsic_load_interpolated_input);
564    load->num_components = intrin->num_components;
565 
566    nir_intrinsic_set_base(load, var->data.driver_location);
567    nir_intrinsic_set_component(load, component);
568 
569    nir_io_semantics semantics = {0};
570    semantics.location = var->data.location;
571    semantics.num_slots = get_number_of_slots(state, var);
572    semantics.medium_precision =
573       var->data.precision == GLSL_PRECISION_MEDIUM ||
574       var->data.precision == GLSL_PRECISION_LOW;
575    nir_intrinsic_set_io_semantics(load, semantics);
576 
577    load->src[0] = nir_src_for_ssa(&bary_setup->dest.ssa);
578    load->src[1] = nir_src_for_ssa(offset);
579 
580    assert(intrin->dest.is_ssa);
581    nir_ssa_dest_init(&load->instr, &load->dest,
582                      intrin->dest.ssa.num_components,
583                      intrin->dest.ssa.bit_size, NULL);
584    nir_builder_instr_insert(b, &load->instr);
585 
586    return &load->dest.ssa;
587 }
588 
589 static bool
nir_lower_io_block(nir_block * block,struct lower_io_state * state)590 nir_lower_io_block(nir_block *block,
591                    struct lower_io_state *state)
592 {
593    nir_builder *b = &state->builder;
594    const nir_shader_compiler_options *options = b->shader->options;
595    bool progress = false;
596 
597    nir_foreach_instr_safe(instr, block) {
598       if (instr->type != nir_instr_type_intrinsic)
599          continue;
600 
601       nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
602 
603       switch (intrin->intrinsic) {
604       case nir_intrinsic_load_deref:
605       case nir_intrinsic_store_deref:
606          /* We can lower the io for this nir instrinsic */
607          break;
608       case nir_intrinsic_interp_deref_at_centroid:
609       case nir_intrinsic_interp_deref_at_sample:
610       case nir_intrinsic_interp_deref_at_offset:
611       case nir_intrinsic_interp_deref_at_vertex:
612          /* We can optionally lower these to load_interpolated_input */
613          if (options->use_interpolated_input_intrinsics ||
614              options->lower_interpolate_at)
615             break;
616       default:
617          /* We can't lower the io for this nir instrinsic, so skip it */
618          continue;
619       }
620 
621       nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
622       if (!nir_deref_mode_is_one_of(deref, state->modes))
623          continue;
624 
625       nir_variable *var = nir_deref_instr_get_variable(deref);
626 
627       b->cursor = nir_before_instr(instr);
628 
629       const bool per_vertex = nir_is_per_vertex_io(var, b->shader->info.stage);
630 
631       nir_ssa_def *offset;
632       nir_ssa_def *vertex_index = NULL;
633       unsigned component_offset = var->data.location_frac;
634       bool bindless_type_size = var->data.mode == nir_var_shader_in ||
635                                 var->data.mode == nir_var_shader_out ||
636                                 var->data.bindless;
637 
638      if (nir_deref_instr_is_known_out_of_bounds(deref)) {
639         /* Section 5.11 (Out-of-Bounds Accesses) of the GLSL 4.60 spec says:
640          *
641          *    In the subsections described above for array, vector, matrix and
642          *    structure accesses, any out-of-bounds access produced undefined
643          *    behavior....
644          *    Out-of-bounds reads return undefined values, which
645          *    include values from other variables of the active program or zero.
646          *    Out-of-bounds writes may be discarded or overwrite
647          *    other variables of the active program.
648          *
649          * GL_KHR_robustness and GL_ARB_robustness encourage us to return zero
650          * for reads.
651          *
652          * Otherwise get_io_offset would return out-of-bound offset which may
653          * result in out-of-bound loading/storing of inputs/outputs,
654          * that could cause issues in drivers down the line.
655          */
656          if (intrin->intrinsic != nir_intrinsic_store_deref) {
657             nir_ssa_def *zero =
658                nir_imm_zero(b, intrin->dest.ssa.num_components,
659                              intrin->dest.ssa.bit_size);
660             nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
661                                   nir_src_for_ssa(zero));
662          }
663 
664          nir_instr_remove(&intrin->instr);
665          progress = true;
666          continue;
667       }
668 
669       offset = get_io_offset(b, deref, per_vertex ? &vertex_index : NULL,
670                              state->type_size, &component_offset,
671                              bindless_type_size);
672 
673       nir_ssa_def *replacement = NULL;
674 
675       switch (intrin->intrinsic) {
676       case nir_intrinsic_load_deref:
677          replacement = lower_load(intrin, state, vertex_index, var, offset,
678                                   component_offset, deref->type);
679          break;
680 
681       case nir_intrinsic_store_deref:
682          lower_store(intrin, state, vertex_index, var, offset,
683                      component_offset, deref->type);
684          break;
685 
686       case nir_intrinsic_interp_deref_at_centroid:
687       case nir_intrinsic_interp_deref_at_sample:
688       case nir_intrinsic_interp_deref_at_offset:
689       case nir_intrinsic_interp_deref_at_vertex:
690          assert(vertex_index == NULL);
691          replacement = lower_interpolate_at(intrin, state, var, offset,
692                                             component_offset, deref->type);
693          break;
694 
695       default:
696          continue;
697       }
698 
699       if (replacement) {
700          nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
701                                   nir_src_for_ssa(replacement));
702       }
703       nir_instr_remove(&intrin->instr);
704       progress = true;
705    }
706 
707    return progress;
708 }
709 
710 static bool
nir_lower_io_impl(nir_function_impl * impl,nir_variable_mode modes,int (* type_size)(const struct glsl_type *,bool),nir_lower_io_options options)711 nir_lower_io_impl(nir_function_impl *impl,
712                   nir_variable_mode modes,
713                   int (*type_size)(const struct glsl_type *, bool),
714                   nir_lower_io_options options)
715 {
716    struct lower_io_state state;
717    bool progress = false;
718 
719    nir_builder_init(&state.builder, impl);
720    state.dead_ctx = ralloc_context(NULL);
721    state.modes = modes;
722    state.type_size = type_size;
723    state.options = options;
724 
725    ASSERTED nir_variable_mode supported_modes =
726       nir_var_shader_in | nir_var_shader_out | nir_var_uniform;
727    assert(!(modes & ~supported_modes));
728 
729    nir_foreach_block(block, impl) {
730       progress |= nir_lower_io_block(block, &state);
731    }
732 
733    ralloc_free(state.dead_ctx);
734 
735    nir_metadata_preserve(impl, nir_metadata_none);
736 
737    return progress;
738 }
739 
740 /** Lower load/store_deref intrinsics on I/O variables to offset-based intrinsics
741  *
742  * This pass is intended to be used for cross-stage shader I/O and driver-
743  * managed uniforms to turn deref-based access into a simpler model using
744  * locations or offsets.  For fragment shader inputs, it can optionally turn
745  * load_deref into an explicit interpolation using barycentrics coming from
746  * one of the load_barycentric_* intrinsics.  This pass requires that all
747  * deref chains are complete and contain no casts.
748  */
749 bool
nir_lower_io(nir_shader * shader,nir_variable_mode modes,int (* type_size)(const struct glsl_type *,bool),nir_lower_io_options options)750 nir_lower_io(nir_shader *shader, nir_variable_mode modes,
751              int (*type_size)(const struct glsl_type *, bool),
752              nir_lower_io_options options)
753 {
754    bool progress = false;
755 
756    nir_foreach_function(function, shader) {
757       if (function->impl) {
758          progress |= nir_lower_io_impl(function->impl, modes,
759                                        type_size, options);
760       }
761    }
762 
763    return progress;
764 }
765 
766 static unsigned
type_scalar_size_bytes(const struct glsl_type * type)767 type_scalar_size_bytes(const struct glsl_type *type)
768 {
769    assert(glsl_type_is_vector_or_scalar(type) ||
770           glsl_type_is_matrix(type));
771    return glsl_type_is_boolean(type) ? 4 : glsl_get_bit_size(type) / 8;
772 }
773 
774 static nir_ssa_def *
build_addr_iadd(nir_builder * b,nir_ssa_def * addr,nir_address_format addr_format,nir_variable_mode modes,nir_ssa_def * offset)775 build_addr_iadd(nir_builder *b, nir_ssa_def *addr,
776                 nir_address_format addr_format,
777                 nir_variable_mode modes,
778                 nir_ssa_def *offset)
779 {
780    assert(offset->num_components == 1);
781 
782    switch (addr_format) {
783    case nir_address_format_32bit_global:
784    case nir_address_format_64bit_global:
785    case nir_address_format_32bit_offset:
786       assert(addr->bit_size == offset->bit_size);
787       assert(addr->num_components == 1);
788       return nir_iadd(b, addr, offset);
789 
790    case nir_address_format_32bit_offset_as_64bit:
791       assert(addr->num_components == 1);
792       assert(offset->bit_size == 32);
793       return nir_u2u64(b, nir_iadd(b, nir_u2u32(b, addr), offset));
794 
795    case nir_address_format_64bit_bounded_global:
796       assert(addr->num_components == 4);
797       assert(addr->bit_size == offset->bit_size);
798       return nir_vec4(b, nir_channel(b, addr, 0),
799                          nir_channel(b, addr, 1),
800                          nir_channel(b, addr, 2),
801                          nir_iadd(b, nir_channel(b, addr, 3), offset));
802 
803    case nir_address_format_32bit_index_offset:
804       assert(addr->num_components == 2);
805       assert(addr->bit_size == offset->bit_size);
806       return nir_vec2(b, nir_channel(b, addr, 0),
807                          nir_iadd(b, nir_channel(b, addr, 1), offset));
808 
809    case nir_address_format_32bit_index_offset_pack64:
810       assert(addr->num_components == 1);
811       assert(offset->bit_size == 32);
812       return nir_pack_64_2x32_split(b,
813                                     nir_iadd(b, nir_unpack_64_2x32_split_x(b, addr), offset),
814                                     nir_unpack_64_2x32_split_y(b, addr));
815 
816    case nir_address_format_vec2_index_32bit_offset:
817       assert(addr->num_components == 3);
818       assert(offset->bit_size == 32);
819       return nir_vec3(b, nir_channel(b, addr, 0), nir_channel(b, addr, 1),
820                          nir_iadd(b, nir_channel(b, addr, 2), offset));
821 
822    case nir_address_format_62bit_generic:
823       assert(addr->num_components == 1);
824       assert(addr->bit_size == 64);
825       assert(offset->bit_size == 64);
826       if (!(modes & ~(nir_var_function_temp |
827                       nir_var_shader_temp |
828                       nir_var_mem_shared))) {
829          /* If we're sure it's one of these modes, we can do an easy 32-bit
830           * addition and don't need to bother with 64-bit math.
831           */
832          nir_ssa_def *addr32 = nir_unpack_64_2x32_split_x(b, addr);
833          nir_ssa_def *type = nir_unpack_64_2x32_split_y(b, addr);
834          addr32 = nir_iadd(b, addr32, nir_u2u32(b, offset));
835          return nir_pack_64_2x32_split(b, addr32, type);
836       } else {
837          return nir_iadd(b, addr, offset);
838       }
839 
840    case nir_address_format_logical:
841       unreachable("Unsupported address format");
842    }
843    unreachable("Invalid address format");
844 }
845 
846 static unsigned
addr_get_offset_bit_size(nir_ssa_def * addr,nir_address_format addr_format)847 addr_get_offset_bit_size(nir_ssa_def *addr, nir_address_format addr_format)
848 {
849    if (addr_format == nir_address_format_32bit_offset_as_64bit ||
850        addr_format == nir_address_format_32bit_index_offset_pack64)
851       return 32;
852    return addr->bit_size;
853 }
854 
855 static nir_ssa_def *
build_addr_iadd_imm(nir_builder * b,nir_ssa_def * addr,nir_address_format addr_format,nir_variable_mode modes,int64_t offset)856 build_addr_iadd_imm(nir_builder *b, nir_ssa_def *addr,
857                     nir_address_format addr_format,
858                     nir_variable_mode modes,
859                     int64_t offset)
860 {
861    return build_addr_iadd(b, addr, addr_format, modes,
862                              nir_imm_intN_t(b, offset,
863                                             addr_get_offset_bit_size(addr, addr_format)));
864 }
865 
866 static nir_ssa_def *
build_addr_for_var(nir_builder * b,nir_variable * var,nir_address_format addr_format)867 build_addr_for_var(nir_builder *b, nir_variable *var,
868                    nir_address_format addr_format)
869 {
870    assert(var->data.mode & (nir_var_uniform | nir_var_mem_shared |
871                             nir_var_shader_temp | nir_var_function_temp |
872                             nir_var_mem_push_const | nir_var_mem_constant));
873 
874    const unsigned num_comps = nir_address_format_num_components(addr_format);
875    const unsigned bit_size = nir_address_format_bit_size(addr_format);
876 
877    switch (addr_format) {
878    case nir_address_format_32bit_global:
879    case nir_address_format_64bit_global: {
880       nir_ssa_def *base_addr;
881       switch (var->data.mode) {
882       case nir_var_shader_temp:
883          base_addr = nir_load_scratch_base_ptr(b, 0, num_comps, bit_size);
884          break;
885 
886       case nir_var_function_temp:
887          base_addr = nir_load_scratch_base_ptr(b, 1, num_comps, bit_size);
888          break;
889 
890       case nir_var_mem_constant:
891          base_addr = nir_load_constant_base_ptr(b, num_comps, bit_size);
892          break;
893 
894       case nir_var_mem_shared:
895          base_addr = nir_load_shared_base_ptr(b, num_comps, bit_size);
896          break;
897 
898       default:
899          unreachable("Unsupported variable mode");
900       }
901 
902       return build_addr_iadd_imm(b, base_addr, addr_format, var->data.mode,
903                                     var->data.driver_location);
904    }
905 
906    case nir_address_format_32bit_offset:
907       assert(var->data.driver_location <= UINT32_MAX);
908       return nir_imm_int(b, var->data.driver_location);
909 
910    case nir_address_format_32bit_offset_as_64bit:
911       assert(var->data.driver_location <= UINT32_MAX);
912       return nir_imm_int64(b, var->data.driver_location);
913 
914    case nir_address_format_62bit_generic:
915       switch (var->data.mode) {
916       case nir_var_shader_temp:
917       case nir_var_function_temp:
918          assert(var->data.driver_location <= UINT32_MAX);
919          return nir_imm_intN_t(b, var->data.driver_location | 2ull << 62, 64);
920 
921       case nir_var_mem_shared:
922          assert(var->data.driver_location <= UINT32_MAX);
923          return nir_imm_intN_t(b, var->data.driver_location | 1ull << 62, 64);
924 
925       default:
926          unreachable("Unsupported variable mode");
927       }
928 
929    default:
930       unreachable("Unsupported address format");
931    }
932 }
933 
934 static nir_ssa_def *
build_runtime_addr_mode_check(nir_builder * b,nir_ssa_def * addr,nir_address_format addr_format,nir_variable_mode mode)935 build_runtime_addr_mode_check(nir_builder *b, nir_ssa_def *addr,
936                               nir_address_format addr_format,
937                               nir_variable_mode mode)
938 {
939    /* The compile-time check failed; do a run-time check */
940    switch (addr_format) {
941    case nir_address_format_62bit_generic: {
942       assert(addr->num_components == 1);
943       assert(addr->bit_size == 64);
944       nir_ssa_def *mode_enum = nir_ushr(b, addr, nir_imm_int(b, 62));
945       switch (mode) {
946       case nir_var_function_temp:
947       case nir_var_shader_temp:
948          return nir_ieq_imm(b, mode_enum, 0x2);
949 
950       case nir_var_mem_shared:
951          return nir_ieq_imm(b, mode_enum, 0x1);
952 
953       case nir_var_mem_global:
954          return nir_ior(b, nir_ieq_imm(b, mode_enum, 0x0),
955                            nir_ieq_imm(b, mode_enum, 0x3));
956 
957       default:
958          unreachable("Invalid mode check intrinsic");
959       }
960    }
961 
962    default:
963       unreachable("Unsupported address mode");
964    }
965 }
966 
967 static nir_ssa_def *
addr_to_index(nir_builder * b,nir_ssa_def * addr,nir_address_format addr_format)968 addr_to_index(nir_builder *b, nir_ssa_def *addr,
969               nir_address_format addr_format)
970 {
971    switch (addr_format) {
972    case nir_address_format_32bit_index_offset:
973       assert(addr->num_components == 2);
974       return nir_channel(b, addr, 0);
975    case nir_address_format_32bit_index_offset_pack64:
976       return nir_unpack_64_2x32_split_y(b, addr);
977    case nir_address_format_vec2_index_32bit_offset:
978       assert(addr->num_components == 3);
979       return nir_channels(b, addr, 0x3);
980    default: unreachable("Invalid address format");
981    }
982 }
983 
984 static nir_ssa_def *
addr_to_offset(nir_builder * b,nir_ssa_def * addr,nir_address_format addr_format)985 addr_to_offset(nir_builder *b, nir_ssa_def *addr,
986                nir_address_format addr_format)
987 {
988    switch (addr_format) {
989    case nir_address_format_32bit_index_offset:
990       assert(addr->num_components == 2);
991       return nir_channel(b, addr, 1);
992    case nir_address_format_32bit_index_offset_pack64:
993       return nir_unpack_64_2x32_split_x(b, addr);
994    case nir_address_format_vec2_index_32bit_offset:
995       assert(addr->num_components == 3);
996       return nir_channel(b, addr, 2);
997    case nir_address_format_32bit_offset:
998       return addr;
999    case nir_address_format_32bit_offset_as_64bit:
1000    case nir_address_format_62bit_generic:
1001       return nir_u2u32(b, addr);
1002    default:
1003       unreachable("Invalid address format");
1004    }
1005 }
1006 
1007 /** Returns true if the given address format resolves to a global address */
1008 static bool
addr_format_is_global(nir_address_format addr_format,nir_variable_mode mode)1009 addr_format_is_global(nir_address_format addr_format,
1010                       nir_variable_mode mode)
1011 {
1012    if (addr_format == nir_address_format_62bit_generic)
1013       return mode == nir_var_mem_global;
1014 
1015    return addr_format == nir_address_format_32bit_global ||
1016           addr_format == nir_address_format_64bit_global ||
1017           addr_format == nir_address_format_64bit_bounded_global;
1018 }
1019 
1020 static bool
addr_format_is_offset(nir_address_format addr_format,nir_variable_mode mode)1021 addr_format_is_offset(nir_address_format addr_format,
1022                       nir_variable_mode mode)
1023 {
1024    if (addr_format == nir_address_format_62bit_generic)
1025       return mode != nir_var_mem_global;
1026 
1027    return addr_format == nir_address_format_32bit_offset ||
1028           addr_format == nir_address_format_32bit_offset_as_64bit;
1029 }
1030 
1031 static nir_ssa_def *
addr_to_global(nir_builder * b,nir_ssa_def * addr,nir_address_format addr_format)1032 addr_to_global(nir_builder *b, nir_ssa_def *addr,
1033                nir_address_format addr_format)
1034 {
1035    switch (addr_format) {
1036    case nir_address_format_32bit_global:
1037    case nir_address_format_64bit_global:
1038    case nir_address_format_62bit_generic:
1039       assert(addr->num_components == 1);
1040       return addr;
1041 
1042    case nir_address_format_64bit_bounded_global:
1043       assert(addr->num_components == 4);
1044       return nir_iadd(b, nir_pack_64_2x32(b, nir_channels(b, addr, 0x3)),
1045                          nir_u2u64(b, nir_channel(b, addr, 3)));
1046 
1047    case nir_address_format_32bit_index_offset:
1048    case nir_address_format_32bit_index_offset_pack64:
1049    case nir_address_format_vec2_index_32bit_offset:
1050    case nir_address_format_32bit_offset:
1051    case nir_address_format_32bit_offset_as_64bit:
1052    case nir_address_format_logical:
1053       unreachable("Cannot get a 64-bit address with this address format");
1054    }
1055 
1056    unreachable("Invalid address format");
1057 }
1058 
1059 static bool
addr_format_needs_bounds_check(nir_address_format addr_format)1060 addr_format_needs_bounds_check(nir_address_format addr_format)
1061 {
1062    return addr_format == nir_address_format_64bit_bounded_global;
1063 }
1064 
1065 static nir_ssa_def *
addr_is_in_bounds(nir_builder * b,nir_ssa_def * addr,nir_address_format addr_format,unsigned size)1066 addr_is_in_bounds(nir_builder *b, nir_ssa_def *addr,
1067                   nir_address_format addr_format, unsigned size)
1068 {
1069    assert(addr_format == nir_address_format_64bit_bounded_global);
1070    assert(addr->num_components == 4);
1071    return nir_ige(b, nir_channel(b, addr, 2),
1072                      nir_iadd_imm(b, nir_channel(b, addr, 3), size));
1073 }
1074 
1075 static void
nir_get_explicit_deref_range(nir_deref_instr * deref,nir_address_format addr_format,uint32_t * out_base,uint32_t * out_range)1076 nir_get_explicit_deref_range(nir_deref_instr *deref,
1077                              nir_address_format addr_format,
1078                              uint32_t *out_base,
1079                              uint32_t *out_range)
1080 {
1081    uint32_t base = 0;
1082    uint32_t range = glsl_get_explicit_size(deref->type, false);
1083 
1084    while (true) {
1085       nir_deref_instr *parent = nir_deref_instr_parent(deref);
1086 
1087       switch (deref->deref_type) {
1088       case nir_deref_type_array:
1089       case nir_deref_type_array_wildcard:
1090       case nir_deref_type_ptr_as_array: {
1091          const unsigned stride = nir_deref_instr_array_stride(deref);
1092          if (stride == 0)
1093             goto fail;
1094 
1095          if (!parent)
1096             goto fail;
1097 
1098          if (deref->deref_type != nir_deref_type_array_wildcard &&
1099              nir_src_is_const(deref->arr.index)) {
1100             base += stride * nir_src_as_uint(deref->arr.index);
1101          } else {
1102             if (glsl_get_length(parent->type) == 0)
1103                goto fail;
1104             range += stride * (glsl_get_length(parent->type) - 1);
1105          }
1106          break;
1107       }
1108 
1109       case nir_deref_type_struct: {
1110          if (!parent)
1111             goto fail;
1112 
1113          base += glsl_get_struct_field_offset(parent->type, deref->strct.index);
1114          break;
1115       }
1116 
1117       case nir_deref_type_cast: {
1118          nir_instr *parent_instr = deref->parent.ssa->parent_instr;
1119 
1120          switch (parent_instr->type) {
1121          case nir_instr_type_load_const: {
1122             nir_load_const_instr *load = nir_instr_as_load_const(parent_instr);
1123 
1124             switch (addr_format) {
1125             case nir_address_format_32bit_offset:
1126                base += load->value[1].u32;
1127                break;
1128             case nir_address_format_32bit_index_offset:
1129                base += load->value[1].u32;
1130                break;
1131             case nir_address_format_vec2_index_32bit_offset:
1132                base += load->value[2].u32;
1133                break;
1134             default:
1135                goto fail;
1136             }
1137 
1138             *out_base = base;
1139             *out_range = range;
1140             return;
1141          }
1142 
1143          case nir_instr_type_intrinsic: {
1144             nir_intrinsic_instr *intr = nir_instr_as_intrinsic(parent_instr);
1145             switch (intr->intrinsic) {
1146             case nir_intrinsic_load_vulkan_descriptor:
1147                /* Assume that a load_vulkan_descriptor won't contribute to an
1148                 * offset within the resource.
1149                 */
1150                break;
1151             default:
1152                goto fail;
1153             }
1154 
1155             *out_base = base;
1156             *out_range = range;
1157             return;
1158          }
1159 
1160          default:
1161             goto fail;
1162          }
1163       }
1164 
1165       default:
1166          goto fail;
1167       }
1168 
1169       deref = parent;
1170    }
1171 
1172 fail:
1173    *out_base = 0;
1174    *out_range = ~0;
1175 }
1176 
1177 static nir_variable_mode
canonicalize_generic_modes(nir_variable_mode modes)1178 canonicalize_generic_modes(nir_variable_mode modes)
1179 {
1180    assert(modes != 0);
1181    if (util_bitcount(modes) == 1)
1182       return modes;
1183 
1184    assert(!(modes & ~(nir_var_function_temp | nir_var_shader_temp |
1185                       nir_var_mem_shared | nir_var_mem_global)));
1186 
1187    /* Canonicalize by converting shader_temp to function_temp */
1188    if (modes & nir_var_shader_temp) {
1189       modes &= ~nir_var_shader_temp;
1190       modes |= nir_var_function_temp;
1191    }
1192 
1193    return modes;
1194 }
1195 
1196 static nir_ssa_def *
build_explicit_io_load(nir_builder * b,nir_intrinsic_instr * intrin,nir_ssa_def * addr,nir_address_format addr_format,nir_variable_mode modes,uint32_t align_mul,uint32_t align_offset,unsigned num_components)1197 build_explicit_io_load(nir_builder *b, nir_intrinsic_instr *intrin,
1198                        nir_ssa_def *addr, nir_address_format addr_format,
1199                        nir_variable_mode modes,
1200                        uint32_t align_mul, uint32_t align_offset,
1201                        unsigned num_components)
1202 {
1203    nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
1204    modes = canonicalize_generic_modes(modes);
1205 
1206    if (util_bitcount(modes) > 1) {
1207       if (addr_format_is_global(addr_format, modes)) {
1208          return build_explicit_io_load(b, intrin, addr, addr_format,
1209                                        nir_var_mem_global,
1210                                        align_mul, align_offset,
1211                                        num_components);
1212       } else if (modes & nir_var_function_temp) {
1213          nir_push_if(b, build_runtime_addr_mode_check(b, addr, addr_format,
1214                                                       nir_var_function_temp));
1215          nir_ssa_def *res1 =
1216             build_explicit_io_load(b, intrin, addr, addr_format,
1217                                    nir_var_function_temp,
1218                                    align_mul, align_offset,
1219                                    num_components);
1220          nir_push_else(b, NULL);
1221          nir_ssa_def *res2 =
1222             build_explicit_io_load(b, intrin, addr, addr_format,
1223                                    modes & ~nir_var_function_temp,
1224                                    align_mul, align_offset,
1225                                    num_components);
1226          nir_pop_if(b, NULL);
1227          return nir_if_phi(b, res1, res2);
1228       } else {
1229          nir_push_if(b, build_runtime_addr_mode_check(b, addr, addr_format,
1230                                                       nir_var_mem_shared));
1231          assert(modes & nir_var_mem_shared);
1232          nir_ssa_def *res1 =
1233             build_explicit_io_load(b, intrin, addr, addr_format,
1234                                    nir_var_mem_shared,
1235                                    align_mul, align_offset,
1236                                    num_components);
1237          nir_push_else(b, NULL);
1238          assert(modes & nir_var_mem_global);
1239          nir_ssa_def *res2 =
1240             build_explicit_io_load(b, intrin, addr, addr_format,
1241                                    nir_var_mem_global,
1242                                    align_mul, align_offset,
1243                                    num_components);
1244          nir_pop_if(b, NULL);
1245          return nir_if_phi(b, res1, res2);
1246       }
1247    }
1248 
1249    assert(util_bitcount(modes) == 1);
1250    const nir_variable_mode mode = modes;
1251 
1252    nir_intrinsic_op op;
1253    switch (intrin->intrinsic) {
1254    case nir_intrinsic_load_deref:
1255       switch (mode) {
1256       case nir_var_mem_ubo:
1257          op = nir_intrinsic_load_ubo;
1258          break;
1259       case nir_var_mem_ssbo:
1260          if (addr_format_is_global(addr_format, mode))
1261             op = nir_intrinsic_load_global;
1262          else
1263             op = nir_intrinsic_load_ssbo;
1264          break;
1265       case nir_var_mem_global:
1266          assert(addr_format_is_global(addr_format, mode));
1267          op = nir_intrinsic_load_global;
1268          break;
1269       case nir_var_uniform:
1270          assert(addr_format_is_offset(addr_format, mode));
1271          assert(b->shader->info.stage == MESA_SHADER_KERNEL);
1272          op = nir_intrinsic_load_kernel_input;
1273          break;
1274       case nir_var_mem_shared:
1275          assert(addr_format_is_offset(addr_format, mode));
1276          op = nir_intrinsic_load_shared;
1277          break;
1278       case nir_var_shader_temp:
1279       case nir_var_function_temp:
1280          if (addr_format_is_offset(addr_format, mode)) {
1281             op = nir_intrinsic_load_scratch;
1282          } else {
1283             assert(addr_format_is_global(addr_format, mode));
1284             op = nir_intrinsic_load_global;
1285          }
1286          break;
1287       case nir_var_mem_push_const:
1288          assert(addr_format == nir_address_format_32bit_offset);
1289          op = nir_intrinsic_load_push_constant;
1290          break;
1291       case nir_var_mem_constant:
1292          if (addr_format_is_offset(addr_format, mode)) {
1293             op = nir_intrinsic_load_constant;
1294          } else {
1295             assert(addr_format_is_global(addr_format, mode));
1296             op = nir_intrinsic_load_global_constant;
1297          }
1298          break;
1299       default:
1300          unreachable("Unsupported explicit IO variable mode");
1301       }
1302       break;
1303 
1304    case nir_intrinsic_load_deref_block_intel:
1305       switch (mode) {
1306       case nir_var_mem_ssbo:
1307          if (addr_format_is_global(addr_format, mode))
1308             op = nir_intrinsic_load_global_block_intel;
1309          else
1310             op = nir_intrinsic_load_ssbo_block_intel;
1311          break;
1312       case nir_var_mem_global:
1313          op = nir_intrinsic_load_global_block_intel;
1314          break;
1315       case nir_var_mem_shared:
1316          op = nir_intrinsic_load_shared_block_intel;
1317          break;
1318       default:
1319          unreachable("Unsupported explicit IO variable mode");
1320       }
1321       break;
1322 
1323    default:
1324       unreachable("Invalid intrinsic");
1325    }
1326 
1327    nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, op);
1328 
1329    if (addr_format_is_global(addr_format, mode)) {
1330       load->src[0] = nir_src_for_ssa(addr_to_global(b, addr, addr_format));
1331    } else if (addr_format_is_offset(addr_format, mode)) {
1332       assert(addr->num_components == 1);
1333       load->src[0] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format));
1334    } else {
1335       load->src[0] = nir_src_for_ssa(addr_to_index(b, addr, addr_format));
1336       load->src[1] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format));
1337    }
1338 
1339    if (nir_intrinsic_has_access(load))
1340       nir_intrinsic_set_access(load, nir_intrinsic_access(intrin));
1341 
1342    if (op == nir_intrinsic_load_constant) {
1343       nir_intrinsic_set_base(load, 0);
1344       nir_intrinsic_set_range(load, b->shader->constant_data_size);
1345    } else if (mode == nir_var_mem_push_const) {
1346       /* Push constants are required to be able to be chased back to the
1347        * variable so we can provide a base/range.
1348        */
1349       nir_variable *var = nir_deref_instr_get_variable(deref);
1350       nir_intrinsic_set_base(load, 0);
1351       nir_intrinsic_set_range(load, glsl_get_explicit_size(var->type, false));
1352    }
1353 
1354    unsigned bit_size = intrin->dest.ssa.bit_size;
1355    if (bit_size == 1) {
1356       /* TODO: Make the native bool bit_size an option. */
1357       bit_size = 32;
1358    }
1359 
1360    if (nir_intrinsic_has_align(load))
1361       nir_intrinsic_set_align(load, align_mul, align_offset);
1362 
1363    if (nir_intrinsic_has_range_base(load)) {
1364       unsigned base, range;
1365       nir_get_explicit_deref_range(deref, addr_format, &base, &range);
1366       nir_intrinsic_set_range_base(load, base);
1367       nir_intrinsic_set_range(load, range);
1368    }
1369 
1370    assert(intrin->dest.is_ssa);
1371    load->num_components = num_components;
1372    nir_ssa_dest_init(&load->instr, &load->dest, num_components,
1373                      bit_size, intrin->dest.ssa.name);
1374 
1375    assert(bit_size % 8 == 0);
1376 
1377    nir_ssa_def *result;
1378    if (addr_format_needs_bounds_check(addr_format)) {
1379       /* The Vulkan spec for robustBufferAccess gives us quite a few options
1380        * as to what we can do with an OOB read.  Unfortunately, returning
1381        * undefined values isn't one of them so we return an actual zero.
1382        */
1383       nir_ssa_def *zero = nir_imm_zero(b, load->num_components, bit_size);
1384 
1385       /* TODO: Better handle block_intel. */
1386       const unsigned load_size = (bit_size / 8) * load->num_components;
1387       nir_push_if(b, addr_is_in_bounds(b, addr, addr_format, load_size));
1388 
1389       nir_builder_instr_insert(b, &load->instr);
1390 
1391       nir_pop_if(b, NULL);
1392 
1393       result = nir_if_phi(b, &load->dest.ssa, zero);
1394    } else {
1395       nir_builder_instr_insert(b, &load->instr);
1396       result = &load->dest.ssa;
1397    }
1398 
1399    if (intrin->dest.ssa.bit_size == 1) {
1400       /* For shared, we can go ahead and use NIR's and/or the back-end's
1401        * standard encoding for booleans rather than forcing a 0/1 boolean.
1402        * This should save an instruction or two.
1403        */
1404       if (mode == nir_var_mem_shared ||
1405           mode == nir_var_shader_temp ||
1406           mode == nir_var_function_temp)
1407          result = nir_b2b1(b, result);
1408       else
1409          result = nir_i2b(b, result);
1410    }
1411 
1412    return result;
1413 }
1414 
1415 static void
build_explicit_io_store(nir_builder * b,nir_intrinsic_instr * intrin,nir_ssa_def * addr,nir_address_format addr_format,nir_variable_mode modes,uint32_t align_mul,uint32_t align_offset,nir_ssa_def * value,nir_component_mask_t write_mask)1416 build_explicit_io_store(nir_builder *b, nir_intrinsic_instr *intrin,
1417                         nir_ssa_def *addr, nir_address_format addr_format,
1418                         nir_variable_mode modes,
1419                         uint32_t align_mul, uint32_t align_offset,
1420                         nir_ssa_def *value, nir_component_mask_t write_mask)
1421 {
1422    modes = canonicalize_generic_modes(modes);
1423 
1424    if (util_bitcount(modes) > 1) {
1425       if (addr_format_is_global(addr_format, modes)) {
1426          build_explicit_io_store(b, intrin, addr, addr_format,
1427                                  nir_var_mem_global,
1428                                  align_mul, align_offset,
1429                                  value, write_mask);
1430       } else if (modes & nir_var_function_temp) {
1431          nir_push_if(b, build_runtime_addr_mode_check(b, addr, addr_format,
1432                                                       nir_var_function_temp));
1433          build_explicit_io_store(b, intrin, addr, addr_format,
1434                                  nir_var_function_temp,
1435                                  align_mul, align_offset,
1436                                  value, write_mask);
1437          nir_push_else(b, NULL);
1438          build_explicit_io_store(b, intrin, addr, addr_format,
1439                                  modes & ~nir_var_function_temp,
1440                                  align_mul, align_offset,
1441                                  value, write_mask);
1442          nir_pop_if(b, NULL);
1443       } else {
1444          nir_push_if(b, build_runtime_addr_mode_check(b, addr, addr_format,
1445                                                       nir_var_mem_shared));
1446          assert(modes & nir_var_mem_shared);
1447          build_explicit_io_store(b, intrin, addr, addr_format,
1448                                  nir_var_mem_shared,
1449                                  align_mul, align_offset,
1450                                  value, write_mask);
1451          nir_push_else(b, NULL);
1452          assert(modes & nir_var_mem_global);
1453          build_explicit_io_store(b, intrin, addr, addr_format,
1454                                  nir_var_mem_global,
1455                                  align_mul, align_offset,
1456                                  value, write_mask);
1457          nir_pop_if(b, NULL);
1458       }
1459       return;
1460    }
1461 
1462    assert(util_bitcount(modes) == 1);
1463    const nir_variable_mode mode = modes;
1464 
1465    nir_intrinsic_op op;
1466    switch (intrin->intrinsic) {
1467    case nir_intrinsic_store_deref:
1468       assert(write_mask != 0);
1469 
1470       switch (mode) {
1471       case nir_var_mem_ssbo:
1472          if (addr_format_is_global(addr_format, mode))
1473             op = nir_intrinsic_store_global;
1474          else
1475             op = nir_intrinsic_store_ssbo;
1476          break;
1477       case nir_var_mem_global:
1478          assert(addr_format_is_global(addr_format, mode));
1479          op = nir_intrinsic_store_global;
1480          break;
1481       case nir_var_mem_shared:
1482          assert(addr_format_is_offset(addr_format, mode));
1483          op = nir_intrinsic_store_shared;
1484          break;
1485       case nir_var_shader_temp:
1486       case nir_var_function_temp:
1487          if (addr_format_is_offset(addr_format, mode)) {
1488             op = nir_intrinsic_store_scratch;
1489          } else {
1490             assert(addr_format_is_global(addr_format, mode));
1491             op = nir_intrinsic_store_global;
1492          }
1493          break;
1494       default:
1495          unreachable("Unsupported explicit IO variable mode");
1496       }
1497       break;
1498 
1499    case nir_intrinsic_store_deref_block_intel:
1500       assert(write_mask == 0);
1501 
1502       switch (mode) {
1503       case nir_var_mem_ssbo:
1504          if (addr_format_is_global(addr_format, mode))
1505             op = nir_intrinsic_store_global_block_intel;
1506          else
1507             op = nir_intrinsic_store_ssbo_block_intel;
1508          break;
1509       case nir_var_mem_global:
1510          op = nir_intrinsic_store_global_block_intel;
1511          break;
1512       case nir_var_mem_shared:
1513          op = nir_intrinsic_store_shared_block_intel;
1514          break;
1515       default:
1516          unreachable("Unsupported explicit IO variable mode");
1517       }
1518       break;
1519 
1520    default:
1521       unreachable("Invalid intrinsic");
1522    }
1523 
1524    nir_intrinsic_instr *store = nir_intrinsic_instr_create(b->shader, op);
1525 
1526    if (value->bit_size == 1) {
1527       /* For shared, we can go ahead and use NIR's and/or the back-end's
1528        * standard encoding for booleans rather than forcing a 0/1 boolean.
1529        * This should save an instruction or two.
1530        *
1531        * TODO: Make the native bool bit_size an option.
1532        */
1533       if (mode == nir_var_mem_shared ||
1534           mode == nir_var_shader_temp ||
1535           mode == nir_var_function_temp)
1536          value = nir_b2b32(b, value);
1537       else
1538          value = nir_b2i(b, value, 32);
1539    }
1540 
1541    store->src[0] = nir_src_for_ssa(value);
1542    if (addr_format_is_global(addr_format, mode)) {
1543       store->src[1] = nir_src_for_ssa(addr_to_global(b, addr, addr_format));
1544    } else if (addr_format_is_offset(addr_format, mode)) {
1545       assert(addr->num_components == 1);
1546       store->src[1] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format));
1547    } else {
1548       store->src[1] = nir_src_for_ssa(addr_to_index(b, addr, addr_format));
1549       store->src[2] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format));
1550    }
1551 
1552    nir_intrinsic_set_write_mask(store, write_mask);
1553 
1554    if (nir_intrinsic_has_access(store))
1555       nir_intrinsic_set_access(store, nir_intrinsic_access(intrin));
1556 
1557    nir_intrinsic_set_align(store, align_mul, align_offset);
1558 
1559    assert(value->num_components == 1 ||
1560           value->num_components == intrin->num_components);
1561    store->num_components = value->num_components;
1562 
1563    assert(value->bit_size % 8 == 0);
1564 
1565    if (addr_format_needs_bounds_check(addr_format)) {
1566       /* TODO: Better handle block_intel. */
1567       const unsigned store_size = (value->bit_size / 8) * store->num_components;
1568       nir_push_if(b, addr_is_in_bounds(b, addr, addr_format, store_size));
1569 
1570       nir_builder_instr_insert(b, &store->instr);
1571 
1572       nir_pop_if(b, NULL);
1573    } else {
1574       nir_builder_instr_insert(b, &store->instr);
1575    }
1576 }
1577 
1578 static nir_ssa_def *
build_explicit_io_atomic(nir_builder * b,nir_intrinsic_instr * intrin,nir_ssa_def * addr,nir_address_format addr_format,nir_variable_mode modes)1579 build_explicit_io_atomic(nir_builder *b, nir_intrinsic_instr *intrin,
1580                          nir_ssa_def *addr, nir_address_format addr_format,
1581                          nir_variable_mode modes)
1582 {
1583    modes = canonicalize_generic_modes(modes);
1584 
1585    if (util_bitcount(modes) > 1) {
1586       if (addr_format_is_global(addr_format, modes)) {
1587          return build_explicit_io_atomic(b, intrin, addr, addr_format,
1588                                          nir_var_mem_global);
1589       } else if (modes & nir_var_function_temp) {
1590          nir_push_if(b, build_runtime_addr_mode_check(b, addr, addr_format,
1591                                                       nir_var_function_temp));
1592          nir_ssa_def *res1 =
1593             build_explicit_io_atomic(b, intrin, addr, addr_format,
1594                                      nir_var_function_temp);
1595          nir_push_else(b, NULL);
1596          nir_ssa_def *res2 =
1597             build_explicit_io_atomic(b, intrin, addr, addr_format,
1598                                      modes & ~nir_var_function_temp);
1599          nir_pop_if(b, NULL);
1600          return nir_if_phi(b, res1, res2);
1601       } else {
1602          nir_push_if(b, build_runtime_addr_mode_check(b, addr, addr_format,
1603                                                       nir_var_mem_shared));
1604          assert(modes & nir_var_mem_shared);
1605          nir_ssa_def *res1 =
1606             build_explicit_io_atomic(b, intrin, addr, addr_format,
1607                                      nir_var_mem_shared);
1608          nir_push_else(b, NULL);
1609          assert(modes & nir_var_mem_global);
1610          nir_ssa_def *res2 =
1611             build_explicit_io_atomic(b, intrin, addr, addr_format,
1612                                      nir_var_mem_global);
1613          nir_pop_if(b, NULL);
1614          return nir_if_phi(b, res1, res2);
1615       }
1616    }
1617 
1618    assert(util_bitcount(modes) == 1);
1619    const nir_variable_mode mode = modes;
1620 
1621    const unsigned num_data_srcs =
1622       nir_intrinsic_infos[intrin->intrinsic].num_srcs - 1;
1623 
1624    nir_intrinsic_op op;
1625    switch (mode) {
1626    case nir_var_mem_ssbo:
1627       if (addr_format_is_global(addr_format, mode))
1628          op = global_atomic_for_deref(intrin->intrinsic);
1629       else
1630          op = ssbo_atomic_for_deref(intrin->intrinsic);
1631       break;
1632    case nir_var_mem_global:
1633       assert(addr_format_is_global(addr_format, mode));
1634       op = global_atomic_for_deref(intrin->intrinsic);
1635       break;
1636    case nir_var_mem_shared:
1637       assert(addr_format_is_offset(addr_format, mode));
1638       op = shared_atomic_for_deref(intrin->intrinsic);
1639       break;
1640    default:
1641       unreachable("Unsupported explicit IO variable mode");
1642    }
1643 
1644    nir_intrinsic_instr *atomic = nir_intrinsic_instr_create(b->shader, op);
1645 
1646    unsigned src = 0;
1647    if (addr_format_is_global(addr_format, mode)) {
1648       atomic->src[src++] = nir_src_for_ssa(addr_to_global(b, addr, addr_format));
1649    } else if (addr_format_is_offset(addr_format, mode)) {
1650       assert(addr->num_components == 1);
1651       atomic->src[src++] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format));
1652    } else {
1653       atomic->src[src++] = nir_src_for_ssa(addr_to_index(b, addr, addr_format));
1654       atomic->src[src++] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format));
1655    }
1656    for (unsigned i = 0; i < num_data_srcs; i++) {
1657       atomic->src[src++] = nir_src_for_ssa(intrin->src[1 + i].ssa);
1658    }
1659 
1660    /* Global atomics don't have access flags because they assume that the
1661     * address may be non-uniform.
1662     */
1663    if (nir_intrinsic_has_access(atomic))
1664       nir_intrinsic_set_access(atomic, nir_intrinsic_access(intrin));
1665 
1666    assert(intrin->dest.ssa.num_components == 1);
1667    nir_ssa_dest_init(&atomic->instr, &atomic->dest,
1668                      1, intrin->dest.ssa.bit_size, intrin->dest.ssa.name);
1669 
1670    assert(atomic->dest.ssa.bit_size % 8 == 0);
1671 
1672    if (addr_format_needs_bounds_check(addr_format)) {
1673       const unsigned atomic_size = atomic->dest.ssa.bit_size / 8;
1674       nir_push_if(b, addr_is_in_bounds(b, addr, addr_format, atomic_size));
1675 
1676       nir_builder_instr_insert(b, &atomic->instr);
1677 
1678       nir_pop_if(b, NULL);
1679       return nir_if_phi(b, &atomic->dest.ssa,
1680                            nir_ssa_undef(b, 1, atomic->dest.ssa.bit_size));
1681    } else {
1682       nir_builder_instr_insert(b, &atomic->instr);
1683       return &atomic->dest.ssa;
1684    }
1685 }
1686 
1687 nir_ssa_def *
nir_explicit_io_address_from_deref(nir_builder * b,nir_deref_instr * deref,nir_ssa_def * base_addr,nir_address_format addr_format)1688 nir_explicit_io_address_from_deref(nir_builder *b, nir_deref_instr *deref,
1689                                    nir_ssa_def *base_addr,
1690                                    nir_address_format addr_format)
1691 {
1692    assert(deref->dest.is_ssa);
1693    switch (deref->deref_type) {
1694    case nir_deref_type_var:
1695       return build_addr_for_var(b, deref->var, addr_format);
1696 
1697    case nir_deref_type_array: {
1698       unsigned stride = nir_deref_instr_array_stride(deref);
1699       assert(stride > 0);
1700 
1701       nir_ssa_def *index = nir_ssa_for_src(b, deref->arr.index, 1);
1702       index = nir_i2i(b, index, addr_get_offset_bit_size(base_addr, addr_format));
1703       return build_addr_iadd(b, base_addr, addr_format, deref->modes,
1704                                 nir_amul_imm(b, index, stride));
1705    }
1706 
1707    case nir_deref_type_ptr_as_array: {
1708       nir_ssa_def *index = nir_ssa_for_src(b, deref->arr.index, 1);
1709       index = nir_i2i(b, index, addr_get_offset_bit_size(base_addr, addr_format));
1710       unsigned stride = nir_deref_instr_array_stride(deref);
1711       return build_addr_iadd(b, base_addr, addr_format, deref->modes,
1712                                 nir_amul_imm(b, index, stride));
1713    }
1714 
1715    case nir_deref_type_array_wildcard:
1716       unreachable("Wildcards should be lowered by now");
1717       break;
1718 
1719    case nir_deref_type_struct: {
1720       nir_deref_instr *parent = nir_deref_instr_parent(deref);
1721       int offset = glsl_get_struct_field_offset(parent->type,
1722                                                 deref->strct.index);
1723       assert(offset >= 0);
1724       return build_addr_iadd_imm(b, base_addr, addr_format,
1725                                  deref->modes, offset);
1726    }
1727 
1728    case nir_deref_type_cast:
1729       /* Nothing to do here */
1730       return base_addr;
1731    }
1732 
1733    unreachable("Invalid NIR deref type");
1734 }
1735 
1736 void
nir_lower_explicit_io_instr(nir_builder * b,nir_intrinsic_instr * intrin,nir_ssa_def * addr,nir_address_format addr_format)1737 nir_lower_explicit_io_instr(nir_builder *b,
1738                             nir_intrinsic_instr *intrin,
1739                             nir_ssa_def *addr,
1740                             nir_address_format addr_format)
1741 {
1742    b->cursor = nir_after_instr(&intrin->instr);
1743 
1744    nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
1745    unsigned vec_stride = glsl_get_explicit_stride(deref->type);
1746    unsigned scalar_size = type_scalar_size_bytes(deref->type);
1747    assert(vec_stride == 0 || glsl_type_is_vector(deref->type));
1748    assert(vec_stride == 0 || vec_stride >= scalar_size);
1749 
1750    uint32_t align_mul, align_offset;
1751    if (!nir_get_explicit_deref_align(deref, true, &align_mul, &align_offset)) {
1752       /* If we don't have an alignment from the deref, assume scalar */
1753       align_mul = scalar_size;
1754       align_offset = 0;
1755    }
1756 
1757    switch (intrin->intrinsic) {
1758    case nir_intrinsic_load_deref: {
1759       nir_ssa_def *value;
1760       if (vec_stride > scalar_size) {
1761          nir_ssa_def *comps[NIR_MAX_VEC_COMPONENTS] = { NULL, };
1762          for (unsigned i = 0; i < intrin->num_components; i++) {
1763             unsigned comp_offset = i * vec_stride;
1764             nir_ssa_def *comp_addr = build_addr_iadd_imm(b, addr, addr_format,
1765                                                          deref->modes,
1766                                                          comp_offset);
1767             comps[i] = build_explicit_io_load(b, intrin, comp_addr,
1768                                               addr_format, deref->modes,
1769                                               align_mul,
1770                                               (align_offset + comp_offset) %
1771                                                  align_mul,
1772                                               1);
1773          }
1774          value = nir_vec(b, comps, intrin->num_components);
1775       } else {
1776          value = build_explicit_io_load(b, intrin, addr, addr_format,
1777                                         deref->modes, align_mul, align_offset,
1778                                         intrin->num_components);
1779       }
1780       nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(value));
1781       break;
1782    }
1783 
1784    case nir_intrinsic_store_deref: {
1785       assert(intrin->src[1].is_ssa);
1786       nir_ssa_def *value = intrin->src[1].ssa;
1787       nir_component_mask_t write_mask = nir_intrinsic_write_mask(intrin);
1788       if (vec_stride > scalar_size) {
1789          for (unsigned i = 0; i < intrin->num_components; i++) {
1790             if (!(write_mask & (1 << i)))
1791                continue;
1792 
1793             unsigned comp_offset = i * vec_stride;
1794             nir_ssa_def *comp_addr = build_addr_iadd_imm(b, addr, addr_format,
1795                                                          deref->modes,
1796                                                          comp_offset);
1797             build_explicit_io_store(b, intrin, comp_addr, addr_format,
1798                                     deref->modes, align_mul,
1799                                     (align_offset + comp_offset) % align_mul,
1800                                     nir_channel(b, value, i), 1);
1801          }
1802       } else {
1803          build_explicit_io_store(b, intrin, addr, addr_format,
1804                                  deref->modes, align_mul, align_offset,
1805                                  value, write_mask);
1806       }
1807       break;
1808    }
1809 
1810    case nir_intrinsic_load_deref_block_intel: {
1811       nir_ssa_def *value = build_explicit_io_load(b, intrin, addr, addr_format,
1812                                                   deref->modes,
1813                                                   align_mul, align_offset,
1814                                                   intrin->num_components);
1815       nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(value));
1816       break;
1817    }
1818 
1819    case nir_intrinsic_store_deref_block_intel: {
1820       assert(intrin->src[1].is_ssa);
1821       nir_ssa_def *value = intrin->src[1].ssa;
1822       const nir_component_mask_t write_mask = 0;
1823       build_explicit_io_store(b, intrin, addr, addr_format,
1824                               deref->modes, align_mul, align_offset,
1825                               value, write_mask);
1826       break;
1827    }
1828 
1829    default: {
1830       nir_ssa_def *value =
1831          build_explicit_io_atomic(b, intrin, addr, addr_format, deref->modes);
1832       nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(value));
1833       break;
1834    }
1835    }
1836 
1837    nir_instr_remove(&intrin->instr);
1838 }
1839 
1840 bool
nir_get_explicit_deref_align(nir_deref_instr * deref,bool default_to_type_align,uint32_t * align_mul,uint32_t * align_offset)1841 nir_get_explicit_deref_align(nir_deref_instr *deref,
1842                              bool default_to_type_align,
1843                              uint32_t *align_mul,
1844                              uint32_t *align_offset)
1845 {
1846    if (deref->deref_type == nir_deref_type_var) {
1847       /* If we see a variable, align_mul is effectively infinite because we
1848        * know the offset exactly (up to the offset of the base pointer for the
1849        * given variable mode).   We have to pick something so we choose 256B
1850        * as an arbitrary alignment which seems high enough for any reasonable
1851        * wide-load use-case.  Back-ends should clamp alignments down if 256B
1852        * is too large for some reason.
1853        */
1854       *align_mul = 256;
1855       *align_offset = deref->var->data.driver_location % 256;
1856       return true;
1857    }
1858 
1859    /* If we're a cast deref that has an alignment, use that. */
1860    if (deref->deref_type == nir_deref_type_cast && deref->cast.align_mul > 0) {
1861       *align_mul = deref->cast.align_mul;
1862       *align_offset = deref->cast.align_offset;
1863       return true;
1864    }
1865 
1866    /* Otherwise, we need to compute the alignment based on the parent */
1867    nir_deref_instr *parent = nir_deref_instr_parent(deref);
1868    if (parent == NULL) {
1869       assert(deref->deref_type == nir_deref_type_cast);
1870       if (default_to_type_align) {
1871          /* If we don't have a parent, assume the type's alignment, if any. */
1872          unsigned type_align = glsl_get_explicit_alignment(deref->type);
1873          if (type_align == 0)
1874             return false;
1875 
1876          *align_mul = type_align;
1877          *align_offset = 0;
1878          return true;
1879       } else {
1880          return false;
1881       }
1882    }
1883 
1884    uint32_t parent_mul, parent_offset;
1885    if (!nir_get_explicit_deref_align(parent, default_to_type_align,
1886                                      &parent_mul, &parent_offset))
1887       return false;
1888 
1889    switch (deref->deref_type) {
1890    case nir_deref_type_var:
1891       unreachable("Handled above");
1892 
1893    case nir_deref_type_array:
1894    case nir_deref_type_array_wildcard:
1895    case nir_deref_type_ptr_as_array: {
1896       const unsigned stride = nir_deref_instr_array_stride(deref);
1897       if (stride == 0)
1898          return false;
1899 
1900       if (deref->deref_type != nir_deref_type_array_wildcard &&
1901           nir_src_is_const(deref->arr.index)) {
1902          unsigned offset = nir_src_as_uint(deref->arr.index) * stride;
1903          *align_mul = parent_mul;
1904          *align_offset = (parent_offset + offset) % parent_mul;
1905       } else {
1906          /* If this is a wildcard or an indirect deref, we have to go with the
1907           * power-of-two gcd.
1908           */
1909          *align_mul = MIN2(parent_mul, 1 << (ffs(stride) - 1));
1910          *align_offset = parent_offset % *align_mul;
1911       }
1912       return true;
1913    }
1914 
1915    case nir_deref_type_struct: {
1916       const int offset = glsl_get_struct_field_offset(parent->type,
1917                                                       deref->strct.index);
1918       if (offset < 0)
1919          return false;
1920 
1921       *align_mul = parent_mul;
1922       *align_offset = (parent_offset + offset) % parent_mul;
1923       return true;
1924    }
1925 
1926    case nir_deref_type_cast:
1927       /* We handled the explicit alignment case above. */
1928       assert(deref->cast.align_mul == 0);
1929       *align_mul = parent_mul;
1930       *align_offset = parent_offset;
1931       return true;
1932    }
1933 
1934    unreachable("Invalid deref_instr_type");
1935 }
1936 
1937 static void
lower_explicit_io_deref(nir_builder * b,nir_deref_instr * deref,nir_address_format addr_format)1938 lower_explicit_io_deref(nir_builder *b, nir_deref_instr *deref,
1939                         nir_address_format addr_format)
1940 {
1941    /* Just delete the deref if it's not used.  We can't use
1942     * nir_deref_instr_remove_if_unused here because it may remove more than
1943     * one deref which could break our list walking since we walk the list
1944     * backwards.
1945     */
1946    assert(list_is_empty(&deref->dest.ssa.if_uses));
1947    if (list_is_empty(&deref->dest.ssa.uses)) {
1948       nir_instr_remove(&deref->instr);
1949       return;
1950    }
1951 
1952    b->cursor = nir_after_instr(&deref->instr);
1953 
1954    nir_ssa_def *base_addr = NULL;
1955    if (deref->deref_type != nir_deref_type_var) {
1956       assert(deref->parent.is_ssa);
1957       base_addr = deref->parent.ssa;
1958    }
1959 
1960    nir_ssa_def *addr = nir_explicit_io_address_from_deref(b, deref, base_addr,
1961                                                           addr_format);
1962    assert(addr->bit_size == deref->dest.ssa.bit_size);
1963    assert(addr->num_components == deref->dest.ssa.num_components);
1964 
1965    nir_instr_remove(&deref->instr);
1966    nir_ssa_def_rewrite_uses(&deref->dest.ssa, nir_src_for_ssa(addr));
1967 }
1968 
1969 static void
lower_explicit_io_access(nir_builder * b,nir_intrinsic_instr * intrin,nir_address_format addr_format)1970 lower_explicit_io_access(nir_builder *b, nir_intrinsic_instr *intrin,
1971                          nir_address_format addr_format)
1972 {
1973    assert(intrin->src[0].is_ssa);
1974    nir_lower_explicit_io_instr(b, intrin, intrin->src[0].ssa, addr_format);
1975 }
1976 
1977 static void
lower_explicit_io_array_length(nir_builder * b,nir_intrinsic_instr * intrin,nir_address_format addr_format)1978 lower_explicit_io_array_length(nir_builder *b, nir_intrinsic_instr *intrin,
1979                                nir_address_format addr_format)
1980 {
1981    b->cursor = nir_after_instr(&intrin->instr);
1982 
1983    nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
1984 
1985    assert(glsl_type_is_array(deref->type));
1986    assert(glsl_get_length(deref->type) == 0);
1987    assert(nir_deref_mode_is(deref, nir_var_mem_ssbo));
1988    unsigned stride = glsl_get_explicit_stride(deref->type);
1989    assert(stride > 0);
1990 
1991    nir_ssa_def *addr = &deref->dest.ssa;
1992    nir_ssa_def *index = addr_to_index(b, addr, addr_format);
1993    nir_ssa_def *offset = addr_to_offset(b, addr, addr_format);
1994 
1995    nir_intrinsic_instr *bsize =
1996       nir_intrinsic_instr_create(b->shader, nir_intrinsic_get_ssbo_size);
1997    bsize->src[0] = nir_src_for_ssa(index);
1998    nir_ssa_dest_init(&bsize->instr, &bsize->dest, 1, 32, NULL);
1999    nir_builder_instr_insert(b, &bsize->instr);
2000 
2001    nir_ssa_def *arr_size =
2002       nir_idiv(b, nir_isub(b, &bsize->dest.ssa, offset),
2003                   nir_imm_int(b, stride));
2004 
2005    nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(arr_size));
2006    nir_instr_remove(&intrin->instr);
2007 }
2008 
2009 static void
lower_explicit_io_mode_check(nir_builder * b,nir_intrinsic_instr * intrin,nir_address_format addr_format)2010 lower_explicit_io_mode_check(nir_builder *b, nir_intrinsic_instr *intrin,
2011                              nir_address_format addr_format)
2012 {
2013    if (addr_format_is_global(addr_format, 0)) {
2014       /* If the address format is always global, then the driver can use
2015        * global addresses regardless of the mode.  In that case, don't create
2016        * a check, just whack the intrinsic to addr_mode_is and delegate to the
2017        * driver lowering that.
2018        */
2019       intrin->intrinsic = nir_intrinsic_addr_mode_is;
2020       return;
2021    }
2022 
2023    assert(intrin->src[0].is_ssa);
2024    nir_ssa_def *addr = intrin->src[0].ssa;
2025 
2026    b->cursor = nir_instr_remove(&intrin->instr);
2027 
2028    nir_ssa_def *is_mode =
2029       build_runtime_addr_mode_check(b, addr, addr_format,
2030                                     nir_intrinsic_memory_modes(intrin));
2031 
2032    nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(is_mode));
2033 }
2034 
2035 static bool
nir_lower_explicit_io_impl(nir_function_impl * impl,nir_variable_mode modes,nir_address_format addr_format)2036 nir_lower_explicit_io_impl(nir_function_impl *impl, nir_variable_mode modes,
2037                            nir_address_format addr_format)
2038 {
2039    bool progress = false;
2040 
2041    nir_builder b;
2042    nir_builder_init(&b, impl);
2043 
2044    /* Walk in reverse order so that we can see the full deref chain when we
2045     * lower the access operations.  We lower them assuming that the derefs
2046     * will be turned into address calculations later.
2047     */
2048    nir_foreach_block_reverse(block, impl) {
2049       nir_foreach_instr_reverse_safe(instr, block) {
2050          switch (instr->type) {
2051          case nir_instr_type_deref: {
2052             nir_deref_instr *deref = nir_instr_as_deref(instr);
2053             if (nir_deref_mode_is_in_set(deref, modes)) {
2054                lower_explicit_io_deref(&b, deref, addr_format);
2055                progress = true;
2056             }
2057             break;
2058          }
2059 
2060          case nir_instr_type_intrinsic: {
2061             nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
2062             switch (intrin->intrinsic) {
2063             case nir_intrinsic_load_deref:
2064             case nir_intrinsic_store_deref:
2065             case nir_intrinsic_load_deref_block_intel:
2066             case nir_intrinsic_store_deref_block_intel:
2067             case nir_intrinsic_deref_atomic_add:
2068             case nir_intrinsic_deref_atomic_imin:
2069             case nir_intrinsic_deref_atomic_umin:
2070             case nir_intrinsic_deref_atomic_imax:
2071             case nir_intrinsic_deref_atomic_umax:
2072             case nir_intrinsic_deref_atomic_and:
2073             case nir_intrinsic_deref_atomic_or:
2074             case nir_intrinsic_deref_atomic_xor:
2075             case nir_intrinsic_deref_atomic_exchange:
2076             case nir_intrinsic_deref_atomic_comp_swap:
2077             case nir_intrinsic_deref_atomic_fadd:
2078             case nir_intrinsic_deref_atomic_fmin:
2079             case nir_intrinsic_deref_atomic_fmax:
2080             case nir_intrinsic_deref_atomic_fcomp_swap: {
2081                nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
2082                if (nir_deref_mode_is_in_set(deref, modes)) {
2083                   lower_explicit_io_access(&b, intrin, addr_format);
2084                   progress = true;
2085                }
2086                break;
2087             }
2088 
2089             case nir_intrinsic_deref_buffer_array_length: {
2090                nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
2091                if (nir_deref_mode_is_in_set(deref, modes)) {
2092                   lower_explicit_io_array_length(&b, intrin, addr_format);
2093                   progress = true;
2094                }
2095                break;
2096             }
2097 
2098             case nir_intrinsic_deref_mode_is: {
2099                nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
2100                if (nir_deref_mode_is_in_set(deref, modes)) {
2101                   lower_explicit_io_mode_check(&b, intrin, addr_format);
2102                   progress = true;
2103                }
2104                break;
2105             }
2106 
2107             default:
2108                break;
2109             }
2110             break;
2111          }
2112 
2113          default:
2114             /* Nothing to do */
2115             break;
2116          }
2117       }
2118    }
2119 
2120    if (progress) {
2121       nir_metadata_preserve(impl, nir_metadata_block_index |
2122                                   nir_metadata_dominance);
2123    }
2124 
2125    return progress;
2126 }
2127 
2128 /** Lower explicitly laid out I/O access to byte offset/address intrinsics
2129  *
2130  * This pass is intended to be used for any I/O which touches memory external
2131  * to the shader or which is directly visible to the client.  It requires that
2132  * all data types in the given modes have a explicit stride/offset decorations
2133  * to tell it exactly how to calculate the offset/address for the given load,
2134  * store, or atomic operation.  If the offset/stride information does not come
2135  * from the client explicitly (as with shared variables in GL or Vulkan),
2136  * nir_lower_vars_to_explicit_types() can be used to add them.
2137  *
2138  * Unlike nir_lower_io, this pass is fully capable of handling incomplete
2139  * pointer chains which may contain cast derefs.  It does so by walking the
2140  * deref chain backwards and simply replacing each deref, one at a time, with
2141  * the appropriate address calculation.  The pass takes a nir_address_format
2142  * parameter which describes how the offset or address is to be represented
2143  * during calculations.  By ensuring that the address is always in a
2144  * consistent format, pointers can safely be conjured from thin air by the
2145  * driver, stored to variables, passed through phis, etc.
2146  *
2147  * The one exception to the simple algorithm described above is for handling
2148  * row-major matrices in which case we may look down one additional level of
2149  * the deref chain.
2150  *
2151  * This pass is also capable of handling OpenCL generic pointers.  If the
2152  * address mode is global, it will lowering any ambiguous (more than one mode)
2153  * access to global and passing through the deref_mode_is run-time checks as
2154  * addr_mode_is.  This assumes the driver has somehow mapped shared and
2155  * scratch memory to the global address space.  For other modes such as
2156  * 62bit_generic, there is an enum embedded in the address and we lower
2157  * ambiguous access to an if-ladder and deref_mode_is to a check against the
2158  * embedded enum.  If nir_lower_explicit_io is called on any shader that
2159  * contains generic pointers, it must either be used on all of the generic
2160  * modes or none.
2161  */
2162 bool
nir_lower_explicit_io(nir_shader * shader,nir_variable_mode modes,nir_address_format addr_format)2163 nir_lower_explicit_io(nir_shader *shader, nir_variable_mode modes,
2164                       nir_address_format addr_format)
2165 {
2166    bool progress = false;
2167 
2168    nir_foreach_function(function, shader) {
2169       if (function->impl &&
2170           nir_lower_explicit_io_impl(function->impl, modes, addr_format))
2171          progress = true;
2172    }
2173 
2174    return progress;
2175 }
2176 
2177 static bool
nir_lower_vars_to_explicit_types_impl(nir_function_impl * impl,nir_variable_mode modes,glsl_type_size_align_func type_info)2178 nir_lower_vars_to_explicit_types_impl(nir_function_impl *impl,
2179                                       nir_variable_mode modes,
2180                                       glsl_type_size_align_func type_info)
2181 {
2182    bool progress = false;
2183 
2184    nir_foreach_block(block, impl) {
2185       nir_foreach_instr(instr, block) {
2186          if (instr->type != nir_instr_type_deref)
2187             continue;
2188 
2189          nir_deref_instr *deref = nir_instr_as_deref(instr);
2190          if (!nir_deref_mode_is_in_set(deref, modes))
2191             continue;
2192 
2193          unsigned size, alignment;
2194          const struct glsl_type *new_type =
2195             glsl_get_explicit_type_for_size_align(deref->type, type_info, &size, &alignment);
2196          if (new_type != deref->type) {
2197             progress = true;
2198             deref->type = new_type;
2199          }
2200          if (deref->deref_type == nir_deref_type_cast) {
2201             /* See also glsl_type::get_explicit_type_for_size_align() */
2202             unsigned new_stride = align(size, alignment);
2203             if (new_stride != deref->cast.ptr_stride) {
2204                deref->cast.ptr_stride = new_stride;
2205                progress = true;
2206             }
2207          }
2208       }
2209    }
2210 
2211    if (progress) {
2212       nir_metadata_preserve(impl, nir_metadata_block_index |
2213                                   nir_metadata_dominance |
2214                                   nir_metadata_live_ssa_defs |
2215                                   nir_metadata_loop_analysis);
2216    }
2217 
2218    return progress;
2219 }
2220 
2221 static bool
lower_vars_to_explicit(nir_shader * shader,struct exec_list * vars,nir_variable_mode mode,glsl_type_size_align_func type_info)2222 lower_vars_to_explicit(nir_shader *shader,
2223                        struct exec_list *vars, nir_variable_mode mode,
2224                        glsl_type_size_align_func type_info)
2225 {
2226    bool progress = false;
2227    unsigned offset;
2228    switch (mode) {
2229    case nir_var_uniform:
2230       assert(shader->info.stage == MESA_SHADER_KERNEL);
2231       offset = 0;
2232       break;
2233    case nir_var_function_temp:
2234    case nir_var_shader_temp:
2235       offset = shader->scratch_size;
2236       break;
2237    case nir_var_mem_shared:
2238       offset = 0;
2239       break;
2240    case nir_var_mem_constant:
2241       offset = shader->constant_data_size;
2242       break;
2243    default:
2244       unreachable("Unsupported mode");
2245    }
2246    nir_foreach_variable_in_list(var, vars) {
2247       if (var->data.mode != mode)
2248          continue;
2249 
2250       unsigned size, align;
2251       const struct glsl_type *explicit_type =
2252          glsl_get_explicit_type_for_size_align(var->type, type_info, &size, &align);
2253 
2254       if (explicit_type != var->type)
2255          var->type = explicit_type;
2256 
2257       assert(util_is_power_of_two_nonzero(align));
2258       var->data.driver_location = ALIGN_POT(offset, align);
2259       offset = var->data.driver_location + size;
2260       progress = true;
2261    }
2262 
2263    switch (mode) {
2264    case nir_var_uniform:
2265       assert(shader->info.stage == MESA_SHADER_KERNEL);
2266       shader->num_uniforms = offset;
2267       break;
2268    case nir_var_shader_temp:
2269    case nir_var_function_temp:
2270       shader->scratch_size = offset;
2271       break;
2272    case nir_var_mem_shared:
2273       shader->info.cs.shared_size = offset;
2274       shader->shared_size = offset;
2275       break;
2276    case nir_var_mem_constant:
2277       shader->constant_data_size = offset;
2278       break;
2279    default:
2280       unreachable("Unsupported mode");
2281    }
2282 
2283    return progress;
2284 }
2285 
2286 /* If nir_lower_vars_to_explicit_types is called on any shader that contains
2287  * generic pointers, it must either be used on all of the generic modes or
2288  * none.
2289  */
2290 bool
nir_lower_vars_to_explicit_types(nir_shader * shader,nir_variable_mode modes,glsl_type_size_align_func type_info)2291 nir_lower_vars_to_explicit_types(nir_shader *shader,
2292                                  nir_variable_mode modes,
2293                                  glsl_type_size_align_func type_info)
2294 {
2295    /* TODO: Situations which need to be handled to support more modes:
2296     * - row-major matrices
2297     * - compact shader inputs/outputs
2298     * - interface types
2299     */
2300    ASSERTED nir_variable_mode supported =
2301       nir_var_mem_shared | nir_var_mem_global |
2302       nir_var_shader_temp | nir_var_function_temp | nir_var_uniform;
2303    assert(!(modes & ~supported) && "unsupported");
2304 
2305    bool progress = false;
2306 
2307    if (modes & nir_var_uniform)
2308       progress |= lower_vars_to_explicit(shader, &shader->variables, nir_var_uniform, type_info);
2309    if (modes & nir_var_mem_shared)
2310       progress |= lower_vars_to_explicit(shader, &shader->variables, nir_var_mem_shared, type_info);
2311    if (modes & nir_var_shader_temp)
2312       progress |= lower_vars_to_explicit(shader, &shader->variables, nir_var_shader_temp, type_info);
2313 
2314    nir_foreach_function(function, shader) {
2315       if (function->impl) {
2316          if (modes & nir_var_function_temp)
2317             progress |= lower_vars_to_explicit(shader, &function->impl->locals, nir_var_function_temp, type_info);
2318 
2319          progress |= nir_lower_vars_to_explicit_types_impl(function->impl, modes, type_info);
2320       }
2321    }
2322 
2323    return progress;
2324 }
2325 
2326 static void
write_constant(void * dst,const nir_constant * c,const struct glsl_type * type)2327 write_constant(void *dst, const nir_constant *c, const struct glsl_type *type)
2328 {
2329    if (glsl_type_is_vector_or_scalar(type)) {
2330       const unsigned num_components = glsl_get_vector_elements(type);
2331       const unsigned bit_size = glsl_get_bit_size(type);
2332       if (bit_size == 1) {
2333          /* Booleans are special-cased to be 32-bit
2334           *
2335           * TODO: Make the native bool bit_size an option.
2336           */
2337          for (unsigned i = 0; i < num_components; i++) {
2338             int32_t b32 = -(int)c->values[i].b;
2339             memcpy((char *)dst + i * 4, &b32, 4);
2340          }
2341       } else {
2342          assert(bit_size >= 8 && bit_size % 8 == 0);
2343          const unsigned byte_size = bit_size / 8;
2344          for (unsigned i = 0; i < num_components; i++) {
2345             /* Annoyingly, thanks to packed structs, we can't make any
2346              * assumptions about the alignment of dst.  To avoid any strange
2347              * issues with unaligned writes, we always use memcpy.
2348              */
2349             memcpy((char *)dst + i * byte_size, &c->values[i], byte_size);
2350          }
2351       }
2352    } else if (glsl_type_is_array_or_matrix(type)) {
2353       const unsigned array_len = glsl_get_length(type);
2354       const unsigned stride = glsl_get_explicit_stride(type);
2355       assert(stride > 0);
2356       const struct glsl_type *elem_type = glsl_get_array_element(type);
2357       for (unsigned i = 0; i < array_len; i++)
2358          write_constant((char *)dst + i * stride, c->elements[i], elem_type);
2359    } else {
2360       assert(glsl_type_is_struct_or_ifc(type));
2361       const unsigned num_fields = glsl_get_length(type);
2362       for (unsigned i = 0; i < num_fields; i++) {
2363          const int field_offset = glsl_get_struct_field_offset(type, i);
2364          assert(field_offset >= 0);
2365          const struct glsl_type *field_type = glsl_get_struct_field(type, i);
2366          write_constant((char *)dst + field_offset, c->elements[i], field_type);
2367       }
2368    }
2369 }
2370 
2371 bool
nir_lower_mem_constant_vars(nir_shader * shader,glsl_type_size_align_func type_info)2372 nir_lower_mem_constant_vars(nir_shader *shader,
2373                             glsl_type_size_align_func type_info)
2374 {
2375    bool progress = false;
2376 
2377    unsigned old_constant_data_size = shader->constant_data_size;
2378    if (lower_vars_to_explicit(shader, &shader->variables,
2379                               nir_var_mem_constant, type_info)) {
2380       assert(shader->constant_data_size > old_constant_data_size);
2381       shader->constant_data = rerzalloc_size(shader, shader->constant_data,
2382                                              old_constant_data_size,
2383                                              shader->constant_data_size);
2384 
2385       nir_foreach_variable_with_modes(var, shader, nir_var_mem_constant) {
2386          write_constant((char *)shader->constant_data +
2387                            var->data.driver_location,
2388                         var->constant_initializer, var->type);
2389       }
2390       progress = true;
2391    }
2392 
2393    nir_foreach_function(function, shader) {
2394       if (!function->impl)
2395          continue;
2396 
2397       if (nir_lower_vars_to_explicit_types_impl(function->impl,
2398                                                 nir_var_mem_constant,
2399                                                 type_info))
2400          progress = true;
2401    }
2402 
2403    return progress;
2404 }
2405 
2406 /**
2407  * Return the offset source for a load/store intrinsic.
2408  */
2409 nir_src *
nir_get_io_offset_src(nir_intrinsic_instr * instr)2410 nir_get_io_offset_src(nir_intrinsic_instr *instr)
2411 {
2412    switch (instr->intrinsic) {
2413    case nir_intrinsic_load_input:
2414    case nir_intrinsic_load_output:
2415    case nir_intrinsic_load_shared:
2416    case nir_intrinsic_load_uniform:
2417    case nir_intrinsic_load_global:
2418    case nir_intrinsic_load_global_constant:
2419    case nir_intrinsic_load_scratch:
2420    case nir_intrinsic_load_fs_input_interp_deltas:
2421    case nir_intrinsic_shared_atomic_add:
2422    case nir_intrinsic_shared_atomic_and:
2423    case nir_intrinsic_shared_atomic_comp_swap:
2424    case nir_intrinsic_shared_atomic_exchange:
2425    case nir_intrinsic_shared_atomic_fadd:
2426    case nir_intrinsic_shared_atomic_fcomp_swap:
2427    case nir_intrinsic_shared_atomic_fmax:
2428    case nir_intrinsic_shared_atomic_fmin:
2429    case nir_intrinsic_shared_atomic_imax:
2430    case nir_intrinsic_shared_atomic_imin:
2431    case nir_intrinsic_shared_atomic_or:
2432    case nir_intrinsic_shared_atomic_umax:
2433    case nir_intrinsic_shared_atomic_umin:
2434    case nir_intrinsic_shared_atomic_xor:
2435    case nir_intrinsic_global_atomic_add:
2436    case nir_intrinsic_global_atomic_and:
2437    case nir_intrinsic_global_atomic_comp_swap:
2438    case nir_intrinsic_global_atomic_exchange:
2439    case nir_intrinsic_global_atomic_fadd:
2440    case nir_intrinsic_global_atomic_fcomp_swap:
2441    case nir_intrinsic_global_atomic_fmax:
2442    case nir_intrinsic_global_atomic_fmin:
2443    case nir_intrinsic_global_atomic_imax:
2444    case nir_intrinsic_global_atomic_imin:
2445    case nir_intrinsic_global_atomic_or:
2446    case nir_intrinsic_global_atomic_umax:
2447    case nir_intrinsic_global_atomic_umin:
2448    case nir_intrinsic_global_atomic_xor:
2449       return &instr->src[0];
2450    case nir_intrinsic_load_ubo:
2451    case nir_intrinsic_load_ssbo:
2452    case nir_intrinsic_load_input_vertex:
2453    case nir_intrinsic_load_per_vertex_input:
2454    case nir_intrinsic_load_per_vertex_output:
2455    case nir_intrinsic_load_interpolated_input:
2456    case nir_intrinsic_store_output:
2457    case nir_intrinsic_store_shared:
2458    case nir_intrinsic_store_global:
2459    case nir_intrinsic_store_scratch:
2460    case nir_intrinsic_ssbo_atomic_add:
2461    case nir_intrinsic_ssbo_atomic_imin:
2462    case nir_intrinsic_ssbo_atomic_umin:
2463    case nir_intrinsic_ssbo_atomic_imax:
2464    case nir_intrinsic_ssbo_atomic_umax:
2465    case nir_intrinsic_ssbo_atomic_and:
2466    case nir_intrinsic_ssbo_atomic_or:
2467    case nir_intrinsic_ssbo_atomic_xor:
2468    case nir_intrinsic_ssbo_atomic_exchange:
2469    case nir_intrinsic_ssbo_atomic_comp_swap:
2470    case nir_intrinsic_ssbo_atomic_fadd:
2471    case nir_intrinsic_ssbo_atomic_fmin:
2472    case nir_intrinsic_ssbo_atomic_fmax:
2473    case nir_intrinsic_ssbo_atomic_fcomp_swap:
2474       return &instr->src[1];
2475    case nir_intrinsic_store_ssbo:
2476    case nir_intrinsic_store_per_vertex_output:
2477       return &instr->src[2];
2478    default:
2479       return NULL;
2480    }
2481 }
2482 
2483 /**
2484  * Return the vertex index source for a load/store per_vertex intrinsic.
2485  */
2486 nir_src *
nir_get_io_vertex_index_src(nir_intrinsic_instr * instr)2487 nir_get_io_vertex_index_src(nir_intrinsic_instr *instr)
2488 {
2489    switch (instr->intrinsic) {
2490    case nir_intrinsic_load_per_vertex_input:
2491    case nir_intrinsic_load_per_vertex_output:
2492       return &instr->src[0];
2493    case nir_intrinsic_store_per_vertex_output:
2494       return &instr->src[1];
2495    default:
2496       return NULL;
2497    }
2498 }
2499 
2500 /**
2501  * Return the numeric constant that identify a NULL pointer for each address
2502  * format.
2503  */
2504 const nir_const_value *
nir_address_format_null_value(nir_address_format addr_format)2505 nir_address_format_null_value(nir_address_format addr_format)
2506 {
2507    const static nir_const_value null_values[][NIR_MAX_VEC_COMPONENTS] = {
2508       [nir_address_format_32bit_global] = {{0}},
2509       [nir_address_format_64bit_global] = {{0}},
2510       [nir_address_format_64bit_bounded_global] = {{0}},
2511       [nir_address_format_32bit_index_offset] = {{.u32 = ~0}, {.u32 = ~0}},
2512       [nir_address_format_32bit_index_offset_pack64] = {{.u64 = ~0ull}},
2513       [nir_address_format_vec2_index_32bit_offset] = {{.u32 = ~0}, {.u32 = ~0}, {.u32 = ~0}},
2514       [nir_address_format_32bit_offset] = {{.u32 = ~0}},
2515       [nir_address_format_32bit_offset_as_64bit] = {{.u64 = ~0ull}},
2516       [nir_address_format_62bit_generic] = {{.u64 = 0}},
2517       [nir_address_format_logical] = {{.u32 = ~0}},
2518    };
2519 
2520    assert(addr_format < ARRAY_SIZE(null_values));
2521    return null_values[addr_format];
2522 }
2523 
2524 nir_ssa_def *
nir_build_addr_ieq(nir_builder * b,nir_ssa_def * addr0,nir_ssa_def * addr1,nir_address_format addr_format)2525 nir_build_addr_ieq(nir_builder *b, nir_ssa_def *addr0, nir_ssa_def *addr1,
2526                    nir_address_format addr_format)
2527 {
2528    switch (addr_format) {
2529    case nir_address_format_32bit_global:
2530    case nir_address_format_64bit_global:
2531    case nir_address_format_64bit_bounded_global:
2532    case nir_address_format_32bit_index_offset:
2533    case nir_address_format_vec2_index_32bit_offset:
2534    case nir_address_format_32bit_offset:
2535    case nir_address_format_62bit_generic:
2536       return nir_ball_iequal(b, addr0, addr1);
2537 
2538    case nir_address_format_32bit_offset_as_64bit:
2539       assert(addr0->num_components == 1 && addr1->num_components == 1);
2540       return nir_ieq(b, nir_u2u32(b, addr0), nir_u2u32(b, addr1));
2541 
2542    case nir_address_format_32bit_index_offset_pack64:
2543       assert(addr0->num_components == 1 && addr1->num_components == 1);
2544       return nir_ball_iequal(b, nir_unpack_64_2x32(b, addr0), nir_unpack_64_2x32(b, addr1));
2545 
2546    case nir_address_format_logical:
2547       unreachable("Unsupported address format");
2548    }
2549 
2550    unreachable("Invalid address format");
2551 }
2552 
2553 nir_ssa_def *
nir_build_addr_isub(nir_builder * b,nir_ssa_def * addr0,nir_ssa_def * addr1,nir_address_format addr_format)2554 nir_build_addr_isub(nir_builder *b, nir_ssa_def *addr0, nir_ssa_def *addr1,
2555                     nir_address_format addr_format)
2556 {
2557    switch (addr_format) {
2558    case nir_address_format_32bit_global:
2559    case nir_address_format_64bit_global:
2560    case nir_address_format_32bit_offset:
2561    case nir_address_format_32bit_index_offset_pack64:
2562    case nir_address_format_62bit_generic:
2563       assert(addr0->num_components == 1);
2564       assert(addr1->num_components == 1);
2565       return nir_isub(b, addr0, addr1);
2566 
2567    case nir_address_format_32bit_offset_as_64bit:
2568       assert(addr0->num_components == 1);
2569       assert(addr1->num_components == 1);
2570       return nir_u2u64(b, nir_isub(b, nir_u2u32(b, addr0), nir_u2u32(b, addr1)));
2571 
2572    case nir_address_format_64bit_bounded_global:
2573       return nir_isub(b, addr_to_global(b, addr0, addr_format),
2574                          addr_to_global(b, addr1, addr_format));
2575 
2576    case nir_address_format_32bit_index_offset:
2577       assert(addr0->num_components == 2);
2578       assert(addr1->num_components == 2);
2579       /* Assume the same buffer index. */
2580       return nir_isub(b, nir_channel(b, addr0, 1), nir_channel(b, addr1, 1));
2581 
2582    case nir_address_format_vec2_index_32bit_offset:
2583       assert(addr0->num_components == 3);
2584       assert(addr1->num_components == 3);
2585       /* Assume the same buffer index. */
2586       return nir_isub(b, nir_channel(b, addr0, 2), nir_channel(b, addr1, 2));
2587 
2588    case nir_address_format_logical:
2589       unreachable("Unsupported address format");
2590    }
2591 
2592    unreachable("Invalid address format");
2593 }
2594 
2595 static bool
is_input(nir_intrinsic_instr * intrin)2596 is_input(nir_intrinsic_instr *intrin)
2597 {
2598    return intrin->intrinsic == nir_intrinsic_load_input ||
2599           intrin->intrinsic == nir_intrinsic_load_per_vertex_input ||
2600           intrin->intrinsic == nir_intrinsic_load_interpolated_input ||
2601           intrin->intrinsic == nir_intrinsic_load_fs_input_interp_deltas;
2602 }
2603 
2604 static bool
is_output(nir_intrinsic_instr * intrin)2605 is_output(nir_intrinsic_instr *intrin)
2606 {
2607    return intrin->intrinsic == nir_intrinsic_load_output ||
2608           intrin->intrinsic == nir_intrinsic_load_per_vertex_output ||
2609           intrin->intrinsic == nir_intrinsic_store_output ||
2610           intrin->intrinsic == nir_intrinsic_store_per_vertex_output;
2611 }
2612 
is_dual_slot(nir_intrinsic_instr * intrin)2613 static bool is_dual_slot(nir_intrinsic_instr *intrin)
2614 {
2615    if (intrin->intrinsic == nir_intrinsic_store_output ||
2616        intrin->intrinsic == nir_intrinsic_store_per_vertex_output) {
2617       return nir_src_bit_size(intrin->src[0]) == 64 &&
2618              nir_src_num_components(intrin->src[0]) >= 3;
2619    }
2620 
2621    return nir_dest_bit_size(intrin->dest) == 64 &&
2622           nir_dest_num_components(intrin->dest) >= 3;
2623 }
2624 
2625 /**
2626  * This pass adds constant offsets to instr->const_index[0] for input/output
2627  * intrinsics, and resets the offset source to 0.  Non-constant offsets remain
2628  * unchanged - since we don't know what part of a compound variable is
2629  * accessed, we allocate storage for the entire thing. For drivers that use
2630  * nir_lower_io_to_temporaries() before nir_lower_io(), this guarantees that
2631  * the offset source will be 0, so that they don't have to add it in manually.
2632  */
2633 
2634 static bool
add_const_offset_to_base_block(nir_block * block,nir_builder * b,nir_variable_mode modes)2635 add_const_offset_to_base_block(nir_block *block, nir_builder *b,
2636                                nir_variable_mode modes)
2637 {
2638    bool progress = false;
2639    nir_foreach_instr_safe(instr, block) {
2640       if (instr->type != nir_instr_type_intrinsic)
2641          continue;
2642 
2643       nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
2644 
2645       if (((modes & nir_var_shader_in) && is_input(intrin)) ||
2646           ((modes & nir_var_shader_out) && is_output(intrin))) {
2647          nir_src *offset = nir_get_io_offset_src(intrin);
2648 
2649          /* TODO: Better handling of per-view variables here */
2650          if (nir_src_is_const(*offset) &&
2651              !nir_intrinsic_io_semantics(intrin).per_view) {
2652             unsigned off = nir_src_as_uint(*offset);
2653 
2654             nir_intrinsic_set_base(intrin, nir_intrinsic_base(intrin) + off);
2655 
2656             nir_io_semantics sem = nir_intrinsic_io_semantics(intrin);
2657             sem.location += off;
2658             /* non-indirect indexing should reduce num_slots */
2659             sem.num_slots = is_dual_slot(intrin) ? 2 : 1;
2660             nir_intrinsic_set_io_semantics(intrin, sem);
2661 
2662             b->cursor = nir_before_instr(&intrin->instr);
2663             nir_instr_rewrite_src(&intrin->instr, offset,
2664                                   nir_src_for_ssa(nir_imm_int(b, 0)));
2665             progress = true;
2666          }
2667       }
2668    }
2669 
2670    return progress;
2671 }
2672 
2673 bool
nir_io_add_const_offset_to_base(nir_shader * nir,nir_variable_mode modes)2674 nir_io_add_const_offset_to_base(nir_shader *nir, nir_variable_mode modes)
2675 {
2676    bool progress = false;
2677 
2678    nir_foreach_function(f, nir) {
2679       if (f->impl) {
2680          nir_builder b;
2681          nir_builder_init(&b, f->impl);
2682          nir_foreach_block(block, f->impl) {
2683             progress |= add_const_offset_to_base_block(block, &b, modes);
2684          }
2685       }
2686    }
2687 
2688    return progress;
2689 }
2690 
2691