1 /*
2  * Copyright © 2015 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "nir.h"
25 #include "nir_builder.h"
26 #include "util/set.h"
27 #include "util/hash_table.h"
28 
29 /* This file contains various little helpers for doing simple linking in
30  * NIR.  Eventually, we'll probably want a full-blown varying packing
31  * implementation in here.  Right now, it just deletes unused things.
32  */
33 
34 /**
35  * Returns the bits in the inputs_read, outputs_written, or
36  * system_values_read bitfield corresponding to this variable.
37  */
38 static uint64_t
get_variable_io_mask(nir_variable * var,gl_shader_stage stage)39 get_variable_io_mask(nir_variable *var, gl_shader_stage stage)
40 {
41    if (var->data.location < 0)
42       return 0;
43 
44    unsigned location = var->data.patch ?
45       var->data.location - VARYING_SLOT_PATCH0 : var->data.location;
46 
47    assert(var->data.mode == nir_var_shader_in ||
48           var->data.mode == nir_var_shader_out ||
49           var->data.mode == nir_var_system_value);
50    assert(var->data.location >= 0);
51 
52    const struct glsl_type *type = var->type;
53    if (nir_is_per_vertex_io(var, stage) || var->data.per_view) {
54       assert(glsl_type_is_array(type));
55       type = glsl_get_array_element(type);
56    }
57 
58    unsigned slots = glsl_count_attribute_slots(type, false);
59    return ((1ull << slots) - 1) << location;
60 }
61 
62 static uint8_t
get_num_components(nir_variable * var)63 get_num_components(nir_variable *var)
64 {
65    if (glsl_type_is_struct_or_ifc(glsl_without_array(var->type)))
66       return 4;
67 
68    return glsl_get_vector_elements(glsl_without_array(var->type));
69 }
70 
71 static void
tcs_add_output_reads(nir_shader * shader,uint64_t * read,uint64_t * patches_read)72 tcs_add_output_reads(nir_shader *shader, uint64_t *read, uint64_t *patches_read)
73 {
74    nir_foreach_function(function, shader) {
75       if (!function->impl)
76          continue;
77 
78       nir_foreach_block(block, function->impl) {
79          nir_foreach_instr(instr, block) {
80             if (instr->type != nir_instr_type_intrinsic)
81                continue;
82 
83             nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
84             if (intrin->intrinsic != nir_intrinsic_load_deref)
85                continue;
86 
87             nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
88             if (!nir_deref_mode_is(deref, nir_var_shader_out))
89                continue;
90 
91             nir_variable *var = nir_deref_instr_get_variable(deref);
92             for (unsigned i = 0; i < get_num_components(var); i++) {
93                if (var->data.patch) {
94                   patches_read[var->data.location_frac + i] |=
95                      get_variable_io_mask(var, shader->info.stage);
96                } else {
97                   read[var->data.location_frac + i] |=
98                      get_variable_io_mask(var, shader->info.stage);
99                }
100             }
101          }
102       }
103    }
104 }
105 
106 /**
107  * Helper for removing unused shader I/O variables, by demoting them to global
108  * variables (which may then by dead code eliminated).
109  *
110  * Example usage is:
111  *
112  * progress = nir_remove_unused_io_vars(producer, nir_var_shader_out,
113  *                                      read, patches_read) ||
114  *                                      progress;
115  *
116  * The "used" should be an array of 4 uint64_ts (probably of VARYING_BIT_*)
117  * representing each .location_frac used.  Note that for vector variables,
118  * only the first channel (.location_frac) is examined for deciding if the
119  * variable is used!
120  */
121 bool
nir_remove_unused_io_vars(nir_shader * shader,nir_variable_mode mode,uint64_t * used_by_other_stage,uint64_t * used_by_other_stage_patches)122 nir_remove_unused_io_vars(nir_shader *shader,
123                           nir_variable_mode mode,
124                           uint64_t *used_by_other_stage,
125                           uint64_t *used_by_other_stage_patches)
126 {
127    bool progress = false;
128    uint64_t *used;
129 
130    assert(mode == nir_var_shader_in || mode == nir_var_shader_out);
131 
132    nir_foreach_variable_with_modes_safe(var, shader, mode) {
133       if (var->data.patch)
134          used = used_by_other_stage_patches;
135       else
136          used = used_by_other_stage;
137 
138       if (var->data.location < VARYING_SLOT_VAR0 && var->data.location >= 0)
139          continue;
140 
141       if (var->data.always_active_io)
142          continue;
143 
144       if (var->data.explicit_xfb_buffer)
145          continue;
146 
147       uint64_t other_stage = used[var->data.location_frac];
148 
149       if (!(other_stage & get_variable_io_mask(var, shader->info.stage))) {
150          /* This one is invalid, make it a global variable instead */
151          var->data.location = 0;
152          var->data.mode = nir_var_shader_temp;
153 
154          progress = true;
155       }
156    }
157 
158    if (progress)
159       nir_fixup_deref_modes(shader);
160 
161    return progress;
162 }
163 
164 bool
nir_remove_unused_varyings(nir_shader * producer,nir_shader * consumer)165 nir_remove_unused_varyings(nir_shader *producer, nir_shader *consumer)
166 {
167    assert(producer->info.stage != MESA_SHADER_FRAGMENT);
168    assert(consumer->info.stage != MESA_SHADER_VERTEX);
169 
170    uint64_t read[4] = { 0 }, written[4] = { 0 };
171    uint64_t patches_read[4] = { 0 }, patches_written[4] = { 0 };
172 
173    nir_foreach_shader_out_variable(var, producer) {
174       for (unsigned i = 0; i < get_num_components(var); i++) {
175          if (var->data.patch) {
176             patches_written[var->data.location_frac + i] |=
177                get_variable_io_mask(var, producer->info.stage);
178          } else {
179             written[var->data.location_frac + i] |=
180                get_variable_io_mask(var, producer->info.stage);
181          }
182       }
183    }
184 
185    nir_foreach_shader_in_variable(var, consumer) {
186       for (unsigned i = 0; i < get_num_components(var); i++) {
187          if (var->data.patch) {
188             patches_read[var->data.location_frac + i] |=
189                get_variable_io_mask(var, consumer->info.stage);
190          } else {
191             read[var->data.location_frac + i] |=
192                get_variable_io_mask(var, consumer->info.stage);
193          }
194       }
195    }
196 
197    /* Each TCS invocation can read data written by other TCS invocations,
198     * so even if the outputs are not used by the TES we must also make
199     * sure they are not read by the TCS before demoting them to globals.
200     */
201    if (producer->info.stage == MESA_SHADER_TESS_CTRL)
202       tcs_add_output_reads(producer, read, patches_read);
203 
204    bool progress = false;
205    progress = nir_remove_unused_io_vars(producer, nir_var_shader_out, read,
206                                         patches_read);
207 
208    progress = nir_remove_unused_io_vars(consumer, nir_var_shader_in, written,
209                                         patches_written) || progress;
210 
211    return progress;
212 }
213 
214 static uint8_t
get_interp_type(nir_variable * var,const struct glsl_type * type,bool default_to_smooth_interp)215 get_interp_type(nir_variable *var, const struct glsl_type *type,
216                 bool default_to_smooth_interp)
217 {
218    if (glsl_type_is_integer(type))
219       return INTERP_MODE_FLAT;
220    else if (var->data.interpolation != INTERP_MODE_NONE)
221       return var->data.interpolation;
222    else if (default_to_smooth_interp)
223       return INTERP_MODE_SMOOTH;
224    else
225       return INTERP_MODE_NONE;
226 }
227 
228 #define INTERPOLATE_LOC_SAMPLE 0
229 #define INTERPOLATE_LOC_CENTROID 1
230 #define INTERPOLATE_LOC_CENTER 2
231 
232 static uint8_t
get_interp_loc(nir_variable * var)233 get_interp_loc(nir_variable *var)
234 {
235    if (var->data.sample)
236       return INTERPOLATE_LOC_SAMPLE;
237    else if (var->data.centroid)
238       return INTERPOLATE_LOC_CENTROID;
239    else
240       return INTERPOLATE_LOC_CENTER;
241 }
242 
243 static bool
is_packing_supported_for_type(const struct glsl_type * type)244 is_packing_supported_for_type(const struct glsl_type *type)
245 {
246    /* We ignore complex types such as arrays, matrices, structs and bitsizes
247     * other then 32bit. All other vector types should have been split into
248     * scalar variables by the lower_io_to_scalar pass. The only exception
249     * should be OpenGL xfb varyings.
250     * TODO: add support for more complex types?
251     */
252    return glsl_type_is_scalar(type) && glsl_type_is_32bit(type);
253 }
254 
255 struct assigned_comps
256 {
257    uint8_t comps;
258    uint8_t interp_type;
259    uint8_t interp_loc;
260    bool is_32bit;
261 };
262 
263 /* Packing arrays and dual slot varyings is difficult so to avoid complex
264  * algorithms this function just assigns them their existing location for now.
265  * TODO: allow better packing of complex types.
266  */
267 static void
get_unmoveable_components_masks(nir_shader * shader,nir_variable_mode mode,struct assigned_comps * comps,gl_shader_stage stage,bool default_to_smooth_interp)268 get_unmoveable_components_masks(nir_shader *shader,
269                                 nir_variable_mode mode,
270                                 struct assigned_comps *comps,
271                                 gl_shader_stage stage,
272                                 bool default_to_smooth_interp)
273 {
274    nir_foreach_variable_with_modes_safe(var, shader, mode) {
275       assert(var->data.location >= 0);
276 
277       /* Only remap things that aren't built-ins. */
278       if (var->data.location >= VARYING_SLOT_VAR0 &&
279           var->data.location - VARYING_SLOT_VAR0 < MAX_VARYINGS_INCL_PATCH) {
280 
281          const struct glsl_type *type = var->type;
282          if (nir_is_per_vertex_io(var, stage) || var->data.per_view) {
283             assert(glsl_type_is_array(type));
284             type = glsl_get_array_element(type);
285          }
286 
287          /* If we can pack this varying then don't mark the components as
288           * used.
289           */
290          if (is_packing_supported_for_type(type))
291             continue;
292 
293          unsigned location = var->data.location - VARYING_SLOT_VAR0;
294 
295          unsigned elements =
296             glsl_type_is_vector_or_scalar(glsl_without_array(type)) ?
297             glsl_get_vector_elements(glsl_without_array(type)) : 4;
298 
299          bool dual_slot = glsl_type_is_dual_slot(glsl_without_array(type));
300          unsigned slots = glsl_count_attribute_slots(type, false);
301          unsigned dmul = glsl_type_is_64bit(glsl_without_array(type)) ? 2 : 1;
302          unsigned comps_slot2 = 0;
303          for (unsigned i = 0; i < slots; i++) {
304             if (dual_slot) {
305                if (i & 1) {
306                   comps[location + i].comps |= ((1 << comps_slot2) - 1);
307                } else {
308                   unsigned num_comps = 4 - var->data.location_frac;
309                   comps_slot2 = (elements * dmul) - num_comps;
310 
311                   /* Assume ARB_enhanced_layouts packing rules for doubles */
312                   assert(var->data.location_frac == 0 ||
313                          var->data.location_frac == 2);
314                   assert(comps_slot2 <= 4);
315 
316                   comps[location + i].comps |=
317                      ((1 << num_comps) - 1) << var->data.location_frac;
318                }
319             } else {
320                comps[location + i].comps |=
321                   ((1 << (elements * dmul)) - 1) << var->data.location_frac;
322             }
323 
324             comps[location + i].interp_type =
325                get_interp_type(var, type, default_to_smooth_interp);
326             comps[location + i].interp_loc = get_interp_loc(var);
327             comps[location + i].is_32bit =
328                glsl_type_is_32bit(glsl_without_array(type));
329          }
330       }
331    }
332 }
333 
334 struct varying_loc
335 {
336    uint8_t component;
337    uint32_t location;
338 };
339 
340 static void
mark_all_used_slots(nir_variable * var,uint64_t * slots_used,uint64_t slots_used_mask,unsigned num_slots)341 mark_all_used_slots(nir_variable *var, uint64_t *slots_used,
342                     uint64_t slots_used_mask, unsigned num_slots)
343 {
344    unsigned loc_offset = var->data.patch ? VARYING_SLOT_PATCH0 : 0;
345 
346    slots_used[var->data.patch ? 1 : 0] |= slots_used_mask &
347       BITFIELD64_RANGE(var->data.location - loc_offset, num_slots);
348 }
349 
350 static void
mark_used_slot(nir_variable * var,uint64_t * slots_used,unsigned offset)351 mark_used_slot(nir_variable *var, uint64_t *slots_used, unsigned offset)
352 {
353    unsigned loc_offset = var->data.patch ? VARYING_SLOT_PATCH0 : 0;
354 
355    slots_used[var->data.patch ? 1 : 0] |=
356       BITFIELD64_BIT(var->data.location - loc_offset + offset);
357 }
358 
359 static void
remap_slots_and_components(nir_shader * shader,nir_variable_mode mode,struct varying_loc (* remap)[4],uint64_t * slots_used,uint64_t * out_slots_read,uint32_t * p_slots_used,uint32_t * p_out_slots_read)360 remap_slots_and_components(nir_shader *shader, nir_variable_mode mode,
361                            struct varying_loc (*remap)[4],
362                            uint64_t *slots_used, uint64_t *out_slots_read,
363                            uint32_t *p_slots_used, uint32_t *p_out_slots_read)
364  {
365    const gl_shader_stage stage = shader->info.stage;
366    uint64_t out_slots_read_tmp[2] = {0};
367    uint64_t slots_used_tmp[2] = {0};
368 
369    /* We don't touch builtins so just copy the bitmask */
370    slots_used_tmp[0] = *slots_used & BITFIELD64_RANGE(0, VARYING_SLOT_VAR0);
371 
372    nir_foreach_variable_with_modes(var, shader, mode) {
373       assert(var->data.location >= 0);
374 
375       /* Only remap things that aren't built-ins */
376       if (var->data.location >= VARYING_SLOT_VAR0 &&
377           var->data.location - VARYING_SLOT_VAR0 < MAX_VARYINGS_INCL_PATCH) {
378 
379          const struct glsl_type *type = var->type;
380          if (nir_is_per_vertex_io(var, stage) || var->data.per_view) {
381             assert(glsl_type_is_array(type));
382             type = glsl_get_array_element(type);
383          }
384 
385          unsigned num_slots = glsl_count_attribute_slots(type, false);
386          bool used_across_stages = false;
387          bool outputs_read = false;
388 
389          unsigned location = var->data.location - VARYING_SLOT_VAR0;
390          struct varying_loc *new_loc = &remap[location][var->data.location_frac];
391 
392          unsigned loc_offset = var->data.patch ? VARYING_SLOT_PATCH0 : 0;
393          uint64_t used = var->data.patch ? *p_slots_used : *slots_used;
394          uint64_t outs_used =
395             var->data.patch ? *p_out_slots_read : *out_slots_read;
396          uint64_t slots =
397             BITFIELD64_RANGE(var->data.location - loc_offset, num_slots);
398 
399          if (slots & used)
400             used_across_stages = true;
401 
402          if (slots & outs_used)
403             outputs_read = true;
404 
405          if (new_loc->location) {
406             var->data.location = new_loc->location;
407             var->data.location_frac = new_loc->component;
408          }
409 
410          if (var->data.always_active_io) {
411             /* We can't apply link time optimisations (specifically array
412              * splitting) to these so we need to copy the existing mask
413              * otherwise we will mess up the mask for things like partially
414              * marked arrays.
415              */
416             if (used_across_stages)
417                mark_all_used_slots(var, slots_used_tmp, used, num_slots);
418 
419             if (outputs_read) {
420                mark_all_used_slots(var, out_slots_read_tmp, outs_used,
421                                    num_slots);
422             }
423          } else {
424             for (unsigned i = 0; i < num_slots; i++) {
425                if (used_across_stages)
426                   mark_used_slot(var, slots_used_tmp, i);
427 
428                if (outputs_read)
429                   mark_used_slot(var, out_slots_read_tmp, i);
430             }
431          }
432       }
433    }
434 
435    *slots_used = slots_used_tmp[0];
436    *out_slots_read = out_slots_read_tmp[0];
437    *p_slots_used = slots_used_tmp[1];
438    *p_out_slots_read = out_slots_read_tmp[1];
439 }
440 
441 struct varying_component {
442    nir_variable *var;
443    uint8_t interp_type;
444    uint8_t interp_loc;
445    bool is_32bit;
446    bool is_patch;
447    bool is_intra_stage_only;
448    bool initialised;
449 };
450 
451 static int
cmp_varying_component(const void * comp1_v,const void * comp2_v)452 cmp_varying_component(const void *comp1_v, const void *comp2_v)
453 {
454    struct varying_component *comp1 = (struct varying_component *) comp1_v;
455    struct varying_component *comp2 = (struct varying_component *) comp2_v;
456 
457    /* We want patches to be order at the end of the array */
458    if (comp1->is_patch != comp2->is_patch)
459       return comp1->is_patch ? 1 : -1;
460 
461    /* We want to try to group together TCS outputs that are only read by other
462     * TCS invocations and not consumed by the follow stage.
463     */
464    if (comp1->is_intra_stage_only != comp2->is_intra_stage_only)
465       return comp1->is_intra_stage_only ? 1 : -1;
466 
467    /* We can only pack varyings with matching interpolation types so group
468     * them together.
469     */
470    if (comp1->interp_type != comp2->interp_type)
471       return comp1->interp_type - comp2->interp_type;
472 
473    /* Interpolation loc must match also. */
474    if (comp1->interp_loc != comp2->interp_loc)
475       return comp1->interp_loc - comp2->interp_loc;
476 
477    /* If everything else matches just use the original location to sort */
478    const struct nir_variable_data *const data1 = &comp1->var->data;
479    const struct nir_variable_data *const data2 = &comp2->var->data;
480    if (data1->location != data2->location)
481       return data1->location - data2->location;
482    return (int)data1->location_frac - (int)data2->location_frac;
483 }
484 
485 static void
gather_varying_component_info(nir_shader * producer,nir_shader * consumer,struct varying_component ** varying_comp_info,unsigned * varying_comp_info_size,bool default_to_smooth_interp)486 gather_varying_component_info(nir_shader *producer, nir_shader *consumer,
487                               struct varying_component **varying_comp_info,
488                               unsigned *varying_comp_info_size,
489                               bool default_to_smooth_interp)
490 {
491    unsigned store_varying_info_idx[MAX_VARYINGS_INCL_PATCH][4] = {{0}};
492    unsigned num_of_comps_to_pack = 0;
493 
494    /* Count the number of varying that can be packed and create a mapping
495     * of those varyings to the array we will pass to qsort.
496     */
497    nir_foreach_shader_out_variable(var, producer) {
498 
499       /* Only remap things that aren't builtins. */
500       if (var->data.location >= VARYING_SLOT_VAR0 &&
501           var->data.location - VARYING_SLOT_VAR0 < MAX_VARYINGS_INCL_PATCH) {
502 
503          /* We can't repack xfb varyings. */
504          if (var->data.always_active_io)
505             continue;
506 
507          const struct glsl_type *type = var->type;
508          if (nir_is_per_vertex_io(var, producer->info.stage) || var->data.per_view) {
509             assert(glsl_type_is_array(type));
510             type = glsl_get_array_element(type);
511          }
512 
513          if (!is_packing_supported_for_type(type))
514             continue;
515 
516          unsigned loc = var->data.location - VARYING_SLOT_VAR0;
517          store_varying_info_idx[loc][var->data.location_frac] =
518             ++num_of_comps_to_pack;
519       }
520    }
521 
522    *varying_comp_info_size = num_of_comps_to_pack;
523    *varying_comp_info = rzalloc_array(NULL, struct varying_component,
524                                       num_of_comps_to_pack);
525 
526    nir_function_impl *impl = nir_shader_get_entrypoint(consumer);
527 
528    /* Walk over the shader and populate the varying component info array */
529    nir_foreach_block(block, impl) {
530       nir_foreach_instr(instr, block) {
531          if (instr->type != nir_instr_type_intrinsic)
532             continue;
533 
534          nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
535          if (intr->intrinsic != nir_intrinsic_load_deref &&
536              intr->intrinsic != nir_intrinsic_interp_deref_at_centroid &&
537              intr->intrinsic != nir_intrinsic_interp_deref_at_sample &&
538              intr->intrinsic != nir_intrinsic_interp_deref_at_offset &&
539              intr->intrinsic != nir_intrinsic_interp_deref_at_vertex)
540             continue;
541 
542          nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
543          if (!nir_deref_mode_is(deref, nir_var_shader_in))
544             continue;
545 
546          /* We only remap things that aren't builtins. */
547          nir_variable *in_var = nir_deref_instr_get_variable(deref);
548          if (in_var->data.location < VARYING_SLOT_VAR0)
549             continue;
550 
551          unsigned location = in_var->data.location - VARYING_SLOT_VAR0;
552          if (location >= MAX_VARYINGS_INCL_PATCH)
553             continue;
554 
555          unsigned var_info_idx =
556             store_varying_info_idx[location][in_var->data.location_frac];
557          if (!var_info_idx)
558             continue;
559 
560          struct varying_component *vc_info =
561             &(*varying_comp_info)[var_info_idx-1];
562 
563          if (!vc_info->initialised) {
564             const struct glsl_type *type = in_var->type;
565             if (nir_is_per_vertex_io(in_var, consumer->info.stage) ||
566                 in_var->data.per_view) {
567                assert(glsl_type_is_array(type));
568                type = glsl_get_array_element(type);
569             }
570 
571             vc_info->var = in_var;
572             vc_info->interp_type =
573                get_interp_type(in_var, type, default_to_smooth_interp);
574             vc_info->interp_loc = get_interp_loc(in_var);
575             vc_info->is_32bit = glsl_type_is_32bit(type);
576             vc_info->is_patch = in_var->data.patch;
577             vc_info->is_intra_stage_only = false;
578             vc_info->initialised = true;
579          }
580       }
581    }
582 
583    /* Walk over the shader and populate the varying component info array
584     * for varyings which are read by other TCS instances but are not consumed
585     * by the TES.
586     */
587    if (producer->info.stage == MESA_SHADER_TESS_CTRL) {
588       impl = nir_shader_get_entrypoint(producer);
589 
590       nir_foreach_block(block, impl) {
591          nir_foreach_instr(instr, block) {
592             if (instr->type != nir_instr_type_intrinsic)
593                continue;
594 
595             nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
596             if (intr->intrinsic != nir_intrinsic_load_deref)
597                continue;
598 
599             nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
600             if (!nir_deref_mode_is(deref, nir_var_shader_out))
601                continue;
602 
603             /* We only remap things that aren't builtins. */
604             nir_variable *out_var = nir_deref_instr_get_variable(deref);
605             if (out_var->data.location < VARYING_SLOT_VAR0)
606                continue;
607 
608             unsigned location = out_var->data.location - VARYING_SLOT_VAR0;
609             if (location >= MAX_VARYINGS_INCL_PATCH)
610                continue;
611 
612             unsigned var_info_idx =
613                store_varying_info_idx[location][out_var->data.location_frac];
614             if (!var_info_idx) {
615                /* Something went wrong, the shader interfaces didn't match, so
616                 * abandon packing. This can happen for example when the
617                 * inputs are scalars but the outputs are struct members.
618                 */
619                *varying_comp_info_size = 0;
620                break;
621             }
622 
623             struct varying_component *vc_info =
624                &(*varying_comp_info)[var_info_idx-1];
625 
626             if (!vc_info->initialised) {
627                const struct glsl_type *type = out_var->type;
628                if (nir_is_per_vertex_io(out_var, producer->info.stage)) {
629                   assert(glsl_type_is_array(type));
630                   type = glsl_get_array_element(type);
631                }
632 
633                vc_info->var = out_var;
634                vc_info->interp_type =
635                   get_interp_type(out_var, type, default_to_smooth_interp);
636                vc_info->interp_loc = get_interp_loc(out_var);
637                vc_info->is_32bit = glsl_type_is_32bit(type);
638                vc_info->is_patch = out_var->data.patch;
639                vc_info->is_intra_stage_only = true;
640                vc_info->initialised = true;
641             }
642          }
643       }
644    }
645 
646    for (unsigned i = 0; i < *varying_comp_info_size; i++ ) {
647       struct varying_component *vc_info = &(*varying_comp_info)[i];
648       if (!vc_info->initialised) {
649          /* Something went wrong, the shader interfaces didn't match, so
650           * abandon packing. This can happen for example when the outputs are
651           * scalars but the inputs are struct members.
652           */
653          *varying_comp_info_size = 0;
654          break;
655       }
656    }
657 }
658 
659 static void
assign_remap_locations(struct varying_loc (* remap)[4],struct assigned_comps * assigned_comps,struct varying_component * info,unsigned * cursor,unsigned * comp,unsigned max_location)660 assign_remap_locations(struct varying_loc (*remap)[4],
661                        struct assigned_comps *assigned_comps,
662                        struct varying_component *info,
663                        unsigned *cursor, unsigned *comp,
664                        unsigned max_location)
665 {
666    unsigned tmp_cursor = *cursor;
667    unsigned tmp_comp = *comp;
668 
669    for (; tmp_cursor < max_location; tmp_cursor++) {
670 
671       if (assigned_comps[tmp_cursor].comps) {
672          /* We can only pack varyings with matching interpolation types,
673           * interpolation loc must match also.
674           * TODO: i965 can handle interpolation locations that don't match,
675           * but the radeonsi nir backend handles everything as vec4s and so
676           * expects this to be the same for all components. We could make this
677           * check driver specfific or drop it if NIR ever become the only
678           * radeonsi backend.
679           */
680          if (assigned_comps[tmp_cursor].interp_type != info->interp_type ||
681              assigned_comps[tmp_cursor].interp_loc != info->interp_loc) {
682             tmp_comp = 0;
683             continue;
684          }
685 
686          /* We can only pack varyings with matching types, and the current
687           * algorithm only supports packing 32-bit.
688           */
689          if (!assigned_comps[tmp_cursor].is_32bit) {
690             tmp_comp = 0;
691             continue;
692          }
693 
694          while (tmp_comp < 4 &&
695                 (assigned_comps[tmp_cursor].comps & (1 << tmp_comp))) {
696             tmp_comp++;
697          }
698       }
699 
700       if (tmp_comp == 4) {
701          tmp_comp = 0;
702          continue;
703       }
704 
705       unsigned location = info->var->data.location - VARYING_SLOT_VAR0;
706 
707       /* Once we have assigned a location mark it as used */
708       assigned_comps[tmp_cursor].comps |= (1 << tmp_comp);
709       assigned_comps[tmp_cursor].interp_type = info->interp_type;
710       assigned_comps[tmp_cursor].interp_loc = info->interp_loc;
711       assigned_comps[tmp_cursor].is_32bit = info->is_32bit;
712 
713       /* Assign remap location */
714       remap[location][info->var->data.location_frac].component = tmp_comp++;
715       remap[location][info->var->data.location_frac].location =
716          tmp_cursor + VARYING_SLOT_VAR0;
717 
718       break;
719    }
720 
721    *cursor = tmp_cursor;
722    *comp = tmp_comp;
723 }
724 
725 /* If there are empty components in the slot compact the remaining components
726  * as close to component 0 as possible. This will make it easier to fill the
727  * empty components with components from a different slot in a following pass.
728  */
729 static void
compact_components(nir_shader * producer,nir_shader * consumer,struct assigned_comps * assigned_comps,bool default_to_smooth_interp)730 compact_components(nir_shader *producer, nir_shader *consumer,
731                    struct assigned_comps *assigned_comps,
732                    bool default_to_smooth_interp)
733 {
734    struct varying_loc remap[MAX_VARYINGS_INCL_PATCH][4] = {{{0}, {0}}};
735    struct varying_component *varying_comp_info;
736    unsigned varying_comp_info_size;
737 
738    /* Gather varying component info */
739    gather_varying_component_info(producer, consumer, &varying_comp_info,
740                                  &varying_comp_info_size,
741                                  default_to_smooth_interp);
742 
743    /* Sort varying components. */
744    qsort(varying_comp_info, varying_comp_info_size,
745          sizeof(struct varying_component), cmp_varying_component);
746 
747    unsigned cursor = 0;
748    unsigned comp = 0;
749 
750    /* Set the remap array based on the sorted components */
751    for (unsigned i = 0; i < varying_comp_info_size; i++ ) {
752       struct varying_component *info = &varying_comp_info[i];
753 
754       assert(info->is_patch || cursor < MAX_VARYING);
755       if (info->is_patch) {
756          /* The list should be sorted with all non-patch inputs first followed
757           * by patch inputs.  When we hit our first patch input, we need to
758           * reset the cursor to MAX_VARYING so we put them in the right slot.
759           */
760          if (cursor < MAX_VARYING) {
761             cursor = MAX_VARYING;
762             comp = 0;
763          }
764 
765          assign_remap_locations(remap, assigned_comps, info,
766                                 &cursor, &comp, MAX_VARYINGS_INCL_PATCH);
767       } else {
768          assign_remap_locations(remap, assigned_comps, info,
769                                 &cursor, &comp, MAX_VARYING);
770 
771          /* Check if we failed to assign a remap location. This can happen if
772           * for example there are a bunch of unmovable components with
773           * mismatching interpolation types causing us to skip over locations
774           * that would have been useful for packing later components.
775           * The solution is to iterate over the locations again (this should
776           * happen very rarely in practice).
777           */
778          if (cursor == MAX_VARYING) {
779             cursor = 0;
780             comp = 0;
781             assign_remap_locations(remap, assigned_comps, info,
782                                    &cursor, &comp, MAX_VARYING);
783          }
784       }
785    }
786 
787    ralloc_free(varying_comp_info);
788 
789    uint64_t zero = 0;
790    uint32_t zero32 = 0;
791    remap_slots_and_components(consumer, nir_var_shader_in, remap,
792                               &consumer->info.inputs_read, &zero,
793                               &consumer->info.patch_inputs_read, &zero32);
794    remap_slots_and_components(producer, nir_var_shader_out, remap,
795                               &producer->info.outputs_written,
796                               &producer->info.outputs_read,
797                               &producer->info.patch_outputs_written,
798                               &producer->info.patch_outputs_read);
799 }
800 
801 /* We assume that this has been called more-or-less directly after
802  * remove_unused_varyings.  At this point, all of the varyings that we
803  * aren't going to be using have been completely removed and the
804  * inputs_read and outputs_written fields in nir_shader_info reflect
805  * this.  Therefore, the total set of valid slots is the OR of the two
806  * sets of varyings;  this accounts for varyings which one side may need
807  * to read/write even if the other doesn't.  This can happen if, for
808  * instance, an array is used indirectly from one side causing it to be
809  * unsplittable but directly from the other.
810  */
811 void
nir_compact_varyings(nir_shader * producer,nir_shader * consumer,bool default_to_smooth_interp)812 nir_compact_varyings(nir_shader *producer, nir_shader *consumer,
813                      bool default_to_smooth_interp)
814 {
815    assert(producer->info.stage != MESA_SHADER_FRAGMENT);
816    assert(consumer->info.stage != MESA_SHADER_VERTEX);
817 
818    struct assigned_comps assigned_comps[MAX_VARYINGS_INCL_PATCH] = {{0}};
819 
820    get_unmoveable_components_masks(producer, nir_var_shader_out,
821                                    assigned_comps,
822                                    producer->info.stage,
823                                    default_to_smooth_interp);
824    get_unmoveable_components_masks(consumer, nir_var_shader_in,
825                                    assigned_comps,
826                                    consumer->info.stage,
827                                    default_to_smooth_interp);
828 
829    compact_components(producer, consumer, assigned_comps,
830                       default_to_smooth_interp);
831 }
832 
833 /*
834  * Mark XFB varyings as always_active_io in the consumer so the linking opts
835  * don't touch them.
836  */
837 void
nir_link_xfb_varyings(nir_shader * producer,nir_shader * consumer)838 nir_link_xfb_varyings(nir_shader *producer, nir_shader *consumer)
839 {
840    nir_variable *input_vars[MAX_VARYING] = { 0 };
841 
842    nir_foreach_shader_in_variable(var, consumer) {
843       if (var->data.location >= VARYING_SLOT_VAR0 &&
844           var->data.location - VARYING_SLOT_VAR0 < MAX_VARYING) {
845 
846          unsigned location = var->data.location - VARYING_SLOT_VAR0;
847          input_vars[location] = var;
848       }
849    }
850 
851    nir_foreach_shader_out_variable(var, producer) {
852       if (var->data.location >= VARYING_SLOT_VAR0 &&
853           var->data.location - VARYING_SLOT_VAR0 < MAX_VARYING) {
854 
855          if (!var->data.always_active_io)
856             continue;
857 
858          unsigned location = var->data.location - VARYING_SLOT_VAR0;
859          if (input_vars[location]) {
860             input_vars[location]->data.always_active_io = true;
861          }
862       }
863    }
864 }
865 
866 static bool
does_varying_match(nir_variable * out_var,nir_variable * in_var)867 does_varying_match(nir_variable *out_var, nir_variable *in_var)
868 {
869    return in_var->data.location == out_var->data.location &&
870           in_var->data.location_frac == out_var->data.location_frac;
871 }
872 
873 static nir_variable *
get_matching_input_var(nir_shader * consumer,nir_variable * out_var)874 get_matching_input_var(nir_shader *consumer, nir_variable *out_var)
875 {
876    nir_foreach_shader_in_variable(var, consumer) {
877       if (does_varying_match(out_var, var))
878          return var;
879    }
880 
881    return NULL;
882 }
883 
884 static bool
can_replace_varying(nir_variable * out_var)885 can_replace_varying(nir_variable *out_var)
886 {
887    /* Skip types that require more complex handling.
888     * TODO: add support for these types.
889     */
890    if (glsl_type_is_array(out_var->type) ||
891        glsl_type_is_dual_slot(out_var->type) ||
892        glsl_type_is_matrix(out_var->type) ||
893        glsl_type_is_struct_or_ifc(out_var->type))
894       return false;
895 
896    /* Limit this pass to scalars for now to keep things simple. Most varyings
897     * should have been lowered to scalars at this point anyway.
898     */
899    if (!glsl_type_is_scalar(out_var->type))
900       return false;
901 
902    if (out_var->data.location < VARYING_SLOT_VAR0 ||
903        out_var->data.location - VARYING_SLOT_VAR0 >= MAX_VARYING)
904       return false;
905 
906    return true;
907 }
908 
909 static bool
replace_constant_input(nir_shader * shader,nir_intrinsic_instr * store_intr)910 replace_constant_input(nir_shader *shader, nir_intrinsic_instr *store_intr)
911 {
912    nir_function_impl *impl = nir_shader_get_entrypoint(shader);
913 
914    nir_builder b;
915    nir_builder_init(&b, impl);
916 
917    nir_variable *out_var =
918       nir_deref_instr_get_variable(nir_src_as_deref(store_intr->src[0]));
919 
920    bool progress = false;
921    nir_foreach_block(block, impl) {
922       nir_foreach_instr(instr, block) {
923          if (instr->type != nir_instr_type_intrinsic)
924             continue;
925 
926          nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
927          if (intr->intrinsic != nir_intrinsic_load_deref)
928             continue;
929 
930          nir_deref_instr *in_deref = nir_src_as_deref(intr->src[0]);
931          if (!nir_deref_mode_is(in_deref, nir_var_shader_in))
932             continue;
933 
934          nir_variable *in_var = nir_deref_instr_get_variable(in_deref);
935 
936          if (!does_varying_match(out_var, in_var))
937             continue;
938 
939          b.cursor = nir_before_instr(instr);
940 
941          nir_load_const_instr *out_const =
942             nir_instr_as_load_const(store_intr->src[1].ssa->parent_instr);
943 
944          /* Add new const to replace the input */
945          nir_ssa_def *nconst = nir_build_imm(&b, store_intr->num_components,
946                                              intr->dest.ssa.bit_size,
947                                              out_const->value);
948 
949          nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_src_for_ssa(nconst));
950 
951          progress = true;
952       }
953    }
954 
955    return progress;
956 }
957 
958 static bool
replace_duplicate_input(nir_shader * shader,nir_variable * input_var,nir_intrinsic_instr * dup_store_intr)959 replace_duplicate_input(nir_shader *shader, nir_variable *input_var,
960                          nir_intrinsic_instr *dup_store_intr)
961 {
962    assert(input_var);
963 
964    nir_function_impl *impl = nir_shader_get_entrypoint(shader);
965 
966    nir_builder b;
967    nir_builder_init(&b, impl);
968 
969    nir_variable *dup_out_var =
970       nir_deref_instr_get_variable(nir_src_as_deref(dup_store_intr->src[0]));
971 
972    bool progress = false;
973    nir_foreach_block(block, impl) {
974       nir_foreach_instr(instr, block) {
975          if (instr->type != nir_instr_type_intrinsic)
976             continue;
977 
978          nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
979          if (intr->intrinsic != nir_intrinsic_load_deref)
980             continue;
981 
982          nir_deref_instr *in_deref = nir_src_as_deref(intr->src[0]);
983          if (!nir_deref_mode_is(in_deref, nir_var_shader_in))
984             continue;
985 
986          nir_variable *in_var = nir_deref_instr_get_variable(in_deref);
987 
988          if (!does_varying_match(dup_out_var, in_var) ||
989              in_var->data.interpolation != input_var->data.interpolation ||
990              get_interp_loc(in_var) != get_interp_loc(input_var))
991             continue;
992 
993          b.cursor = nir_before_instr(instr);
994 
995          nir_ssa_def *load = nir_load_var(&b, input_var);
996          nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_src_for_ssa(load));
997 
998          progress = true;
999       }
1000    }
1001 
1002    return progress;
1003 }
1004 
1005 bool
nir_link_opt_varyings(nir_shader * producer,nir_shader * consumer)1006 nir_link_opt_varyings(nir_shader *producer, nir_shader *consumer)
1007 {
1008    /* TODO: Add support for more shader stage combinations */
1009    if (consumer->info.stage != MESA_SHADER_FRAGMENT ||
1010        (producer->info.stage != MESA_SHADER_VERTEX &&
1011         producer->info.stage != MESA_SHADER_TESS_EVAL))
1012       return false;
1013 
1014    bool progress = false;
1015 
1016    nir_function_impl *impl = nir_shader_get_entrypoint(producer);
1017 
1018    struct hash_table *varying_values = _mesa_pointer_hash_table_create(NULL);
1019 
1020    /* If we find a store in the last block of the producer we can be sure this
1021     * is the only possible value for this output.
1022     */
1023    nir_block *last_block = nir_impl_last_block(impl);
1024    nir_foreach_instr_reverse(instr, last_block) {
1025       if (instr->type != nir_instr_type_intrinsic)
1026          continue;
1027 
1028       nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
1029 
1030       if (intr->intrinsic != nir_intrinsic_store_deref)
1031          continue;
1032 
1033       nir_deref_instr *out_deref = nir_src_as_deref(intr->src[0]);
1034       if (!nir_deref_mode_is(out_deref, nir_var_shader_out))
1035          continue;
1036 
1037       nir_variable *out_var = nir_deref_instr_get_variable(out_deref);
1038       if (!can_replace_varying(out_var))
1039          continue;
1040 
1041       if (intr->src[1].ssa->parent_instr->type == nir_instr_type_load_const) {
1042          progress |= replace_constant_input(consumer, intr);
1043       } else {
1044          struct hash_entry *entry =
1045                _mesa_hash_table_search(varying_values, intr->src[1].ssa);
1046          if (entry) {
1047             progress |= replace_duplicate_input(consumer,
1048                                                 (nir_variable *) entry->data,
1049                                                 intr);
1050          } else {
1051             nir_variable *in_var = get_matching_input_var(consumer, out_var);
1052             if (in_var) {
1053                _mesa_hash_table_insert(varying_values, intr->src[1].ssa,
1054                                        in_var);
1055             }
1056          }
1057       }
1058    }
1059 
1060    _mesa_hash_table_destroy(varying_values, NULL);
1061 
1062    return progress;
1063 }
1064 
1065 /* TODO any better helper somewhere to sort a list? */
1066 
1067 static void
insert_sorted(struct exec_list * var_list,nir_variable * new_var)1068 insert_sorted(struct exec_list *var_list, nir_variable *new_var)
1069 {
1070    nir_foreach_variable_in_list(var, var_list) {
1071       if (var->data.location > new_var->data.location) {
1072          exec_node_insert_node_before(&var->node, &new_var->node);
1073          return;
1074       }
1075    }
1076    exec_list_push_tail(var_list, &new_var->node);
1077 }
1078 
1079 static void
sort_varyings(nir_shader * shader,nir_variable_mode mode,struct exec_list * sorted_list)1080 sort_varyings(nir_shader *shader, nir_variable_mode mode,
1081               struct exec_list *sorted_list)
1082 {
1083    exec_list_make_empty(sorted_list);
1084    nir_foreach_variable_with_modes_safe(var, shader, mode) {
1085       exec_node_remove(&var->node);
1086       insert_sorted(sorted_list, var);
1087    }
1088 }
1089 
1090 void
nir_assign_io_var_locations(nir_shader * shader,nir_variable_mode mode,unsigned * size,gl_shader_stage stage)1091 nir_assign_io_var_locations(nir_shader *shader, nir_variable_mode mode,
1092                             unsigned *size, gl_shader_stage stage)
1093 {
1094    unsigned location = 0;
1095    unsigned assigned_locations[VARYING_SLOT_TESS_MAX];
1096    uint64_t processed_locs[2] = {0};
1097 
1098    struct exec_list io_vars;
1099    sort_varyings(shader, mode, &io_vars);
1100 
1101    int UNUSED last_loc = 0;
1102    bool last_partial = false;
1103    nir_foreach_variable_in_list(var, &io_vars) {
1104       const struct glsl_type *type = var->type;
1105       if (nir_is_per_vertex_io(var, stage)) {
1106          assert(glsl_type_is_array(type));
1107          type = glsl_get_array_element(type);
1108       }
1109 
1110       int base;
1111       if (var->data.mode == nir_var_shader_in && stage == MESA_SHADER_VERTEX)
1112          base = VERT_ATTRIB_GENERIC0;
1113       else if (var->data.mode == nir_var_shader_out &&
1114                stage == MESA_SHADER_FRAGMENT)
1115          base = FRAG_RESULT_DATA0;
1116       else
1117          base = VARYING_SLOT_VAR0;
1118 
1119       unsigned var_size, driver_size;
1120       if (var->data.compact) {
1121          /* If we are inside a partial compact,
1122           * don't allow another compact to be in this slot
1123           * if it starts at component 0.
1124           */
1125          if (last_partial && var->data.location_frac == 0) {
1126             location++;
1127          }
1128 
1129          /* compact variables must be arrays of scalars */
1130          assert(!var->data.per_view);
1131          assert(glsl_type_is_array(type));
1132          assert(glsl_type_is_scalar(glsl_get_array_element(type)));
1133          unsigned start = 4 * location + var->data.location_frac;
1134          unsigned end = start + glsl_get_length(type);
1135          var_size = driver_size = end / 4 - location;
1136          last_partial = end % 4 != 0;
1137       } else {
1138          /* Compact variables bypass the normal varying compacting pass,
1139           * which means they cannot be in the same vec4 slot as a normal
1140           * variable. If part of the current slot is taken up by a compact
1141           * variable, we need to go to the next one.
1142           */
1143          if (last_partial) {
1144             location++;
1145             last_partial = false;
1146          }
1147 
1148          /* per-view variables have an extra array dimension, which is ignored
1149           * when counting user-facing slots (var->data.location), but *not*
1150           * with driver slots (var->data.driver_location). That is, each user
1151           * slot maps to multiple driver slots.
1152           */
1153          driver_size = glsl_count_attribute_slots(type, false);
1154          if (var->data.per_view) {
1155             assert(glsl_type_is_array(type));
1156             var_size =
1157                glsl_count_attribute_slots(glsl_get_array_element(type), false);
1158          } else {
1159             var_size = driver_size;
1160          }
1161       }
1162 
1163       /* Builtins don't allow component packing so we only need to worry about
1164        * user defined varyings sharing the same location.
1165        */
1166       bool processed = false;
1167       if (var->data.location >= base) {
1168          unsigned glsl_location = var->data.location - base;
1169 
1170          for (unsigned i = 0; i < var_size; i++) {
1171             if (processed_locs[var->data.index] &
1172                 ((uint64_t)1 << (glsl_location + i)))
1173                processed = true;
1174             else
1175                processed_locs[var->data.index] |=
1176                   ((uint64_t)1 << (glsl_location + i));
1177          }
1178       }
1179 
1180       /* Because component packing allows varyings to share the same location
1181        * we may have already have processed this location.
1182        */
1183       if (processed) {
1184          /* TODO handle overlapping per-view variables */
1185          assert(!var->data.per_view);
1186          unsigned driver_location = assigned_locations[var->data.location];
1187          var->data.driver_location = driver_location;
1188 
1189          /* An array may be packed such that is crosses multiple other arrays
1190           * or variables, we need to make sure we have allocated the elements
1191           * consecutively if the previously proccessed var was shorter than
1192           * the current array we are processing.
1193           *
1194           * NOTE: The code below assumes the var list is ordered in ascending
1195           * location order.
1196           */
1197          assert(last_loc <= var->data.location);
1198          last_loc = var->data.location;
1199          unsigned last_slot_location = driver_location + var_size;
1200          if (last_slot_location > location) {
1201             unsigned num_unallocated_slots = last_slot_location - location;
1202             unsigned first_unallocated_slot = var_size - num_unallocated_slots;
1203             for (unsigned i = first_unallocated_slot; i < var_size; i++) {
1204                assigned_locations[var->data.location + i] = location;
1205                location++;
1206             }
1207          }
1208          continue;
1209       }
1210 
1211       for (unsigned i = 0; i < var_size; i++) {
1212          assigned_locations[var->data.location + i] = location + i;
1213       }
1214 
1215       var->data.driver_location = location;
1216       location += driver_size;
1217    }
1218 
1219    if (last_partial)
1220       location++;
1221 
1222    exec_list_append(&shader->variables, &io_vars);
1223    *size = location;
1224 }
1225 
1226 static uint64_t
get_linked_variable_location(unsigned location,bool patch)1227 get_linked_variable_location(unsigned location, bool patch)
1228 {
1229    if (!patch)
1230       return location;
1231 
1232    /* Reserve locations 0...3 for special patch variables
1233     * like tess factors and bounding boxes, and the generic patch
1234     * variables will come after them.
1235     */
1236    if (location >= VARYING_SLOT_PATCH0)
1237       return location - VARYING_SLOT_PATCH0 + 4;
1238    else if (location >= VARYING_SLOT_TESS_LEVEL_OUTER &&
1239             location <= VARYING_SLOT_BOUNDING_BOX1)
1240       return location - VARYING_SLOT_TESS_LEVEL_OUTER;
1241    else
1242       unreachable("Unsupported variable in get_linked_variable_location.");
1243 }
1244 
1245 static uint64_t
get_linked_variable_io_mask(nir_variable * variable,gl_shader_stage stage)1246 get_linked_variable_io_mask(nir_variable *variable, gl_shader_stage stage)
1247 {
1248    const struct glsl_type *type = variable->type;
1249 
1250    if (nir_is_per_vertex_io(variable, stage)) {
1251       assert(glsl_type_is_array(type));
1252       type = glsl_get_array_element(type);
1253    }
1254 
1255    unsigned slots = glsl_count_attribute_slots(type, false);
1256    if (variable->data.compact) {
1257       unsigned component_count = variable->data.location_frac + glsl_get_length(type);
1258       slots = DIV_ROUND_UP(component_count, 4);
1259    }
1260 
1261    uint64_t mask = u_bit_consecutive64(0, slots);
1262    return mask;
1263 }
1264 
1265 nir_linked_io_var_info
nir_assign_linked_io_var_locations(nir_shader * producer,nir_shader * consumer)1266 nir_assign_linked_io_var_locations(nir_shader *producer, nir_shader *consumer)
1267 {
1268    assert(producer);
1269    assert(consumer);
1270 
1271    uint64_t producer_output_mask = 0;
1272    uint64_t producer_patch_output_mask = 0;
1273 
1274    nir_foreach_shader_out_variable(variable, producer) {
1275       uint64_t mask = get_linked_variable_io_mask(variable, producer->info.stage);
1276       uint64_t loc = get_linked_variable_location(variable->data.location, variable->data.patch);
1277 
1278       if (variable->data.patch)
1279          producer_patch_output_mask |= mask << loc;
1280       else
1281          producer_output_mask |= mask << loc;
1282    }
1283 
1284    uint64_t consumer_input_mask = 0;
1285    uint64_t consumer_patch_input_mask = 0;
1286 
1287    nir_foreach_shader_in_variable(variable, consumer) {
1288       uint64_t mask = get_linked_variable_io_mask(variable, consumer->info.stage);
1289       uint64_t loc = get_linked_variable_location(variable->data.location, variable->data.patch);
1290 
1291       if (variable->data.patch)
1292          consumer_patch_input_mask |= mask << loc;
1293       else
1294          consumer_input_mask |= mask << loc;
1295    }
1296 
1297    uint64_t io_mask = producer_output_mask | consumer_input_mask;
1298    uint64_t patch_io_mask = producer_patch_output_mask | consumer_patch_input_mask;
1299 
1300    nir_foreach_shader_out_variable(variable, producer) {
1301       uint64_t loc = get_linked_variable_location(variable->data.location, variable->data.patch);
1302 
1303       if (variable->data.patch)
1304          variable->data.driver_location = util_bitcount64(patch_io_mask & u_bit_consecutive64(0, loc));
1305       else
1306          variable->data.driver_location = util_bitcount64(io_mask & u_bit_consecutive64(0, loc));
1307    }
1308 
1309    nir_foreach_shader_in_variable(variable, consumer) {
1310       uint64_t loc = get_linked_variable_location(variable->data.location, variable->data.patch);
1311 
1312       if (variable->data.patch)
1313          variable->data.driver_location = util_bitcount64(patch_io_mask & u_bit_consecutive64(0, loc));
1314       else
1315          variable->data.driver_location = util_bitcount64(io_mask & u_bit_consecutive64(0, loc));
1316    }
1317 
1318    nir_linked_io_var_info result = {
1319       .num_linked_io_vars = util_bitcount64(io_mask),
1320       .num_linked_patch_io_vars = util_bitcount64(patch_io_mask),
1321    };
1322 
1323    return result;
1324 }
1325