1 /*
2  * Copyright © 2018 Valve Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  */
24 
25 #include "nir.h"
26 
27 /* This pass computes for each ssa definition if it is uniform.
28  * That is, the variable has the same value for all invocations
29  * of the group.
30  *
31  * This divergence analysis pass expects the shader to be in LCSSA-form.
32  *
33  * This algorithm implements "The Simple Divergence Analysis" from
34  * Diogo Sampaio, Rafael De Souza, Sylvain Collange, Fernando Magno Quintão Pereira.
35  * Divergence Analysis.  ACM Transactions on Programming Languages and Systems (TOPLAS),
36  * ACM, 2013, 35 (4), pp.13:1-13:36. <10.1145/2523815>. <hal-00909072v2>
37  */
38 
39 struct divergence_state {
40    const gl_shader_stage stage;
41    nir_shader *shader;
42 
43    /** current control flow state */
44    /* True if some loop-active invocations might take a different control-flow path.
45     * A divergent break does not cause subsequent control-flow to be considered
46     * divergent because those invocations are no longer active in the loop.
47     * For a divergent if, both sides are considered divergent flow because
48     * the other side is still loop-active. */
49    bool divergent_loop_cf;
50    /* True if a divergent continue happened since the loop header */
51    bool divergent_loop_continue;
52    /* True if a divergent break happened since the loop header */
53    bool divergent_loop_break;
54 
55    /* True if we visit the block for the fist time */
56    bool first_visit;
57 };
58 
59 static bool
60 visit_cf_list(struct exec_list *list, struct divergence_state *state);
61 
62 static bool
visit_alu(nir_alu_instr * instr)63 visit_alu(nir_alu_instr *instr)
64 {
65    if (instr->dest.dest.ssa.divergent)
66       return false;
67 
68    unsigned num_src = nir_op_infos[instr->op].num_inputs;
69 
70    for (unsigned i = 0; i < num_src; i++) {
71       if (instr->src[i].src.ssa->divergent) {
72          instr->dest.dest.ssa.divergent = true;
73          return true;
74       }
75    }
76 
77    return false;
78 }
79 
80 static bool
visit_intrinsic(nir_shader * shader,nir_intrinsic_instr * instr)81 visit_intrinsic(nir_shader *shader, nir_intrinsic_instr *instr)
82 {
83    if (!nir_intrinsic_infos[instr->intrinsic].has_dest)
84       return false;
85 
86    if (instr->dest.ssa.divergent)
87       return false;
88 
89    nir_divergence_options options = shader->options->divergence_analysis_options;
90    gl_shader_stage stage = shader->info.stage;
91    bool is_divergent = false;
92    switch (instr->intrinsic) {
93    /* Intrinsics which are always uniform */
94    case nir_intrinsic_shader_clock:
95    case nir_intrinsic_ballot:
96    case nir_intrinsic_read_invocation:
97    case nir_intrinsic_read_first_invocation:
98    case nir_intrinsic_vote_any:
99    case nir_intrinsic_vote_all:
100    case nir_intrinsic_vote_feq:
101    case nir_intrinsic_vote_ieq:
102    case nir_intrinsic_load_work_dim:
103    case nir_intrinsic_load_work_group_id:
104    case nir_intrinsic_load_num_work_groups:
105    case nir_intrinsic_load_local_group_size:
106    case nir_intrinsic_load_subgroup_id:
107    case nir_intrinsic_load_num_subgroups:
108    case nir_intrinsic_load_subgroup_size:
109    case nir_intrinsic_load_subgroup_eq_mask:
110    case nir_intrinsic_load_subgroup_ge_mask:
111    case nir_intrinsic_load_subgroup_gt_mask:
112    case nir_intrinsic_load_subgroup_le_mask:
113    case nir_intrinsic_load_subgroup_lt_mask:
114    case nir_intrinsic_first_invocation:
115    case nir_intrinsic_last_invocation:
116    case nir_intrinsic_load_base_instance:
117    case nir_intrinsic_load_base_vertex:
118    case nir_intrinsic_load_first_vertex:
119    case nir_intrinsic_load_draw_id:
120    case nir_intrinsic_load_is_indexed_draw:
121    case nir_intrinsic_load_viewport_scale:
122    case nir_intrinsic_load_user_clip_plane:
123    case nir_intrinsic_load_viewport_x_scale:
124    case nir_intrinsic_load_viewport_y_scale:
125    case nir_intrinsic_load_viewport_z_scale:
126    case nir_intrinsic_load_viewport_offset:
127    case nir_intrinsic_load_viewport_z_offset:
128    case nir_intrinsic_load_blend_const_color_a_float:
129    case nir_intrinsic_load_blend_const_color_b_float:
130    case nir_intrinsic_load_blend_const_color_g_float:
131    case nir_intrinsic_load_blend_const_color_r_float:
132    case nir_intrinsic_load_blend_const_color_rgba:
133    case nir_intrinsic_load_blend_const_color_aaaa8888_unorm:
134    case nir_intrinsic_load_blend_const_color_rgba8888_unorm:
135       is_divergent = false;
136       break;
137 
138    /* Intrinsics with divergence depending on shader stage and hardware */
139    case nir_intrinsic_load_input:
140       is_divergent = instr->src[0].ssa->divergent;
141       if (stage == MESA_SHADER_FRAGMENT)
142          is_divergent |= !(options & nir_divergence_single_prim_per_subgroup);
143       else if (stage == MESA_SHADER_TESS_EVAL)
144          is_divergent |= !(options & nir_divergence_single_patch_per_tes_subgroup);
145       else
146          is_divergent = true;
147       break;
148    case nir_intrinsic_load_per_vertex_input:
149       is_divergent = instr->src[0].ssa->divergent ||
150                      instr->src[1].ssa->divergent;
151       if (stage == MESA_SHADER_TESS_CTRL)
152          is_divergent |= !(options & nir_divergence_single_patch_per_tcs_subgroup);
153       if (stage == MESA_SHADER_TESS_EVAL)
154          is_divergent |= !(options & nir_divergence_single_patch_per_tes_subgroup);
155       else
156          is_divergent = true;
157       break;
158    case nir_intrinsic_load_input_vertex:
159       is_divergent = instr->src[1].ssa->divergent;
160       assert(stage == MESA_SHADER_FRAGMENT);
161       is_divergent |= !(options & nir_divergence_single_prim_per_subgroup);
162       break;
163    case nir_intrinsic_load_output:
164       assert(stage == MESA_SHADER_TESS_CTRL || stage == MESA_SHADER_FRAGMENT);
165       is_divergent = instr->src[0].ssa->divergent;
166       if (stage == MESA_SHADER_TESS_CTRL)
167          is_divergent |= !(options & nir_divergence_single_patch_per_tcs_subgroup);
168       else
169          is_divergent = true;
170       break;
171    case nir_intrinsic_load_per_vertex_output:
172       assert(stage == MESA_SHADER_TESS_CTRL);
173       is_divergent = instr->src[0].ssa->divergent ||
174                      instr->src[1].ssa->divergent ||
175                      !(options & nir_divergence_single_patch_per_tcs_subgroup);
176       break;
177    case nir_intrinsic_load_layer_id:
178    case nir_intrinsic_load_front_face:
179       assert(stage == MESA_SHADER_FRAGMENT);
180       is_divergent = !(options & nir_divergence_single_prim_per_subgroup);
181       break;
182    case nir_intrinsic_load_view_index:
183       assert(stage != MESA_SHADER_COMPUTE && stage != MESA_SHADER_KERNEL);
184       if (options & nir_divergence_view_index_uniform)
185          is_divergent = false;
186       else if (stage == MESA_SHADER_FRAGMENT)
187          is_divergent = !(options & nir_divergence_single_prim_per_subgroup);
188       break;
189    case nir_intrinsic_load_fs_input_interp_deltas:
190       assert(stage == MESA_SHADER_FRAGMENT);
191       is_divergent = instr->src[0].ssa->divergent;
192       is_divergent |= !(options & nir_divergence_single_prim_per_subgroup);
193       break;
194    case nir_intrinsic_load_primitive_id:
195       if (stage == MESA_SHADER_FRAGMENT)
196          is_divergent = !(options & nir_divergence_single_prim_per_subgroup);
197       else if (stage == MESA_SHADER_TESS_CTRL)
198          is_divergent = !(options & nir_divergence_single_patch_per_tcs_subgroup);
199       else if (stage == MESA_SHADER_TESS_EVAL)
200          is_divergent = !(options & nir_divergence_single_patch_per_tes_subgroup);
201       else if (stage == MESA_SHADER_GEOMETRY)
202          is_divergent = true;
203       else
204          unreachable("Invalid stage for load_primitive_id");
205       break;
206    case nir_intrinsic_load_tess_level_inner:
207    case nir_intrinsic_load_tess_level_outer:
208       if (stage == MESA_SHADER_TESS_CTRL)
209          is_divergent = !(options & nir_divergence_single_patch_per_tcs_subgroup);
210       else if (stage == MESA_SHADER_TESS_EVAL)
211          is_divergent = !(options & nir_divergence_single_patch_per_tes_subgroup);
212       else
213          unreachable("Invalid stage for load_primitive_tess_level_*");
214       break;
215    case nir_intrinsic_load_patch_vertices_in:
216       if (stage == MESA_SHADER_TESS_EVAL)
217          is_divergent = !(options & nir_divergence_single_patch_per_tes_subgroup);
218       else
219          assert(stage == MESA_SHADER_TESS_CTRL);
220       break;
221 
222    /* Clustered reductions are uniform if cluster_size == subgroup_size or
223     * the source is uniform and the operation is invariant.
224     * Inclusive scans are uniform if
225     * the source is uniform and the operation is invariant
226     */
227    case nir_intrinsic_reduce:
228       if (nir_intrinsic_cluster_size(instr) == 0)
229          return false;
230       /* fallthrough */
231    case nir_intrinsic_inclusive_scan: {
232       nir_op op = nir_intrinsic_reduction_op(instr);
233       is_divergent = instr->src[0].ssa->divergent;
234       if (op != nir_op_umin && op != nir_op_imin && op != nir_op_fmin &&
235           op != nir_op_umax && op != nir_op_imax && op != nir_op_fmax &&
236           op != nir_op_iand && op != nir_op_ior)
237          is_divergent = true;
238       break;
239    }
240 
241    /* Intrinsics with divergence depending on sources */
242    case nir_intrinsic_ballot_bitfield_extract:
243    case nir_intrinsic_ballot_find_lsb:
244    case nir_intrinsic_ballot_find_msb:
245    case nir_intrinsic_ballot_bit_count_reduce:
246    case nir_intrinsic_shuffle_xor:
247    case nir_intrinsic_shuffle_up:
248    case nir_intrinsic_shuffle_down:
249    case nir_intrinsic_quad_broadcast:
250    case nir_intrinsic_quad_swap_horizontal:
251    case nir_intrinsic_quad_swap_vertical:
252    case nir_intrinsic_quad_swap_diagonal:
253    case nir_intrinsic_load_deref:
254    case nir_intrinsic_load_ubo:
255    case nir_intrinsic_load_ssbo:
256    case nir_intrinsic_load_shared:
257    case nir_intrinsic_load_global:
258    case nir_intrinsic_load_global_constant:
259    case nir_intrinsic_load_uniform:
260    case nir_intrinsic_load_push_constant:
261    case nir_intrinsic_load_constant:
262    case nir_intrinsic_load_sample_pos_from_id:
263    case nir_intrinsic_load_kernel_input:
264    case nir_intrinsic_image_load:
265    case nir_intrinsic_image_deref_load:
266    case nir_intrinsic_bindless_image_load:
267    case nir_intrinsic_image_samples:
268    case nir_intrinsic_image_deref_samples:
269    case nir_intrinsic_bindless_image_samples:
270    case nir_intrinsic_get_ssbo_size:
271    case nir_intrinsic_image_size:
272    case nir_intrinsic_image_deref_size:
273    case nir_intrinsic_bindless_image_size:
274    case nir_intrinsic_copy_deref:
275    case nir_intrinsic_deref_buffer_array_length:
276    case nir_intrinsic_vulkan_resource_index:
277    case nir_intrinsic_vulkan_resource_reindex:
278    case nir_intrinsic_load_vulkan_descriptor:
279    case nir_intrinsic_atomic_counter_read:
280    case nir_intrinsic_atomic_counter_read_deref:
281    case nir_intrinsic_quad_swizzle_amd:
282    case nir_intrinsic_masked_swizzle_amd: {
283       unsigned num_srcs = nir_intrinsic_infos[instr->intrinsic].num_srcs;
284       for (unsigned i = 0; i < num_srcs; i++) {
285          if (instr->src[i].ssa->divergent) {
286             is_divergent = true;
287             break;
288          }
289       }
290       break;
291    }
292 
293    case nir_intrinsic_shuffle:
294       is_divergent = instr->src[0].ssa->divergent &&
295                      instr->src[1].ssa->divergent;
296       break;
297 
298    /* Intrinsics which are always divergent */
299    case nir_intrinsic_load_color0:
300    case nir_intrinsic_load_color1:
301    case nir_intrinsic_load_param:
302    case nir_intrinsic_load_sample_id:
303    case nir_intrinsic_load_sample_id_no_per_sample:
304    case nir_intrinsic_load_sample_mask_in:
305    case nir_intrinsic_load_interpolated_input:
306    case nir_intrinsic_load_barycentric_pixel:
307    case nir_intrinsic_load_barycentric_centroid:
308    case nir_intrinsic_load_barycentric_sample:
309    case nir_intrinsic_load_barycentric_model:
310    case nir_intrinsic_load_barycentric_at_sample:
311    case nir_intrinsic_load_barycentric_at_offset:
312    case nir_intrinsic_interp_deref_at_offset:
313    case nir_intrinsic_interp_deref_at_sample:
314    case nir_intrinsic_interp_deref_at_centroid:
315    case nir_intrinsic_interp_deref_at_vertex:
316    case nir_intrinsic_load_tess_coord:
317    case nir_intrinsic_load_point_coord:
318    case nir_intrinsic_load_line_coord:
319    case nir_intrinsic_load_frag_coord:
320    case nir_intrinsic_load_sample_pos:
321    case nir_intrinsic_load_vertex_id_zero_base:
322    case nir_intrinsic_load_vertex_id:
323    case nir_intrinsic_load_instance_id:
324    case nir_intrinsic_load_invocation_id:
325    case nir_intrinsic_load_local_invocation_id:
326    case nir_intrinsic_load_local_invocation_index:
327    case nir_intrinsic_load_global_invocation_id:
328    case nir_intrinsic_load_global_invocation_index:
329    case nir_intrinsic_load_subgroup_invocation:
330    case nir_intrinsic_load_helper_invocation:
331    case nir_intrinsic_is_helper_invocation:
332    case nir_intrinsic_load_scratch:
333    case nir_intrinsic_deref_atomic_add:
334    case nir_intrinsic_deref_atomic_imin:
335    case nir_intrinsic_deref_atomic_umin:
336    case nir_intrinsic_deref_atomic_imax:
337    case nir_intrinsic_deref_atomic_umax:
338    case nir_intrinsic_deref_atomic_and:
339    case nir_intrinsic_deref_atomic_or:
340    case nir_intrinsic_deref_atomic_xor:
341    case nir_intrinsic_deref_atomic_exchange:
342    case nir_intrinsic_deref_atomic_comp_swap:
343    case nir_intrinsic_deref_atomic_fadd:
344    case nir_intrinsic_deref_atomic_fmin:
345    case nir_intrinsic_deref_atomic_fmax:
346    case nir_intrinsic_deref_atomic_fcomp_swap:
347    case nir_intrinsic_ssbo_atomic_add:
348    case nir_intrinsic_ssbo_atomic_imin:
349    case nir_intrinsic_ssbo_atomic_umin:
350    case nir_intrinsic_ssbo_atomic_imax:
351    case nir_intrinsic_ssbo_atomic_umax:
352    case nir_intrinsic_ssbo_atomic_and:
353    case nir_intrinsic_ssbo_atomic_or:
354    case nir_intrinsic_ssbo_atomic_xor:
355    case nir_intrinsic_ssbo_atomic_exchange:
356    case nir_intrinsic_ssbo_atomic_comp_swap:
357    case nir_intrinsic_ssbo_atomic_fadd:
358    case nir_intrinsic_ssbo_atomic_fmax:
359    case nir_intrinsic_ssbo_atomic_fmin:
360    case nir_intrinsic_ssbo_atomic_fcomp_swap:
361    case nir_intrinsic_image_deref_atomic_add:
362    case nir_intrinsic_image_deref_atomic_imin:
363    case nir_intrinsic_image_deref_atomic_umin:
364    case nir_intrinsic_image_deref_atomic_imax:
365    case nir_intrinsic_image_deref_atomic_umax:
366    case nir_intrinsic_image_deref_atomic_and:
367    case nir_intrinsic_image_deref_atomic_or:
368    case nir_intrinsic_image_deref_atomic_xor:
369    case nir_intrinsic_image_deref_atomic_exchange:
370    case nir_intrinsic_image_deref_atomic_comp_swap:
371    case nir_intrinsic_image_deref_atomic_fadd:
372    case nir_intrinsic_image_atomic_add:
373    case nir_intrinsic_image_atomic_imin:
374    case nir_intrinsic_image_atomic_umin:
375    case nir_intrinsic_image_atomic_imax:
376    case nir_intrinsic_image_atomic_umax:
377    case nir_intrinsic_image_atomic_and:
378    case nir_intrinsic_image_atomic_or:
379    case nir_intrinsic_image_atomic_xor:
380    case nir_intrinsic_image_atomic_exchange:
381    case nir_intrinsic_image_atomic_comp_swap:
382    case nir_intrinsic_image_atomic_fadd:
383    case nir_intrinsic_bindless_image_atomic_add:
384    case nir_intrinsic_bindless_image_atomic_imin:
385    case nir_intrinsic_bindless_image_atomic_umin:
386    case nir_intrinsic_bindless_image_atomic_imax:
387    case nir_intrinsic_bindless_image_atomic_umax:
388    case nir_intrinsic_bindless_image_atomic_and:
389    case nir_intrinsic_bindless_image_atomic_or:
390    case nir_intrinsic_bindless_image_atomic_xor:
391    case nir_intrinsic_bindless_image_atomic_exchange:
392    case nir_intrinsic_bindless_image_atomic_comp_swap:
393    case nir_intrinsic_bindless_image_atomic_fadd:
394    case nir_intrinsic_shared_atomic_add:
395    case nir_intrinsic_shared_atomic_imin:
396    case nir_intrinsic_shared_atomic_umin:
397    case nir_intrinsic_shared_atomic_imax:
398    case nir_intrinsic_shared_atomic_umax:
399    case nir_intrinsic_shared_atomic_and:
400    case nir_intrinsic_shared_atomic_or:
401    case nir_intrinsic_shared_atomic_xor:
402    case nir_intrinsic_shared_atomic_exchange:
403    case nir_intrinsic_shared_atomic_comp_swap:
404    case nir_intrinsic_shared_atomic_fadd:
405    case nir_intrinsic_shared_atomic_fmin:
406    case nir_intrinsic_shared_atomic_fmax:
407    case nir_intrinsic_shared_atomic_fcomp_swap:
408    case nir_intrinsic_global_atomic_add:
409    case nir_intrinsic_global_atomic_imin:
410    case nir_intrinsic_global_atomic_umin:
411    case nir_intrinsic_global_atomic_imax:
412    case nir_intrinsic_global_atomic_umax:
413    case nir_intrinsic_global_atomic_and:
414    case nir_intrinsic_global_atomic_or:
415    case nir_intrinsic_global_atomic_xor:
416    case nir_intrinsic_global_atomic_exchange:
417    case nir_intrinsic_global_atomic_comp_swap:
418    case nir_intrinsic_global_atomic_fadd:
419    case nir_intrinsic_global_atomic_fmin:
420    case nir_intrinsic_global_atomic_fmax:
421    case nir_intrinsic_global_atomic_fcomp_swap:
422    case nir_intrinsic_atomic_counter_add:
423    case nir_intrinsic_atomic_counter_min:
424    case nir_intrinsic_atomic_counter_max:
425    case nir_intrinsic_atomic_counter_and:
426    case nir_intrinsic_atomic_counter_or:
427    case nir_intrinsic_atomic_counter_xor:
428    case nir_intrinsic_atomic_counter_inc:
429    case nir_intrinsic_atomic_counter_pre_dec:
430    case nir_intrinsic_atomic_counter_post_dec:
431    case nir_intrinsic_atomic_counter_exchange:
432    case nir_intrinsic_atomic_counter_comp_swap:
433    case nir_intrinsic_atomic_counter_add_deref:
434    case nir_intrinsic_atomic_counter_min_deref:
435    case nir_intrinsic_atomic_counter_max_deref:
436    case nir_intrinsic_atomic_counter_and_deref:
437    case nir_intrinsic_atomic_counter_or_deref:
438    case nir_intrinsic_atomic_counter_xor_deref:
439    case nir_intrinsic_atomic_counter_inc_deref:
440    case nir_intrinsic_atomic_counter_pre_dec_deref:
441    case nir_intrinsic_atomic_counter_post_dec_deref:
442    case nir_intrinsic_atomic_counter_exchange_deref:
443    case nir_intrinsic_atomic_counter_comp_swap_deref:
444    case nir_intrinsic_exclusive_scan:
445    case nir_intrinsic_ballot_bit_count_exclusive:
446    case nir_intrinsic_ballot_bit_count_inclusive:
447    case nir_intrinsic_write_invocation_amd:
448    case nir_intrinsic_mbcnt_amd:
449    case nir_intrinsic_elect:
450       is_divergent = true;
451       break;
452 
453    default:
454 #ifdef NDEBUG
455       is_divergent = true;
456       break;
457 #else
458       nir_print_instr(&instr->instr, stderr);
459       unreachable("\nNIR divergence analysis: Unhandled intrinsic.");
460 #endif
461    }
462 
463    instr->dest.ssa.divergent = is_divergent;
464    return is_divergent;
465 }
466 
467 static bool
visit_tex(nir_tex_instr * instr)468 visit_tex(nir_tex_instr *instr)
469 {
470    if (instr->dest.ssa.divergent)
471       return false;
472 
473    bool is_divergent = false;
474 
475    for (unsigned i = 0; i < instr->num_srcs; i++) {
476       switch (instr->src[i].src_type) {
477       case nir_tex_src_sampler_deref:
478       case nir_tex_src_sampler_handle:
479       case nir_tex_src_sampler_offset:
480          is_divergent |= instr->src[i].src.ssa->divergent &&
481                          instr->sampler_non_uniform;
482          break;
483       case nir_tex_src_texture_deref:
484       case nir_tex_src_texture_handle:
485       case nir_tex_src_texture_offset:
486          is_divergent |= instr->src[i].src.ssa->divergent &&
487                          instr->texture_non_uniform;
488          break;
489       default:
490          is_divergent |= instr->src[i].src.ssa->divergent;
491          break;
492       }
493    }
494 
495    instr->dest.ssa.divergent = is_divergent;
496    return is_divergent;
497 }
498 
499 static bool
visit_load_const(nir_load_const_instr * instr)500 visit_load_const(nir_load_const_instr *instr)
501 {
502    return false;
503 }
504 
505 static bool
visit_ssa_undef(nir_ssa_undef_instr * instr)506 visit_ssa_undef(nir_ssa_undef_instr *instr)
507 {
508    return false;
509 }
510 
511 static bool
nir_variable_mode_is_uniform(nir_variable_mode mode)512 nir_variable_mode_is_uniform(nir_variable_mode mode) {
513    switch (mode) {
514    case nir_var_uniform:
515    case nir_var_mem_ubo:
516    case nir_var_mem_ssbo:
517    case nir_var_mem_shared:
518    case nir_var_mem_global:
519       return true;
520    default:
521       return false;
522    }
523 }
524 
525 static bool
nir_variable_is_uniform(nir_shader * shader,nir_variable * var)526 nir_variable_is_uniform(nir_shader *shader, nir_variable *var)
527 {
528    if (nir_variable_mode_is_uniform(var->data.mode))
529       return true;
530 
531    nir_divergence_options options = shader->options->divergence_analysis_options;
532    gl_shader_stage stage = shader->info.stage;
533 
534    if (stage == MESA_SHADER_FRAGMENT &&
535        (options & nir_divergence_single_prim_per_subgroup) &&
536        var->data.mode == nir_var_shader_in &&
537        var->data.interpolation == INTERP_MODE_FLAT)
538       return true;
539 
540    if (stage == MESA_SHADER_TESS_CTRL &&
541        (options & nir_divergence_single_patch_per_tcs_subgroup) &&
542        var->data.mode == nir_var_shader_out && var->data.patch)
543       return true;
544 
545    if (stage == MESA_SHADER_TESS_EVAL &&
546        (options & nir_divergence_single_patch_per_tes_subgroup) &&
547        var->data.mode == nir_var_shader_in && var->data.patch)
548       return true;
549 
550    return false;
551 }
552 
553 static bool
visit_deref(nir_shader * shader,nir_deref_instr * deref)554 visit_deref(nir_shader *shader, nir_deref_instr *deref)
555 {
556    if (deref->dest.ssa.divergent)
557       return false;
558 
559    bool is_divergent = false;
560    switch (deref->deref_type) {
561    case nir_deref_type_var:
562       is_divergent = !nir_variable_is_uniform(shader, deref->var);
563       break;
564    case nir_deref_type_array:
565    case nir_deref_type_ptr_as_array:
566       is_divergent = deref->arr.index.ssa->divergent;
567       /* fallthrough */
568    case nir_deref_type_struct:
569    case nir_deref_type_array_wildcard:
570       is_divergent |= deref->parent.ssa->divergent;
571       break;
572    case nir_deref_type_cast:
573       is_divergent = !nir_variable_mode_is_uniform(deref->var->data.mode) ||
574                      deref->parent.ssa->divergent;
575       break;
576    }
577 
578    deref->dest.ssa.divergent = is_divergent;
579    return is_divergent;
580 }
581 
582 static bool
visit_jump(nir_jump_instr * jump,struct divergence_state * state)583 visit_jump(nir_jump_instr *jump, struct divergence_state *state)
584 {
585    switch (jump->type) {
586    case nir_jump_continue:
587       if (state->divergent_loop_continue)
588          return false;
589       if (state->divergent_loop_cf)
590          state->divergent_loop_continue = true;
591       return state->divergent_loop_continue;
592    case nir_jump_break:
593       if (state->divergent_loop_break)
594          return false;
595       if (state->divergent_loop_cf)
596          state->divergent_loop_break = true;
597       return state->divergent_loop_break;
598    case nir_jump_return:
599       unreachable("NIR divergence analysis: Unsupported return instruction.");
600       break;
601    case nir_jump_goto:
602    case nir_jump_goto_if:
603       unreachable("NIR divergence analysis: Unsupported goto_if instruction.");
604       break;
605    }
606    return false;
607 }
608 
609 static bool
set_ssa_def_not_divergent(nir_ssa_def * def,UNUSED void * _state)610 set_ssa_def_not_divergent(nir_ssa_def *def, UNUSED void *_state)
611 {
612    def->divergent = false;
613    return true;
614 }
615 
616 static bool
update_instr_divergence(nir_shader * shader,nir_instr * instr)617 update_instr_divergence(nir_shader *shader, nir_instr *instr)
618 {
619    switch (instr->type) {
620    case nir_instr_type_alu:
621       return visit_alu(nir_instr_as_alu(instr));
622    case nir_instr_type_intrinsic:
623       return visit_intrinsic(shader, nir_instr_as_intrinsic(instr));
624    case nir_instr_type_tex:
625       return visit_tex(nir_instr_as_tex(instr));
626    case nir_instr_type_load_const:
627       return visit_load_const(nir_instr_as_load_const(instr));
628    case nir_instr_type_ssa_undef:
629       return visit_ssa_undef(nir_instr_as_ssa_undef(instr));
630    case nir_instr_type_deref:
631       return visit_deref(shader, nir_instr_as_deref(instr));
632    case nir_instr_type_jump:
633    case nir_instr_type_phi:
634    case nir_instr_type_call:
635    case nir_instr_type_parallel_copy:
636    default:
637       unreachable("NIR divergence analysis: Unsupported instruction type.");
638    }
639 }
640 
641 static bool
visit_block(nir_block * block,struct divergence_state * state)642 visit_block(nir_block *block, struct divergence_state *state)
643 {
644    bool has_changed = false;
645 
646    nir_foreach_instr(instr, block) {
647       /* phis are handled when processing the branches */
648       if (instr->type == nir_instr_type_phi)
649          continue;
650 
651       if (state->first_visit)
652          nir_foreach_ssa_def(instr, set_ssa_def_not_divergent, NULL);
653 
654       if (instr->type == nir_instr_type_jump)
655          has_changed |= visit_jump(nir_instr_as_jump(instr), state);
656       else
657          has_changed |= update_instr_divergence(state->shader, instr);
658    }
659 
660    return has_changed;
661 }
662 
663 /* There are 3 types of phi instructions:
664  * (1) gamma: represent the joining point of different paths
665  *     created by an “if-then-else” branch.
666  *     The resulting value is divergent if the branch condition
667  *     or any of the source values is divergent. */
668 static bool
visit_if_merge_phi(nir_phi_instr * phi,bool if_cond_divergent)669 visit_if_merge_phi(nir_phi_instr *phi, bool if_cond_divergent)
670 {
671    if (phi->dest.ssa.divergent)
672       return false;
673 
674    unsigned defined_srcs = 0;
675    nir_foreach_phi_src(src, phi) {
676       /* if any source value is divergent, the resulting value is divergent */
677       if (src->src.ssa->divergent) {
678          phi->dest.ssa.divergent = true;
679          return true;
680       }
681       if (src->src.ssa->parent_instr->type != nir_instr_type_ssa_undef) {
682          defined_srcs++;
683       }
684    }
685 
686    /* if the condition is divergent and two sources defined, the definition is divergent */
687    if (defined_srcs > 1 && if_cond_divergent) {
688       phi->dest.ssa.divergent = true;
689       return true;
690    }
691 
692    return false;
693 }
694 
695 /* There are 3 types of phi instructions:
696  * (2) mu: which only exist at loop headers,
697  *     merge initial and loop-carried values.
698  *     The resulting value is divergent if any source value
699  *     is divergent or a divergent loop continue condition
700  *     is associated with a different ssa-def. */
701 static bool
visit_loop_header_phi(nir_phi_instr * phi,nir_block * preheader,bool divergent_continue)702 visit_loop_header_phi(nir_phi_instr *phi, nir_block *preheader, bool divergent_continue)
703 {
704    if (phi->dest.ssa.divergent)
705       return false;
706 
707    nir_ssa_def* same = NULL;
708    nir_foreach_phi_src(src, phi) {
709       /* if any source value is divergent, the resulting value is divergent */
710       if (src->src.ssa->divergent) {
711          phi->dest.ssa.divergent = true;
712          return true;
713       }
714       /* if this loop is uniform, we're done here */
715       if (!divergent_continue)
716          continue;
717       /* skip the loop preheader */
718       if (src->pred == preheader)
719          continue;
720       /* skip undef values */
721       if (src->src.ssa->parent_instr->type == nir_instr_type_ssa_undef)
722          continue;
723 
724       /* check if all loop-carried values are from the same ssa-def */
725       if (!same)
726          same = src->src.ssa;
727       else if (same != src->src.ssa) {
728          phi->dest.ssa.divergent = true;
729          return true;
730       }
731    }
732 
733    return false;
734 }
735 
736 /* There are 3 types of phi instructions:
737  * (3) eta: represent values that leave a loop.
738  *     The resulting value is divergent if the source value is divergent
739  *     or any loop exit condition is divergent for a value which is
740  *     not loop-invariant.
741  *     (note: there should be no phi for loop-invariant variables.) */
742 static bool
visit_loop_exit_phi(nir_phi_instr * phi,bool divergent_break)743 visit_loop_exit_phi(nir_phi_instr *phi, bool divergent_break)
744 {
745    if (phi->dest.ssa.divergent)
746       return false;
747 
748    if (divergent_break) {
749       phi->dest.ssa.divergent = true;
750       return true;
751    }
752 
753    /* if any source value is divergent, the resulting value is divergent */
754    nir_foreach_phi_src(src, phi) {
755       if (src->src.ssa->divergent) {
756          phi->dest.ssa.divergent = true;
757          return true;
758       }
759    }
760 
761    return false;
762 }
763 
764 static bool
visit_if(nir_if * if_stmt,struct divergence_state * state)765 visit_if(nir_if *if_stmt, struct divergence_state *state)
766 {
767    bool progress = false;
768 
769    struct divergence_state then_state = *state;
770    then_state.divergent_loop_cf |= if_stmt->condition.ssa->divergent;
771    progress |= visit_cf_list(&if_stmt->then_list, &then_state);
772 
773    struct divergence_state else_state = *state;
774    else_state.divergent_loop_cf |= if_stmt->condition.ssa->divergent;
775    progress |= visit_cf_list(&if_stmt->else_list, &else_state);
776 
777    /* handle phis after the IF */
778    nir_foreach_instr(instr, nir_cf_node_cf_tree_next(&if_stmt->cf_node)) {
779       if (instr->type != nir_instr_type_phi)
780          break;
781 
782       if (state->first_visit)
783          nir_instr_as_phi(instr)->dest.ssa.divergent = false;
784       progress |= visit_if_merge_phi(nir_instr_as_phi(instr),
785                                      if_stmt->condition.ssa->divergent);
786    }
787 
788    /* join loop divergence information from both branch legs */
789    state->divergent_loop_continue |= then_state.divergent_loop_continue ||
790                                      else_state.divergent_loop_continue;
791    state->divergent_loop_break |= then_state.divergent_loop_break ||
792                                   else_state.divergent_loop_break;
793 
794    /* A divergent continue makes succeeding loop CF divergent:
795     * not all loop-active invocations participate in the remaining loop-body
796     * which means that a following break might be taken by some invocations, only */
797    state->divergent_loop_cf |= state->divergent_loop_continue;
798 
799    return progress;
800 }
801 
802 static bool
visit_loop(nir_loop * loop,struct divergence_state * state)803 visit_loop(nir_loop *loop, struct divergence_state *state)
804 {
805    bool progress = false;
806    nir_block *loop_header = nir_loop_first_block(loop);
807    nir_block *loop_preheader = nir_block_cf_tree_prev(loop_header);
808 
809    /* handle loop header phis first: we have no knowledge yet about
810     * the loop's control flow or any loop-carried sources. */
811    nir_foreach_instr(instr, loop_header) {
812       if (instr->type != nir_instr_type_phi)
813          break;
814 
815       nir_phi_instr *phi = nir_instr_as_phi(instr);
816       if (!state->first_visit && phi->dest.ssa.divergent)
817          continue;
818 
819       nir_foreach_phi_src(src, phi) {
820          if (src->pred == loop_preheader) {
821             phi->dest.ssa.divergent = src->src.ssa->divergent;
822             break;
823          }
824       }
825       progress |= phi->dest.ssa.divergent;
826    }
827 
828    /* setup loop state */
829    struct divergence_state loop_state = *state;
830    loop_state.divergent_loop_cf = false;
831    loop_state.divergent_loop_continue = false;
832    loop_state.divergent_loop_break = false;
833 
834    /* process loop body until no further changes are made */
835    bool repeat;
836    do {
837       progress |= visit_cf_list(&loop->body, &loop_state);
838       repeat = false;
839 
840       /* revisit loop header phis to see if something has changed */
841       nir_foreach_instr(instr, loop_header) {
842          if (instr->type != nir_instr_type_phi)
843             break;
844 
845          repeat |= visit_loop_header_phi(nir_instr_as_phi(instr),
846                                          loop_preheader,
847                                          loop_state.divergent_loop_continue);
848       }
849 
850       loop_state.divergent_loop_cf = false;
851       loop_state.first_visit = false;
852    } while (repeat);
853 
854    /* handle phis after the loop */
855    nir_foreach_instr(instr, nir_cf_node_cf_tree_next(&loop->cf_node)) {
856       if (instr->type != nir_instr_type_phi)
857          break;
858 
859       if (state->first_visit)
860          nir_instr_as_phi(instr)->dest.ssa.divergent = false;
861       progress |= visit_loop_exit_phi(nir_instr_as_phi(instr),
862                                       loop_state.divergent_loop_break);
863    }
864 
865    return progress;
866 }
867 
868 static bool
visit_cf_list(struct exec_list * list,struct divergence_state * state)869 visit_cf_list(struct exec_list *list, struct divergence_state *state)
870 {
871    bool has_changed = false;
872 
873    foreach_list_typed(nir_cf_node, node, node, list) {
874       switch (node->type) {
875       case nir_cf_node_block:
876          has_changed |= visit_block(nir_cf_node_as_block(node), state);
877          break;
878       case nir_cf_node_if:
879          has_changed |= visit_if(nir_cf_node_as_if(node), state);
880          break;
881       case nir_cf_node_loop:
882          has_changed |= visit_loop(nir_cf_node_as_loop(node), state);
883          break;
884       case nir_cf_node_function:
885          unreachable("NIR divergence analysis: Unsupported cf_node type.");
886       }
887    }
888 
889    return has_changed;
890 }
891 
892 void
nir_divergence_analysis(nir_shader * shader)893 nir_divergence_analysis(nir_shader *shader)
894 {
895    struct divergence_state state = {
896       .stage = shader->info.stage,
897       .shader = shader,
898       .divergent_loop_cf = false,
899       .divergent_loop_continue = false,
900       .divergent_loop_break = false,
901       .first_visit = true,
902    };
903 
904    visit_cf_list(&nir_shader_get_entrypoint(shader)->body, &state);
905 }
906 
nir_update_instr_divergence(nir_shader * shader,nir_instr * instr)907 bool nir_update_instr_divergence(nir_shader *shader, nir_instr *instr)
908 {
909    nir_foreach_ssa_def(instr, set_ssa_def_not_divergent, NULL);
910 
911    if (instr->type == nir_instr_type_phi) {
912       nir_cf_node *prev = nir_cf_node_prev(&instr->block->cf_node);
913       /* can only update gamma/if phis */
914       if (!prev || prev->type != nir_cf_node_if)
915          return false;
916 
917       nir_if *nif = nir_cf_node_as_if(prev);
918 
919       visit_if_merge_phi(nir_instr_as_phi(instr), nir_src_is_divergent(nif->condition));
920       return true;
921    }
922 
923    update_instr_divergence(shader, instr);
924    return true;
925 }
926 
927