1 /*
2 * Copyright © 2018 Valve Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 */
24
25 #include "nir.h"
26
27 /* This pass computes for each ssa definition if it is uniform.
28 * That is, the variable has the same value for all invocations
29 * of the group.
30 *
31 * This divergence analysis pass expects the shader to be in LCSSA-form.
32 *
33 * This algorithm implements "The Simple Divergence Analysis" from
34 * Diogo Sampaio, Rafael De Souza, Sylvain Collange, Fernando Magno Quintão Pereira.
35 * Divergence Analysis. ACM Transactions on Programming Languages and Systems (TOPLAS),
36 * ACM, 2013, 35 (4), pp.13:1-13:36. <10.1145/2523815>. <hal-00909072v2>
37 */
38
39 struct divergence_state {
40 const gl_shader_stage stage;
41 nir_shader *shader;
42
43 /** current control flow state */
44 /* True if some loop-active invocations might take a different control-flow path.
45 * A divergent break does not cause subsequent control-flow to be considered
46 * divergent because those invocations are no longer active in the loop.
47 * For a divergent if, both sides are considered divergent flow because
48 * the other side is still loop-active. */
49 bool divergent_loop_cf;
50 /* True if a divergent continue happened since the loop header */
51 bool divergent_loop_continue;
52 /* True if a divergent break happened since the loop header */
53 bool divergent_loop_break;
54
55 /* True if we visit the block for the fist time */
56 bool first_visit;
57 };
58
59 static bool
60 visit_cf_list(struct exec_list *list, struct divergence_state *state);
61
62 static bool
visit_alu(nir_alu_instr * instr)63 visit_alu(nir_alu_instr *instr)
64 {
65 if (instr->dest.dest.ssa.divergent)
66 return false;
67
68 unsigned num_src = nir_op_infos[instr->op].num_inputs;
69
70 for (unsigned i = 0; i < num_src; i++) {
71 if (instr->src[i].src.ssa->divergent) {
72 instr->dest.dest.ssa.divergent = true;
73 return true;
74 }
75 }
76
77 return false;
78 }
79
80 static bool
visit_intrinsic(nir_shader * shader,nir_intrinsic_instr * instr)81 visit_intrinsic(nir_shader *shader, nir_intrinsic_instr *instr)
82 {
83 if (!nir_intrinsic_infos[instr->intrinsic].has_dest)
84 return false;
85
86 if (instr->dest.ssa.divergent)
87 return false;
88
89 nir_divergence_options options = shader->options->divergence_analysis_options;
90 gl_shader_stage stage = shader->info.stage;
91 bool is_divergent = false;
92 switch (instr->intrinsic) {
93 /* Intrinsics which are always uniform */
94 case nir_intrinsic_shader_clock:
95 case nir_intrinsic_ballot:
96 case nir_intrinsic_read_invocation:
97 case nir_intrinsic_read_first_invocation:
98 case nir_intrinsic_vote_any:
99 case nir_intrinsic_vote_all:
100 case nir_intrinsic_vote_feq:
101 case nir_intrinsic_vote_ieq:
102 case nir_intrinsic_load_work_dim:
103 case nir_intrinsic_load_work_group_id:
104 case nir_intrinsic_load_num_work_groups:
105 case nir_intrinsic_load_local_group_size:
106 case nir_intrinsic_load_subgroup_id:
107 case nir_intrinsic_load_num_subgroups:
108 case nir_intrinsic_load_subgroup_size:
109 case nir_intrinsic_load_subgroup_eq_mask:
110 case nir_intrinsic_load_subgroup_ge_mask:
111 case nir_intrinsic_load_subgroup_gt_mask:
112 case nir_intrinsic_load_subgroup_le_mask:
113 case nir_intrinsic_load_subgroup_lt_mask:
114 case nir_intrinsic_first_invocation:
115 case nir_intrinsic_last_invocation:
116 case nir_intrinsic_load_base_instance:
117 case nir_intrinsic_load_base_vertex:
118 case nir_intrinsic_load_first_vertex:
119 case nir_intrinsic_load_draw_id:
120 case nir_intrinsic_load_is_indexed_draw:
121 case nir_intrinsic_load_viewport_scale:
122 case nir_intrinsic_load_user_clip_plane:
123 case nir_intrinsic_load_viewport_x_scale:
124 case nir_intrinsic_load_viewport_y_scale:
125 case nir_intrinsic_load_viewport_z_scale:
126 case nir_intrinsic_load_viewport_offset:
127 case nir_intrinsic_load_viewport_z_offset:
128 case nir_intrinsic_load_blend_const_color_a_float:
129 case nir_intrinsic_load_blend_const_color_b_float:
130 case nir_intrinsic_load_blend_const_color_g_float:
131 case nir_intrinsic_load_blend_const_color_r_float:
132 case nir_intrinsic_load_blend_const_color_rgba:
133 case nir_intrinsic_load_blend_const_color_aaaa8888_unorm:
134 case nir_intrinsic_load_blend_const_color_rgba8888_unorm:
135 is_divergent = false;
136 break;
137
138 /* Intrinsics with divergence depending on shader stage and hardware */
139 case nir_intrinsic_load_input:
140 is_divergent = instr->src[0].ssa->divergent;
141 if (stage == MESA_SHADER_FRAGMENT)
142 is_divergent |= !(options & nir_divergence_single_prim_per_subgroup);
143 else if (stage == MESA_SHADER_TESS_EVAL)
144 is_divergent |= !(options & nir_divergence_single_patch_per_tes_subgroup);
145 else
146 is_divergent = true;
147 break;
148 case nir_intrinsic_load_per_vertex_input:
149 is_divergent = instr->src[0].ssa->divergent ||
150 instr->src[1].ssa->divergent;
151 if (stage == MESA_SHADER_TESS_CTRL)
152 is_divergent |= !(options & nir_divergence_single_patch_per_tcs_subgroup);
153 if (stage == MESA_SHADER_TESS_EVAL)
154 is_divergent |= !(options & nir_divergence_single_patch_per_tes_subgroup);
155 else
156 is_divergent = true;
157 break;
158 case nir_intrinsic_load_input_vertex:
159 is_divergent = instr->src[1].ssa->divergent;
160 assert(stage == MESA_SHADER_FRAGMENT);
161 is_divergent |= !(options & nir_divergence_single_prim_per_subgroup);
162 break;
163 case nir_intrinsic_load_output:
164 assert(stage == MESA_SHADER_TESS_CTRL || stage == MESA_SHADER_FRAGMENT);
165 is_divergent = instr->src[0].ssa->divergent;
166 if (stage == MESA_SHADER_TESS_CTRL)
167 is_divergent |= !(options & nir_divergence_single_patch_per_tcs_subgroup);
168 else
169 is_divergent = true;
170 break;
171 case nir_intrinsic_load_per_vertex_output:
172 assert(stage == MESA_SHADER_TESS_CTRL);
173 is_divergent = instr->src[0].ssa->divergent ||
174 instr->src[1].ssa->divergent ||
175 !(options & nir_divergence_single_patch_per_tcs_subgroup);
176 break;
177 case nir_intrinsic_load_layer_id:
178 case nir_intrinsic_load_front_face:
179 assert(stage == MESA_SHADER_FRAGMENT);
180 is_divergent = !(options & nir_divergence_single_prim_per_subgroup);
181 break;
182 case nir_intrinsic_load_view_index:
183 assert(stage != MESA_SHADER_COMPUTE && stage != MESA_SHADER_KERNEL);
184 if (options & nir_divergence_view_index_uniform)
185 is_divergent = false;
186 else if (stage == MESA_SHADER_FRAGMENT)
187 is_divergent = !(options & nir_divergence_single_prim_per_subgroup);
188 break;
189 case nir_intrinsic_load_fs_input_interp_deltas:
190 assert(stage == MESA_SHADER_FRAGMENT);
191 is_divergent = instr->src[0].ssa->divergent;
192 is_divergent |= !(options & nir_divergence_single_prim_per_subgroup);
193 break;
194 case nir_intrinsic_load_primitive_id:
195 if (stage == MESA_SHADER_FRAGMENT)
196 is_divergent = !(options & nir_divergence_single_prim_per_subgroup);
197 else if (stage == MESA_SHADER_TESS_CTRL)
198 is_divergent = !(options & nir_divergence_single_patch_per_tcs_subgroup);
199 else if (stage == MESA_SHADER_TESS_EVAL)
200 is_divergent = !(options & nir_divergence_single_patch_per_tes_subgroup);
201 else if (stage == MESA_SHADER_GEOMETRY)
202 is_divergent = true;
203 else
204 unreachable("Invalid stage for load_primitive_id");
205 break;
206 case nir_intrinsic_load_tess_level_inner:
207 case nir_intrinsic_load_tess_level_outer:
208 if (stage == MESA_SHADER_TESS_CTRL)
209 is_divergent = !(options & nir_divergence_single_patch_per_tcs_subgroup);
210 else if (stage == MESA_SHADER_TESS_EVAL)
211 is_divergent = !(options & nir_divergence_single_patch_per_tes_subgroup);
212 else
213 unreachable("Invalid stage for load_primitive_tess_level_*");
214 break;
215 case nir_intrinsic_load_patch_vertices_in:
216 if (stage == MESA_SHADER_TESS_EVAL)
217 is_divergent = !(options & nir_divergence_single_patch_per_tes_subgroup);
218 else
219 assert(stage == MESA_SHADER_TESS_CTRL);
220 break;
221
222 /* Clustered reductions are uniform if cluster_size == subgroup_size or
223 * the source is uniform and the operation is invariant.
224 * Inclusive scans are uniform if
225 * the source is uniform and the operation is invariant
226 */
227 case nir_intrinsic_reduce:
228 if (nir_intrinsic_cluster_size(instr) == 0)
229 return false;
230 /* fallthrough */
231 case nir_intrinsic_inclusive_scan: {
232 nir_op op = nir_intrinsic_reduction_op(instr);
233 is_divergent = instr->src[0].ssa->divergent;
234 if (op != nir_op_umin && op != nir_op_imin && op != nir_op_fmin &&
235 op != nir_op_umax && op != nir_op_imax && op != nir_op_fmax &&
236 op != nir_op_iand && op != nir_op_ior)
237 is_divergent = true;
238 break;
239 }
240
241 /* Intrinsics with divergence depending on sources */
242 case nir_intrinsic_ballot_bitfield_extract:
243 case nir_intrinsic_ballot_find_lsb:
244 case nir_intrinsic_ballot_find_msb:
245 case nir_intrinsic_ballot_bit_count_reduce:
246 case nir_intrinsic_shuffle_xor:
247 case nir_intrinsic_shuffle_up:
248 case nir_intrinsic_shuffle_down:
249 case nir_intrinsic_quad_broadcast:
250 case nir_intrinsic_quad_swap_horizontal:
251 case nir_intrinsic_quad_swap_vertical:
252 case nir_intrinsic_quad_swap_diagonal:
253 case nir_intrinsic_load_deref:
254 case nir_intrinsic_load_ubo:
255 case nir_intrinsic_load_ssbo:
256 case nir_intrinsic_load_shared:
257 case nir_intrinsic_load_global:
258 case nir_intrinsic_load_global_constant:
259 case nir_intrinsic_load_uniform:
260 case nir_intrinsic_load_push_constant:
261 case nir_intrinsic_load_constant:
262 case nir_intrinsic_load_sample_pos_from_id:
263 case nir_intrinsic_load_kernel_input:
264 case nir_intrinsic_image_load:
265 case nir_intrinsic_image_deref_load:
266 case nir_intrinsic_bindless_image_load:
267 case nir_intrinsic_image_samples:
268 case nir_intrinsic_image_deref_samples:
269 case nir_intrinsic_bindless_image_samples:
270 case nir_intrinsic_get_ssbo_size:
271 case nir_intrinsic_image_size:
272 case nir_intrinsic_image_deref_size:
273 case nir_intrinsic_bindless_image_size:
274 case nir_intrinsic_copy_deref:
275 case nir_intrinsic_deref_buffer_array_length:
276 case nir_intrinsic_vulkan_resource_index:
277 case nir_intrinsic_vulkan_resource_reindex:
278 case nir_intrinsic_load_vulkan_descriptor:
279 case nir_intrinsic_atomic_counter_read:
280 case nir_intrinsic_atomic_counter_read_deref:
281 case nir_intrinsic_quad_swizzle_amd:
282 case nir_intrinsic_masked_swizzle_amd: {
283 unsigned num_srcs = nir_intrinsic_infos[instr->intrinsic].num_srcs;
284 for (unsigned i = 0; i < num_srcs; i++) {
285 if (instr->src[i].ssa->divergent) {
286 is_divergent = true;
287 break;
288 }
289 }
290 break;
291 }
292
293 case nir_intrinsic_shuffle:
294 is_divergent = instr->src[0].ssa->divergent &&
295 instr->src[1].ssa->divergent;
296 break;
297
298 /* Intrinsics which are always divergent */
299 case nir_intrinsic_load_color0:
300 case nir_intrinsic_load_color1:
301 case nir_intrinsic_load_param:
302 case nir_intrinsic_load_sample_id:
303 case nir_intrinsic_load_sample_id_no_per_sample:
304 case nir_intrinsic_load_sample_mask_in:
305 case nir_intrinsic_load_interpolated_input:
306 case nir_intrinsic_load_barycentric_pixel:
307 case nir_intrinsic_load_barycentric_centroid:
308 case nir_intrinsic_load_barycentric_sample:
309 case nir_intrinsic_load_barycentric_model:
310 case nir_intrinsic_load_barycentric_at_sample:
311 case nir_intrinsic_load_barycentric_at_offset:
312 case nir_intrinsic_interp_deref_at_offset:
313 case nir_intrinsic_interp_deref_at_sample:
314 case nir_intrinsic_interp_deref_at_centroid:
315 case nir_intrinsic_interp_deref_at_vertex:
316 case nir_intrinsic_load_tess_coord:
317 case nir_intrinsic_load_point_coord:
318 case nir_intrinsic_load_line_coord:
319 case nir_intrinsic_load_frag_coord:
320 case nir_intrinsic_load_sample_pos:
321 case nir_intrinsic_load_vertex_id_zero_base:
322 case nir_intrinsic_load_vertex_id:
323 case nir_intrinsic_load_instance_id:
324 case nir_intrinsic_load_invocation_id:
325 case nir_intrinsic_load_local_invocation_id:
326 case nir_intrinsic_load_local_invocation_index:
327 case nir_intrinsic_load_global_invocation_id:
328 case nir_intrinsic_load_global_invocation_index:
329 case nir_intrinsic_load_subgroup_invocation:
330 case nir_intrinsic_load_helper_invocation:
331 case nir_intrinsic_is_helper_invocation:
332 case nir_intrinsic_load_scratch:
333 case nir_intrinsic_deref_atomic_add:
334 case nir_intrinsic_deref_atomic_imin:
335 case nir_intrinsic_deref_atomic_umin:
336 case nir_intrinsic_deref_atomic_imax:
337 case nir_intrinsic_deref_atomic_umax:
338 case nir_intrinsic_deref_atomic_and:
339 case nir_intrinsic_deref_atomic_or:
340 case nir_intrinsic_deref_atomic_xor:
341 case nir_intrinsic_deref_atomic_exchange:
342 case nir_intrinsic_deref_atomic_comp_swap:
343 case nir_intrinsic_deref_atomic_fadd:
344 case nir_intrinsic_deref_atomic_fmin:
345 case nir_intrinsic_deref_atomic_fmax:
346 case nir_intrinsic_deref_atomic_fcomp_swap:
347 case nir_intrinsic_ssbo_atomic_add:
348 case nir_intrinsic_ssbo_atomic_imin:
349 case nir_intrinsic_ssbo_atomic_umin:
350 case nir_intrinsic_ssbo_atomic_imax:
351 case nir_intrinsic_ssbo_atomic_umax:
352 case nir_intrinsic_ssbo_atomic_and:
353 case nir_intrinsic_ssbo_atomic_or:
354 case nir_intrinsic_ssbo_atomic_xor:
355 case nir_intrinsic_ssbo_atomic_exchange:
356 case nir_intrinsic_ssbo_atomic_comp_swap:
357 case nir_intrinsic_ssbo_atomic_fadd:
358 case nir_intrinsic_ssbo_atomic_fmax:
359 case nir_intrinsic_ssbo_atomic_fmin:
360 case nir_intrinsic_ssbo_atomic_fcomp_swap:
361 case nir_intrinsic_image_deref_atomic_add:
362 case nir_intrinsic_image_deref_atomic_imin:
363 case nir_intrinsic_image_deref_atomic_umin:
364 case nir_intrinsic_image_deref_atomic_imax:
365 case nir_intrinsic_image_deref_atomic_umax:
366 case nir_intrinsic_image_deref_atomic_and:
367 case nir_intrinsic_image_deref_atomic_or:
368 case nir_intrinsic_image_deref_atomic_xor:
369 case nir_intrinsic_image_deref_atomic_exchange:
370 case nir_intrinsic_image_deref_atomic_comp_swap:
371 case nir_intrinsic_image_deref_atomic_fadd:
372 case nir_intrinsic_image_atomic_add:
373 case nir_intrinsic_image_atomic_imin:
374 case nir_intrinsic_image_atomic_umin:
375 case nir_intrinsic_image_atomic_imax:
376 case nir_intrinsic_image_atomic_umax:
377 case nir_intrinsic_image_atomic_and:
378 case nir_intrinsic_image_atomic_or:
379 case nir_intrinsic_image_atomic_xor:
380 case nir_intrinsic_image_atomic_exchange:
381 case nir_intrinsic_image_atomic_comp_swap:
382 case nir_intrinsic_image_atomic_fadd:
383 case nir_intrinsic_bindless_image_atomic_add:
384 case nir_intrinsic_bindless_image_atomic_imin:
385 case nir_intrinsic_bindless_image_atomic_umin:
386 case nir_intrinsic_bindless_image_atomic_imax:
387 case nir_intrinsic_bindless_image_atomic_umax:
388 case nir_intrinsic_bindless_image_atomic_and:
389 case nir_intrinsic_bindless_image_atomic_or:
390 case nir_intrinsic_bindless_image_atomic_xor:
391 case nir_intrinsic_bindless_image_atomic_exchange:
392 case nir_intrinsic_bindless_image_atomic_comp_swap:
393 case nir_intrinsic_bindless_image_atomic_fadd:
394 case nir_intrinsic_shared_atomic_add:
395 case nir_intrinsic_shared_atomic_imin:
396 case nir_intrinsic_shared_atomic_umin:
397 case nir_intrinsic_shared_atomic_imax:
398 case nir_intrinsic_shared_atomic_umax:
399 case nir_intrinsic_shared_atomic_and:
400 case nir_intrinsic_shared_atomic_or:
401 case nir_intrinsic_shared_atomic_xor:
402 case nir_intrinsic_shared_atomic_exchange:
403 case nir_intrinsic_shared_atomic_comp_swap:
404 case nir_intrinsic_shared_atomic_fadd:
405 case nir_intrinsic_shared_atomic_fmin:
406 case nir_intrinsic_shared_atomic_fmax:
407 case nir_intrinsic_shared_atomic_fcomp_swap:
408 case nir_intrinsic_global_atomic_add:
409 case nir_intrinsic_global_atomic_imin:
410 case nir_intrinsic_global_atomic_umin:
411 case nir_intrinsic_global_atomic_imax:
412 case nir_intrinsic_global_atomic_umax:
413 case nir_intrinsic_global_atomic_and:
414 case nir_intrinsic_global_atomic_or:
415 case nir_intrinsic_global_atomic_xor:
416 case nir_intrinsic_global_atomic_exchange:
417 case nir_intrinsic_global_atomic_comp_swap:
418 case nir_intrinsic_global_atomic_fadd:
419 case nir_intrinsic_global_atomic_fmin:
420 case nir_intrinsic_global_atomic_fmax:
421 case nir_intrinsic_global_atomic_fcomp_swap:
422 case nir_intrinsic_atomic_counter_add:
423 case nir_intrinsic_atomic_counter_min:
424 case nir_intrinsic_atomic_counter_max:
425 case nir_intrinsic_atomic_counter_and:
426 case nir_intrinsic_atomic_counter_or:
427 case nir_intrinsic_atomic_counter_xor:
428 case nir_intrinsic_atomic_counter_inc:
429 case nir_intrinsic_atomic_counter_pre_dec:
430 case nir_intrinsic_atomic_counter_post_dec:
431 case nir_intrinsic_atomic_counter_exchange:
432 case nir_intrinsic_atomic_counter_comp_swap:
433 case nir_intrinsic_atomic_counter_add_deref:
434 case nir_intrinsic_atomic_counter_min_deref:
435 case nir_intrinsic_atomic_counter_max_deref:
436 case nir_intrinsic_atomic_counter_and_deref:
437 case nir_intrinsic_atomic_counter_or_deref:
438 case nir_intrinsic_atomic_counter_xor_deref:
439 case nir_intrinsic_atomic_counter_inc_deref:
440 case nir_intrinsic_atomic_counter_pre_dec_deref:
441 case nir_intrinsic_atomic_counter_post_dec_deref:
442 case nir_intrinsic_atomic_counter_exchange_deref:
443 case nir_intrinsic_atomic_counter_comp_swap_deref:
444 case nir_intrinsic_exclusive_scan:
445 case nir_intrinsic_ballot_bit_count_exclusive:
446 case nir_intrinsic_ballot_bit_count_inclusive:
447 case nir_intrinsic_write_invocation_amd:
448 case nir_intrinsic_mbcnt_amd:
449 case nir_intrinsic_elect:
450 is_divergent = true;
451 break;
452
453 default:
454 #ifdef NDEBUG
455 is_divergent = true;
456 break;
457 #else
458 nir_print_instr(&instr->instr, stderr);
459 unreachable("\nNIR divergence analysis: Unhandled intrinsic.");
460 #endif
461 }
462
463 instr->dest.ssa.divergent = is_divergent;
464 return is_divergent;
465 }
466
467 static bool
visit_tex(nir_tex_instr * instr)468 visit_tex(nir_tex_instr *instr)
469 {
470 if (instr->dest.ssa.divergent)
471 return false;
472
473 bool is_divergent = false;
474
475 for (unsigned i = 0; i < instr->num_srcs; i++) {
476 switch (instr->src[i].src_type) {
477 case nir_tex_src_sampler_deref:
478 case nir_tex_src_sampler_handle:
479 case nir_tex_src_sampler_offset:
480 is_divergent |= instr->src[i].src.ssa->divergent &&
481 instr->sampler_non_uniform;
482 break;
483 case nir_tex_src_texture_deref:
484 case nir_tex_src_texture_handle:
485 case nir_tex_src_texture_offset:
486 is_divergent |= instr->src[i].src.ssa->divergent &&
487 instr->texture_non_uniform;
488 break;
489 default:
490 is_divergent |= instr->src[i].src.ssa->divergent;
491 break;
492 }
493 }
494
495 instr->dest.ssa.divergent = is_divergent;
496 return is_divergent;
497 }
498
499 static bool
visit_load_const(nir_load_const_instr * instr)500 visit_load_const(nir_load_const_instr *instr)
501 {
502 return false;
503 }
504
505 static bool
visit_ssa_undef(nir_ssa_undef_instr * instr)506 visit_ssa_undef(nir_ssa_undef_instr *instr)
507 {
508 return false;
509 }
510
511 static bool
nir_variable_mode_is_uniform(nir_variable_mode mode)512 nir_variable_mode_is_uniform(nir_variable_mode mode) {
513 switch (mode) {
514 case nir_var_uniform:
515 case nir_var_mem_ubo:
516 case nir_var_mem_ssbo:
517 case nir_var_mem_shared:
518 case nir_var_mem_global:
519 return true;
520 default:
521 return false;
522 }
523 }
524
525 static bool
nir_variable_is_uniform(nir_shader * shader,nir_variable * var)526 nir_variable_is_uniform(nir_shader *shader, nir_variable *var)
527 {
528 if (nir_variable_mode_is_uniform(var->data.mode))
529 return true;
530
531 nir_divergence_options options = shader->options->divergence_analysis_options;
532 gl_shader_stage stage = shader->info.stage;
533
534 if (stage == MESA_SHADER_FRAGMENT &&
535 (options & nir_divergence_single_prim_per_subgroup) &&
536 var->data.mode == nir_var_shader_in &&
537 var->data.interpolation == INTERP_MODE_FLAT)
538 return true;
539
540 if (stage == MESA_SHADER_TESS_CTRL &&
541 (options & nir_divergence_single_patch_per_tcs_subgroup) &&
542 var->data.mode == nir_var_shader_out && var->data.patch)
543 return true;
544
545 if (stage == MESA_SHADER_TESS_EVAL &&
546 (options & nir_divergence_single_patch_per_tes_subgroup) &&
547 var->data.mode == nir_var_shader_in && var->data.patch)
548 return true;
549
550 return false;
551 }
552
553 static bool
visit_deref(nir_shader * shader,nir_deref_instr * deref)554 visit_deref(nir_shader *shader, nir_deref_instr *deref)
555 {
556 if (deref->dest.ssa.divergent)
557 return false;
558
559 bool is_divergent = false;
560 switch (deref->deref_type) {
561 case nir_deref_type_var:
562 is_divergent = !nir_variable_is_uniform(shader, deref->var);
563 break;
564 case nir_deref_type_array:
565 case nir_deref_type_ptr_as_array:
566 is_divergent = deref->arr.index.ssa->divergent;
567 /* fallthrough */
568 case nir_deref_type_struct:
569 case nir_deref_type_array_wildcard:
570 is_divergent |= deref->parent.ssa->divergent;
571 break;
572 case nir_deref_type_cast:
573 is_divergent = !nir_variable_mode_is_uniform(deref->var->data.mode) ||
574 deref->parent.ssa->divergent;
575 break;
576 }
577
578 deref->dest.ssa.divergent = is_divergent;
579 return is_divergent;
580 }
581
582 static bool
visit_jump(nir_jump_instr * jump,struct divergence_state * state)583 visit_jump(nir_jump_instr *jump, struct divergence_state *state)
584 {
585 switch (jump->type) {
586 case nir_jump_continue:
587 if (state->divergent_loop_continue)
588 return false;
589 if (state->divergent_loop_cf)
590 state->divergent_loop_continue = true;
591 return state->divergent_loop_continue;
592 case nir_jump_break:
593 if (state->divergent_loop_break)
594 return false;
595 if (state->divergent_loop_cf)
596 state->divergent_loop_break = true;
597 return state->divergent_loop_break;
598 case nir_jump_return:
599 unreachable("NIR divergence analysis: Unsupported return instruction.");
600 break;
601 case nir_jump_goto:
602 case nir_jump_goto_if:
603 unreachable("NIR divergence analysis: Unsupported goto_if instruction.");
604 break;
605 }
606 return false;
607 }
608
609 static bool
set_ssa_def_not_divergent(nir_ssa_def * def,UNUSED void * _state)610 set_ssa_def_not_divergent(nir_ssa_def *def, UNUSED void *_state)
611 {
612 def->divergent = false;
613 return true;
614 }
615
616 static bool
update_instr_divergence(nir_shader * shader,nir_instr * instr)617 update_instr_divergence(nir_shader *shader, nir_instr *instr)
618 {
619 switch (instr->type) {
620 case nir_instr_type_alu:
621 return visit_alu(nir_instr_as_alu(instr));
622 case nir_instr_type_intrinsic:
623 return visit_intrinsic(shader, nir_instr_as_intrinsic(instr));
624 case nir_instr_type_tex:
625 return visit_tex(nir_instr_as_tex(instr));
626 case nir_instr_type_load_const:
627 return visit_load_const(nir_instr_as_load_const(instr));
628 case nir_instr_type_ssa_undef:
629 return visit_ssa_undef(nir_instr_as_ssa_undef(instr));
630 case nir_instr_type_deref:
631 return visit_deref(shader, nir_instr_as_deref(instr));
632 case nir_instr_type_jump:
633 case nir_instr_type_phi:
634 case nir_instr_type_call:
635 case nir_instr_type_parallel_copy:
636 default:
637 unreachable("NIR divergence analysis: Unsupported instruction type.");
638 }
639 }
640
641 static bool
visit_block(nir_block * block,struct divergence_state * state)642 visit_block(nir_block *block, struct divergence_state *state)
643 {
644 bool has_changed = false;
645
646 nir_foreach_instr(instr, block) {
647 /* phis are handled when processing the branches */
648 if (instr->type == nir_instr_type_phi)
649 continue;
650
651 if (state->first_visit)
652 nir_foreach_ssa_def(instr, set_ssa_def_not_divergent, NULL);
653
654 if (instr->type == nir_instr_type_jump)
655 has_changed |= visit_jump(nir_instr_as_jump(instr), state);
656 else
657 has_changed |= update_instr_divergence(state->shader, instr);
658 }
659
660 return has_changed;
661 }
662
663 /* There are 3 types of phi instructions:
664 * (1) gamma: represent the joining point of different paths
665 * created by an “if-then-else” branch.
666 * The resulting value is divergent if the branch condition
667 * or any of the source values is divergent. */
668 static bool
visit_if_merge_phi(nir_phi_instr * phi,bool if_cond_divergent)669 visit_if_merge_phi(nir_phi_instr *phi, bool if_cond_divergent)
670 {
671 if (phi->dest.ssa.divergent)
672 return false;
673
674 unsigned defined_srcs = 0;
675 nir_foreach_phi_src(src, phi) {
676 /* if any source value is divergent, the resulting value is divergent */
677 if (src->src.ssa->divergent) {
678 phi->dest.ssa.divergent = true;
679 return true;
680 }
681 if (src->src.ssa->parent_instr->type != nir_instr_type_ssa_undef) {
682 defined_srcs++;
683 }
684 }
685
686 /* if the condition is divergent and two sources defined, the definition is divergent */
687 if (defined_srcs > 1 && if_cond_divergent) {
688 phi->dest.ssa.divergent = true;
689 return true;
690 }
691
692 return false;
693 }
694
695 /* There are 3 types of phi instructions:
696 * (2) mu: which only exist at loop headers,
697 * merge initial and loop-carried values.
698 * The resulting value is divergent if any source value
699 * is divergent or a divergent loop continue condition
700 * is associated with a different ssa-def. */
701 static bool
visit_loop_header_phi(nir_phi_instr * phi,nir_block * preheader,bool divergent_continue)702 visit_loop_header_phi(nir_phi_instr *phi, nir_block *preheader, bool divergent_continue)
703 {
704 if (phi->dest.ssa.divergent)
705 return false;
706
707 nir_ssa_def* same = NULL;
708 nir_foreach_phi_src(src, phi) {
709 /* if any source value is divergent, the resulting value is divergent */
710 if (src->src.ssa->divergent) {
711 phi->dest.ssa.divergent = true;
712 return true;
713 }
714 /* if this loop is uniform, we're done here */
715 if (!divergent_continue)
716 continue;
717 /* skip the loop preheader */
718 if (src->pred == preheader)
719 continue;
720 /* skip undef values */
721 if (src->src.ssa->parent_instr->type == nir_instr_type_ssa_undef)
722 continue;
723
724 /* check if all loop-carried values are from the same ssa-def */
725 if (!same)
726 same = src->src.ssa;
727 else if (same != src->src.ssa) {
728 phi->dest.ssa.divergent = true;
729 return true;
730 }
731 }
732
733 return false;
734 }
735
736 /* There are 3 types of phi instructions:
737 * (3) eta: represent values that leave a loop.
738 * The resulting value is divergent if the source value is divergent
739 * or any loop exit condition is divergent for a value which is
740 * not loop-invariant.
741 * (note: there should be no phi for loop-invariant variables.) */
742 static bool
visit_loop_exit_phi(nir_phi_instr * phi,bool divergent_break)743 visit_loop_exit_phi(nir_phi_instr *phi, bool divergent_break)
744 {
745 if (phi->dest.ssa.divergent)
746 return false;
747
748 if (divergent_break) {
749 phi->dest.ssa.divergent = true;
750 return true;
751 }
752
753 /* if any source value is divergent, the resulting value is divergent */
754 nir_foreach_phi_src(src, phi) {
755 if (src->src.ssa->divergent) {
756 phi->dest.ssa.divergent = true;
757 return true;
758 }
759 }
760
761 return false;
762 }
763
764 static bool
visit_if(nir_if * if_stmt,struct divergence_state * state)765 visit_if(nir_if *if_stmt, struct divergence_state *state)
766 {
767 bool progress = false;
768
769 struct divergence_state then_state = *state;
770 then_state.divergent_loop_cf |= if_stmt->condition.ssa->divergent;
771 progress |= visit_cf_list(&if_stmt->then_list, &then_state);
772
773 struct divergence_state else_state = *state;
774 else_state.divergent_loop_cf |= if_stmt->condition.ssa->divergent;
775 progress |= visit_cf_list(&if_stmt->else_list, &else_state);
776
777 /* handle phis after the IF */
778 nir_foreach_instr(instr, nir_cf_node_cf_tree_next(&if_stmt->cf_node)) {
779 if (instr->type != nir_instr_type_phi)
780 break;
781
782 if (state->first_visit)
783 nir_instr_as_phi(instr)->dest.ssa.divergent = false;
784 progress |= visit_if_merge_phi(nir_instr_as_phi(instr),
785 if_stmt->condition.ssa->divergent);
786 }
787
788 /* join loop divergence information from both branch legs */
789 state->divergent_loop_continue |= then_state.divergent_loop_continue ||
790 else_state.divergent_loop_continue;
791 state->divergent_loop_break |= then_state.divergent_loop_break ||
792 else_state.divergent_loop_break;
793
794 /* A divergent continue makes succeeding loop CF divergent:
795 * not all loop-active invocations participate in the remaining loop-body
796 * which means that a following break might be taken by some invocations, only */
797 state->divergent_loop_cf |= state->divergent_loop_continue;
798
799 return progress;
800 }
801
802 static bool
visit_loop(nir_loop * loop,struct divergence_state * state)803 visit_loop(nir_loop *loop, struct divergence_state *state)
804 {
805 bool progress = false;
806 nir_block *loop_header = nir_loop_first_block(loop);
807 nir_block *loop_preheader = nir_block_cf_tree_prev(loop_header);
808
809 /* handle loop header phis first: we have no knowledge yet about
810 * the loop's control flow or any loop-carried sources. */
811 nir_foreach_instr(instr, loop_header) {
812 if (instr->type != nir_instr_type_phi)
813 break;
814
815 nir_phi_instr *phi = nir_instr_as_phi(instr);
816 if (!state->first_visit && phi->dest.ssa.divergent)
817 continue;
818
819 nir_foreach_phi_src(src, phi) {
820 if (src->pred == loop_preheader) {
821 phi->dest.ssa.divergent = src->src.ssa->divergent;
822 break;
823 }
824 }
825 progress |= phi->dest.ssa.divergent;
826 }
827
828 /* setup loop state */
829 struct divergence_state loop_state = *state;
830 loop_state.divergent_loop_cf = false;
831 loop_state.divergent_loop_continue = false;
832 loop_state.divergent_loop_break = false;
833
834 /* process loop body until no further changes are made */
835 bool repeat;
836 do {
837 progress |= visit_cf_list(&loop->body, &loop_state);
838 repeat = false;
839
840 /* revisit loop header phis to see if something has changed */
841 nir_foreach_instr(instr, loop_header) {
842 if (instr->type != nir_instr_type_phi)
843 break;
844
845 repeat |= visit_loop_header_phi(nir_instr_as_phi(instr),
846 loop_preheader,
847 loop_state.divergent_loop_continue);
848 }
849
850 loop_state.divergent_loop_cf = false;
851 loop_state.first_visit = false;
852 } while (repeat);
853
854 /* handle phis after the loop */
855 nir_foreach_instr(instr, nir_cf_node_cf_tree_next(&loop->cf_node)) {
856 if (instr->type != nir_instr_type_phi)
857 break;
858
859 if (state->first_visit)
860 nir_instr_as_phi(instr)->dest.ssa.divergent = false;
861 progress |= visit_loop_exit_phi(nir_instr_as_phi(instr),
862 loop_state.divergent_loop_break);
863 }
864
865 return progress;
866 }
867
868 static bool
visit_cf_list(struct exec_list * list,struct divergence_state * state)869 visit_cf_list(struct exec_list *list, struct divergence_state *state)
870 {
871 bool has_changed = false;
872
873 foreach_list_typed(nir_cf_node, node, node, list) {
874 switch (node->type) {
875 case nir_cf_node_block:
876 has_changed |= visit_block(nir_cf_node_as_block(node), state);
877 break;
878 case nir_cf_node_if:
879 has_changed |= visit_if(nir_cf_node_as_if(node), state);
880 break;
881 case nir_cf_node_loop:
882 has_changed |= visit_loop(nir_cf_node_as_loop(node), state);
883 break;
884 case nir_cf_node_function:
885 unreachable("NIR divergence analysis: Unsupported cf_node type.");
886 }
887 }
888
889 return has_changed;
890 }
891
892 void
nir_divergence_analysis(nir_shader * shader)893 nir_divergence_analysis(nir_shader *shader)
894 {
895 struct divergence_state state = {
896 .stage = shader->info.stage,
897 .shader = shader,
898 .divergent_loop_cf = false,
899 .divergent_loop_continue = false,
900 .divergent_loop_break = false,
901 .first_visit = true,
902 };
903
904 visit_cf_list(&nir_shader_get_entrypoint(shader)->body, &state);
905 }
906
nir_update_instr_divergence(nir_shader * shader,nir_instr * instr)907 bool nir_update_instr_divergence(nir_shader *shader, nir_instr *instr)
908 {
909 nir_foreach_ssa_def(instr, set_ssa_def_not_divergent, NULL);
910
911 if (instr->type == nir_instr_type_phi) {
912 nir_cf_node *prev = nir_cf_node_prev(&instr->block->cf_node);
913 /* can only update gamma/if phis */
914 if (!prev || prev->type != nir_cf_node_if)
915 return false;
916
917 nir_if *nif = nir_cf_node_as_if(prev);
918
919 visit_if_merge_phi(nir_instr_as_phi(instr), nir_src_is_divergent(nif->condition));
920 return true;
921 }
922
923 update_instr_divergence(shader, instr);
924 return true;
925 }
926
927