1 /*
2  * Copyright © 2016 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  */
23 
24 #include "nir.h"
25 #include "nir_builder.h"
26 #include "nir_control_flow.h"
27 #include "nir_loop_analyze.h"
28 
29 
30 /* This limit is chosen fairly arbitrarily.  GLSL IR max iteration is 32
31  * instructions. (Multiply counting nodes and magic number 5.)  But there is
32  * no 1:1 mapping between GLSL IR and NIR so 25 was picked because it seemed
33  * to give about the same results. Around 5 instructions per node.  But some
34  * loops that would unroll with GLSL IR fail to unroll if we set this to 25 so
35  * we set it to 26.
36  * This was bumped to 96 because it unrolled more loops with a positive
37  * effect (vulkan ssao demo).
38  */
39 #define LOOP_UNROLL_LIMIT 96
40 
41 /* Prepare this loop for unrolling by first converting to lcssa and then
42  * converting the phis from the top level of the loop body to regs.
43  * Partially converting out of SSA allows us to unroll the loop without having
44  * to keep track of and update phis along the way which gets tricky and
45  * doesn't add much value over converting to regs.
46  *
47  * The loop may have a continue instruction at the end of the loop which does
48  * nothing.  Once we're out of SSA, we can safely delete it so we don't have
49  * to deal with it later.
50  */
51 static void
loop_prepare_for_unroll(nir_loop * loop)52 loop_prepare_for_unroll(nir_loop *loop)
53 {
54    nir_convert_loop_to_lcssa(loop);
55 
56    /* Lower phis at the top level of the loop body */
57    foreach_list_typed_safe(nir_cf_node, node, node, &loop->body) {
58       if (nir_cf_node_block == node->type) {
59          nir_lower_phis_to_regs_block(nir_cf_node_as_block(node));
60       }
61    }
62 
63    /* Lower phis after the loop */
64    nir_block *block_after_loop =
65       nir_cf_node_as_block(nir_cf_node_next(&loop->cf_node));
66 
67    nir_lower_phis_to_regs_block(block_after_loop);
68 
69    /* Remove continue if its the last instruction in the loop */
70    nir_instr *last_instr = nir_block_last_instr(nir_loop_last_block(loop));
71    if (last_instr && last_instr->type == nir_instr_type_jump) {
72       assert(nir_instr_as_jump(last_instr)->type == nir_jump_continue);
73       nir_instr_remove(last_instr);
74    }
75 }
76 
77 static void
get_first_blocks_in_terminator(nir_loop_terminator * term,nir_block ** first_break_block,nir_block ** first_continue_block)78 get_first_blocks_in_terminator(nir_loop_terminator *term,
79                                nir_block **first_break_block,
80                                nir_block **first_continue_block)
81 {
82    if (term->continue_from_then) {
83       *first_continue_block = nir_if_first_then_block(term->nif);
84       *first_break_block = nir_if_first_else_block(term->nif);
85    } else {
86       *first_continue_block = nir_if_first_else_block(term->nif);
87       *first_break_block = nir_if_first_then_block(term->nif);
88    }
89 }
90 
91 /**
92  * Unroll a loop where we know exactly how many iterations there are and there
93  * is only a single exit point.  Note here we can unroll loops with multiple
94  * theoretical exits that only have a single terminating exit that we always
95  * know is the "real" exit.
96  *
97  *     loop {
98  *         ...instrs...
99  *     }
100  *
101  * And the iteration count is 3, the output will be:
102  *
103  *     ...instrs... ...instrs... ...instrs...
104  */
105 static void
simple_unroll(nir_loop * loop)106 simple_unroll(nir_loop *loop)
107 {
108    nir_loop_terminator *limiting_term = loop->info->limiting_terminator;
109    assert(nir_is_trivial_loop_if(limiting_term->nif,
110                                  limiting_term->break_block));
111 
112    loop_prepare_for_unroll(loop);
113 
114    /* Skip over loop terminator and get the loop body. */
115    list_for_each_entry(nir_loop_terminator, terminator,
116                        &loop->info->loop_terminator_list,
117                        loop_terminator_link) {
118 
119       /* Remove all but the limiting terminator as we know the other exit
120        * conditions can never be met. Note we need to extract any instructions
121        * in the continue from branch and insert then into the loop body before
122        * removing it.
123        */
124       if (terminator->nif != limiting_term->nif) {
125          nir_block *first_break_block;
126          nir_block *first_continue_block;
127          get_first_blocks_in_terminator(terminator, &first_break_block,
128                                         &first_continue_block);
129 
130          assert(nir_is_trivial_loop_if(terminator->nif,
131                                        terminator->break_block));
132 
133          nir_cf_list continue_from_lst;
134          nir_cf_extract(&continue_from_lst,
135                         nir_before_block(first_continue_block),
136                         nir_after_block(terminator->continue_from_block));
137          nir_cf_reinsert(&continue_from_lst,
138                          nir_after_cf_node(&terminator->nif->cf_node));
139 
140          nir_cf_node_remove(&terminator->nif->cf_node);
141       }
142    }
143 
144    nir_block *first_break_block;
145    nir_block *first_continue_block;
146    get_first_blocks_in_terminator(limiting_term, &first_break_block,
147                                   &first_continue_block);
148 
149    /* Pluck out the loop header */
150    nir_block *header_blk = nir_loop_first_block(loop);
151    nir_cf_list lp_header;
152    nir_cf_extract(&lp_header, nir_before_block(header_blk),
153                   nir_before_cf_node(&limiting_term->nif->cf_node));
154 
155    /* Add the continue from block of the limiting terminator to the loop body
156     */
157    nir_cf_list continue_from_lst;
158    nir_cf_extract(&continue_from_lst, nir_before_block(first_continue_block),
159                   nir_after_block(limiting_term->continue_from_block));
160    nir_cf_reinsert(&continue_from_lst,
161                    nir_after_cf_node(&limiting_term->nif->cf_node));
162 
163    /* Pluck out the loop body */
164    nir_cf_list loop_body;
165    nir_cf_extract(&loop_body, nir_after_cf_node(&limiting_term->nif->cf_node),
166                   nir_after_block(nir_loop_last_block(loop)));
167 
168    struct hash_table *remap_table =
169       _mesa_hash_table_create(NULL, _mesa_hash_pointer,
170                               _mesa_key_pointer_equal);
171 
172    /* Clone the loop header */
173    nir_cf_list cloned_header;
174    nir_cf_list_clone(&cloned_header, &lp_header, loop->cf_node.parent,
175                      remap_table);
176 
177    /* Insert cloned loop header before the loop */
178    nir_cf_reinsert(&cloned_header, nir_before_cf_node(&loop->cf_node));
179 
180    /* Temp list to store the cloned loop body as we unroll */
181    nir_cf_list unrolled_lp_body;
182 
183    /* Clone loop header and append to the loop body */
184    for (unsigned i = 0; i < loop->info->trip_count; i++) {
185       /* Clone loop body */
186       nir_cf_list_clone(&unrolled_lp_body, &loop_body, loop->cf_node.parent,
187                         remap_table);
188 
189       /* Insert unrolled loop body before the loop */
190       nir_cf_reinsert(&unrolled_lp_body, nir_before_cf_node(&loop->cf_node));
191 
192       /* Clone loop header */
193       nir_cf_list_clone(&cloned_header, &lp_header, loop->cf_node.parent,
194                         remap_table);
195 
196       /* Insert loop header after loop body */
197       nir_cf_reinsert(&cloned_header, nir_before_cf_node(&loop->cf_node));
198    }
199 
200    /* Remove the break from the loop terminator and add instructions from
201     * the break block after the unrolled loop.
202     */
203    nir_instr *break_instr = nir_block_last_instr(limiting_term->break_block);
204    nir_instr_remove(break_instr);
205    nir_cf_list break_list;
206    nir_cf_extract(&break_list, nir_before_block(first_break_block),
207                   nir_after_block(limiting_term->break_block));
208 
209    /* Clone so things get properly remapped */
210    nir_cf_list cloned_break_list;
211    nir_cf_list_clone(&cloned_break_list, &break_list, loop->cf_node.parent,
212                      remap_table);
213 
214    nir_cf_reinsert(&cloned_break_list, nir_before_cf_node(&loop->cf_node));
215 
216    /* Remove the loop */
217    nir_cf_node_remove(&loop->cf_node);
218 
219    /* Delete the original loop body, break block & header */
220    nir_cf_delete(&lp_header);
221    nir_cf_delete(&loop_body);
222    nir_cf_delete(&break_list);
223 
224    _mesa_hash_table_destroy(remap_table, NULL);
225 }
226 
227 static void
move_cf_list_into_loop_term(nir_cf_list * lst,nir_loop_terminator * term)228 move_cf_list_into_loop_term(nir_cf_list *lst, nir_loop_terminator *term)
229 {
230    /* Move the rest of the loop inside the continue-from-block */
231    nir_cf_reinsert(lst, nir_after_block(term->continue_from_block));
232 
233    /* Remove the break */
234    nir_instr_remove(nir_block_last_instr(term->break_block));
235 }
236 
237 static nir_cursor
get_complex_unroll_insert_location(nir_cf_node * node,bool continue_from_then)238 get_complex_unroll_insert_location(nir_cf_node *node, bool continue_from_then)
239 {
240    if (node->type == nir_cf_node_loop) {
241       return nir_before_cf_node(node);
242    } else {
243       nir_if *if_stmt = nir_cf_node_as_if(node);
244       if (continue_from_then) {
245          return nir_after_block(nir_if_last_then_block(if_stmt));
246       } else {
247          return nir_after_block(nir_if_last_else_block(if_stmt));
248       }
249    }
250 }
251 
252 /**
253  * Unroll a loop with two exists when the trip count of one of the exits is
254  * unknown.  If continue_from_then is true, the loop is repeated only when the
255  * "then" branch of the if is taken; otherwise it is repeated only
256  * when the "else" branch of the if is taken.
257  *
258  * For example, if the input is:
259  *
260  *      loop {
261  *         ...phis/condition...
262  *         if condition {
263  *            ...then instructions...
264  *         } else {
265  *            ...continue instructions...
266  *            break
267  *         }
268  *         ...body...
269  *      }
270  *
271  * And the iteration count is 3, and unlimit_term->continue_from_then is true,
272  * then the output will be:
273  *
274  *      ...condition...
275  *      if condition {
276  *         ...then instructions...
277  *         ...body...
278  *         if condition {
279  *            ...then instructions...
280  *            ...body...
281  *            if condition {
282  *               ...then instructions...
283  *               ...body...
284  *            } else {
285  *               ...continue instructions...
286  *            }
287  *         } else {
288  *            ...continue instructions...
289  *         }
290  *      } else {
291  *         ...continue instructions...
292  *      }
293  */
294 static void
complex_unroll(nir_loop * loop,nir_loop_terminator * unlimit_term,bool limiting_term_second)295 complex_unroll(nir_loop *loop, nir_loop_terminator *unlimit_term,
296                bool limiting_term_second)
297 {
298    assert(nir_is_trivial_loop_if(unlimit_term->nif,
299                                  unlimit_term->break_block));
300 
301    nir_loop_terminator *limiting_term = loop->info->limiting_terminator;
302    assert(nir_is_trivial_loop_if(limiting_term->nif,
303                                  limiting_term->break_block));
304 
305    loop_prepare_for_unroll(loop);
306 
307    nir_block *header_blk = nir_loop_first_block(loop);
308 
309    nir_cf_list lp_header;
310    nir_cf_list limit_break_list;
311    unsigned num_times_to_clone;
312    if (limiting_term_second) {
313       /* Pluck out the loop header */
314       nir_cf_extract(&lp_header, nir_before_block(header_blk),
315                      nir_before_cf_node(&unlimit_term->nif->cf_node));
316 
317       /* We need some special handling when its the second terminator causing
318        * us to exit the loop for example:
319        *
320        *   for (int i = 0; i < uniform_lp_count; i++) {
321        *      colour = vec4(0.0, 1.0, 0.0, 1.0);
322        *
323        *      if (i == 1) {
324        *         break;
325        *      }
326        *      ... any further code is unreachable after i == 1 ...
327        *   }
328        */
329       nir_cf_list after_lt;
330       nir_if *limit_if = limiting_term->nif;
331       nir_cf_extract(&after_lt, nir_after_cf_node(&limit_if->cf_node),
332                      nir_after_block(nir_loop_last_block(loop)));
333       move_cf_list_into_loop_term(&after_lt, limiting_term);
334 
335       /* Because the trip count is the number of times we pass over the entire
336        * loop before hitting a break when the second terminator is the
337        * limiting terminator we can actually execute code inside the loop when
338        * trip count == 0 e.g. the code above the break.  So we need to bump
339        * the trip_count in order for the code below to clone anything.  When
340        * trip count == 1 we execute the code above the break twice and the
341        * code below it once so we need clone things twice and so on.
342        */
343       num_times_to_clone = loop->info->trip_count + 1;
344    } else {
345       /* Pluck out the loop header */
346       nir_cf_extract(&lp_header, nir_before_block(header_blk),
347                      nir_before_cf_node(&limiting_term->nif->cf_node));
348 
349       nir_block *first_break_block;
350       nir_block *first_continue_block;
351       get_first_blocks_in_terminator(limiting_term, &first_break_block,
352                                      &first_continue_block);
353 
354       /* Remove the break then extract instructions from the break block so we
355        * can insert them in the innermost else of the unrolled loop.
356        */
357       nir_instr *break_instr = nir_block_last_instr(limiting_term->break_block);
358       nir_instr_remove(break_instr);
359       nir_cf_extract(&limit_break_list, nir_before_block(first_break_block),
360                      nir_after_block(limiting_term->break_block));
361 
362       nir_cf_list continue_list;
363       nir_cf_extract(&continue_list, nir_before_block(first_continue_block),
364                      nir_after_block(limiting_term->continue_from_block));
365 
366       nir_cf_reinsert(&continue_list,
367                       nir_after_cf_node(&limiting_term->nif->cf_node));
368 
369       nir_cf_node_remove(&limiting_term->nif->cf_node);
370 
371       num_times_to_clone = loop->info->trip_count;
372    }
373 
374    /* In the terminator that we have no trip count for move everything after
375     * the terminator into the continue from branch.
376     */
377    nir_cf_list loop_end;
378    nir_cf_extract(&loop_end, nir_after_cf_node(&unlimit_term->nif->cf_node),
379                   nir_after_block(nir_loop_last_block(loop)));
380    move_cf_list_into_loop_term(&loop_end, unlimit_term);
381 
382    /* Pluck out the loop body. */
383    nir_cf_list loop_body;
384    nir_cf_extract(&loop_body, nir_before_block(nir_loop_first_block(loop)),
385                   nir_after_block(nir_loop_last_block(loop)));
386 
387    struct hash_table *remap_table =
388       _mesa_hash_table_create(NULL, _mesa_hash_pointer,
389                               _mesa_key_pointer_equal);
390 
391    /* Set unroll_loc to the loop as we will insert the unrolled loop before it
392     */
393    nir_cf_node *unroll_loc = &loop->cf_node;
394 
395    /* Temp lists to store the cloned loop as we unroll */
396    nir_cf_list unrolled_lp_body;
397    nir_cf_list cloned_header;
398 
399    for (unsigned i = 0; i < num_times_to_clone; i++) {
400       /* Clone loop header */
401       nir_cf_list_clone(&cloned_header, &lp_header, loop->cf_node.parent,
402                         remap_table);
403 
404       nir_cursor cursor =
405          get_complex_unroll_insert_location(unroll_loc,
406                                             unlimit_term->continue_from_then);
407 
408       /* Insert cloned loop header */
409       nir_cf_reinsert(&cloned_header, cursor);
410 
411       cursor =
412          get_complex_unroll_insert_location(unroll_loc,
413                                             unlimit_term->continue_from_then);
414 
415       /* Clone loop body */
416       nir_cf_list_clone(&unrolled_lp_body, &loop_body, loop->cf_node.parent,
417                         remap_table);
418 
419       unroll_loc = exec_node_data(nir_cf_node,
420                                   exec_list_get_tail(&unrolled_lp_body.list),
421                                   node);
422       assert(unroll_loc->type == nir_cf_node_block &&
423              exec_list_is_empty(&nir_cf_node_as_block(unroll_loc)->instr_list));
424 
425       /* Get the unrolled if node */
426       unroll_loc = nir_cf_node_prev(unroll_loc);
427 
428       /* Insert unrolled loop body */
429       nir_cf_reinsert(&unrolled_lp_body, cursor);
430    }
431 
432    if (!limiting_term_second) {
433       assert(unroll_loc->type == nir_cf_node_if);
434 
435       nir_cf_list_clone(&cloned_header, &lp_header, loop->cf_node.parent,
436                         remap_table);
437 
438       nir_cursor cursor =
439          get_complex_unroll_insert_location(unroll_loc,
440                                             unlimit_term->continue_from_then);
441 
442       /* Insert cloned loop header */
443       nir_cf_reinsert(&cloned_header, cursor);
444 
445       /* Clone so things get properly remapped, and insert break block from
446        * the limiting terminator.
447        */
448       nir_cf_list cloned_break_blk;
449       nir_cf_list_clone(&cloned_break_blk, &limit_break_list,
450                         loop->cf_node.parent, remap_table);
451 
452       cursor =
453          get_complex_unroll_insert_location(unroll_loc,
454                                             unlimit_term->continue_from_then);
455 
456       nir_cf_reinsert(&cloned_break_blk, cursor);
457       nir_cf_delete(&limit_break_list);
458    }
459 
460    /* The loop has been unrolled so remove it. */
461    nir_cf_node_remove(&loop->cf_node);
462 
463    /* Delete the original loop header and body */
464    nir_cf_delete(&lp_header);
465    nir_cf_delete(&loop_body);
466 
467    _mesa_hash_table_destroy(remap_table, NULL);
468 }
469 
470 static bool
is_loop_small_enough_to_unroll(nir_shader * shader,nir_loop_info * li)471 is_loop_small_enough_to_unroll(nir_shader *shader, nir_loop_info *li)
472 {
473    unsigned max_iter = shader->options->max_unroll_iterations;
474 
475    if (li->trip_count > max_iter)
476       return false;
477 
478    if (li->force_unroll)
479       return true;
480 
481    bool loop_not_too_large =
482       li->num_instructions * li->trip_count <= max_iter * LOOP_UNROLL_LIMIT;
483 
484    return loop_not_too_large;
485 }
486 
487 static bool
process_loops(nir_shader * sh,nir_cf_node * cf_node,bool * innermost_loop)488 process_loops(nir_shader *sh, nir_cf_node *cf_node, bool *innermost_loop)
489 {
490    bool progress = false;
491    nir_loop *loop;
492 
493    switch (cf_node->type) {
494    case nir_cf_node_block:
495       return progress;
496    case nir_cf_node_if: {
497       nir_if *if_stmt = nir_cf_node_as_if(cf_node);
498       foreach_list_typed_safe(nir_cf_node, nested_node, node, &if_stmt->then_list)
499          progress |= process_loops(sh, nested_node, innermost_loop);
500       foreach_list_typed_safe(nir_cf_node, nested_node, node, &if_stmt->else_list)
501          progress |= process_loops(sh, nested_node, innermost_loop);
502       return progress;
503    }
504    case nir_cf_node_loop: {
505       loop = nir_cf_node_as_loop(cf_node);
506       foreach_list_typed_safe(nir_cf_node, nested_node, node, &loop->body)
507          progress |= process_loops(sh, nested_node, innermost_loop);
508       break;
509    }
510    default:
511       unreachable("unknown cf node type");
512    }
513 
514    if (*innermost_loop) {
515       /* Don't attempt to unroll outer loops or a second inner loop in
516        * this pass wait until the next pass as we have altered the cf.
517        */
518       *innermost_loop = false;
519 
520       if (loop->info->limiting_terminator == NULL)
521          return progress;
522 
523       if (!is_loop_small_enough_to_unroll(sh, loop->info))
524          return progress;
525 
526       if (loop->info->is_trip_count_known) {
527          simple_unroll(loop);
528          progress = true;
529       } else {
530          /* Attempt to unroll loops with two terminators. */
531          unsigned num_lt = list_length(&loop->info->loop_terminator_list);
532          if (num_lt == 2) {
533             bool limiting_term_second = true;
534             nir_loop_terminator *terminator =
535                list_last_entry(&loop->info->loop_terminator_list,
536                                 nir_loop_terminator, loop_terminator_link);
537 
538 
539             if (terminator->nif == loop->info->limiting_terminator->nif) {
540                limiting_term_second = false;
541                terminator =
542                   list_first_entry(&loop->info->loop_terminator_list,
543                                   nir_loop_terminator, loop_terminator_link);
544             }
545 
546             /* If the first terminator has a trip count of zero and is the
547              * limiting terminator just do a simple unroll as the second
548              * terminator can never be reached.
549              */
550             if (loop->info->trip_count == 0 && !limiting_term_second) {
551                simple_unroll(loop);
552             } else {
553                complex_unroll(loop, terminator, limiting_term_second);
554             }
555             progress = true;
556          }
557       }
558    }
559 
560    return progress;
561 }
562 
563 static bool
nir_opt_loop_unroll_impl(nir_function_impl * impl,nir_variable_mode indirect_mask)564 nir_opt_loop_unroll_impl(nir_function_impl *impl,
565                          nir_variable_mode indirect_mask)
566 {
567    bool progress = false;
568    nir_metadata_require(impl, nir_metadata_loop_analysis, indirect_mask);
569    nir_metadata_require(impl, nir_metadata_block_index);
570 
571    foreach_list_typed_safe(nir_cf_node, node, node, &impl->body) {
572       bool innermost_loop = true;
573       progress |= process_loops(impl->function->shader, node,
574                                 &innermost_loop);
575    }
576 
577    if (progress)
578       nir_lower_regs_to_ssa_impl(impl);
579 
580    return progress;
581 }
582 
583 bool
nir_opt_loop_unroll(nir_shader * shader,nir_variable_mode indirect_mask)584 nir_opt_loop_unroll(nir_shader *shader, nir_variable_mode indirect_mask)
585 {
586    bool progress = false;
587 
588    nir_foreach_function(function, shader) {
589       if (function->impl) {
590          progress |= nir_opt_loop_unroll_impl(function->impl, indirect_mask);
591       }
592    }
593    return progress;
594 }
595