1 /*
2  * Copyright (c) 2017 Lima Project
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sub license,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the
12  * next paragraph) shall be included in all copies or substantial portions
13  * of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  *
23  */
24 
25 #include <string.h>
26 
27 #include "util/hash_table.h"
28 #include "util/ralloc.h"
29 #include "util/bitscan.h"
30 #include "compiler/nir/nir.h"
31 #include "pipe/p_state.h"
32 
33 
34 #include "ppir.h"
35 
ppir_node_create_ssa(ppir_block * block,ppir_op op,nir_ssa_def * ssa)36 static void *ppir_node_create_ssa(ppir_block *block, ppir_op op, nir_ssa_def *ssa)
37 {
38    ppir_node *node = ppir_node_create(block, op, ssa->index, 0);
39    if (!node)
40       return NULL;
41 
42    ppir_dest *dest = ppir_node_get_dest(node);
43    dest->type = ppir_target_ssa;
44    dest->ssa.num_components = ssa->num_components;
45    dest->write_mask = u_bit_consecutive(0, ssa->num_components);
46 
47    if (node->type == ppir_node_type_load ||
48        node->type == ppir_node_type_store)
49       dest->ssa.is_head = true;
50 
51    return node;
52 }
53 
ppir_node_create_reg(ppir_block * block,ppir_op op,nir_register * reg,unsigned mask)54 static void *ppir_node_create_reg(ppir_block *block, ppir_op op,
55                                   nir_register *reg, unsigned mask)
56 {
57    ppir_node *node = ppir_node_create(block, op, reg->index, mask);
58    if (!node)
59       return NULL;
60 
61    ppir_dest *dest = ppir_node_get_dest(node);
62 
63    list_for_each_entry(ppir_reg, r, &block->comp->reg_list, list) {
64       if (r->index == reg->index) {
65          dest->reg = r;
66          break;
67       }
68    }
69 
70    dest->type = ppir_target_register;
71    dest->write_mask = mask;
72 
73    if (node->type == ppir_node_type_load ||
74        node->type == ppir_node_type_store)
75       dest->reg->is_head = true;
76 
77    return node;
78 }
79 
ppir_node_create_dest(ppir_block * block,ppir_op op,nir_dest * dest,unsigned mask)80 static void *ppir_node_create_dest(ppir_block *block, ppir_op op,
81                                    nir_dest *dest, unsigned mask)
82 {
83    unsigned index = -1;
84 
85    if (dest) {
86       if (dest->is_ssa)
87          return ppir_node_create_ssa(block, op, &dest->ssa);
88       else
89          return ppir_node_create_reg(block, op, dest->reg.reg, mask);
90    }
91 
92    return ppir_node_create(block, op, index, 0);
93 }
94 
ppir_node_add_src(ppir_compiler * comp,ppir_node * node,ppir_src * ps,nir_src * ns,unsigned mask)95 static void ppir_node_add_src(ppir_compiler *comp, ppir_node *node,
96                               ppir_src *ps, nir_src *ns, unsigned mask)
97 {
98    ppir_node *child = NULL;
99 
100    if (ns->is_ssa) {
101       child = comp->var_nodes[ns->ssa->index];
102       if (child->op != ppir_op_undef)
103          ppir_node_add_dep(node, child, ppir_dep_src);
104    }
105    else {
106       nir_register *reg = ns->reg.reg;
107       while (mask) {
108          int swizzle = ps->swizzle[u_bit_scan(&mask)];
109          child = comp->var_nodes[(reg->index << 2) + comp->reg_base + swizzle];
110          /* Reg is read before it was written, create a dummy node for it */
111          if (!child) {
112             child = ppir_node_create_reg(node->block, ppir_op_dummy, reg,
113                u_bit_consecutive(0, 4));
114             comp->var_nodes[(reg->index << 2) + comp->reg_base + swizzle] = child;
115          }
116          /* Don't add dummies or recursive deps for ops like r1 = r1 + ssa1 */
117          if (child && node != child && child->op != ppir_op_dummy)
118             ppir_node_add_dep(node, child, ppir_dep_src);
119       }
120    }
121 
122    ppir_node_target_assign(ps, child);
123 }
124 
125 static int nir_to_ppir_opcodes[nir_num_opcodes] = {
126    /* not supported */
127    [0 ... nir_last_opcode] = -1,
128 
129    [nir_op_mov] = ppir_op_mov,
130    [nir_op_fmul] = ppir_op_mul,
131    [nir_op_fabs] = ppir_op_abs,
132    [nir_op_fneg] = ppir_op_neg,
133    [nir_op_fadd] = ppir_op_add,
134    [nir_op_fsum3] = ppir_op_sum3,
135    [nir_op_fsum4] = ppir_op_sum4,
136    [nir_op_frsq] = ppir_op_rsqrt,
137    [nir_op_flog2] = ppir_op_log2,
138    [nir_op_fexp2] = ppir_op_exp2,
139    [nir_op_fsqrt] = ppir_op_sqrt,
140    [nir_op_fsin] = ppir_op_sin,
141    [nir_op_fcos] = ppir_op_cos,
142    [nir_op_fmax] = ppir_op_max,
143    [nir_op_fmin] = ppir_op_min,
144    [nir_op_frcp] = ppir_op_rcp,
145    [nir_op_ffloor] = ppir_op_floor,
146    [nir_op_fceil] = ppir_op_ceil,
147    [nir_op_ffract] = ppir_op_fract,
148    [nir_op_sge] = ppir_op_ge,
149    [nir_op_slt] = ppir_op_lt,
150    [nir_op_seq] = ppir_op_eq,
151    [nir_op_sne] = ppir_op_ne,
152    [nir_op_fcsel] = ppir_op_select,
153    [nir_op_inot] = ppir_op_not,
154    [nir_op_ftrunc] = ppir_op_trunc,
155    [nir_op_fsat] = ppir_op_sat,
156    [nir_op_fddx] = ppir_op_ddx,
157    [nir_op_fddy] = ppir_op_ddy,
158 };
159 
ppir_emit_alu(ppir_block * block,nir_instr * ni)160 static bool ppir_emit_alu(ppir_block *block, nir_instr *ni)
161 {
162    nir_alu_instr *instr = nir_instr_as_alu(ni);
163    int op = nir_to_ppir_opcodes[instr->op];
164 
165    if (op < 0) {
166       ppir_error("unsupported nir_op: %s\n", nir_op_infos[instr->op].name);
167       return false;
168    }
169 
170    ppir_alu_node *node = ppir_node_create_dest(block, op, &instr->dest.dest,
171                                                instr->dest.write_mask);
172    if (!node)
173       return false;
174 
175    ppir_dest *pd = &node->dest;
176    nir_alu_dest *nd = &instr->dest;
177    if (nd->saturate)
178       pd->modifier = ppir_outmod_clamp_fraction;
179 
180    unsigned src_mask;
181    switch (op) {
182    case ppir_op_sum3:
183       src_mask = 0b0111;
184       break;
185    case ppir_op_sum4:
186       src_mask = 0b1111;
187       break;
188    default:
189       src_mask = pd->write_mask;
190       break;
191    }
192 
193    unsigned num_child = nir_op_infos[instr->op].num_inputs;
194    node->num_src = num_child;
195 
196    for (int i = 0; i < num_child; i++) {
197       nir_alu_src *ns = instr->src + i;
198       ppir_src *ps = node->src + i;
199       memcpy(ps->swizzle, ns->swizzle, sizeof(ps->swizzle));
200       ppir_node_add_src(block->comp, &node->node, ps, &ns->src, src_mask);
201 
202       ps->absolute = ns->abs;
203       ps->negate = ns->negate;
204    }
205 
206    list_addtail(&node->node.list, &block->node_list);
207    return true;
208 }
209 
210 static ppir_block *ppir_block_create(ppir_compiler *comp);
211 
ppir_emit_discard_block(ppir_compiler * comp)212 static bool ppir_emit_discard_block(ppir_compiler *comp)
213 {
214    ppir_block *block = ppir_block_create(comp);
215    ppir_discard_node *discard;
216    if (!block)
217       return false;
218 
219    comp->discard_block = block;
220    block->comp  = comp;
221 
222    discard = ppir_node_create(block, ppir_op_discard, -1, 0);
223    if (discard)
224       list_addtail(&discard->node.list, &block->node_list);
225    else
226       return false;
227 
228    return true;
229 }
230 
ppir_emit_discard_if(ppir_block * block,nir_instr * ni)231 static ppir_node *ppir_emit_discard_if(ppir_block *block, nir_instr *ni)
232 {
233    nir_intrinsic_instr *instr = nir_instr_as_intrinsic(ni);
234    ppir_node *node;
235    ppir_compiler *comp = block->comp;
236    ppir_branch_node *branch;
237 
238    if (!comp->discard_block && !ppir_emit_discard_block(comp))
239       return NULL;
240 
241    node = ppir_node_create(block, ppir_op_branch, -1, 0);
242    if (!node)
243       return NULL;
244    branch = ppir_node_to_branch(node);
245 
246    /* second src and condition will be updated during lowering */
247    ppir_node_add_src(block->comp, node, &branch->src[0],
248                      &instr->src[0], u_bit_consecutive(0, instr->num_components));
249    branch->num_src = 1;
250    branch->target = comp->discard_block;
251 
252    return node;
253 }
254 
ppir_emit_discard(ppir_block * block,nir_instr * ni)255 static ppir_node *ppir_emit_discard(ppir_block *block, nir_instr *ni)
256 {
257    ppir_node *node = ppir_node_create(block, ppir_op_discard, -1, 0);
258 
259    return node;
260 }
261 
ppir_emit_intrinsic(ppir_block * block,nir_instr * ni)262 static bool ppir_emit_intrinsic(ppir_block *block, nir_instr *ni)
263 {
264    ppir_node *node;
265    nir_intrinsic_instr *instr = nir_instr_as_intrinsic(ni);
266    unsigned mask = 0;
267    ppir_load_node *lnode;
268    ppir_alu_node *alu_node;
269 
270    switch (instr->intrinsic) {
271    case nir_intrinsic_load_input:
272       if (!instr->dest.is_ssa)
273          mask = u_bit_consecutive(0, instr->num_components);
274 
275       lnode = ppir_node_create_dest(block, ppir_op_load_varying, &instr->dest, mask);
276       if (!lnode)
277          return false;
278 
279       lnode->num_components = instr->num_components;
280       lnode->index = nir_intrinsic_base(instr) * 4 + nir_intrinsic_component(instr);
281       if (nir_src_is_const(instr->src[0]))
282          lnode->index += (uint32_t)(nir_src_as_float(instr->src[0]) * 4);
283       else {
284          lnode->num_src = 1;
285          ppir_node_add_src(block->comp, &lnode->node, &lnode->src, instr->src, 1);
286       }
287       list_addtail(&lnode->node.list, &block->node_list);
288       return true;
289 
290    case nir_intrinsic_load_frag_coord:
291    case nir_intrinsic_load_point_coord:
292    case nir_intrinsic_load_front_face:
293       if (!instr->dest.is_ssa)
294          mask = u_bit_consecutive(0, instr->num_components);
295 
296       ppir_op op;
297       switch (instr->intrinsic) {
298       case nir_intrinsic_load_frag_coord:
299          op = ppir_op_load_fragcoord;
300          break;
301       case nir_intrinsic_load_point_coord:
302          op = ppir_op_load_pointcoord;
303          break;
304       case nir_intrinsic_load_front_face:
305          op = ppir_op_load_frontface;
306          break;
307       default:
308          unreachable("bad intrinsic");
309          break;
310       }
311 
312       lnode = ppir_node_create_dest(block, op, &instr->dest, mask);
313       if (!lnode)
314          return false;
315 
316       lnode->num_components = instr->num_components;
317       list_addtail(&lnode->node.list, &block->node_list);
318       return true;
319 
320    case nir_intrinsic_load_uniform:
321       if (!instr->dest.is_ssa)
322          mask = u_bit_consecutive(0, instr->num_components);
323 
324       lnode = ppir_node_create_dest(block, ppir_op_load_uniform, &instr->dest, mask);
325       if (!lnode)
326          return false;
327 
328       lnode->num_components = instr->num_components;
329       lnode->index = nir_intrinsic_base(instr);
330       if (nir_src_is_const(instr->src[0]))
331          lnode->index += (uint32_t)nir_src_as_float(instr->src[0]);
332       else {
333          lnode->num_src = 1;
334          ppir_node_add_src(block->comp, &lnode->node, &lnode->src, instr->src, 1);
335       }
336 
337       list_addtail(&lnode->node.list, &block->node_list);
338       return true;
339 
340    case nir_intrinsic_store_output: {
341       /* In simple cases where the store_output is ssa, that register
342        * can be directly marked as the output.
343        * If discard is used or the source is not ssa, things can get a
344        * lot more complicated, so don't try to optimize those and fall
345        * back to inserting a mov at the end.
346        * If the source node will only be able to output to pipeline
347        * registers, fall back to the mov as well. */
348       if (!block->comp->uses_discard && instr->src->is_ssa) {
349          node = block->comp->var_nodes[instr->src->ssa->index];
350          switch (node->op) {
351          case ppir_op_load_uniform:
352          case ppir_op_load_texture:
353          case ppir_op_const:
354             break;
355          default:
356             node->is_end = 1;
357             return true;
358          }
359       }
360 
361       alu_node = ppir_node_create_dest(block, ppir_op_mov, NULL, 0);
362       if (!alu_node)
363          return false;
364 
365       ppir_dest *dest = ppir_node_get_dest(&alu_node->node);
366       dest->type = ppir_target_ssa;
367       dest->ssa.num_components = instr->num_components;
368       dest->ssa.index = 0;
369       dest->write_mask = u_bit_consecutive(0, instr->num_components);
370 
371       alu_node->num_src = 1;
372 
373       for (int i = 0; i < instr->num_components; i++)
374          alu_node->src[0].swizzle[i] = i;
375 
376       ppir_node_add_src(block->comp, &alu_node->node, alu_node->src, instr->src,
377                         u_bit_consecutive(0, instr->num_components));
378 
379       alu_node->node.is_end = 1;
380 
381       list_addtail(&alu_node->node.list, &block->node_list);
382       return true;
383    }
384 
385    case nir_intrinsic_discard:
386       node = ppir_emit_discard(block, ni);
387       list_addtail(&node->list, &block->node_list);
388       return true;
389 
390    case nir_intrinsic_discard_if:
391       node = ppir_emit_discard_if(block, ni);
392       list_addtail(&node->list, &block->node_list);
393       return true;
394 
395    default:
396       ppir_error("unsupported nir_intrinsic_instr %s\n",
397                  nir_intrinsic_infos[instr->intrinsic].name);
398       return false;
399    }
400 }
401 
ppir_emit_load_const(ppir_block * block,nir_instr * ni)402 static bool ppir_emit_load_const(ppir_block *block, nir_instr *ni)
403 {
404    nir_load_const_instr *instr = nir_instr_as_load_const(ni);
405    ppir_const_node *node = ppir_node_create_ssa(block, ppir_op_const, &instr->def);
406    if (!node)
407       return false;
408 
409    assert(instr->def.bit_size == 32);
410 
411    for (int i = 0; i < instr->def.num_components; i++)
412       node->constant.value[i].i = instr->value[i].i32;
413    node->constant.num = instr->def.num_components;
414 
415    list_addtail(&node->node.list, &block->node_list);
416    return true;
417 }
418 
ppir_emit_ssa_undef(ppir_block * block,nir_instr * ni)419 static bool ppir_emit_ssa_undef(ppir_block *block, nir_instr *ni)
420 {
421    nir_ssa_undef_instr *undef = nir_instr_as_ssa_undef(ni);
422    ppir_node *node = ppir_node_create_ssa(block, ppir_op_undef, &undef->def);
423    if (!node)
424       return false;
425    ppir_alu_node *alu = ppir_node_to_alu(node);
426 
427    ppir_dest *dest = &alu->dest;
428    dest->ssa.undef = true;
429 
430    list_addtail(&node->list, &block->node_list);
431    return true;
432 }
433 
ppir_emit_tex(ppir_block * block,nir_instr * ni)434 static bool ppir_emit_tex(ppir_block *block, nir_instr *ni)
435 {
436    nir_tex_instr *instr = nir_instr_as_tex(ni);
437    ppir_load_texture_node *node;
438 
439    switch (instr->op) {
440    case nir_texop_tex:
441    case nir_texop_txb:
442    case nir_texop_txl:
443       break;
444    default:
445       ppir_error("unsupported texop %d\n", instr->op);
446       return false;
447    }
448 
449    switch (instr->sampler_dim) {
450    case GLSL_SAMPLER_DIM_2D:
451    case GLSL_SAMPLER_DIM_CUBE:
452    case GLSL_SAMPLER_DIM_RECT:
453    case GLSL_SAMPLER_DIM_EXTERNAL:
454       break;
455    default:
456       ppir_error("unsupported sampler dim: %d\n", instr->sampler_dim);
457       return false;
458    }
459 
460    /* emit ld_tex node */
461 
462    unsigned mask = 0;
463    if (!instr->dest.is_ssa)
464       mask = u_bit_consecutive(0, nir_tex_instr_dest_size(instr));
465 
466    node = ppir_node_create_dest(block, ppir_op_load_texture, &instr->dest, mask);
467    if (!node)
468       return false;
469 
470    node->sampler = instr->texture_index;
471    node->sampler_dim = instr->sampler_dim;
472 
473    for (int i = 0; i < instr->coord_components; i++)
474          node->src[0].swizzle[i] = i;
475 
476    for (int i = 0; i < instr->num_srcs; i++) {
477       switch (instr->src[i].src_type) {
478       case nir_tex_src_coord: {
479          nir_src *ns = &instr->src[i].src;
480          if (ns->is_ssa) {
481             ppir_node *child = block->comp->var_nodes[ns->ssa->index];
482             if (child->op == ppir_op_load_varying) {
483                /* If the successor is load_texture, promote it to load_coords */
484                nir_tex_src *nts = (nir_tex_src *)ns;
485                if (nts->src_type == nir_tex_src_coord)
486                   child->op = ppir_op_load_coords;
487             }
488          }
489 
490          /* src[0] is not used by the ld_tex instruction but ensures
491           * correct scheduling due to the pipeline dependency */
492          ppir_node_add_src(block->comp, &node->node, &node->src[0], &instr->src[i].src,
493                            u_bit_consecutive(0, instr->coord_components));
494          node->num_src++;
495          break;
496       }
497       case nir_tex_src_bias:
498       case nir_tex_src_lod:
499          node->lod_bias_en = true;
500          node->explicit_lod = (instr->src[i].src_type == nir_tex_src_lod);
501          ppir_node_add_src(block->comp, &node->node, &node->src[1], &instr->src[i].src, 1);
502          node->num_src++;
503          break;
504       default:
505          ppir_error("unsupported texture source type\n");
506          return false;
507       }
508    }
509 
510    list_addtail(&node->node.list, &block->node_list);
511 
512    /* validate load coords node */
513 
514    ppir_node *src_coords = ppir_node_get_src(&node->node, 0)->node;
515    ppir_load_node *load = NULL;
516 
517    if (src_coords && ppir_node_has_single_src_succ(src_coords) &&
518        (src_coords->op == ppir_op_load_coords))
519       load = ppir_node_to_load(src_coords);
520    else {
521       /* Create load_coords node */
522       load = ppir_node_create(block, ppir_op_load_coords_reg, -1, 0);
523       if (!load)
524          return false;
525       list_addtail(&load->node.list, &block->node_list);
526 
527       load->src = node->src[0];
528       load->num_src = 1;
529       if (node->sampler_dim == GLSL_SAMPLER_DIM_CUBE)
530          load->num_components = 3;
531       else
532          load->num_components = 2;
533 
534       ppir_debug("%s create load_coords node %d for %d\n",
535                  __FUNCTION__, load->index, node->node.index);
536 
537       ppir_node_foreach_pred_safe((&node->node), dep) {
538          ppir_node *pred = dep->pred;
539          ppir_node_remove_dep(dep);
540          ppir_node_add_dep(&load->node, pred, ppir_dep_src);
541       }
542       ppir_node_add_dep(&node->node, &load->node, ppir_dep_src);
543    }
544 
545    assert(load);
546    node->src[0].type = load->dest.type = ppir_target_pipeline;
547    node->src[0].pipeline = load->dest.pipeline = ppir_pipeline_reg_discard;
548 
549    return true;
550 }
551 
ppir_get_block(ppir_compiler * comp,nir_block * nblock)552 static ppir_block *ppir_get_block(ppir_compiler *comp, nir_block *nblock)
553 {
554    ppir_block *block = _mesa_hash_table_u64_search(comp->blocks, (uint64_t)nblock);
555 
556    return block;
557 }
558 
ppir_emit_jump(ppir_block * block,nir_instr * ni)559 static bool ppir_emit_jump(ppir_block *block, nir_instr *ni)
560 {
561    ppir_node *node;
562    ppir_compiler *comp = block->comp;
563    ppir_branch_node *branch;
564    ppir_block *jump_block;
565    nir_jump_instr *jump = nir_instr_as_jump(ni);
566 
567    switch (jump->type) {
568    case nir_jump_break: {
569       assert(comp->current_block->successors[0]);
570       assert(!comp->current_block->successors[1]);
571       jump_block = comp->current_block->successors[0];
572    }
573    break;
574    case nir_jump_continue:
575       jump_block = comp->loop_cont_block;
576    break;
577    default:
578       ppir_error("nir_jump_instr not support\n");
579       return false;
580    }
581 
582    assert(jump_block != NULL);
583 
584    node = ppir_node_create(block, ppir_op_branch, -1, 0);
585    if (!node)
586       return false;
587    branch = ppir_node_to_branch(node);
588 
589    /* Unconditional */
590    branch->num_src = 0;
591    branch->target = jump_block;
592 
593    list_addtail(&node->list, &block->node_list);
594    return true;
595 }
596 
597 static bool (*ppir_emit_instr[nir_instr_type_phi])(ppir_block *, nir_instr *) = {
598    [nir_instr_type_alu]        = ppir_emit_alu,
599    [nir_instr_type_intrinsic]  = ppir_emit_intrinsic,
600    [nir_instr_type_load_const] = ppir_emit_load_const,
601    [nir_instr_type_ssa_undef]  = ppir_emit_ssa_undef,
602    [nir_instr_type_tex]        = ppir_emit_tex,
603    [nir_instr_type_jump]       = ppir_emit_jump,
604 };
605 
ppir_block_create(ppir_compiler * comp)606 static ppir_block *ppir_block_create(ppir_compiler *comp)
607 {
608    ppir_block *block = rzalloc(comp, ppir_block);
609    if (!block)
610       return NULL;
611 
612    list_inithead(&block->node_list);
613    list_inithead(&block->instr_list);
614 
615    block->comp = comp;
616 
617    return block;
618 }
619 
ppir_emit_block(ppir_compiler * comp,nir_block * nblock)620 static bool ppir_emit_block(ppir_compiler *comp, nir_block *nblock)
621 {
622    ppir_block *block = ppir_get_block(comp, nblock);
623 
624    comp->current_block = block;
625 
626    list_addtail(&block->list, &comp->block_list);
627 
628    nir_foreach_instr(instr, nblock) {
629       assert(instr->type < nir_instr_type_phi);
630       if (!ppir_emit_instr[instr->type](block, instr))
631          return false;
632    }
633 
634    return true;
635 }
636 
637 static bool ppir_emit_cf_list(ppir_compiler *comp, struct exec_list *list);
638 
ppir_emit_if(ppir_compiler * comp,nir_if * if_stmt)639 static bool ppir_emit_if(ppir_compiler *comp, nir_if *if_stmt)
640 {
641    ppir_node *node;
642    ppir_branch_node *else_branch, *after_branch;
643    nir_block *nir_else_block = nir_if_first_else_block(if_stmt);
644    bool empty_else_block =
645       (nir_else_block == nir_if_last_else_block(if_stmt) &&
646       exec_list_is_empty(&nir_else_block->instr_list));
647    ppir_block *block = comp->current_block;
648 
649    node = ppir_node_create(block, ppir_op_branch, -1, 0);
650    if (!node)
651       return false;
652    else_branch = ppir_node_to_branch(node);
653    ppir_node_add_src(block->comp, node, &else_branch->src[0],
654                      &if_stmt->condition, 1);
655    else_branch->num_src = 1;
656    /* Negate condition to minimize branching. We're generating following:
657     * current_block: { ...; if (!statement) branch else_block; }
658     * then_block: { ...; branch after_block; }
659     * else_block: { ... }
660     * after_block: { ... }
661     *
662     * or if else list is empty:
663     * block: { if (!statement) branch else_block; }
664     * then_block: { ... }
665     * else_block: after_block: { ... }
666     */
667    else_branch->negate = true;
668    list_addtail(&else_branch->node.list, &block->node_list);
669 
670    if (!ppir_emit_cf_list(comp, &if_stmt->then_list))
671       return false;
672 
673    if (empty_else_block) {
674       nir_block *nblock = nir_if_last_else_block(if_stmt);
675       assert(nblock->successors[0]);
676       assert(!nblock->successors[1]);
677       else_branch->target = ppir_get_block(comp, nblock->successors[0]);
678       /* Add empty else block to the list */
679       list_addtail(&block->successors[1]->list, &comp->block_list);
680       return true;
681    }
682 
683    else_branch->target = ppir_get_block(comp, nir_if_first_else_block(if_stmt));
684 
685    nir_block *last_then_block = nir_if_last_then_block(if_stmt);
686    assert(last_then_block->successors[0]);
687    assert(!last_then_block->successors[1]);
688    block = ppir_get_block(comp, last_then_block);
689    node = ppir_node_create(block, ppir_op_branch, -1, 0);
690    if (!node)
691       return false;
692    after_branch = ppir_node_to_branch(node);
693    /* Unconditional */
694    after_branch->num_src = 0;
695    after_branch->target = ppir_get_block(comp, last_then_block->successors[0]);
696    /* Target should be after_block, will fixup later */
697    list_addtail(&after_branch->node.list, &block->node_list);
698 
699    if (!ppir_emit_cf_list(comp, &if_stmt->else_list))
700       return false;
701 
702    return true;
703 }
704 
ppir_emit_loop(ppir_compiler * comp,nir_loop * nloop)705 static bool ppir_emit_loop(ppir_compiler *comp, nir_loop *nloop)
706 {
707    ppir_block *save_loop_cont_block = comp->loop_cont_block;
708    ppir_block *block;
709    ppir_branch_node *loop_branch;
710    nir_block *loop_last_block;
711    ppir_node *node;
712 
713    comp->loop_cont_block = ppir_get_block(comp, nir_loop_first_block(nloop));
714 
715    if (!ppir_emit_cf_list(comp, &nloop->body))
716       return false;
717 
718    loop_last_block = nir_loop_last_block(nloop);
719    block = ppir_get_block(comp, loop_last_block);
720    node = ppir_node_create(block, ppir_op_branch, -1, 0);
721    if (!node)
722       return false;
723    loop_branch = ppir_node_to_branch(node);
724    /* Unconditional */
725    loop_branch->num_src = 0;
726    loop_branch->target = comp->loop_cont_block;
727    list_addtail(&loop_branch->node.list, &block->node_list);
728 
729    comp->loop_cont_block = save_loop_cont_block;
730 
731    comp->num_loops++;
732 
733    return true;
734 }
735 
ppir_emit_function(ppir_compiler * comp,nir_function_impl * nfunc)736 static bool ppir_emit_function(ppir_compiler *comp, nir_function_impl *nfunc)
737 {
738    ppir_error("function nir_cf_node not support\n");
739    return false;
740 }
741 
ppir_emit_cf_list(ppir_compiler * comp,struct exec_list * list)742 static bool ppir_emit_cf_list(ppir_compiler *comp, struct exec_list *list)
743 {
744    foreach_list_typed(nir_cf_node, node, node, list) {
745       bool ret;
746 
747       switch (node->type) {
748       case nir_cf_node_block:
749          ret = ppir_emit_block(comp, nir_cf_node_as_block(node));
750          break;
751       case nir_cf_node_if:
752          ret = ppir_emit_if(comp, nir_cf_node_as_if(node));
753          break;
754       case nir_cf_node_loop:
755          ret = ppir_emit_loop(comp, nir_cf_node_as_loop(node));
756          break;
757       case nir_cf_node_function:
758          ret = ppir_emit_function(comp, nir_cf_node_as_function(node));
759          break;
760       default:
761          ppir_error("unknown NIR node type %d\n", node->type);
762          return false;
763       }
764 
765       if (!ret)
766          return false;
767    }
768 
769    return true;
770 }
771 
ppir_compiler_create(void * prog,unsigned num_reg,unsigned num_ssa)772 static ppir_compiler *ppir_compiler_create(void *prog, unsigned num_reg, unsigned num_ssa)
773 {
774    ppir_compiler *comp = rzalloc_size(
775       prog, sizeof(*comp) + ((num_reg << 2) + num_ssa) * sizeof(ppir_node *));
776    if (!comp)
777       return NULL;
778 
779    list_inithead(&comp->block_list);
780    list_inithead(&comp->reg_list);
781    comp->blocks = _mesa_hash_table_u64_create(prog);
782 
783    comp->var_nodes = (ppir_node **)(comp + 1);
784    comp->reg_base = num_ssa;
785    comp->prog = prog;
786    return comp;
787 }
788 
ppir_add_ordering_deps(ppir_compiler * comp)789 static void ppir_add_ordering_deps(ppir_compiler *comp)
790 {
791    /* Some intrinsics do not have explicit dependencies and thus depend
792     * on instructions order. Consider discard_if and the is_end node as
793     * example. If we don't add fake dependency of discard_if to is_end,
794     * scheduler may put the is_end first and since is_end terminates
795     * shader on Utgard PP, rest of it will never be executed.
796     * Add fake dependencies for discard/branch/store to preserve
797     * instruction order.
798     *
799     * TODO: scheduler should schedule discard_if as early as possible otherwise
800     * we may end up with suboptimal code for cases like this:
801     *
802     * s3 = s1 < s2
803     * discard_if s3
804     * s4 = s1 + s2
805     * store s4
806     *
807     * In this case store depends on discard_if and s4, but since dependencies can
808     * be scheduled in any order it can result in code like this:
809     *
810     * instr1: s3 = s1 < s3
811     * instr2: s4 = s1 + s2
812     * instr3: discard_if s3
813     * instr4: store s4
814     */
815    list_for_each_entry(ppir_block, block, &comp->block_list, list) {
816       ppir_node *prev_node = NULL;
817       list_for_each_entry_rev(ppir_node, node, &block->node_list, list) {
818          if (prev_node && ppir_node_is_root(node) && node->op != ppir_op_const) {
819             ppir_node_add_dep(prev_node, node, ppir_dep_sequence);
820          }
821          if (node->is_end ||
822              node->op == ppir_op_discard ||
823              node->op == ppir_op_store_temp ||
824              node->op == ppir_op_branch) {
825             prev_node = node;
826          }
827       }
828    }
829 }
830 
ppir_print_shader_db(struct nir_shader * nir,ppir_compiler * comp,struct pipe_debug_callback * debug)831 static void ppir_print_shader_db(struct nir_shader *nir, ppir_compiler *comp,
832                                  struct pipe_debug_callback *debug)
833 {
834    const struct shader_info *info = &nir->info;
835    char *shaderdb;
836    ASSERTED int ret = asprintf(&shaderdb,
837                                "%s shader: %d inst, %d loops, %d:%d spills:fills\n",
838                                gl_shader_stage_name(info->stage),
839                                comp->cur_instr_index,
840                                comp->num_loops,
841                                comp->num_spills,
842                                comp->num_fills);
843    assert(ret >= 0);
844 
845    if (lima_debug & LIMA_DEBUG_SHADERDB)
846       fprintf(stderr, "SHADER-DB: %s\n", shaderdb);
847 
848    pipe_debug_message(debug, SHADER_INFO, "%s", shaderdb);
849    free(shaderdb);
850 }
851 
ppir_add_write_after_read_deps(ppir_compiler * comp)852 static void ppir_add_write_after_read_deps(ppir_compiler *comp)
853 {
854    list_for_each_entry(ppir_block, block, &comp->block_list, list) {
855       list_for_each_entry(ppir_reg, reg, &comp->reg_list, list) {
856          ppir_node *write = NULL;
857          list_for_each_entry_rev(ppir_node, node, &block->node_list, list) {
858             for (int i = 0; i < ppir_node_get_src_num(node); i++) {
859                ppir_src *src = ppir_node_get_src(node, i);
860                if (src && src->type == ppir_target_register &&
861                    src->reg == reg &&
862                    write) {
863                   ppir_debug("Adding dep %d for write %d\n", node->index, write->index);
864                   ppir_node_add_dep(write, node, ppir_dep_write_after_read);
865                }
866             }
867             ppir_dest *dest = ppir_node_get_dest(node);
868             if (dest && dest->type == ppir_target_register &&
869                 dest->reg == reg)
870                write = node;
871          }
872       }
873    }
874 }
875 
ppir_compile_nir(struct lima_fs_shader_state * prog,struct nir_shader * nir,struct ra_regs * ra,struct pipe_debug_callback * debug)876 bool ppir_compile_nir(struct lima_fs_shader_state *prog, struct nir_shader *nir,
877                       struct ra_regs *ra,
878                       struct pipe_debug_callback *debug)
879 {
880    nir_function_impl *func = nir_shader_get_entrypoint(nir);
881    ppir_compiler *comp = ppir_compiler_create(prog, func->reg_alloc, func->ssa_alloc);
882    if (!comp)
883       return false;
884 
885    comp->ra = ra;
886    comp->uses_discard = nir->info.fs.uses_discard;
887 
888    /* 1st pass: create ppir blocks */
889    nir_foreach_function(function, nir) {
890       if (!function->impl)
891          continue;
892 
893       nir_foreach_block(nblock, function->impl) {
894          ppir_block *block = ppir_block_create(comp);
895          if (!block)
896             return false;
897          block->index = nblock->index;
898          _mesa_hash_table_u64_insert(comp->blocks, (uint64_t)nblock, block);
899       }
900    }
901 
902    /* 2nd pass: populate successors */
903    nir_foreach_function(function, nir) {
904       if (!function->impl)
905          continue;
906 
907       nir_foreach_block(nblock, function->impl) {
908          ppir_block *block = ppir_get_block(comp, nblock);
909          assert(block);
910 
911          for (int i = 0; i < 2; i++) {
912             if (nblock->successors[i])
913                block->successors[i] = ppir_get_block(comp, nblock->successors[i]);
914          }
915       }
916    }
917 
918    /* Validate outputs, we support only gl_FragColor */
919    nir_foreach_shader_out_variable(var, nir) {
920       switch (var->data.location) {
921       case FRAG_RESULT_COLOR:
922       case FRAG_RESULT_DATA0:
923          break;
924       default:
925          ppir_error("unsupported output type\n");
926          goto err_out0;
927          break;
928       }
929    }
930 
931    foreach_list_typed(nir_register, reg, node, &func->registers) {
932       ppir_reg *r = rzalloc(comp, ppir_reg);
933       if (!r)
934          return false;
935 
936       r->index = reg->index;
937       r->num_components = reg->num_components;
938       r->is_head = false;
939       list_addtail(&r->list, &comp->reg_list);
940    }
941 
942    if (!ppir_emit_cf_list(comp, &func->body))
943       goto err_out0;
944 
945    /* If we have discard block add it to the very end */
946    if (comp->discard_block)
947       list_addtail(&comp->discard_block->list, &comp->block_list);
948 
949    ppir_node_print_prog(comp);
950 
951    if (!ppir_lower_prog(comp))
952       goto err_out0;
953 
954    ppir_add_ordering_deps(comp);
955    ppir_add_write_after_read_deps(comp);
956 
957    ppir_node_print_prog(comp);
958 
959    if (!ppir_node_to_instr(comp))
960       goto err_out0;
961 
962    if (!ppir_schedule_prog(comp))
963       goto err_out0;
964 
965    if (!ppir_regalloc_prog(comp))
966       goto err_out0;
967 
968    if (!ppir_codegen_prog(comp))
969       goto err_out0;
970 
971    ppir_print_shader_db(nir, comp, debug);
972 
973    _mesa_hash_table_u64_destroy(comp->blocks, NULL);
974    ralloc_free(comp);
975    return true;
976 
977 err_out0:
978    _mesa_hash_table_u64_destroy(comp->blocks, NULL);
979    ralloc_free(comp);
980    return false;
981 }
982 
983