1 /*
2  * Copyright (c) 2017 Lima Project
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sub license,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the
12  * next paragraph) shall be included in all copies or substantial portions
13  * of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  *
23  */
24 
25 #include "util/ralloc.h"
26 #include "util/half_float.h"
27 #include "util/bitscan.h"
28 
29 #include "ppir.h"
30 #include "codegen.h"
31 #include "lima_context.h"
32 
encode_swizzle(uint8_t * swizzle,int shift,int dest_shift)33 static unsigned encode_swizzle(uint8_t *swizzle, int shift, int dest_shift)
34 {
35    unsigned ret = 0;
36    for (int i = 0; i < 4; i++)
37       ret |= ((swizzle[i] + shift) & 0x3) << ((i + dest_shift) * 2);
38    return ret;
39 }
40 
get_scl_reg_index(ppir_src * src,int component)41 static int get_scl_reg_index(ppir_src *src, int component)
42 {
43    int ret = ppir_target_get_src_reg_index(src);
44    ret += src->swizzle[component];
45    return ret;
46 }
47 
ppir_codegen_encode_varying(ppir_node * node,void * code)48 static void ppir_codegen_encode_varying(ppir_node *node, void *code)
49 {
50    ppir_codegen_field_varying *f = code;
51    ppir_load_node *load = ppir_node_to_load(node);
52    ppir_dest *dest = &load->dest;
53    int index = ppir_target_get_dest_reg_index(dest);
54    int num_components = load->num_components;
55 
56    if (node->op != ppir_op_load_coords_reg) {
57       assert(node->op == ppir_op_load_varying ||
58              node->op == ppir_op_load_coords ||
59              node->op == ppir_op_load_fragcoord ||
60              node->op == ppir_op_load_pointcoord ||
61              node->op == ppir_op_load_frontface);
62 
63       f->imm.dest = index >> 2;
64       f->imm.mask = dest->write_mask << (index & 0x3);
65 
66       int alignment = num_components == 3 ? 3 : num_components - 1;
67       f->imm.alignment = alignment;
68 
69       if (load->num_src) {
70          index = ppir_target_get_src_reg_index(&load->src);
71          f->imm.offset_vector = index >> 2;
72          f->imm.offset_scalar = index & 0x3;
73       } else
74          f->imm.offset_vector = 0xf;
75 
76       if (alignment == 3)
77          f->imm.index = load->index >> 2;
78       else
79          f->imm.index = load->index >> alignment;
80 
81       switch (node->op) {
82          case ppir_op_load_fragcoord:
83             f->imm.source_type = 2;
84             f->imm.perspective = 3;
85             break;
86          case ppir_op_load_pointcoord:
87             f->imm.source_type = 3;
88             break;
89          case ppir_op_load_frontface:
90             f->imm.source_type = 3;
91             f->imm.perspective = 1;
92             break;
93          case ppir_op_load_coords:
94             /* num_components == 3 implies cubemap as we don't support 3D textures */
95             f->imm.source_type = num_components == 3 ? 2 : 0;
96             break;
97          default:
98             break;
99       }
100    }
101    else {  /* node->op == ppir_op_load_coords_reg */
102       f->reg.dest = index >> 2;
103       f->reg.mask = dest->write_mask << (index & 0x3);
104 
105       if (load->num_src) {
106          /* num_components == 3 implies cubemap as we don't support 3D textures */
107          if (num_components == 3) {
108             f->reg.source_type = 2;
109             f->reg.perspective = 1;
110          } else {
111             f->reg.source_type = 1;
112          }
113          ppir_src *src = &load->src;
114          index = ppir_target_get_src_reg_index(src);
115          f->reg.source = index >> 2;
116          f->reg.negate = src->negate;
117          f->reg.absolute = src->absolute;
118          f->reg.swizzle = encode_swizzle(src->swizzle, index & 0x3, 0);
119       }
120    }
121 }
122 
ppir_codegen_encode_texld(ppir_node * node,void * code)123 static void ppir_codegen_encode_texld(ppir_node *node, void *code)
124 {
125    ppir_codegen_field_sampler *f = code;
126    ppir_load_texture_node *ldtex = ppir_node_to_load_texture(node);
127 
128    f->index = ldtex->sampler;
129 
130    f->lod_bias_en = ldtex->lod_bias_en;
131    f->explicit_lod = ldtex->explicit_lod;
132    if (ldtex->lod_bias_en)
133       f->lod_bias = ppir_target_get_src_reg_index(&ldtex->src[1]);
134 
135    switch (ldtex->sampler_dim) {
136    case GLSL_SAMPLER_DIM_2D:
137    case GLSL_SAMPLER_DIM_RECT:
138    case GLSL_SAMPLER_DIM_EXTERNAL:
139       f->type = ppir_codegen_sampler_type_2d;
140       break;
141    case GLSL_SAMPLER_DIM_CUBE:
142       f->type = ppir_codegen_sampler_type_cube;
143       break;
144    default:
145       break;
146    }
147 
148    f->offset_en = 0;
149    f->unknown_2 = 0x39001;
150 }
151 
ppir_codegen_encode_uniform(ppir_node * node,void * code)152 static void ppir_codegen_encode_uniform(ppir_node *node, void *code)
153 {
154    ppir_codegen_field_uniform *f = code;
155    ppir_load_node *load = ppir_node_to_load(node);
156 
157    switch (node->op) {
158       case ppir_op_load_uniform:
159          f->source = ppir_codegen_uniform_src_uniform;
160          break;
161       case ppir_op_load_temp:
162          f->source = ppir_codegen_uniform_src_temporary;
163          break;
164       default:
165          assert(0);
166    }
167 
168    /* Uniforms are always aligned to vec4 boundary */
169    f->alignment = 2;
170    f->index = load->index;
171 
172    if (load->num_src) {
173       f->offset_en = 1;
174       f->offset_reg = ppir_target_get_src_reg_index(&load->src);
175    }
176 }
177 
shift_to_op(int shift)178 static unsigned shift_to_op(int shift)
179 {
180    assert(shift >= -3 && shift <= 3);
181    return shift < 0 ? shift + 8 : shift;
182 }
183 
ppir_codegen_encode_vec_mul(ppir_node * node,void * code)184 static void ppir_codegen_encode_vec_mul(ppir_node *node, void *code)
185 {
186    ppir_codegen_field_vec4_mul *f = code;
187    ppir_alu_node *alu = ppir_node_to_alu(node);
188 
189    ppir_dest *dest = &alu->dest;
190    int dest_shift = 0;
191    if (dest->type != ppir_target_pipeline) {
192       int index = ppir_target_get_dest_reg_index(dest);
193       dest_shift = index & 0x3;
194       f->dest = index >> 2;
195       f->mask = dest->write_mask << dest_shift;
196    }
197    f->dest_modifier = dest->modifier;
198 
199    switch (node->op) {
200    case ppir_op_mul:
201       f->op = shift_to_op(alu->shift);
202       break;
203    case ppir_op_mov:
204       f->op = ppir_codegen_vec4_mul_op_mov;
205       break;
206    case ppir_op_max:
207       f->op = ppir_codegen_vec4_mul_op_max;
208       break;
209    case ppir_op_min:
210       f->op = ppir_codegen_vec4_mul_op_min;
211       break;
212    case ppir_op_and:
213       f->op = ppir_codegen_vec4_mul_op_and;
214       break;
215    case ppir_op_or:
216       f->op = ppir_codegen_vec4_mul_op_or;
217       break;
218    case ppir_op_xor:
219       f->op = ppir_codegen_vec4_mul_op_xor;
220       break;
221    case ppir_op_gt:
222       f->op = ppir_codegen_vec4_mul_op_gt;
223       break;
224    case ppir_op_ge:
225       f->op = ppir_codegen_vec4_mul_op_ge;
226       break;
227    case ppir_op_eq:
228       f->op = ppir_codegen_vec4_mul_op_eq;
229       break;
230    case ppir_op_ne:
231       f->op = ppir_codegen_vec4_mul_op_ne;
232       break;
233    case ppir_op_not:
234       f->op = ppir_codegen_vec4_mul_op_not;
235       break;
236    default:
237       break;
238    }
239 
240    ppir_src *src = alu->src;
241    int index = ppir_target_get_src_reg_index(src);
242    f->arg0_source = index >> 2;
243    f->arg0_swizzle = encode_swizzle(src->swizzle, index & 0x3, dest_shift);
244    f->arg0_absolute = src->absolute;
245    f->arg0_negate = src->negate;
246 
247    if (alu->num_src == 2) {
248       src = alu->src + 1;
249       index = ppir_target_get_src_reg_index(src);
250       f->arg1_source = index >> 2;
251       f->arg1_swizzle = encode_swizzle(src->swizzle, index & 0x3, dest_shift);
252       f->arg1_absolute = src->absolute;
253       f->arg1_negate = src->negate;
254    }
255 }
256 
ppir_codegen_encode_scl_mul(ppir_node * node,void * code)257 static void ppir_codegen_encode_scl_mul(ppir_node *node, void *code)
258 {
259    ppir_codegen_field_float_mul *f = code;
260    ppir_alu_node *alu = ppir_node_to_alu(node);
261 
262    ppir_dest *dest = &alu->dest;
263    int dest_component = ffs(dest->write_mask) - 1;
264    assert(dest_component >= 0);
265 
266    if (dest->type != ppir_target_pipeline) {
267       f->dest = ppir_target_get_dest_reg_index(dest) + dest_component;
268       f->output_en = true;
269    }
270    f->dest_modifier = dest->modifier;
271 
272    switch (node->op) {
273    case ppir_op_mul:
274       f->op = shift_to_op(alu->shift);
275       break;
276    case ppir_op_mov:
277       f->op = ppir_codegen_float_mul_op_mov;
278       break;
279    case ppir_op_max:
280       f->op = ppir_codegen_float_mul_op_max;
281       break;
282    case ppir_op_min:
283       f->op = ppir_codegen_float_mul_op_min;
284       break;
285    case ppir_op_and:
286       f->op = ppir_codegen_float_mul_op_and;
287       break;
288    case ppir_op_or:
289       f->op = ppir_codegen_float_mul_op_or;
290       break;
291    case ppir_op_xor:
292       f->op = ppir_codegen_float_mul_op_xor;
293       break;
294    case ppir_op_gt:
295       f->op = ppir_codegen_float_mul_op_gt;
296       break;
297    case ppir_op_ge:
298       f->op = ppir_codegen_float_mul_op_ge;
299       break;
300    case ppir_op_eq:
301       f->op = ppir_codegen_float_mul_op_eq;
302       break;
303    case ppir_op_ne:
304       f->op = ppir_codegen_float_mul_op_ne;
305       break;
306    case ppir_op_not:
307       f->op = ppir_codegen_float_mul_op_not;
308       break;
309    default:
310       break;
311    }
312 
313    ppir_src *src = alu->src;
314    f->arg0_source = get_scl_reg_index(src, dest_component);
315    f->arg0_absolute = src->absolute;
316    f->arg0_negate = src->negate;
317 
318    if (alu->num_src == 2) {
319       src = alu->src + 1;
320       f->arg1_source = get_scl_reg_index(src, dest_component);
321       f->arg1_absolute = src->absolute;
322       f->arg1_negate = src->negate;
323    }
324 }
325 
ppir_codegen_encode_vec_add(ppir_node * node,void * code)326 static void ppir_codegen_encode_vec_add(ppir_node *node, void *code)
327 {
328    ppir_codegen_field_vec4_acc *f = code;
329    ppir_alu_node *alu = ppir_node_to_alu(node);
330 
331    ppir_dest *dest = &alu->dest;
332    int index = ppir_target_get_dest_reg_index(dest);
333    int dest_shift = index & 0x3;
334    f->dest = index >> 2;
335    f->mask = dest->write_mask << dest_shift;
336    f->dest_modifier = dest->modifier;
337 
338    switch (node->op) {
339    case ppir_op_add:
340       f->op = ppir_codegen_vec4_acc_op_add;
341       break;
342    case ppir_op_mov:
343       f->op = ppir_codegen_vec4_acc_op_mov;
344       break;
345    case ppir_op_sum3:
346       f->op = ppir_codegen_vec4_acc_op_sum3;
347       dest_shift = 0;
348       break;
349    case ppir_op_sum4:
350       f->op = ppir_codegen_vec4_acc_op_sum4;
351       dest_shift = 0;
352       break;
353    case ppir_op_floor:
354       f->op = ppir_codegen_vec4_acc_op_floor;
355       break;
356    case ppir_op_ceil:
357       f->op = ppir_codegen_vec4_acc_op_ceil;
358       break;
359    case ppir_op_fract:
360       f->op = ppir_codegen_vec4_acc_op_fract;
361       break;
362    case ppir_op_gt:
363       f->op = ppir_codegen_vec4_acc_op_gt;
364       break;
365    case ppir_op_ge:
366       f->op = ppir_codegen_vec4_acc_op_ge;
367       break;
368    case ppir_op_eq:
369       f->op = ppir_codegen_vec4_acc_op_eq;
370       break;
371    case ppir_op_ne:
372       f->op = ppir_codegen_vec4_acc_op_ne;
373       break;
374    case ppir_op_select:
375       f->op = ppir_codegen_vec4_acc_op_sel;
376       break;
377    case ppir_op_max:
378       f->op = ppir_codegen_vec4_acc_op_max;
379       break;
380    case ppir_op_min:
381       f->op = ppir_codegen_vec4_acc_op_min;
382       break;
383    case ppir_op_ddx:
384       f->op = ppir_codegen_vec4_acc_op_dFdx;
385       break;
386    case ppir_op_ddy:
387       f->op = ppir_codegen_vec4_acc_op_dFdy;
388       break;
389    default:
390       break;
391    }
392 
393    ppir_src *src = node->op == ppir_op_select ? alu->src + 1 : alu->src;
394    index = ppir_target_get_src_reg_index(src);
395 
396    if (src->type == ppir_target_pipeline &&
397        src->pipeline == ppir_pipeline_reg_vmul)
398       f->mul_in = true;
399    else
400       f->arg0_source = index >> 2;
401 
402    f->arg0_swizzle = encode_swizzle(src->swizzle, index & 0x3, dest_shift);
403    f->arg0_absolute = src->absolute;
404    f->arg0_negate = src->negate;
405 
406    if (++src < alu->src + alu->num_src) {
407       index = ppir_target_get_src_reg_index(src);
408       f->arg1_source = index >> 2;
409       f->arg1_swizzle = encode_swizzle(src->swizzle, index & 0x3, dest_shift);
410       f->arg1_absolute = src->absolute;
411       f->arg1_negate = src->negate;
412    }
413 }
414 
ppir_codegen_encode_scl_add(ppir_node * node,void * code)415 static void ppir_codegen_encode_scl_add(ppir_node *node, void *code)
416 {
417    ppir_codegen_field_float_acc *f = code;
418    ppir_alu_node *alu = ppir_node_to_alu(node);
419 
420    ppir_dest *dest = &alu->dest;
421    int dest_component = ffs(dest->write_mask) - 1;
422    assert(dest_component >= 0);
423 
424    f->dest = ppir_target_get_dest_reg_index(dest) + dest_component;
425    f->output_en = true;
426    f->dest_modifier = dest->modifier;
427 
428    switch (node->op) {
429    case ppir_op_add:
430       f->op = shift_to_op(alu->shift);
431       break;
432    case ppir_op_mov:
433       f->op = ppir_codegen_float_acc_op_mov;
434       break;
435    case ppir_op_max:
436       f->op = ppir_codegen_float_acc_op_max;
437       break;
438    case ppir_op_min:
439       f->op = ppir_codegen_float_acc_op_min;
440       break;
441    case ppir_op_floor:
442       f->op = ppir_codegen_float_acc_op_floor;
443       break;
444    case ppir_op_ceil:
445       f->op = ppir_codegen_float_acc_op_ceil;
446       break;
447    case ppir_op_fract:
448       f->op = ppir_codegen_float_acc_op_fract;
449       break;
450    case ppir_op_gt:
451       f->op = ppir_codegen_float_acc_op_gt;
452       break;
453    case ppir_op_ge:
454       f->op = ppir_codegen_float_acc_op_ge;
455       break;
456    case ppir_op_eq:
457       f->op = ppir_codegen_float_acc_op_eq;
458       break;
459    case ppir_op_ne:
460       f->op = ppir_codegen_float_acc_op_ne;
461       break;
462    case ppir_op_select:
463       f->op = ppir_codegen_float_acc_op_sel;
464       break;
465    case ppir_op_ddx:
466       f->op = ppir_codegen_float_acc_op_dFdx;
467       break;
468    case ppir_op_ddy:
469       f->op = ppir_codegen_float_acc_op_dFdy;
470       break;
471    default:
472       break;
473    }
474 
475    ppir_src *src = node->op == ppir_op_select ? alu->src + 1: alu->src;
476    if (src->type == ppir_target_pipeline &&
477        src->pipeline == ppir_pipeline_reg_fmul)
478       f->mul_in = true;
479    else
480       f->arg0_source = get_scl_reg_index(src, dest_component);
481    f->arg0_absolute = src->absolute;
482    f->arg0_negate = src->negate;
483 
484    if (++src < alu->src + alu->num_src) {
485       f->arg1_source = get_scl_reg_index(src, dest_component);
486       f->arg1_absolute = src->absolute;
487       f->arg1_negate = src->negate;
488    }
489 }
490 
ppir_codegen_encode_combine(ppir_node * node,void * code)491 static void ppir_codegen_encode_combine(ppir_node *node, void *code)
492 {
493    ppir_codegen_field_combine *f = code;
494    ppir_alu_node *alu = ppir_node_to_alu(node);
495 
496    switch (node->op) {
497    case ppir_op_rsqrt:
498    case ppir_op_log2:
499    case ppir_op_exp2:
500    case ppir_op_rcp:
501    case ppir_op_sqrt:
502    case ppir_op_sin:
503    case ppir_op_cos:
504    {
505       f->scalar.dest_vec = false;
506       f->scalar.arg1_en = false;
507 
508       ppir_dest *dest = &alu->dest;
509       int dest_component = ffs(dest->write_mask) - 1;
510       assert(dest_component >= 0);
511       f->scalar.dest = ppir_target_get_dest_reg_index(dest) + dest_component;
512       f->scalar.dest_modifier = dest->modifier;
513 
514       ppir_src *src = alu->src;
515       f->scalar.arg0_src = get_scl_reg_index(src, dest_component);
516       f->scalar.arg0_absolute = src->absolute;
517       f->scalar.arg0_negate = src->negate;
518 
519       switch (node->op) {
520       case ppir_op_rsqrt:
521          f->scalar.op = ppir_codegen_combine_scalar_op_rsqrt;
522          break;
523       case ppir_op_log2:
524          f->scalar.op = ppir_codegen_combine_scalar_op_log2;
525          break;
526       case ppir_op_exp2:
527          f->scalar.op = ppir_codegen_combine_scalar_op_exp2;
528          break;
529       case ppir_op_rcp:
530          f->scalar.op = ppir_codegen_combine_scalar_op_rcp;
531          break;
532       case ppir_op_sqrt:
533          f->scalar.op = ppir_codegen_combine_scalar_op_sqrt;
534          break;
535       case ppir_op_sin:
536          f->scalar.op = ppir_codegen_combine_scalar_op_sin;
537          break;
538       case ppir_op_cos:
539          f->scalar.op = ppir_codegen_combine_scalar_op_cos;
540          break;
541       default:
542          break;
543       }
544    }
545    default:
546       break;
547    }
548 }
549 
ppir_codegen_encode_store_temp(ppir_node * node,void * code)550 static void ppir_codegen_encode_store_temp(ppir_node *node, void *code)
551 {
552    assert(node->op == ppir_op_store_temp);
553 
554    ppir_codegen_field_temp_write *f = code;
555    ppir_store_node *snode = ppir_node_to_store(node);
556    int num_components = snode->num_components;
557 
558    f->temp_write.dest = 0x03; // 11 - temporary
559    f->temp_write.source = snode->src.reg->index;
560 
561    int alignment = num_components == 4 ? 2 : num_components - 1;
562    f->temp_write.alignment = alignment;
563    f->temp_write.index = snode->index << (2 - alignment);
564 
565    f->temp_write.offset_reg = snode->index >> 2;
566 }
567 
ppir_codegen_encode_const(ppir_const * constant,uint16_t * code)568 static void ppir_codegen_encode_const(ppir_const *constant, uint16_t *code)
569 {
570    for (int i = 0; i < constant->num; i++)
571       code[i] = _mesa_float_to_half(constant->value[i].f);
572 }
573 
ppir_codegen_encode_discard(ppir_node * node,void * code)574 static void ppir_codegen_encode_discard(ppir_node *node, void *code)
575 {
576    ppir_codegen_field_branch *b = code;
577    assert(node->op == ppir_op_discard);
578 
579    b->discard.word0 = PPIR_CODEGEN_DISCARD_WORD0;
580    b->discard.word1 = PPIR_CODEGEN_DISCARD_WORD1;
581    b->discard.word2 = PPIR_CODEGEN_DISCARD_WORD2;
582 }
583 
ppir_codegen_encode_branch(ppir_node * node,void * code)584 static void ppir_codegen_encode_branch(ppir_node *node, void *code)
585 {
586    ppir_codegen_field_branch *b = code;
587    ppir_branch_node *branch;
588    ppir_instr *target_instr;
589    ppir_block *target;
590    if (node->op == ppir_op_discard) {
591       ppir_codegen_encode_discard(node, code);
592       return;
593    }
594 
595    assert(node->op == ppir_op_branch);
596    branch = ppir_node_to_branch(node);
597 
598    b->branch.unknown_0 = 0x0;
599    b->branch.unknown_1 = 0x0;
600 
601    if (branch->num_src == 2) {
602       b->branch.arg0_source = get_scl_reg_index(&branch->src[0], 0);
603       b->branch.arg1_source = get_scl_reg_index(&branch->src[1], 0);
604       b->branch.cond_gt = branch->cond_gt;
605       b->branch.cond_eq = branch->cond_eq;
606       b->branch.cond_lt = branch->cond_lt;
607    } else if (branch->num_src == 0) {
608       /* Unconditional branch */
609       b->branch.arg0_source = 0;
610       b->branch.arg1_source = 0;
611       b->branch.cond_gt = true;
612       b->branch.cond_eq = true;
613       b->branch.cond_lt = true;
614    } else {
615       assert(false);
616    }
617 
618    target = branch->target;
619    while (list_is_empty(&target->instr_list)) {
620       if (!target->list.next)
621          break;
622       target = LIST_ENTRY(ppir_block, target->list.next, list);
623    }
624 
625    assert(!list_is_empty(&target->instr_list));
626 
627    target_instr = list_first_entry(&target->instr_list, ppir_instr, list);
628    b->branch.target = target_instr->offset - node->instr->offset;
629    b->branch.next_count = target_instr->encode_size;
630 }
631 
632 typedef void (*ppir_codegen_instr_slot_encode_func)(ppir_node *, void *);
633 
634 static const ppir_codegen_instr_slot_encode_func
635 ppir_codegen_encode_slot[PPIR_INSTR_SLOT_NUM] = {
636    [PPIR_INSTR_SLOT_VARYING] = ppir_codegen_encode_varying,
637    [PPIR_INSTR_SLOT_TEXLD] = ppir_codegen_encode_texld,
638    [PPIR_INSTR_SLOT_UNIFORM] = ppir_codegen_encode_uniform,
639    [PPIR_INSTR_SLOT_ALU_VEC_MUL] = ppir_codegen_encode_vec_mul,
640    [PPIR_INSTR_SLOT_ALU_SCL_MUL] = ppir_codegen_encode_scl_mul,
641    [PPIR_INSTR_SLOT_ALU_VEC_ADD] = ppir_codegen_encode_vec_add,
642    [PPIR_INSTR_SLOT_ALU_SCL_ADD] = ppir_codegen_encode_scl_add,
643    [PPIR_INSTR_SLOT_ALU_COMBINE] = ppir_codegen_encode_combine,
644    [PPIR_INSTR_SLOT_STORE_TEMP] = ppir_codegen_encode_store_temp,
645    [PPIR_INSTR_SLOT_BRANCH] = ppir_codegen_encode_branch,
646 };
647 
648 static const int ppir_codegen_field_size[] = {
649    34, 62, 41, 43, 30, 44, 31, 30, 41, 73
650 };
651 
align_to_word(int size)652 static inline int align_to_word(int size)
653 {
654    return ((size + 0x1f) >> 5);
655 }
656 
get_instr_encode_size(ppir_instr * instr)657 static int get_instr_encode_size(ppir_instr *instr)
658 {
659    int size = 0;
660 
661    for (int i = 0; i < PPIR_INSTR_SLOT_NUM; i++) {
662       if (instr->slots[i])
663          size += ppir_codegen_field_size[i];
664    }
665 
666    for (int i = 0; i < 2; i++) {
667       if (instr->constant[i].num)
668          size += 64;
669    }
670 
671    return align_to_word(size) + 1;
672 }
673 
bitcopy(void * dst,int dst_offset,void * src,int src_size)674 static void bitcopy(void *dst, int dst_offset, void *src, int src_size)
675 {
676    int off1 = dst_offset & 0x1f;
677    uint32_t *cpy_dst = dst, *cpy_src = src;
678 
679    cpy_dst += (dst_offset >> 5);
680 
681    if (off1) {
682       int off2 = 32 - off1;
683       int cpy_size = 0;
684       while (1) {
685          *cpy_dst |= *cpy_src << off1;
686          cpy_dst++;
687 
688          cpy_size += off2;
689          if (cpy_size >= src_size)
690             break;
691 
692          *cpy_dst |= *cpy_src >> off2;
693          cpy_src++;
694 
695          cpy_size += off1;
696          if (cpy_size >= src_size)
697             break;
698       }
699    }
700    else
701       memcpy(cpy_dst, cpy_src, align_to_word(src_size) * 4);
702 }
703 
encode_instr(ppir_instr * instr,void * code,void * last_code)704 static int encode_instr(ppir_instr *instr, void *code, void *last_code)
705 {
706    int size = 0;
707    ppir_codegen_ctrl *ctrl = code;
708 
709    for (int i = 0; i < PPIR_INSTR_SLOT_NUM; i++) {
710       if (instr->slots[i]) {
711          /* max field size (73), align to dword */
712          uint8_t output[12] = {0};
713 
714          ppir_codegen_encode_slot[i](instr->slots[i], output);
715          bitcopy(ctrl + 1, size, output, ppir_codegen_field_size[i]);
716 
717          size += ppir_codegen_field_size[i];
718          ctrl->fields |= 1 << i;
719       }
720    }
721 
722    if (instr->slots[PPIR_INSTR_SLOT_TEXLD])
723       ctrl->sync = true;
724 
725    if (instr->slots[PPIR_INSTR_SLOT_ALU_VEC_ADD]) {
726       ppir_node *node = instr->slots[PPIR_INSTR_SLOT_ALU_VEC_ADD];
727       if (node->op == ppir_op_ddx || node->op == ppir_op_ddy)
728          ctrl->sync = true;
729    }
730 
731    if (instr->slots[PPIR_INSTR_SLOT_ALU_SCL_ADD]) {
732       ppir_node *node = instr->slots[PPIR_INSTR_SLOT_ALU_SCL_ADD];
733       if (node->op == ppir_op_ddx || node->op == ppir_op_ddy)
734          ctrl->sync = true;
735    }
736 
737    for (int i = 0; i < 2; i++) {
738       if (instr->constant[i].num) {
739          uint16_t output[4] = {0};
740 
741          ppir_codegen_encode_const(instr->constant + i, output);
742          bitcopy(ctrl + 1, size, output, instr->constant[i].num * 16);
743 
744          size += 64;
745          ctrl->fields |= 1 << (ppir_codegen_field_shift_vec4_const_0 + i);
746       }
747    }
748 
749    size = align_to_word(size) + 1;
750 
751    ctrl->count = size;
752    if (instr->is_end)
753       ctrl->stop = true;
754 
755    if (last_code) {
756       ppir_codegen_ctrl *last_ctrl = last_code;
757       last_ctrl->next_count = size;
758       last_ctrl->prefetch = true;
759    }
760 
761    return size;
762 }
763 
ppir_codegen_print_prog(ppir_compiler * comp)764 static void ppir_codegen_print_prog(ppir_compiler *comp)
765 {
766    uint32_t *prog = comp->prog->shader;
767    unsigned offset = 0;
768 
769    printf("========ppir codegen========\n");
770    list_for_each_entry(ppir_block, block, &comp->block_list, list) {
771       list_for_each_entry(ppir_instr, instr, &block->instr_list, list) {
772          printf("%03d (@%6d): ", instr->index, instr->offset);
773          int n = prog[0] & 0x1f;
774          for (int i = 0; i < n; i++) {
775             if (i && i % 6 == 0)
776                printf("\n    ");
777             printf("%08x ", prog[i]);
778          }
779          printf("\n");
780          ppir_disassemble_instr(prog, offset);
781          prog += n;
782          offset += n;
783       }
784    }
785    printf("-----------------------\n");
786 }
787 
ppir_codegen_prog(ppir_compiler * comp)788 bool ppir_codegen_prog(ppir_compiler *comp)
789 {
790    int size = 0;
791    list_for_each_entry(ppir_block, block, &comp->block_list, list) {
792       list_for_each_entry(ppir_instr, instr, &block->instr_list, list) {
793          instr->offset = size;
794          instr->encode_size = get_instr_encode_size(instr);
795          size += instr->encode_size;
796       }
797    }
798 
799    uint32_t *prog = rzalloc_size(comp->prog, size * sizeof(uint32_t));
800    if (!prog)
801       return false;
802 
803    uint32_t *code = prog, *last_code = NULL;
804    list_for_each_entry(ppir_block, block, &comp->block_list, list) {
805       list_for_each_entry(ppir_instr, instr, &block->instr_list, list) {
806          int offset = encode_instr(instr, code, last_code);
807          last_code = code;
808          code += offset;
809       }
810    }
811 
812    if (comp->prog->shader)
813       ralloc_free(comp->prog->shader);
814 
815    comp->prog->shader = prog;
816    comp->prog->shader_size = size * sizeof(uint32_t);
817 
818    if (lima_debug & LIMA_DEBUG_PP)
819       ppir_codegen_print_prog(comp);
820 
821    return true;
822 }
823