1 /*
2  * Copyright (c) 2012-2015 Etnaviv Project
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sub license,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the
12  * next paragraph) shall be included in all copies or substantial portions
13  * of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Wladimir J. van der Laan <laanwj@gmail.com>
25  */
26 
27 /* TGSI->Vivante shader ISA conversion */
28 
29 /* What does the compiler return (see etna_shader_object)?
30  *  1) instruction data
31  *  2) input-to-temporary mapping (fixed for ps)
32  *      *) in case of ps, semantic -> varying id mapping
33  *      *) for each varying: number of components used (r, rg, rgb, rgba)
34  *  3) temporary-to-output mapping (in case of vs, fixed for ps)
35  *  4) for each input/output: possible semantic (position, color, glpointcoord, ...)
36  *  5) immediates base offset, immediates data
37  *  6) used texture units (and possibly the TGSI_TEXTURE_* type); not needed to
38  *     configure the hw, but useful for error checking
39  *  7) enough information to add the z=(z+w)/2.0 necessary for older chips
40  *     (output reg id is enough)
41  *
42  *  Empty shaders are not allowed, should always at least generate a NOP. Also
43  *  if there is a label at the end of the shader, an extra NOP should be
44  *  generated as jump target.
45  *
46  * TODO
47  * * Use an instruction scheduler
48  * * Indirect access to uniforms / temporaries using amode
49  */
50 
51 #include "etnaviv_compiler.h"
52 
53 #include "etnaviv_asm.h"
54 #include "etnaviv_context.h"
55 #include "etnaviv_debug.h"
56 #include "etnaviv_uniforms.h"
57 #include "etnaviv_util.h"
58 
59 #include "nir/tgsi_to_nir.h"
60 #include "pipe/p_shader_tokens.h"
61 #include "tgsi/tgsi_info.h"
62 #include "tgsi/tgsi_iterate.h"
63 #include "tgsi/tgsi_lowering.h"
64 #include "tgsi/tgsi_strings.h"
65 #include "tgsi/tgsi_util.h"
66 #include "util/u_math.h"
67 #include "util/u_memory.h"
68 
69 #include <fcntl.h>
70 #include <stdio.h>
71 #include <sys/stat.h>
72 #include <sys/types.h>
73 
74 #define ETNA_MAX_INNER_TEMPS 2
75 
76 static const float sincos_const[2][4] = {
77    {
78       2., -1., 4., -4.,
79    },
80    {
81       1. / (2. * M_PI), 0.75, 0.5, 0.0,
82    },
83 };
84 
85 /* Native register description structure */
86 struct etna_native_reg {
87    unsigned valid : 1;
88    unsigned is_tex : 1; /* is texture unit, overrides rgroup */
89    unsigned rgroup : 3;
90    unsigned id : 9;
91 };
92 
93 /* Register description */
94 struct etna_reg_desc {
95    enum tgsi_file_type file; /* IN, OUT, TEMP, ... */
96    int idx; /* index into file */
97    bool active; /* used in program */
98    int first_use; /* instruction id of first use (scope begin) */
99    int last_use; /* instruction id of last use (scope end, inclusive) */
100 
101    struct etna_native_reg native; /* native register to map to */
102    unsigned usage_mask : 4; /* usage, per channel */
103    bool has_semantic; /* register has associated TGSI semantic */
104    struct tgsi_declaration_semantic semantic; /* TGSI semantic */
105    struct tgsi_declaration_interp interp; /* Interpolation type */
106 };
107 
108 /* Label information structure */
109 struct etna_compile_label {
110    int inst_idx; /* Instruction id that label points to */
111 };
112 
113 enum etna_compile_frame_type {
114    ETNA_COMPILE_FRAME_IF, /* IF/ELSE/ENDIF */
115    ETNA_COMPILE_FRAME_LOOP,
116 };
117 
118 /* nesting scope frame (LOOP, IF, ...) during compilation
119  */
120 struct etna_compile_frame {
121    enum etna_compile_frame_type type;
122    int lbl_else_idx;
123    int lbl_endif_idx;
124    int lbl_loop_bgn_idx;
125    int lbl_loop_end_idx;
126 };
127 
128 struct etna_compile_file {
129    /* Number of registers in each TGSI file (max register+1) */
130    size_t reg_size;
131    /* Register descriptions, per register index */
132    struct etna_reg_desc *reg;
133 };
134 
135 #define array_insert(arr, val)                          \
136    do {                                                 \
137       if (arr##_count == arr##_sz) {                    \
138          arr##_sz = MAX2(2 * arr##_sz, 16);             \
139          arr = realloc(arr, arr##_sz * sizeof(arr[0])); \
140       }                                                 \
141       arr[arr##_count++] = val;                         \
142    } while (0)
143 
144 
145 /* scratch area for compiling shader, freed after compilation finishes */
146 struct etna_compile {
147    const struct tgsi_token *tokens;
148    bool free_tokens;
149 
150    struct tgsi_shader_info info;
151 
152    /* Register descriptions, per TGSI file, per register index */
153    struct etna_compile_file file[TGSI_FILE_COUNT];
154 
155    /* Keep track of TGSI register declarations */
156    struct etna_reg_desc decl[ETNA_MAX_DECL];
157    uint total_decls;
158 
159    /* Bitmap of dead instructions which are removed in a separate pass */
160    bool dead_inst[ETNA_MAX_TOKENS];
161 
162    /* Immediate data */
163    enum etna_immediate_contents imm_contents[ETNA_MAX_IMM];
164    uint32_t imm_data[ETNA_MAX_IMM];
165    uint32_t imm_base; /* base of immediates (in 32 bit units) */
166    uint32_t imm_size; /* size of immediates (in 32 bit units) */
167 
168    /* Next free native register, for register allocation */
169    uint32_t next_free_native;
170 
171    /* Temporary register for use within translated TGSI instruction,
172     * only allocated when needed.
173     */
174    int inner_temps; /* number of inner temps used; only up to one available at
175                        this point */
176    struct etna_native_reg inner_temp[ETNA_MAX_INNER_TEMPS];
177 
178    /* Fields for handling nested conditionals */
179    struct etna_compile_frame frame_stack[ETNA_MAX_DEPTH];
180    int frame_sp;
181    int lbl_usage[ETNA_MAX_INSTRUCTIONS];
182 
183    unsigned labels_count, labels_sz;
184    struct etna_compile_label *labels;
185 
186    unsigned num_loops;
187 
188    /* Code generation */
189    int inst_ptr; /* current instruction pointer */
190    uint32_t code[ETNA_MAX_INSTRUCTIONS * ETNA_INST_SIZE];
191 
192    /* I/O */
193 
194    /* Number of varyings (PS only) */
195    int num_varyings;
196 
197    /* GPU hardware specs */
198    const struct etna_specs *specs;
199 
200    const struct etna_shader_key *key;
201 };
202 
203 static struct etna_reg_desc *
etna_get_dst_reg(struct etna_compile * c,struct tgsi_dst_register dst)204 etna_get_dst_reg(struct etna_compile *c, struct tgsi_dst_register dst)
205 {
206    return &c->file[dst.File].reg[dst.Index];
207 }
208 
209 static struct etna_reg_desc *
etna_get_src_reg(struct etna_compile * c,struct tgsi_src_register src)210 etna_get_src_reg(struct etna_compile *c, struct tgsi_src_register src)
211 {
212    return &c->file[src.File].reg[src.Index];
213 }
214 
215 static struct etna_native_reg
etna_native_temp(unsigned reg)216 etna_native_temp(unsigned reg)
217 {
218    return (struct etna_native_reg) {
219       .valid = 1,
220       .rgroup = INST_RGROUP_TEMP,
221       .id = reg
222    };
223 }
224 
225 static struct etna_native_reg
etna_native_internal(unsigned reg)226 etna_native_internal(unsigned reg)
227 {
228    return (struct etna_native_reg) {
229       .valid = 1,
230       .rgroup = INST_RGROUP_INTERNAL,
231       .id = reg
232    };
233 }
234 
235 /** Register allocation **/
236 enum reg_sort_order {
237    FIRST_USE_ASC,
238    FIRST_USE_DESC,
239    LAST_USE_ASC,
240    LAST_USE_DESC
241 };
242 
243 /* Augmented register description for sorting */
244 struct sort_rec {
245    struct etna_reg_desc *ptr;
246    int key;
247 };
248 
249 static int
sort_rec_compar(const struct sort_rec * a,const struct sort_rec * b)250 sort_rec_compar(const struct sort_rec *a, const struct sort_rec *b)
251 {
252    if (a->key < b->key)
253       return -1;
254 
255    if (a->key > b->key)
256       return 1;
257 
258    return 0;
259 }
260 
261 /* create an index on a register set based on certain criteria. */
262 static int
sort_registers(struct sort_rec * sorted,struct etna_compile_file * file,enum reg_sort_order so)263 sort_registers(struct sort_rec *sorted, struct etna_compile_file *file,
264                enum reg_sort_order so)
265 {
266    struct etna_reg_desc *regs = file->reg;
267    int ptr = 0;
268 
269    /* pre-populate keys from active registers */
270    for (int idx = 0; idx < file->reg_size; ++idx) {
271       /* only interested in active registers now; will only assign inactive ones
272        * if no space in active ones */
273       if (regs[idx].active) {
274          sorted[ptr].ptr = &regs[idx];
275 
276          switch (so) {
277          case FIRST_USE_ASC:
278             sorted[ptr].key = regs[idx].first_use;
279             break;
280          case LAST_USE_ASC:
281             sorted[ptr].key = regs[idx].last_use;
282             break;
283          case FIRST_USE_DESC:
284             sorted[ptr].key = -regs[idx].first_use;
285             break;
286          case LAST_USE_DESC:
287             sorted[ptr].key = -regs[idx].last_use;
288             break;
289          }
290          ptr++;
291       }
292    }
293 
294    /* sort index by key */
295    qsort(sorted, ptr, sizeof(struct sort_rec),
296          (int (*)(const void *, const void *))sort_rec_compar);
297 
298    return ptr;
299 }
300 
301 /* Allocate a new, unused, native temp register */
302 static struct etna_native_reg
alloc_new_native_reg(struct etna_compile * c)303 alloc_new_native_reg(struct etna_compile *c)
304 {
305    assert(c->next_free_native < ETNA_MAX_TEMPS);
306    return etna_native_temp(c->next_free_native++);
307 }
308 
309 /* assign TEMPs to native registers */
310 static void
assign_temporaries_to_native(struct etna_compile * c,struct etna_compile_file * file)311 assign_temporaries_to_native(struct etna_compile *c,
312                              struct etna_compile_file *file)
313 {
314    struct etna_reg_desc *temps = file->reg;
315 
316    for (int idx = 0; idx < file->reg_size; ++idx)
317       temps[idx].native = alloc_new_native_reg(c);
318 }
319 
320 /* assign inputs and outputs to temporaries
321  * Gallium assumes that the hardware has separate registers for taking input and
322  * output, however Vivante GPUs use temporaries both for passing in inputs and
323  * passing back outputs.
324  * Try to re-use temporary registers where possible. */
325 static void
assign_inouts_to_temporaries(struct etna_compile * c,uint file)326 assign_inouts_to_temporaries(struct etna_compile *c, uint file)
327 {
328    bool mode_inputs = (file == TGSI_FILE_INPUT);
329    int inout_ptr = 0, num_inouts;
330    int temp_ptr = 0, num_temps;
331    struct sort_rec inout_order[ETNA_MAX_TEMPS];
332    struct sort_rec temps_order[ETNA_MAX_TEMPS];
333    num_inouts = sort_registers(inout_order, &c->file[file],
334                                mode_inputs ? LAST_USE_ASC : FIRST_USE_ASC);
335    num_temps = sort_registers(temps_order, &c->file[TGSI_FILE_TEMPORARY],
336                               mode_inputs ? FIRST_USE_ASC : LAST_USE_ASC);
337 
338    while (inout_ptr < num_inouts && temp_ptr < num_temps) {
339       struct etna_reg_desc *inout = inout_order[inout_ptr].ptr;
340       struct etna_reg_desc *temp = temps_order[temp_ptr].ptr;
341 
342       if (!inout->active || inout->native.valid) { /* Skip if already a native register assigned */
343          inout_ptr++;
344          continue;
345       }
346 
347       /* last usage of this input is before or in same instruction of first use
348        * of temporary? */
349       if (mode_inputs ? (inout->last_use <= temp->first_use)
350                       : (inout->first_use >= temp->last_use)) {
351          /* assign it and advance to next input */
352          inout->native = temp->native;
353          inout_ptr++;
354       }
355 
356       temp_ptr++;
357    }
358 
359    /* if we couldn't reuse current ones, allocate new temporaries */
360    for (inout_ptr = 0; inout_ptr < num_inouts; ++inout_ptr) {
361       struct etna_reg_desc *inout = inout_order[inout_ptr].ptr;
362 
363       if (inout->active && !inout->native.valid)
364          inout->native = alloc_new_native_reg(c);
365    }
366 }
367 
368 /* Allocate an immediate with a certain value and return the index. If
369  * there is already an immediate with that value, return that.
370  */
371 static struct etna_inst_src
alloc_imm(struct etna_compile * c,enum etna_immediate_contents contents,uint32_t value)372 alloc_imm(struct etna_compile *c, enum etna_immediate_contents contents,
373           uint32_t value)
374 {
375    int idx;
376 
377    /* Could use a hash table to speed this up */
378    for (idx = 0; idx < c->imm_size; ++idx) {
379       if (c->imm_contents[idx] == contents && c->imm_data[idx] == value)
380          break;
381    }
382 
383    /* look if there is an unused slot */
384    if (idx == c->imm_size) {
385       for (idx = 0; idx < c->imm_size; ++idx) {
386          if (c->imm_contents[idx] == ETNA_IMMEDIATE_UNUSED)
387             break;
388       }
389    }
390 
391    /* allocate new immediate */
392    if (idx == c->imm_size) {
393       assert(c->imm_size < ETNA_MAX_IMM);
394       idx = c->imm_size++;
395       c->imm_data[idx] = value;
396       c->imm_contents[idx] = contents;
397    }
398 
399    /* swizzle so that component with value is returned in all components */
400    idx += c->imm_base;
401    struct etna_inst_src imm_src = {
402       .use = 1,
403       .rgroup = INST_RGROUP_UNIFORM_0,
404       .reg = idx / 4,
405       .swiz = INST_SWIZ_BROADCAST(idx & 3)
406    };
407 
408    return imm_src;
409 }
410 
411 static struct etna_inst_src
alloc_imm_u32(struct etna_compile * c,uint32_t value)412 alloc_imm_u32(struct etna_compile *c, uint32_t value)
413 {
414    return alloc_imm(c, ETNA_IMMEDIATE_CONSTANT, value);
415 }
416 
417 static struct etna_inst_src
alloc_imm_vec4u(struct etna_compile * c,enum etna_immediate_contents contents,const uint32_t * values)418 alloc_imm_vec4u(struct etna_compile *c, enum etna_immediate_contents contents,
419                 const uint32_t *values)
420 {
421    struct etna_inst_src imm_src = { };
422    int idx, i;
423 
424    for (idx = 0; idx + 3 < c->imm_size; idx += 4) {
425       /* What if we can use a uniform with a different swizzle? */
426       for (i = 0; i < 4; i++)
427          if (c->imm_contents[idx + i] != contents || c->imm_data[idx + i] != values[i])
428             break;
429       if (i == 4)
430          break;
431    }
432 
433    if (idx + 3 >= c->imm_size) {
434       idx = align(c->imm_size, 4);
435       assert(idx + 4 <= ETNA_MAX_IMM);
436 
437       for (i = 0; i < 4; i++) {
438          c->imm_data[idx + i] = values[i];
439          c->imm_contents[idx + i] = contents;
440       }
441 
442       c->imm_size = idx + 4;
443    }
444 
445    assert((c->imm_base & 3) == 0);
446    idx += c->imm_base;
447    imm_src.use = 1;
448    imm_src.rgroup = INST_RGROUP_UNIFORM_0;
449    imm_src.reg = idx / 4;
450    imm_src.swiz = INST_SWIZ_IDENTITY;
451 
452    return imm_src;
453 }
454 
455 static uint32_t
get_imm_u32(struct etna_compile * c,const struct etna_inst_src * imm,unsigned swiz_idx)456 get_imm_u32(struct etna_compile *c, const struct etna_inst_src *imm,
457             unsigned swiz_idx)
458 {
459    assert(imm->use == 1 && imm->rgroup == INST_RGROUP_UNIFORM_0);
460    unsigned int idx = imm->reg * 4 + ((imm->swiz >> (swiz_idx * 2)) & 3);
461 
462    return c->imm_data[idx];
463 }
464 
465 /* Allocate immediate with a certain float value. If there is already an
466  * immediate with that value, return that.
467  */
468 static struct etna_inst_src
alloc_imm_f32(struct etna_compile * c,float value)469 alloc_imm_f32(struct etna_compile *c, float value)
470 {
471    return alloc_imm_u32(c, fui(value));
472 }
473 
474 static struct etna_inst_src
etna_imm_vec4f(struct etna_compile * c,const float * vec4)475 etna_imm_vec4f(struct etna_compile *c, const float *vec4)
476 {
477    uint32_t val[4];
478 
479    for (int i = 0; i < 4; i++)
480       val[i] = fui(vec4[i]);
481 
482    return alloc_imm_vec4u(c, ETNA_IMMEDIATE_CONSTANT, val);
483 }
484 
485 /* Pass -- check register file declarations and immediates */
486 static void
etna_compile_parse_declarations(struct etna_compile * c)487 etna_compile_parse_declarations(struct etna_compile *c)
488 {
489    struct tgsi_parse_context ctx = { };
490    ASSERTED unsigned status = tgsi_parse_init(&ctx, c->tokens);
491    assert(status == TGSI_PARSE_OK);
492 
493    while (!tgsi_parse_end_of_tokens(&ctx)) {
494       tgsi_parse_token(&ctx);
495 
496       switch (ctx.FullToken.Token.Type) {
497       case TGSI_TOKEN_TYPE_IMMEDIATE: {
498          /* immediates are handled differently from other files; they are
499           * not declared explicitly, and always add four components */
500          const struct tgsi_full_immediate *imm = &ctx.FullToken.FullImmediate;
501          assert(c->imm_size <= (ETNA_MAX_IMM - 4));
502 
503          for (int i = 0; i < 4; ++i) {
504             unsigned idx = c->imm_size++;
505 
506             c->imm_data[idx] = imm->u[i].Uint;
507             c->imm_contents[idx] = ETNA_IMMEDIATE_CONSTANT;
508          }
509       }
510       break;
511       }
512    }
513 
514    tgsi_parse_free(&ctx);
515 }
516 
517 /* Allocate register declarations for the registers in all register files */
518 static void
etna_allocate_decls(struct etna_compile * c)519 etna_allocate_decls(struct etna_compile *c)
520 {
521    uint idx = 0;
522 
523    for (int x = 0; x < TGSI_FILE_COUNT; ++x) {
524       c->file[x].reg = &c->decl[idx];
525       c->file[x].reg_size = c->info.file_max[x] + 1;
526 
527       for (int sub = 0; sub < c->file[x].reg_size; ++sub) {
528          c->decl[idx].file = x;
529          c->decl[idx].idx = sub;
530          idx++;
531       }
532    }
533 
534    c->total_decls = idx;
535 }
536 
537 /* Pass -- check and record usage of temporaries, inputs, outputs */
538 static void
etna_compile_pass_check_usage(struct etna_compile * c)539 etna_compile_pass_check_usage(struct etna_compile *c)
540 {
541    struct tgsi_parse_context ctx = { };
542    ASSERTED unsigned status = tgsi_parse_init(&ctx, c->tokens);
543    assert(status == TGSI_PARSE_OK);
544 
545    for (int idx = 0; idx < c->total_decls; ++idx) {
546       c->decl[idx].active = false;
547       c->decl[idx].first_use = c->decl[idx].last_use = -1;
548    }
549 
550    int inst_idx = 0;
551    while (!tgsi_parse_end_of_tokens(&ctx)) {
552       tgsi_parse_token(&ctx);
553       /* find out max register #s used
554        * For every register mark first and last instruction index where it's
555        * used this allows finding ranges where the temporary can be borrowed
556        * as input and/or output register
557        *
558        * XXX in the case of loops this needs special care, or even be completely
559        * disabled, as
560        * the last usage of a register inside a loop means it can still be used
561        * on next loop
562        * iteration (execution is no longer * chronological). The register can
563        * only be
564        * declared "free" after the loop finishes.
565        *
566        * Same for inputs: the first usage of a register inside a loop doesn't
567        * mean that the register
568        * won't have been overwritten in previous iteration. The register can
569        * only be declared free before the loop
570        * starts.
571        * The proper way would be to do full dominator / post-dominator analysis
572        * (especially with more complicated
573        * control flow such as direct branch instructions) but not for now...
574        */
575       switch (ctx.FullToken.Token.Type) {
576       case TGSI_TOKEN_TYPE_DECLARATION: {
577          /* Declaration: fill in file details */
578          const struct tgsi_full_declaration *decl = &ctx.FullToken.FullDeclaration;
579          struct etna_compile_file *file = &c->file[decl->Declaration.File];
580 
581          for (int idx = decl->Range.First; idx <= decl->Range.Last; ++idx) {
582             file->reg[idx].usage_mask = 0; // we'll compute this ourselves
583             file->reg[idx].has_semantic = decl->Declaration.Semantic;
584             file->reg[idx].semantic = decl->Semantic;
585             file->reg[idx].interp = decl->Interp;
586          }
587       } break;
588       case TGSI_TOKEN_TYPE_INSTRUCTION: {
589          /* Instruction: iterate over operands of instruction */
590          const struct tgsi_full_instruction *inst = &ctx.FullToken.FullInstruction;
591 
592          /* iterate over destination registers */
593          for (int idx = 0; idx < inst->Instruction.NumDstRegs; ++idx) {
594             struct etna_reg_desc *reg_desc = &c->file[inst->Dst[idx].Register.File].reg[inst->Dst[idx].Register.Index];
595 
596             if (reg_desc->first_use == -1)
597                reg_desc->first_use = inst_idx;
598 
599             reg_desc->last_use = inst_idx;
600             reg_desc->active = true;
601          }
602 
603          /* iterate over source registers */
604          for (int idx = 0; idx < inst->Instruction.NumSrcRegs; ++idx) {
605             struct etna_reg_desc *reg_desc = &c->file[inst->Src[idx].Register.File].reg[inst->Src[idx].Register.Index];
606 
607             if (reg_desc->first_use == -1)
608                reg_desc->first_use = inst_idx;
609 
610             reg_desc->last_use = inst_idx;
611             reg_desc->active = true;
612             /* accumulate usage mask for register, this is used to determine how
613              * many slots for varyings
614              * should be allocated */
615             reg_desc->usage_mask |= tgsi_util_get_inst_usage_mask(inst, idx);
616          }
617          inst_idx += 1;
618       } break;
619       default:
620          break;
621       }
622    }
623 
624    tgsi_parse_free(&ctx);
625 }
626 
627 /* assign inputs that need to be assigned to specific registers */
628 static void
assign_special_inputs(struct etna_compile * c)629 assign_special_inputs(struct etna_compile *c)
630 {
631    if (c->info.processor == PIPE_SHADER_FRAGMENT) {
632       /* never assign t0 as it is the position output, start assigning at t1 */
633       c->next_free_native = 1;
634 
635       for (int idx = 0; idx < c->total_decls; ++idx) {
636          struct etna_reg_desc *reg = &c->decl[idx];
637 
638          if (!reg->active)
639             continue;
640 
641          /* hardwire TGSI_SEMANTIC_POSITION (input and output) to t0 */
642          if (reg->semantic.Name == TGSI_SEMANTIC_POSITION)
643             reg->native = etna_native_temp(0);
644 
645          /* hardwire TGSI_SEMANTIC_FACE to i0 */
646          if (reg->semantic.Name == TGSI_SEMANTIC_FACE)
647             reg->native = etna_native_internal(0);
648       }
649    }
650 }
651 
652 /* Check that a move instruction does not swizzle any of the components
653  * that it writes.
654  */
655 static bool
etna_mov_check_no_swizzle(const struct tgsi_dst_register dst,const struct tgsi_src_register src)656 etna_mov_check_no_swizzle(const struct tgsi_dst_register dst,
657                           const struct tgsi_src_register src)
658 {
659    return (!(dst.WriteMask & TGSI_WRITEMASK_X) || src.SwizzleX == TGSI_SWIZZLE_X) &&
660           (!(dst.WriteMask & TGSI_WRITEMASK_Y) || src.SwizzleY == TGSI_SWIZZLE_Y) &&
661           (!(dst.WriteMask & TGSI_WRITEMASK_Z) || src.SwizzleZ == TGSI_SWIZZLE_Z) &&
662           (!(dst.WriteMask & TGSI_WRITEMASK_W) || src.SwizzleW == TGSI_SWIZZLE_W);
663 }
664 
665 /* Pass -- optimize outputs
666  * Mesa tends to generate code like this at the end if their shaders
667  *   MOV OUT[1], TEMP[2]
668  *   MOV OUT[0], TEMP[0]
669  *   MOV OUT[2], TEMP[1]
670  * Recognize if
671  * a) there is only a single assignment to an output register and
672  * b) the temporary is not used after that
673  * Also recognize direct assignment of IN to OUT (passthrough)
674  **/
675 static void
etna_compile_pass_optimize_outputs(struct etna_compile * c)676 etna_compile_pass_optimize_outputs(struct etna_compile *c)
677 {
678    struct tgsi_parse_context ctx = { };
679    int inst_idx = 0;
680    ASSERTED unsigned status = tgsi_parse_init(&ctx, c->tokens);
681    assert(status == TGSI_PARSE_OK);
682 
683    while (!tgsi_parse_end_of_tokens(&ctx)) {
684       tgsi_parse_token(&ctx);
685 
686       switch (ctx.FullToken.Token.Type) {
687       case TGSI_TOKEN_TYPE_INSTRUCTION: {
688          const struct tgsi_full_instruction *inst = &ctx.FullToken.FullInstruction;
689 
690          /* iterate over operands */
691          switch (inst->Instruction.Opcode) {
692          case TGSI_OPCODE_MOV: {
693             /* We are only interested in eliminating MOVs which write to
694              * the shader outputs. Test for this early. */
695             if (inst->Dst[0].Register.File != TGSI_FILE_OUTPUT)
696                break;
697             /* Elimination of a MOV must have no visible effect on the
698              * resulting shader: this means the MOV must not swizzle or
699              * saturate, and its source must not have the negate or
700              * absolute modifiers. */
701             if (!etna_mov_check_no_swizzle(inst->Dst[0].Register, inst->Src[0].Register) ||
702                 inst->Instruction.Saturate || inst->Src[0].Register.Negate ||
703                 inst->Src[0].Register.Absolute)
704                break;
705 
706             uint out_idx = inst->Dst[0].Register.Index;
707             uint in_idx = inst->Src[0].Register.Index;
708             /* assignment of temporary to output --
709              * and the output doesn't yet have a native register assigned
710              * and the last use of the temporary is this instruction
711              * and the MOV does not do a swizzle
712              */
713             if (inst->Src[0].Register.File == TGSI_FILE_TEMPORARY &&
714                 !c->file[TGSI_FILE_OUTPUT].reg[out_idx].native.valid &&
715                 c->file[TGSI_FILE_TEMPORARY].reg[in_idx].last_use == inst_idx) {
716                c->file[TGSI_FILE_OUTPUT].reg[out_idx].native =
717                   c->file[TGSI_FILE_TEMPORARY].reg[in_idx].native;
718                /* prevent temp from being re-used for the rest of the shader */
719                c->file[TGSI_FILE_TEMPORARY].reg[in_idx].last_use = ETNA_MAX_TOKENS;
720                /* mark this MOV instruction as a no-op */
721                c->dead_inst[inst_idx] = true;
722             }
723             /* direct assignment of input to output --
724              * and the input or output doesn't yet have a native register
725              * assigned
726              * and the output is only used in this instruction,
727              * allocate a new register, and associate both input and output to
728              * it
729              * and the MOV does not do a swizzle
730              */
731             if (inst->Src[0].Register.File == TGSI_FILE_INPUT &&
732                 !c->file[TGSI_FILE_INPUT].reg[in_idx].native.valid &&
733                 !c->file[TGSI_FILE_OUTPUT].reg[out_idx].native.valid &&
734                 c->file[TGSI_FILE_OUTPUT].reg[out_idx].last_use == inst_idx &&
735                 c->file[TGSI_FILE_OUTPUT].reg[out_idx].first_use == inst_idx) {
736                c->file[TGSI_FILE_OUTPUT].reg[out_idx].native =
737                   c->file[TGSI_FILE_INPUT].reg[in_idx].native =
738                      alloc_new_native_reg(c);
739                /* mark this MOV instruction as a no-op */
740                c->dead_inst[inst_idx] = true;
741             }
742          } break;
743          default:;
744          }
745          inst_idx += 1;
746       } break;
747       }
748    }
749 
750    tgsi_parse_free(&ctx);
751 }
752 
753 /* Get a temporary to be used within one TGSI instruction.
754  * The first time that this function is called the temporary will be allocated.
755  * Each call to this function will return the same temporary.
756  */
757 static struct etna_native_reg
etna_compile_get_inner_temp(struct etna_compile * c)758 etna_compile_get_inner_temp(struct etna_compile *c)
759 {
760    int inner_temp = c->inner_temps;
761 
762    if (inner_temp < ETNA_MAX_INNER_TEMPS) {
763       if (!c->inner_temp[inner_temp].valid)
764          c->inner_temp[inner_temp] = alloc_new_native_reg(c);
765 
766       /* alloc_new_native_reg() handles lack of registers */
767       c->inner_temps += 1;
768    } else {
769       BUG("Too many inner temporaries (%i) requested in one instruction",
770           inner_temp + 1);
771    }
772 
773    return c->inner_temp[inner_temp];
774 }
775 
776 static struct etna_inst_dst
etna_native_to_dst(struct etna_native_reg native,unsigned comps)777 etna_native_to_dst(struct etna_native_reg native, unsigned comps)
778 {
779    /* Can only assign to temporaries */
780    assert(native.valid && !native.is_tex && native.rgroup == INST_RGROUP_TEMP);
781 
782    struct etna_inst_dst rv = {
783       .write_mask = comps,
784       .use = 1,
785       .reg = native.id,
786    };
787 
788    return rv;
789 }
790 
791 static struct etna_inst_src
etna_native_to_src(struct etna_native_reg native,uint32_t swizzle)792 etna_native_to_src(struct etna_native_reg native, uint32_t swizzle)
793 {
794    assert(native.valid && !native.is_tex);
795 
796    struct etna_inst_src rv = {
797       .use = 1,
798       .swiz = swizzle,
799       .rgroup = native.rgroup,
800       .reg = native.id,
801       .amode = INST_AMODE_DIRECT,
802    };
803 
804    return rv;
805 }
806 
807 static inline struct etna_inst_src
negate(struct etna_inst_src src)808 negate(struct etna_inst_src src)
809 {
810    src.neg = !src.neg;
811 
812    return src;
813 }
814 
815 static inline struct etna_inst_src
absolute(struct etna_inst_src src)816 absolute(struct etna_inst_src src)
817 {
818    src.abs = 1;
819 
820    return src;
821 }
822 
823 static inline struct etna_inst_src
swizzle(struct etna_inst_src src,unsigned swizzle)824 swizzle(struct etna_inst_src src, unsigned swizzle)
825 {
826    src.swiz = inst_swiz_compose(src.swiz, swizzle);
827 
828    return src;
829 }
830 
831 /* Emit instruction and append it to program */
832 static void
emit_inst(struct etna_compile * c,struct etna_inst * inst)833 emit_inst(struct etna_compile *c, struct etna_inst *inst)
834 {
835    assert(c->inst_ptr <= ETNA_MAX_INSTRUCTIONS);
836 
837    /* Check for uniform conflicts (each instruction can only access one
838     * uniform),
839     * if detected, use an intermediate temporary */
840    unsigned uni_rgroup = -1;
841    unsigned uni_reg = -1;
842 
843    for (int src = 0; src < ETNA_NUM_SRC; ++src) {
844       if (inst->src[src].rgroup == INST_RGROUP_INTERNAL &&
845           c->info.processor == PIPE_SHADER_FRAGMENT &&
846           c->key->front_ccw) {
847          struct etna_native_reg inner_temp = etna_compile_get_inner_temp(c);
848 
849          /*
850           * Set temporary register to 0.0 or 1.0 based on the gl_FrontFacing
851           * configuration (CW or CCW).
852           */
853          etna_assemble(&c->code[c->inst_ptr * 4], &(struct etna_inst) {
854             .opcode = INST_OPCODE_SET,
855             .cond = INST_CONDITION_NE,
856             .dst = etna_native_to_dst(inner_temp, INST_COMPS_X | INST_COMPS_Y |
857                                                   INST_COMPS_Z | INST_COMPS_W),
858             .src[0] = inst->src[src],
859             .src[1] = alloc_imm_f32(c, 1.0f)
860          });
861          c->inst_ptr++;
862 
863          /* Modify instruction to use temp register instead of uniform */
864          inst->src[src].use = 1;
865          inst->src[src].rgroup = INST_RGROUP_TEMP;
866          inst->src[src].reg = inner_temp.id;
867          inst->src[src].swiz = INST_SWIZ_IDENTITY; /* swizzling happens on MOV */
868          inst->src[src].neg = 0; /* negation happens on MOV */
869          inst->src[src].abs = 0; /* abs happens on MOV */
870          inst->src[src].amode = 0; /* amode effects happen on MOV */
871       } else if (etna_rgroup_is_uniform(inst->src[src].rgroup)) {
872          if (uni_reg == -1) { /* first unique uniform used */
873             uni_rgroup = inst->src[src].rgroup;
874             uni_reg = inst->src[src].reg;
875          } else { /* second or later; check that it is a re-use */
876             if (uni_rgroup != inst->src[src].rgroup ||
877                 uni_reg != inst->src[src].reg) {
878                DBG_F(ETNA_DBG_COMPILER_MSGS, "perf warning: instruction that "
879                                              "accesses different uniforms, "
880                                              "need to generate extra MOV");
881                struct etna_native_reg inner_temp = etna_compile_get_inner_temp(c);
882 
883                /* Generate move instruction to temporary */
884                etna_assemble(&c->code[c->inst_ptr * 4], &(struct etna_inst) {
885                   .opcode = INST_OPCODE_MOV,
886                   .dst = etna_native_to_dst(inner_temp, INST_COMPS_X | INST_COMPS_Y |
887                                                         INST_COMPS_Z | INST_COMPS_W),
888                   .src[2] = inst->src[src]
889                });
890 
891                c->inst_ptr++;
892 
893                /* Modify instruction to use temp register instead of uniform */
894                inst->src[src].use = 1;
895                inst->src[src].rgroup = INST_RGROUP_TEMP;
896                inst->src[src].reg = inner_temp.id;
897                inst->src[src].swiz = INST_SWIZ_IDENTITY; /* swizzling happens on MOV */
898                inst->src[src].neg = 0; /* negation happens on MOV */
899                inst->src[src].abs = 0; /* abs happens on MOV */
900                inst->src[src].amode = 0; /* amode effects happen on MOV */
901             }
902          }
903       }
904    }
905 
906    /* Finally assemble the actual instruction */
907    etna_assemble(&c->code[c->inst_ptr * 4], inst);
908    c->inst_ptr++;
909 }
910 
911 static unsigned int
etna_amode(struct tgsi_ind_register indirect)912 etna_amode(struct tgsi_ind_register indirect)
913 {
914    assert(indirect.File == TGSI_FILE_ADDRESS);
915    assert(indirect.Index == 0);
916 
917    switch (indirect.Swizzle) {
918    case TGSI_SWIZZLE_X:
919       return INST_AMODE_ADD_A_X;
920    case TGSI_SWIZZLE_Y:
921       return INST_AMODE_ADD_A_Y;
922    case TGSI_SWIZZLE_Z:
923       return INST_AMODE_ADD_A_Z;
924    case TGSI_SWIZZLE_W:
925       return INST_AMODE_ADD_A_W;
926    default:
927       assert(!"Invalid swizzle");
928    }
929 
930    unreachable("bad swizzle");
931 }
932 
933 /* convert destination operand */
934 static struct etna_inst_dst
convert_dst(struct etna_compile * c,const struct tgsi_full_dst_register * in)935 convert_dst(struct etna_compile *c, const struct tgsi_full_dst_register *in)
936 {
937    struct etna_inst_dst rv = {
938       /// XXX .amode
939       .write_mask = in->Register.WriteMask,
940    };
941 
942    if (in->Register.File == TGSI_FILE_ADDRESS) {
943       assert(in->Register.Index == 0);
944       rv.reg = in->Register.Index;
945       rv.use = 0;
946    } else {
947       rv = etna_native_to_dst(etna_get_dst_reg(c, in->Register)->native,
948                               in->Register.WriteMask);
949    }
950 
951    if (in->Register.Indirect)
952       rv.amode = etna_amode(in->Indirect);
953 
954    return rv;
955 }
956 
957 /* convert texture operand */
958 static struct etna_inst_tex
convert_tex(struct etna_compile * c,const struct tgsi_full_src_register * in,const struct tgsi_instruction_texture * tex)959 convert_tex(struct etna_compile *c, const struct tgsi_full_src_register *in,
960             const struct tgsi_instruction_texture *tex)
961 {
962    struct etna_native_reg native_reg = etna_get_src_reg(c, in->Register)->native;
963    struct etna_inst_tex rv = {
964       // XXX .amode (to allow for an array of samplers?)
965       .swiz = INST_SWIZ_IDENTITY
966    };
967 
968    assert(native_reg.is_tex && native_reg.valid);
969    rv.id = native_reg.id;
970 
971    return rv;
972 }
973 
974 /* convert source operand */
975 static struct etna_inst_src
etna_create_src(const struct tgsi_full_src_register * tgsi,const struct etna_native_reg * native)976 etna_create_src(const struct tgsi_full_src_register *tgsi,
977                 const struct etna_native_reg *native)
978 {
979    const struct tgsi_src_register *reg = &tgsi->Register;
980    struct etna_inst_src rv = {
981       .use = 1,
982       .swiz = INST_SWIZ(reg->SwizzleX, reg->SwizzleY, reg->SwizzleZ, reg->SwizzleW),
983       .neg = reg->Negate,
984       .abs = reg->Absolute,
985       .rgroup = native->rgroup,
986       .reg = native->id,
987       .amode = INST_AMODE_DIRECT,
988    };
989 
990    assert(native->valid && !native->is_tex);
991 
992    if (reg->Indirect)
993       rv.amode = etna_amode(tgsi->Indirect);
994 
995    return rv;
996 }
997 
998 static struct etna_inst_src
etna_mov_src_to_temp(struct etna_compile * c,struct etna_inst_src src,struct etna_native_reg temp)999 etna_mov_src_to_temp(struct etna_compile *c, struct etna_inst_src src,
1000                      struct etna_native_reg temp)
1001 {
1002    struct etna_inst mov = { };
1003 
1004    mov.opcode = INST_OPCODE_MOV;
1005    mov.sat = 0;
1006    mov.dst = etna_native_to_dst(temp, INST_COMPS_X | INST_COMPS_Y |
1007                                       INST_COMPS_Z | INST_COMPS_W);
1008    mov.src[2] = src;
1009    emit_inst(c, &mov);
1010 
1011    src.swiz = INST_SWIZ_IDENTITY;
1012    src.neg = src.abs = 0;
1013    src.rgroup = temp.rgroup;
1014    src.reg = temp.id;
1015 
1016    return src;
1017 }
1018 
1019 static struct etna_inst_src
etna_mov_src(struct etna_compile * c,struct etna_inst_src src)1020 etna_mov_src(struct etna_compile *c, struct etna_inst_src src)
1021 {
1022    struct etna_native_reg temp = etna_compile_get_inner_temp(c);
1023 
1024    return etna_mov_src_to_temp(c, src, temp);
1025 }
1026 
1027 static bool
etna_src_uniforms_conflict(struct etna_inst_src a,struct etna_inst_src b)1028 etna_src_uniforms_conflict(struct etna_inst_src a, struct etna_inst_src b)
1029 {
1030    return etna_rgroup_is_uniform(a.rgroup) &&
1031           etna_rgroup_is_uniform(b.rgroup) &&
1032           (a.rgroup != b.rgroup || a.reg != b.reg);
1033 }
1034 
1035 /* create a new label */
1036 static unsigned int
alloc_new_label(struct etna_compile * c)1037 alloc_new_label(struct etna_compile *c)
1038 {
1039    struct etna_compile_label label = {
1040       .inst_idx = -1, /* start by point to no specific instruction */
1041    };
1042 
1043    array_insert(c->labels, label);
1044 
1045    return c->labels_count - 1;
1046 }
1047 
1048 /* place label at current instruction pointer */
1049 static void
label_place(struct etna_compile * c,struct etna_compile_label * label)1050 label_place(struct etna_compile *c, struct etna_compile_label *label)
1051 {
1052    label->inst_idx = c->inst_ptr;
1053 }
1054 
1055 /* mark label use at current instruction.
1056  * target of the label will be filled in in the marked instruction's src2.imm
1057  * slot as soon
1058  * as the value becomes known.
1059  */
1060 static void
label_mark_use(struct etna_compile * c,int lbl_idx)1061 label_mark_use(struct etna_compile *c, int lbl_idx)
1062 {
1063    assert(c->inst_ptr < ETNA_MAX_INSTRUCTIONS);
1064    c->lbl_usage[c->inst_ptr] = lbl_idx;
1065 }
1066 
1067 /* walk the frame stack and return first frame with matching type */
1068 static struct etna_compile_frame *
find_frame(struct etna_compile * c,enum etna_compile_frame_type type)1069 find_frame(struct etna_compile *c, enum etna_compile_frame_type type)
1070 {
1071    for (int sp = c->frame_sp; sp >= 0; sp--)
1072       if (c->frame_stack[sp].type == type)
1073          return &c->frame_stack[sp];
1074 
1075    assert(0);
1076    return NULL;
1077 }
1078 
1079 struct instr_translater {
1080    void (*fxn)(const struct instr_translater *t, struct etna_compile *c,
1081                const struct tgsi_full_instruction *inst,
1082                struct etna_inst_src *src);
1083    unsigned tgsi_opc;
1084    uint8_t opc;
1085 
1086    /* tgsi src -> etna src swizzle */
1087    int src[3];
1088 
1089    unsigned cond;
1090 };
1091 
1092 static void
trans_instr(const struct instr_translater * t,struct etna_compile * c,const struct tgsi_full_instruction * inst,struct etna_inst_src * src)1093 trans_instr(const struct instr_translater *t, struct etna_compile *c,
1094             const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
1095 {
1096    const struct tgsi_opcode_info *info = tgsi_get_opcode_info(inst->Instruction.Opcode);
1097    struct etna_inst instr = { };
1098 
1099    instr.opcode = t->opc;
1100    instr.cond = t->cond;
1101    instr.sat = inst->Instruction.Saturate;
1102 
1103    assert(info->num_dst <= 1);
1104    if (info->num_dst)
1105       instr.dst = convert_dst(c, &inst->Dst[0]);
1106 
1107    assert(info->num_src <= ETNA_NUM_SRC);
1108 
1109    for (unsigned i = 0; i < info->num_src; i++) {
1110       int swizzle = t->src[i];
1111 
1112       assert(swizzle != -1);
1113       instr.src[swizzle] = src[i];
1114    }
1115 
1116    emit_inst(c, &instr);
1117 }
1118 
1119 static void
trans_min_max(const struct instr_translater * t,struct etna_compile * c,const struct tgsi_full_instruction * inst,struct etna_inst_src * src)1120 trans_min_max(const struct instr_translater *t, struct etna_compile *c,
1121               const struct tgsi_full_instruction *inst,
1122               struct etna_inst_src *src)
1123 {
1124    emit_inst(c, &(struct etna_inst) {
1125       .opcode = INST_OPCODE_SELECT,
1126        .cond = t->cond,
1127        .sat = inst->Instruction.Saturate,
1128        .dst = convert_dst(c, &inst->Dst[0]),
1129        .src[0] = src[0],
1130        .src[1] = src[1],
1131        .src[2] = src[0],
1132     });
1133 }
1134 
1135 static void
trans_if(const struct instr_translater * t,struct etna_compile * c,const struct tgsi_full_instruction * inst,struct etna_inst_src * src)1136 trans_if(const struct instr_translater *t, struct etna_compile *c,
1137          const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
1138 {
1139    struct etna_compile_frame *f = &c->frame_stack[c->frame_sp++];
1140    struct etna_inst_src imm_0 = alloc_imm_f32(c, 0.0f);
1141 
1142    /* push IF to stack */
1143    f->type = ETNA_COMPILE_FRAME_IF;
1144    /* create "else" label */
1145    f->lbl_else_idx = alloc_new_label(c);
1146    f->lbl_endif_idx = -1;
1147 
1148    /* We need to avoid the emit_inst() below becoming two instructions */
1149    if (etna_src_uniforms_conflict(src[0], imm_0))
1150       src[0] = etna_mov_src(c, src[0]);
1151 
1152    /* mark position in instruction stream of label reference so that it can be
1153     * filled in in next pass */
1154    label_mark_use(c, f->lbl_else_idx);
1155 
1156    /* create conditional branch to label if src0 EQ 0 */
1157    emit_inst(c, &(struct etna_inst){
1158       .opcode = INST_OPCODE_BRANCH,
1159       .cond = INST_CONDITION_EQ,
1160       .src[0] = src[0],
1161       .src[1] = imm_0,
1162     /* imm is filled in later */
1163    });
1164 }
1165 
1166 static void
trans_else(const struct instr_translater * t,struct etna_compile * c,const struct tgsi_full_instruction * inst,struct etna_inst_src * src)1167 trans_else(const struct instr_translater *t, struct etna_compile *c,
1168            const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
1169 {
1170    assert(c->frame_sp > 0);
1171    struct etna_compile_frame *f = &c->frame_stack[c->frame_sp - 1];
1172    assert(f->type == ETNA_COMPILE_FRAME_IF);
1173 
1174    /* create "endif" label, and branch to endif label */
1175    f->lbl_endif_idx = alloc_new_label(c);
1176    label_mark_use(c, f->lbl_endif_idx);
1177    emit_inst(c, &(struct etna_inst) {
1178       .opcode = INST_OPCODE_BRANCH,
1179       .cond = INST_CONDITION_TRUE,
1180       /* imm is filled in later */
1181    });
1182 
1183    /* mark "else" label at this position in instruction stream */
1184    label_place(c, &c->labels[f->lbl_else_idx]);
1185 }
1186 
1187 static void
trans_endif(const struct instr_translater * t,struct etna_compile * c,const struct tgsi_full_instruction * inst,struct etna_inst_src * src)1188 trans_endif(const struct instr_translater *t, struct etna_compile *c,
1189             const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
1190 {
1191    assert(c->frame_sp > 0);
1192    struct etna_compile_frame *f = &c->frame_stack[--c->frame_sp];
1193    assert(f->type == ETNA_COMPILE_FRAME_IF);
1194 
1195    /* assign "endif" or "else" (if no ELSE) label to current position in
1196     * instruction stream, pop IF */
1197    if (f->lbl_endif_idx != -1)
1198       label_place(c, &c->labels[f->lbl_endif_idx]);
1199    else
1200       label_place(c, &c->labels[f->lbl_else_idx]);
1201 }
1202 
1203 static void
trans_loop_bgn(const struct instr_translater * t,struct etna_compile * c,const struct tgsi_full_instruction * inst,struct etna_inst_src * src)1204 trans_loop_bgn(const struct instr_translater *t, struct etna_compile *c,
1205                const struct tgsi_full_instruction *inst,
1206                struct etna_inst_src *src)
1207 {
1208    struct etna_compile_frame *f = &c->frame_stack[c->frame_sp++];
1209 
1210    /* push LOOP to stack */
1211    f->type = ETNA_COMPILE_FRAME_LOOP;
1212    f->lbl_loop_bgn_idx = alloc_new_label(c);
1213    f->lbl_loop_end_idx = alloc_new_label(c);
1214 
1215    label_place(c, &c->labels[f->lbl_loop_bgn_idx]);
1216 
1217    c->num_loops++;
1218 }
1219 
1220 static void
trans_loop_end(const struct instr_translater * t,struct etna_compile * c,const struct tgsi_full_instruction * inst,struct etna_inst_src * src)1221 trans_loop_end(const struct instr_translater *t, struct etna_compile *c,
1222                const struct tgsi_full_instruction *inst,
1223                struct etna_inst_src *src)
1224 {
1225    assert(c->frame_sp > 0);
1226    struct etna_compile_frame *f = &c->frame_stack[--c->frame_sp];
1227    assert(f->type == ETNA_COMPILE_FRAME_LOOP);
1228 
1229    /* mark position in instruction stream of label reference so that it can be
1230     * filled in in next pass */
1231    label_mark_use(c, f->lbl_loop_bgn_idx);
1232 
1233    /* create branch to loop_bgn label */
1234    emit_inst(c, &(struct etna_inst) {
1235       .opcode = INST_OPCODE_BRANCH,
1236       .cond = INST_CONDITION_TRUE,
1237       .src[0] = src[0],
1238       /* imm is filled in later */
1239    });
1240 
1241    label_place(c, &c->labels[f->lbl_loop_end_idx]);
1242 }
1243 
1244 static void
trans_brk(const struct instr_translater * t,struct etna_compile * c,const struct tgsi_full_instruction * inst,struct etna_inst_src * src)1245 trans_brk(const struct instr_translater *t, struct etna_compile *c,
1246           const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
1247 {
1248    assert(c->frame_sp > 0);
1249    struct etna_compile_frame *f = find_frame(c, ETNA_COMPILE_FRAME_LOOP);
1250 
1251    /* mark position in instruction stream of label reference so that it can be
1252     * filled in in next pass */
1253    label_mark_use(c, f->lbl_loop_end_idx);
1254 
1255    /* create branch to loop_end label */
1256    emit_inst(c, &(struct etna_inst) {
1257       .opcode = INST_OPCODE_BRANCH,
1258       .cond = INST_CONDITION_TRUE,
1259       .src[0] = src[0],
1260       /* imm is filled in later */
1261    });
1262 }
1263 
1264 static void
trans_cont(const struct instr_translater * t,struct etna_compile * c,const struct tgsi_full_instruction * inst,struct etna_inst_src * src)1265 trans_cont(const struct instr_translater *t, struct etna_compile *c,
1266            const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
1267 {
1268    assert(c->frame_sp > 0);
1269    struct etna_compile_frame *f = find_frame(c, ETNA_COMPILE_FRAME_LOOP);
1270 
1271    /* mark position in instruction stream of label reference so that it can be
1272     * filled in in next pass */
1273    label_mark_use(c, f->lbl_loop_bgn_idx);
1274 
1275    /* create branch to loop_end label */
1276    emit_inst(c, &(struct etna_inst) {
1277       .opcode = INST_OPCODE_BRANCH,
1278       .cond = INST_CONDITION_TRUE,
1279       .src[0] = src[0],
1280       /* imm is filled in later */
1281    });
1282 }
1283 
1284 static void
trans_deriv(const struct instr_translater * t,struct etna_compile * c,const struct tgsi_full_instruction * inst,struct etna_inst_src * src)1285 trans_deriv(const struct instr_translater *t, struct etna_compile *c,
1286             const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
1287 {
1288    emit_inst(c, &(struct etna_inst) {
1289       .opcode = t->opc,
1290       .sat = inst->Instruction.Saturate,
1291       .dst = convert_dst(c, &inst->Dst[0]),
1292       .src[0] = src[0],
1293       .src[2] = src[0],
1294    });
1295 }
1296 
1297 static void
trans_arl(const struct instr_translater * t,struct etna_compile * c,const struct tgsi_full_instruction * inst,struct etna_inst_src * src)1298 trans_arl(const struct instr_translater *t, struct etna_compile *c,
1299           const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
1300 {
1301    struct etna_native_reg temp = etna_compile_get_inner_temp(c);
1302    struct etna_inst arl = { };
1303    struct etna_inst_dst dst;
1304 
1305    dst = etna_native_to_dst(temp, INST_COMPS_X | INST_COMPS_Y | INST_COMPS_Z |
1306                                   INST_COMPS_W);
1307 
1308    if (c->specs->has_sign_floor_ceil) {
1309       struct etna_inst floor = { };
1310 
1311       floor.opcode = INST_OPCODE_FLOOR;
1312       floor.src[2] = src[0];
1313       floor.dst = dst;
1314 
1315       emit_inst(c, &floor);
1316    } else {
1317       struct etna_inst floor[2] = { };
1318 
1319       floor[0].opcode = INST_OPCODE_FRC;
1320       floor[0].sat = inst->Instruction.Saturate;
1321       floor[0].dst = dst;
1322       floor[0].src[2] = src[0];
1323 
1324       floor[1].opcode = INST_OPCODE_ADD;
1325       floor[1].sat = inst->Instruction.Saturate;
1326       floor[1].dst = dst;
1327       floor[1].src[0] = src[0];
1328       floor[1].src[2].use = 1;
1329       floor[1].src[2].swiz = INST_SWIZ_IDENTITY;
1330       floor[1].src[2].neg = 1;
1331       floor[1].src[2].rgroup = temp.rgroup;
1332       floor[1].src[2].reg = temp.id;
1333 
1334       emit_inst(c, &floor[0]);
1335       emit_inst(c, &floor[1]);
1336    }
1337 
1338    arl.opcode = INST_OPCODE_MOVAR;
1339    arl.sat = inst->Instruction.Saturate;
1340    arl.dst = convert_dst(c, &inst->Dst[0]);
1341    arl.src[2] = etna_native_to_src(temp, INST_SWIZ_IDENTITY);
1342 
1343    emit_inst(c, &arl);
1344 }
1345 
1346 static void
trans_lrp(const struct instr_translater * t,struct etna_compile * c,const struct tgsi_full_instruction * inst,struct etna_inst_src * src)1347 trans_lrp(const struct instr_translater *t, struct etna_compile *c,
1348           const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
1349 {
1350    /* dst = src0 * src1 + (1 - src0) * src2
1351     *     => src0 * src1 - (src0 - 1) * src2
1352     *     => src0 * src1 - (src0 * src2 - src2)
1353     * MAD tTEMP.xyzw, tSRC0.xyzw, tSRC2.xyzw, -tSRC2.xyzw
1354     * MAD tDST.xyzw, tSRC0.xyzw, tSRC1.xyzw, -tTEMP.xyzw
1355     */
1356    struct etna_native_reg temp = etna_compile_get_inner_temp(c);
1357    if (etna_src_uniforms_conflict(src[0], src[1]) ||
1358        etna_src_uniforms_conflict(src[0], src[2])) {
1359       src[0] = etna_mov_src(c, src[0]);
1360    }
1361 
1362    struct etna_inst mad[2] = { };
1363    mad[0].opcode = INST_OPCODE_MAD;
1364    mad[0].sat = 0;
1365    mad[0].dst = etna_native_to_dst(temp, INST_COMPS_X | INST_COMPS_Y |
1366                                          INST_COMPS_Z | INST_COMPS_W);
1367    mad[0].src[0] = src[0];
1368    mad[0].src[1] = src[2];
1369    mad[0].src[2] = negate(src[2]);
1370    mad[1].opcode = INST_OPCODE_MAD;
1371    mad[1].sat = inst->Instruction.Saturate;
1372    mad[1].dst = convert_dst(c, &inst->Dst[0]), mad[1].src[0] = src[0];
1373    mad[1].src[1] = src[1];
1374    mad[1].src[2] = negate(etna_native_to_src(temp, INST_SWIZ_IDENTITY));
1375 
1376    emit_inst(c, &mad[0]);
1377    emit_inst(c, &mad[1]);
1378 }
1379 
1380 static void
trans_lit(const struct instr_translater * t,struct etna_compile * c,const struct tgsi_full_instruction * inst,struct etna_inst_src * src)1381 trans_lit(const struct instr_translater *t, struct etna_compile *c,
1382           const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
1383 {
1384    /* SELECT.LT tmp._y__, 0, src.yyyy, 0
1385     *  - can be eliminated if src.y is a uniform and >= 0
1386     * SELECT.GT tmp.___w, 128, src.wwww, 128
1387     * SELECT.LT tmp.___w, -128, tmp.wwww, -128
1388     *  - can be eliminated if src.w is a uniform and fits clamp
1389     * LOG tmp.x, void, void, tmp.yyyy
1390     * MUL tmp.x, tmp.xxxx, tmp.wwww, void
1391     * LITP dst, undef, src.xxxx, tmp.xxxx
1392     */
1393    struct etna_native_reg inner_temp = etna_compile_get_inner_temp(c);
1394    struct etna_inst_src src_y = { };
1395 
1396    if (!etna_rgroup_is_uniform(src[0].rgroup)) {
1397       src_y = etna_native_to_src(inner_temp, SWIZZLE(Y, Y, Y, Y));
1398 
1399       struct etna_inst ins = { };
1400       ins.opcode = INST_OPCODE_SELECT;
1401       ins.cond = INST_CONDITION_LT;
1402       ins.dst = etna_native_to_dst(inner_temp, INST_COMPS_Y);
1403       ins.src[0] = ins.src[2] = alloc_imm_f32(c, 0.0);
1404       ins.src[1] = swizzle(src[0], SWIZZLE(Y, Y, Y, Y));
1405       emit_inst(c, &ins);
1406    } else if (uif(get_imm_u32(c, &src[0], 1)) < 0)
1407       src_y = alloc_imm_f32(c, 0.0);
1408    else
1409       src_y = swizzle(src[0], SWIZZLE(Y, Y, Y, Y));
1410 
1411    struct etna_inst_src src_w = { };
1412 
1413    if (!etna_rgroup_is_uniform(src[0].rgroup)) {
1414       src_w = etna_native_to_src(inner_temp, SWIZZLE(W, W, W, W));
1415 
1416       struct etna_inst ins = { };
1417       ins.opcode = INST_OPCODE_SELECT;
1418       ins.cond = INST_CONDITION_GT;
1419       ins.dst = etna_native_to_dst(inner_temp, INST_COMPS_W);
1420       ins.src[0] = ins.src[2] = alloc_imm_f32(c, 128.);
1421       ins.src[1] = swizzle(src[0], SWIZZLE(W, W, W, W));
1422       emit_inst(c, &ins);
1423       ins.cond = INST_CONDITION_LT;
1424       ins.src[0].neg = !ins.src[0].neg;
1425       ins.src[2].neg = !ins.src[2].neg;
1426       ins.src[1] = src_w;
1427       emit_inst(c, &ins);
1428    } else if (uif(get_imm_u32(c, &src[0], 3)) < -128.)
1429       src_w = alloc_imm_f32(c, -128.);
1430    else if (uif(get_imm_u32(c, &src[0], 3)) > 128.)
1431       src_w = alloc_imm_f32(c, 128.);
1432    else
1433       src_w = swizzle(src[0], SWIZZLE(W, W, W, W));
1434 
1435    if (c->specs->has_new_transcendentals) { /* Alternative LOG sequence */
1436       emit_inst(c, &(struct etna_inst) {
1437          .opcode = INST_OPCODE_LOG,
1438          .dst = etna_native_to_dst(inner_temp, INST_COMPS_X | INST_COMPS_Y),
1439          .src[2] = src_y,
1440          .tex = { .amode=1 }, /* Unknown bit needs to be set */
1441       });
1442       emit_inst(c, &(struct etna_inst) {
1443          .opcode = INST_OPCODE_MUL,
1444          .dst = etna_native_to_dst(inner_temp, INST_COMPS_X),
1445          .src[0] = etna_native_to_src(inner_temp, SWIZZLE(X, X, X, X)),
1446          .src[1] = etna_native_to_src(inner_temp, SWIZZLE(Y, Y, Y, Y)),
1447       });
1448    } else {
1449       struct etna_inst ins[3] = { };
1450       ins[0].opcode = INST_OPCODE_LOG;
1451       ins[0].dst = etna_native_to_dst(inner_temp, INST_COMPS_X);
1452       ins[0].src[2] = src_y;
1453 
1454       emit_inst(c, &ins[0]);
1455    }
1456    emit_inst(c, &(struct etna_inst) {
1457       .opcode = INST_OPCODE_MUL,
1458       .sat = 0,
1459       .dst = etna_native_to_dst(inner_temp, INST_COMPS_X),
1460       .src[0] = etna_native_to_src(inner_temp, SWIZZLE(X, X, X, X)),
1461       .src[1] = src_w,
1462    });
1463    emit_inst(c, &(struct etna_inst) {
1464       .opcode = INST_OPCODE_LITP,
1465       .sat = 0,
1466       .dst = convert_dst(c, &inst->Dst[0]),
1467       .src[0] = swizzle(src[0], SWIZZLE(X, X, X, X)),
1468       .src[1] = swizzle(src[0], SWIZZLE(X, X, X, X)),
1469       .src[2] = etna_native_to_src(inner_temp, SWIZZLE(X, X, X, X)),
1470    });
1471 }
1472 
1473 static void
trans_ssg(const struct instr_translater * t,struct etna_compile * c,const struct tgsi_full_instruction * inst,struct etna_inst_src * src)1474 trans_ssg(const struct instr_translater *t, struct etna_compile *c,
1475           const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
1476 {
1477    if (c->specs->has_sign_floor_ceil) {
1478       emit_inst(c, &(struct etna_inst){
1479          .opcode = INST_OPCODE_SIGN,
1480          .sat = inst->Instruction.Saturate,
1481          .dst = convert_dst(c, &inst->Dst[0]),
1482          .src[2] = src[0],
1483       });
1484    } else {
1485       struct etna_native_reg temp = etna_compile_get_inner_temp(c);
1486       struct etna_inst ins[2] = { };
1487 
1488       ins[0].opcode = INST_OPCODE_SET;
1489       ins[0].cond = INST_CONDITION_NZ;
1490       ins[0].dst = etna_native_to_dst(temp, INST_COMPS_X | INST_COMPS_Y |
1491                                             INST_COMPS_Z | INST_COMPS_W);
1492       ins[0].src[0] = src[0];
1493 
1494       ins[1].opcode = INST_OPCODE_SELECT;
1495       ins[1].cond = INST_CONDITION_LZ;
1496       ins[1].sat = inst->Instruction.Saturate;
1497       ins[1].dst = convert_dst(c, &inst->Dst[0]);
1498       ins[1].src[0] = src[0];
1499       ins[1].src[2] = etna_native_to_src(temp, INST_SWIZ_IDENTITY);
1500       ins[1].src[1] = negate(ins[1].src[2]);
1501 
1502       emit_inst(c, &ins[0]);
1503       emit_inst(c, &ins[1]);
1504    }
1505 }
1506 
1507 static void
trans_trig(const struct instr_translater * t,struct etna_compile * c,const struct tgsi_full_instruction * inst,struct etna_inst_src * src)1508 trans_trig(const struct instr_translater *t, struct etna_compile *c,
1509            const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
1510 {
1511    if (c->specs->has_new_transcendentals) { /* Alternative SIN/COS */
1512       /* On newer chips alternative SIN/COS instructions are implemented,
1513        * which:
1514        * - Need their input scaled by 1/pi instead of 2/pi
1515        * - Output an x and y component, which need to be multiplied to
1516        *   get the result
1517        */
1518       struct etna_native_reg temp = etna_compile_get_inner_temp(c); /* only using .xyz */
1519       emit_inst(c, &(struct etna_inst) {
1520          .opcode = INST_OPCODE_MUL,
1521          .sat = 0,
1522          .dst = etna_native_to_dst(temp, INST_COMPS_Z),
1523          .src[0] = src[0], /* any swizzling happens here */
1524          .src[1] = alloc_imm_f32(c, 1.0f / M_PI),
1525       });
1526       emit_inst(c, &(struct etna_inst) {
1527          .opcode = inst->Instruction.Opcode == TGSI_OPCODE_COS
1528                     ? INST_OPCODE_COS
1529                     : INST_OPCODE_SIN,
1530          .sat = 0,
1531          .dst = etna_native_to_dst(temp, INST_COMPS_X | INST_COMPS_Y),
1532          .src[2] = etna_native_to_src(temp, SWIZZLE(Z, Z, Z, Z)),
1533          .tex = { .amode=1 }, /* Unknown bit needs to be set */
1534       });
1535       emit_inst(c, &(struct etna_inst) {
1536          .opcode = INST_OPCODE_MUL,
1537          .sat = inst->Instruction.Saturate,
1538          .dst = convert_dst(c, &inst->Dst[0]),
1539          .src[0] = etna_native_to_src(temp, SWIZZLE(X, X, X, X)),
1540          .src[1] = etna_native_to_src(temp, SWIZZLE(Y, Y, Y, Y)),
1541       });
1542 
1543    } else if (c->specs->has_sin_cos_sqrt) {
1544       struct etna_native_reg temp = etna_compile_get_inner_temp(c);
1545       /* add divide by PI/2, using a temp register. GC2000
1546        * fails with src==dst for the trig instruction. */
1547       emit_inst(c, &(struct etna_inst) {
1548          .opcode = INST_OPCODE_MUL,
1549          .sat = 0,
1550          .dst = etna_native_to_dst(temp, INST_COMPS_X | INST_COMPS_Y |
1551                                          INST_COMPS_Z | INST_COMPS_W),
1552          .src[0] = src[0], /* any swizzling happens here */
1553          .src[1] = alloc_imm_f32(c, 2.0f / M_PI),
1554       });
1555       emit_inst(c, &(struct etna_inst) {
1556          .opcode = inst->Instruction.Opcode == TGSI_OPCODE_COS
1557                     ? INST_OPCODE_COS
1558                     : INST_OPCODE_SIN,
1559          .sat = inst->Instruction.Saturate,
1560          .dst = convert_dst(c, &inst->Dst[0]),
1561          .src[2] = etna_native_to_src(temp, INST_SWIZ_IDENTITY),
1562       });
1563    } else {
1564       /* Implement Nick's fast sine/cosine. Taken from:
1565        * http://forum.devmaster.net/t/fast-and-accurate-sine-cosine/9648
1566        * A=(1/2*PI 0 1/2*PI 0) B=(0.75 0 0.5 0) C=(-4 4 X X)
1567        *  MAD t.x_zw, src.xxxx, A, B
1568        *  FRC t.x_z_, void, void, t.xwzw
1569        *  MAD t.x_z_, t.xwzw, 2, -1
1570        *  MUL t._y__, t.wzww, |t.wzww|, void  (for sin/scs)
1571        *  DP3 t.x_z_, t.zyww, C, void         (for sin)
1572        *  DP3 t.__z_, t.zyww, C, void         (for scs)
1573        *  MUL t._y__, t.wxww, |t.wxww|, void  (for cos/scs)
1574        *  DP3 t.x_z_, t.xyww, C, void         (for cos)
1575        *  DP3 t.x___, t.xyww, C, void         (for scs)
1576        *  MAD t._y_w, t,xxzz, |t.xxzz|, -t.xxzz
1577        *  MAD dst, t.ywyw, .2225, t.xzxz
1578        */
1579       struct etna_inst *p, ins[9] = { };
1580       struct etna_native_reg t0 = etna_compile_get_inner_temp(c);
1581       struct etna_inst_src t0s = etna_native_to_src(t0, INST_SWIZ_IDENTITY);
1582       struct etna_inst_src sincos[3], in = src[0];
1583       sincos[0] = etna_imm_vec4f(c, sincos_const[0]);
1584       sincos[1] = etna_imm_vec4f(c, sincos_const[1]);
1585 
1586       /* A uniform source will cause the inner temp limit to
1587        * be exceeded.  Explicitly deal with that scenario.
1588        */
1589       if (etna_rgroup_is_uniform(src[0].rgroup)) {
1590          struct etna_inst ins = { };
1591          ins.opcode = INST_OPCODE_MOV;
1592          ins.dst = etna_native_to_dst(t0, INST_COMPS_X);
1593          ins.src[2] = in;
1594          emit_inst(c, &ins);
1595          in = t0s;
1596       }
1597 
1598       ins[0].opcode = INST_OPCODE_MAD;
1599       ins[0].dst = etna_native_to_dst(t0, INST_COMPS_X | INST_COMPS_Z | INST_COMPS_W);
1600       ins[0].src[0] = swizzle(in, SWIZZLE(X, X, X, X));
1601       ins[0].src[1] = swizzle(sincos[1], SWIZZLE(X, W, X, W)); /* 1/2*PI */
1602       ins[0].src[2] = swizzle(sincos[1], SWIZZLE(Y, W, Z, W)); /* 0.75, 0, 0.5, 0 */
1603 
1604       ins[1].opcode = INST_OPCODE_FRC;
1605       ins[1].dst = etna_native_to_dst(t0, INST_COMPS_X | INST_COMPS_Z);
1606       ins[1].src[2] = swizzle(t0s, SWIZZLE(X, W, Z, W));
1607 
1608       ins[2].opcode = INST_OPCODE_MAD;
1609       ins[2].dst = etna_native_to_dst(t0, INST_COMPS_X | INST_COMPS_Z);
1610       ins[2].src[0] = swizzle(t0s, SWIZZLE(X, W, Z, W));
1611       ins[2].src[1] = swizzle(sincos[0], SWIZZLE(X, X, X, X)); /* 2 */
1612       ins[2].src[2] = swizzle(sincos[0], SWIZZLE(Y, Y, Y, Y)); /* -1 */
1613 
1614       unsigned mul_swiz, dp3_swiz;
1615       if (inst->Instruction.Opcode == TGSI_OPCODE_SIN) {
1616          mul_swiz = SWIZZLE(W, Z, W, W);
1617          dp3_swiz = SWIZZLE(Z, Y, W, W);
1618       } else {
1619          mul_swiz = SWIZZLE(W, X, W, W);
1620          dp3_swiz = SWIZZLE(X, Y, W, W);
1621       }
1622 
1623       ins[3].opcode = INST_OPCODE_MUL;
1624       ins[3].dst = etna_native_to_dst(t0, INST_COMPS_Y);
1625       ins[3].src[0] = swizzle(t0s, mul_swiz);
1626       ins[3].src[1] = absolute(ins[3].src[0]);
1627 
1628       ins[4].opcode = INST_OPCODE_DP3;
1629       ins[4].dst = etna_native_to_dst(t0, INST_COMPS_X | INST_COMPS_Z);
1630       ins[4].src[0] = swizzle(t0s, dp3_swiz);
1631       ins[4].src[1] = swizzle(sincos[0], SWIZZLE(Z, W, W, W));
1632 
1633       p = &ins[5];
1634       p->opcode = INST_OPCODE_MAD;
1635       p->dst = etna_native_to_dst(t0, INST_COMPS_Y | INST_COMPS_W);
1636       p->src[0] = swizzle(t0s, SWIZZLE(X, X, Z, Z));
1637       p->src[1] = absolute(p->src[0]);
1638       p->src[2] = negate(p->src[0]);
1639 
1640       p++;
1641       p->opcode = INST_OPCODE_MAD;
1642       p->sat = inst->Instruction.Saturate;
1643       p->dst = convert_dst(c, &inst->Dst[0]),
1644       p->src[0] = swizzle(t0s, SWIZZLE(Y, W, Y, W));
1645       p->src[1] = alloc_imm_f32(c, 0.2225);
1646       p->src[2] = swizzle(t0s, SWIZZLE(X, Z, X, Z));
1647 
1648       for (int i = 0; &ins[i] <= p; i++)
1649          emit_inst(c, &ins[i]);
1650    }
1651 }
1652 
1653 static void
trans_lg2(const struct instr_translater * t,struct etna_compile * c,const struct tgsi_full_instruction * inst,struct etna_inst_src * src)1654 trans_lg2(const struct instr_translater *t, struct etna_compile *c,
1655             const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
1656 {
1657    if (c->specs->has_new_transcendentals) {
1658       /* On newer chips alternative LOG instruction is implemented,
1659        * which outputs an x and y component, which need to be multiplied to
1660        * get the result.
1661        */
1662       struct etna_native_reg temp = etna_compile_get_inner_temp(c); /* only using .xy */
1663       emit_inst(c, &(struct etna_inst) {
1664          .opcode = INST_OPCODE_LOG,
1665          .sat = 0,
1666          .dst = etna_native_to_dst(temp, INST_COMPS_X | INST_COMPS_Y),
1667          .src[2] = src[0],
1668          .tex = { .amode=1 }, /* Unknown bit needs to be set */
1669       });
1670       emit_inst(c, &(struct etna_inst) {
1671          .opcode = INST_OPCODE_MUL,
1672          .sat = inst->Instruction.Saturate,
1673          .dst = convert_dst(c, &inst->Dst[0]),
1674          .src[0] = etna_native_to_src(temp, SWIZZLE(X, X, X, X)),
1675          .src[1] = etna_native_to_src(temp, SWIZZLE(Y, Y, Y, Y)),
1676       });
1677    } else {
1678       emit_inst(c, &(struct etna_inst) {
1679          .opcode = INST_OPCODE_LOG,
1680          .sat = inst->Instruction.Saturate,
1681          .dst = convert_dst(c, &inst->Dst[0]),
1682          .src[2] = src[0],
1683       });
1684    }
1685 }
1686 
1687 static void
trans_sampler(const struct instr_translater * t,struct etna_compile * c,const struct tgsi_full_instruction * inst,struct etna_inst_src * src)1688 trans_sampler(const struct instr_translater *t, struct etna_compile *c,
1689               const struct tgsi_full_instruction *inst,
1690               struct etna_inst_src *src)
1691 {
1692    /* There is no native support for GL texture rectangle coordinates, so
1693     * we have to rescale from ([0, width], [0, height]) to ([0, 1], [0, 1]). */
1694    if (inst->Texture.Texture == TGSI_TEXTURE_RECT) {
1695       uint32_t unit = inst->Src[1].Register.Index;
1696       struct etna_inst ins[2] = { };
1697       struct etna_native_reg temp = etna_compile_get_inner_temp(c);
1698 
1699       ins[0].opcode = INST_OPCODE_MUL;
1700       ins[0].dst = etna_native_to_dst(temp, INST_COMPS_X);
1701       ins[0].src[0] = src[0];
1702       ins[0].src[1] = alloc_imm(c, ETNA_IMMEDIATE_TEXRECT_SCALE_X, unit);
1703 
1704       ins[1].opcode = INST_OPCODE_MUL;
1705       ins[1].dst = etna_native_to_dst(temp, INST_COMPS_Y);
1706       ins[1].src[0] = src[0];
1707       ins[1].src[1] = alloc_imm(c, ETNA_IMMEDIATE_TEXRECT_SCALE_Y, unit);
1708 
1709       emit_inst(c, &ins[0]);
1710       emit_inst(c, &ins[1]);
1711 
1712       src[0] = etna_native_to_src(temp, INST_SWIZ_IDENTITY); /* temp.xyzw */
1713    }
1714 
1715    switch (inst->Instruction.Opcode) {
1716    case TGSI_OPCODE_TEX:
1717       emit_inst(c, &(struct etna_inst) {
1718          .opcode = INST_OPCODE_TEXLD,
1719          .sat = 0,
1720          .dst = convert_dst(c, &inst->Dst[0]),
1721          .tex = convert_tex(c, &inst->Src[1], &inst->Texture),
1722          .src[0] = src[0],
1723       });
1724       break;
1725 
1726    case TGSI_OPCODE_TXB:
1727       emit_inst(c, &(struct etna_inst) {
1728          .opcode = INST_OPCODE_TEXLDB,
1729          .sat = 0,
1730          .dst = convert_dst(c, &inst->Dst[0]),
1731          .tex = convert_tex(c, &inst->Src[1], &inst->Texture),
1732          .src[0] = src[0],
1733       });
1734       break;
1735 
1736    case TGSI_OPCODE_TXL:
1737       emit_inst(c, &(struct etna_inst) {
1738          .opcode = INST_OPCODE_TEXLDL,
1739          .sat = 0,
1740          .dst = convert_dst(c, &inst->Dst[0]),
1741          .tex = convert_tex(c, &inst->Src[1], &inst->Texture),
1742          .src[0] = src[0],
1743       });
1744       break;
1745 
1746    case TGSI_OPCODE_TXP: { /* divide src.xyz by src.w */
1747       struct etna_native_reg temp = etna_compile_get_inner_temp(c);
1748 
1749       emit_inst(c, &(struct etna_inst) {
1750          .opcode = INST_OPCODE_RCP,
1751          .sat = 0,
1752          .dst = etna_native_to_dst(temp, INST_COMPS_W), /* tmp.w */
1753          .src[2] = swizzle(src[0], SWIZZLE(W, W, W, W)),
1754       });
1755       emit_inst(c, &(struct etna_inst) {
1756          .opcode = INST_OPCODE_MUL,
1757          .sat = 0,
1758          .dst = etna_native_to_dst(temp, INST_COMPS_X | INST_COMPS_Y |
1759                                          INST_COMPS_Z), /* tmp.xyz */
1760          .src[0] = etna_native_to_src(temp, SWIZZLE(W, W, W, W)),
1761          .src[1] = src[0], /* src.xyzw */
1762       });
1763       emit_inst(c, &(struct etna_inst) {
1764          .opcode = INST_OPCODE_TEXLD,
1765          .sat = 0,
1766          .dst = convert_dst(c, &inst->Dst[0]),
1767          .tex = convert_tex(c, &inst->Src[1], &inst->Texture),
1768          .src[0] = etna_native_to_src(temp, INST_SWIZ_IDENTITY), /* tmp.xyzw */
1769       });
1770    } break;
1771 
1772    default:
1773       BUG("Unhandled instruction %s",
1774           tgsi_get_opcode_name(inst->Instruction.Opcode));
1775       assert(0);
1776       break;
1777    }
1778 }
1779 
1780 static void
trans_dummy(const struct instr_translater * t,struct etna_compile * c,const struct tgsi_full_instruction * inst,struct etna_inst_src * src)1781 trans_dummy(const struct instr_translater *t, struct etna_compile *c,
1782             const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
1783 {
1784    /* nothing to do */
1785 }
1786 
1787 static const struct instr_translater translaters[TGSI_OPCODE_LAST] = {
1788 #define INSTR(n, f, ...) \
1789    [TGSI_OPCODE_##n] = {.fxn = (f), .tgsi_opc = TGSI_OPCODE_##n, ##__VA_ARGS__}
1790 
1791    INSTR(MOV, trans_instr, .opc = INST_OPCODE_MOV, .src = {2, -1, -1}),
1792    INSTR(RCP, trans_instr, .opc = INST_OPCODE_RCP, .src = {2, -1, -1}),
1793    INSTR(RSQ, trans_instr, .opc = INST_OPCODE_RSQ, .src = {2, -1, -1}),
1794    INSTR(MUL, trans_instr, .opc = INST_OPCODE_MUL, .src = {0, 1, -1}),
1795    INSTR(ADD, trans_instr, .opc = INST_OPCODE_ADD, .src = {0, 2, -1}),
1796    INSTR(DP2, trans_instr, .opc = INST_OPCODE_DP2, .src = {0, 1, -1}),
1797    INSTR(DP3, trans_instr, .opc = INST_OPCODE_DP3, .src = {0, 1, -1}),
1798    INSTR(DP4, trans_instr, .opc = INST_OPCODE_DP4, .src = {0, 1, -1}),
1799    INSTR(DST, trans_instr, .opc = INST_OPCODE_DST, .src = {0, 1, -1}),
1800    INSTR(MAD, trans_instr, .opc = INST_OPCODE_MAD, .src = {0, 1, 2}),
1801    INSTR(EX2, trans_instr, .opc = INST_OPCODE_EXP, .src = {2, -1, -1}),
1802    INSTR(LG2, trans_lg2),
1803    INSTR(SQRT, trans_instr, .opc = INST_OPCODE_SQRT, .src = {2, -1, -1}),
1804    INSTR(FRC, trans_instr, .opc = INST_OPCODE_FRC, .src = {2, -1, -1}),
1805    INSTR(CEIL, trans_instr, .opc = INST_OPCODE_CEIL, .src = {2, -1, -1}),
1806    INSTR(FLR, trans_instr, .opc = INST_OPCODE_FLOOR, .src = {2, -1, -1}),
1807    INSTR(CMP, trans_instr, .opc = INST_OPCODE_SELECT, .src = {0, 1, 2}, .cond = INST_CONDITION_LZ),
1808 
1809    INSTR(KILL, trans_instr, .opc = INST_OPCODE_TEXKILL),
1810    INSTR(KILL_IF, trans_instr, .opc = INST_OPCODE_TEXKILL, .src = {0, -1, -1}, .cond = INST_CONDITION_LZ),
1811 
1812    INSTR(DDX, trans_deriv, .opc = INST_OPCODE_DSX),
1813    INSTR(DDY, trans_deriv, .opc = INST_OPCODE_DSY),
1814 
1815    INSTR(IF, trans_if),
1816    INSTR(ELSE, trans_else),
1817    INSTR(ENDIF, trans_endif),
1818 
1819    INSTR(BGNLOOP, trans_loop_bgn),
1820    INSTR(ENDLOOP, trans_loop_end),
1821    INSTR(BRK, trans_brk),
1822    INSTR(CONT, trans_cont),
1823 
1824    INSTR(MIN, trans_min_max, .opc = INST_OPCODE_SELECT, .cond = INST_CONDITION_GT),
1825    INSTR(MAX, trans_min_max, .opc = INST_OPCODE_SELECT, .cond = INST_CONDITION_LT),
1826 
1827    INSTR(ARL, trans_arl),
1828    INSTR(LRP, trans_lrp),
1829    INSTR(LIT, trans_lit),
1830    INSTR(SSG, trans_ssg),
1831 
1832    INSTR(SIN, trans_trig),
1833    INSTR(COS, trans_trig),
1834 
1835    INSTR(SLT, trans_instr, .opc = INST_OPCODE_SET, .src = {0, 1, -1}, .cond = INST_CONDITION_LT),
1836    INSTR(SGE, trans_instr, .opc = INST_OPCODE_SET, .src = {0, 1, -1}, .cond = INST_CONDITION_GE),
1837    INSTR(SEQ, trans_instr, .opc = INST_OPCODE_SET, .src = {0, 1, -1}, .cond = INST_CONDITION_EQ),
1838    INSTR(SGT, trans_instr, .opc = INST_OPCODE_SET, .src = {0, 1, -1}, .cond = INST_CONDITION_GT),
1839    INSTR(SLE, trans_instr, .opc = INST_OPCODE_SET, .src = {0, 1, -1}, .cond = INST_CONDITION_LE),
1840    INSTR(SNE, trans_instr, .opc = INST_OPCODE_SET, .src = {0, 1, -1}, .cond = INST_CONDITION_NE),
1841 
1842    INSTR(TEX, trans_sampler),
1843    INSTR(TXB, trans_sampler),
1844    INSTR(TXL, trans_sampler),
1845    INSTR(TXP, trans_sampler),
1846 
1847    INSTR(NOP, trans_dummy),
1848    INSTR(END, trans_dummy),
1849 };
1850 
1851 /* Pass -- compile instructions */
1852 static void
etna_compile_pass_generate_code(struct etna_compile * c)1853 etna_compile_pass_generate_code(struct etna_compile *c)
1854 {
1855    struct tgsi_parse_context ctx = { };
1856    ASSERTED unsigned status = tgsi_parse_init(&ctx, c->tokens);
1857    assert(status == TGSI_PARSE_OK);
1858 
1859    int inst_idx = 0;
1860    while (!tgsi_parse_end_of_tokens(&ctx)) {
1861       const struct tgsi_full_instruction *inst = 0;
1862 
1863       /* No inner temps used yet for this instruction, clear counter */
1864       c->inner_temps = 0;
1865 
1866       tgsi_parse_token(&ctx);
1867 
1868       switch (ctx.FullToken.Token.Type) {
1869       case TGSI_TOKEN_TYPE_INSTRUCTION:
1870          /* iterate over operands */
1871          inst = &ctx.FullToken.FullInstruction;
1872          if (c->dead_inst[inst_idx]) { /* skip dead instructions */
1873             inst_idx++;
1874             continue;
1875          }
1876 
1877          /* Lookup the TGSI information and generate the source arguments */
1878          struct etna_inst_src src[ETNA_NUM_SRC];
1879          memset(src, 0, sizeof(src));
1880 
1881          const struct tgsi_opcode_info *tgsi = tgsi_get_opcode_info(inst->Instruction.Opcode);
1882 
1883          for (int i = 0; i < tgsi->num_src && i < ETNA_NUM_SRC; i++) {
1884             const struct tgsi_full_src_register *reg = &inst->Src[i];
1885             const struct etna_native_reg *n = &etna_get_src_reg(c, reg->Register)->native;
1886 
1887             if (!n->valid || n->is_tex)
1888                continue;
1889 
1890             src[i] = etna_create_src(reg, n);
1891          }
1892 
1893          const unsigned opc = inst->Instruction.Opcode;
1894          const struct instr_translater *t = &translaters[opc];
1895 
1896          if (t->fxn) {
1897             t->fxn(t, c, inst, src);
1898 
1899             inst_idx += 1;
1900          } else {
1901             BUG("Unhandled instruction %s", tgsi_get_opcode_name(opc));
1902             assert(0);
1903          }
1904          break;
1905       }
1906    }
1907    tgsi_parse_free(&ctx);
1908 }
1909 
1910 /* Look up register by semantic */
1911 static struct etna_reg_desc *
find_decl_by_semantic(struct etna_compile * c,uint file,uint name,uint index)1912 find_decl_by_semantic(struct etna_compile *c, uint file, uint name, uint index)
1913 {
1914    for (int idx = 0; idx < c->file[file].reg_size; ++idx) {
1915       struct etna_reg_desc *reg = &c->file[file].reg[idx];
1916 
1917       if (reg->semantic.Name == name && reg->semantic.Index == index)
1918          return reg;
1919    }
1920 
1921    return NULL; /* not found */
1922 }
1923 
1924 /** Add ADD and MUL instruction to bring Z/W to 0..1 if -1..1 if needed:
1925  * - this is a vertex shader
1926  * - and this is an older GPU
1927  */
1928 static void
etna_compile_add_z_div_if_needed(struct etna_compile * c)1929 etna_compile_add_z_div_if_needed(struct etna_compile *c)
1930 {
1931    if (c->info.processor == PIPE_SHADER_VERTEX && c->specs->vs_need_z_div) {
1932       /* find position out */
1933       struct etna_reg_desc *pos_reg =
1934          find_decl_by_semantic(c, TGSI_FILE_OUTPUT, TGSI_SEMANTIC_POSITION, 0);
1935 
1936       if (pos_reg != NULL) {
1937          /*
1938           * ADD tX.__z_, tX.zzzz, void, tX.wwww
1939           * MUL tX.__z_, tX.zzzz, 0.5, void
1940          */
1941          emit_inst(c, &(struct etna_inst) {
1942             .opcode = INST_OPCODE_ADD,
1943             .dst = etna_native_to_dst(pos_reg->native, INST_COMPS_Z),
1944             .src[0] = etna_native_to_src(pos_reg->native, SWIZZLE(Z, Z, Z, Z)),
1945             .src[2] = etna_native_to_src(pos_reg->native, SWIZZLE(W, W, W, W)),
1946          });
1947          emit_inst(c, &(struct etna_inst) {
1948             .opcode = INST_OPCODE_MUL,
1949             .dst = etna_native_to_dst(pos_reg->native, INST_COMPS_Z),
1950             .src[0] = etna_native_to_src(pos_reg->native, SWIZZLE(Z, Z, Z, Z)),
1951             .src[1] = alloc_imm_f32(c, 0.5f),
1952          });
1953       }
1954    }
1955 }
1956 
1957 static void
etna_compile_frag_rb_swap(struct etna_compile * c)1958 etna_compile_frag_rb_swap(struct etna_compile *c)
1959 {
1960    if (c->info.processor == PIPE_SHADER_FRAGMENT && c->key->frag_rb_swap) {
1961       /* find color out */
1962       struct etna_reg_desc *color_reg =
1963          find_decl_by_semantic(c, TGSI_FILE_OUTPUT, TGSI_SEMANTIC_COLOR, 0);
1964 
1965       emit_inst(c, &(struct etna_inst) {
1966          .opcode = INST_OPCODE_MOV,
1967          .dst = etna_native_to_dst(color_reg->native, INST_COMPS_X | INST_COMPS_Y | INST_COMPS_Z | INST_COMPS_W),
1968          .src[2] = etna_native_to_src(color_reg->native, SWIZZLE(Z, Y, X, W)),
1969       });
1970    }
1971 }
1972 
1973 /** add a NOP to the shader if
1974  * a) the shader is empty
1975  * or
1976  * b) there is a label at the end of the shader
1977  */
1978 static void
etna_compile_add_nop_if_needed(struct etna_compile * c)1979 etna_compile_add_nop_if_needed(struct etna_compile *c)
1980 {
1981    bool label_at_last_inst = false;
1982 
1983    for (int idx = 0; idx < c->labels_count; ++idx) {
1984       if (c->labels[idx].inst_idx == c->inst_ptr)
1985          label_at_last_inst = true;
1986 
1987    }
1988 
1989    if (c->inst_ptr == 0 || label_at_last_inst)
1990       emit_inst(c, &(struct etna_inst){.opcode = INST_OPCODE_NOP});
1991 }
1992 
1993 static void
assign_uniforms(struct etna_compile_file * file,unsigned base)1994 assign_uniforms(struct etna_compile_file *file, unsigned base)
1995 {
1996    for (int idx = 0; idx < file->reg_size; ++idx) {
1997       file->reg[idx].native.valid = 1;
1998       file->reg[idx].native.rgroup = INST_RGROUP_UNIFORM_0;
1999       file->reg[idx].native.id = base + idx;
2000    }
2001 }
2002 
2003 /* Allocate CONST and IMM to native ETNA_RGROUP_UNIFORM(x).
2004  * CONST must be consecutive as const buffers are supposed to be consecutive,
2005  * and before IMM, as this is
2006  * more convenient because is possible for the compilation process itself to
2007  * generate extra
2008  * immediates for constants such as pi, one, zero.
2009  */
2010 static void
assign_constants_and_immediates(struct etna_compile * c)2011 assign_constants_and_immediates(struct etna_compile *c)
2012 {
2013    assign_uniforms(&c->file[TGSI_FILE_CONSTANT], 0);
2014    /* immediates start after the constants */
2015    c->imm_base = c->file[TGSI_FILE_CONSTANT].reg_size * 4;
2016    assign_uniforms(&c->file[TGSI_FILE_IMMEDIATE], c->imm_base / 4);
2017    DBG_F(ETNA_DBG_COMPILER_MSGS, "imm base: %i size: %i", c->imm_base,
2018          c->imm_size);
2019 }
2020 
2021 /* Assign declared samplers to native texture units */
2022 static void
assign_texture_units(struct etna_compile * c)2023 assign_texture_units(struct etna_compile *c)
2024 {
2025    uint tex_base = 0;
2026 
2027    if (c->info.processor == PIPE_SHADER_VERTEX)
2028       tex_base = c->specs->vertex_sampler_offset;
2029 
2030    for (int idx = 0; idx < c->file[TGSI_FILE_SAMPLER].reg_size; ++idx) {
2031       c->file[TGSI_FILE_SAMPLER].reg[idx].native.valid = 1;
2032       c->file[TGSI_FILE_SAMPLER].reg[idx].native.is_tex = 1; // overrides rgroup
2033       c->file[TGSI_FILE_SAMPLER].reg[idx].native.id = tex_base + idx;
2034    }
2035 }
2036 
2037 /* Additional pass to fill in branch targets. This pass should be last
2038  * as no instruction reordering or removing/addition can be done anymore
2039  * once the branch targets are computed.
2040  */
2041 static void
etna_compile_fill_in_labels(struct etna_compile * c)2042 etna_compile_fill_in_labels(struct etna_compile *c)
2043 {
2044    for (int idx = 0; idx < c->inst_ptr; ++idx) {
2045       if (c->lbl_usage[idx] != -1)
2046          etna_assemble_set_imm(&c->code[idx * 4],
2047                                c->labels[c->lbl_usage[idx]].inst_idx);
2048    }
2049 }
2050 
2051 /* compare two etna_native_reg structures, return true if equal */
2052 static bool
cmp_etna_native_reg(const struct etna_native_reg to,const struct etna_native_reg from)2053 cmp_etna_native_reg(const struct etna_native_reg to,
2054                     const struct etna_native_reg from)
2055 {
2056    return to.valid == from.valid && to.is_tex == from.is_tex &&
2057           to.rgroup == from.rgroup && to.id == from.id;
2058 }
2059 
2060 /* go through all declarations and swap native registers *to* and *from* */
2061 static void
swap_native_registers(struct etna_compile * c,const struct etna_native_reg to,const struct etna_native_reg from)2062 swap_native_registers(struct etna_compile *c, const struct etna_native_reg to,
2063                       const struct etna_native_reg from)
2064 {
2065    if (cmp_etna_native_reg(from, to))
2066       return; /* Nothing to do */
2067 
2068    for (int idx = 0; idx < c->total_decls; ++idx) {
2069       if (cmp_etna_native_reg(c->decl[idx].native, from)) {
2070          c->decl[idx].native = to;
2071       } else if (cmp_etna_native_reg(c->decl[idx].native, to)) {
2072          c->decl[idx].native = from;
2073       }
2074    }
2075 }
2076 
2077 /* For PS we need to permute so that inputs are always in temporary 0..N-1.
2078  * Semantic POS is always t0. If that semantic is not used, avoid t0.
2079  */
2080 static void
permute_ps_inputs(struct etna_compile * c)2081 permute_ps_inputs(struct etna_compile *c)
2082 {
2083    /* Special inputs:
2084     * gl_FragCoord   VARYING_SLOT_POS   TGSI_SEMANTIC_POSITION
2085     * gl_FrontFacing VARYING_SLOT_FACE  TGSI_SEMANTIC_FACE
2086     * gl_PointCoord  VARYING_SLOT_PNTC  TGSI_SEMANTIC_PCOORD
2087     */
2088    uint native_idx = 1;
2089 
2090    for (int idx = 0; idx < c->file[TGSI_FILE_INPUT].reg_size; ++idx) {
2091       struct etna_reg_desc *reg = &c->file[TGSI_FILE_INPUT].reg[idx];
2092       uint input_id;
2093       assert(reg->has_semantic);
2094 
2095       if (!reg->active ||
2096           reg->semantic.Name == TGSI_SEMANTIC_POSITION ||
2097           reg->semantic.Name == TGSI_SEMANTIC_FACE)
2098          continue;
2099 
2100       input_id = native_idx++;
2101       swap_native_registers(c, etna_native_temp(input_id),
2102                             c->file[TGSI_FILE_INPUT].reg[idx].native);
2103    }
2104 
2105    c->num_varyings = native_idx - 1;
2106 
2107    if (native_idx > c->next_free_native)
2108       c->next_free_native = native_idx;
2109 }
2110 
sem2slot(const struct tgsi_declaration_semantic * semantic)2111 static inline int sem2slot(const struct tgsi_declaration_semantic *semantic)
2112 {
2113    return tgsi_varying_semantic_to_slot(semantic->Name, semantic->Index);
2114 }
2115 
2116 /* fill in ps inputs into shader object */
2117 static void
fill_in_ps_inputs(struct etna_shader_variant * sobj,struct etna_compile * c)2118 fill_in_ps_inputs(struct etna_shader_variant *sobj, struct etna_compile *c)
2119 {
2120    struct etna_shader_io_file *sf = &sobj->infile;
2121 
2122    sf->num_reg = 0;
2123 
2124    for (int idx = 0; idx < c->file[TGSI_FILE_INPUT].reg_size; ++idx) {
2125       struct etna_reg_desc *reg = &c->file[TGSI_FILE_INPUT].reg[idx];
2126 
2127       if (reg->native.id > 0) {
2128          assert(sf->num_reg < ETNA_NUM_INPUTS);
2129          sf->reg[sf->num_reg].reg = reg->native.id;
2130          sf->reg[sf->num_reg].slot = sem2slot(&reg->semantic);
2131          /* convert usage mask to number of components (*=wildcard)
2132           *   .r    (0..1)  -> 1 component
2133           *   .*g   (2..3)  -> 2 component
2134           *   .**b  (4..7)  -> 3 components
2135           *   .***a (8..15) -> 4 components
2136           */
2137          sf->reg[sf->num_reg].num_components = util_last_bit(reg->usage_mask);
2138          sf->num_reg++;
2139       }
2140    }
2141 
2142    assert(sf->num_reg == c->num_varyings);
2143    sobj->input_count_unk8 = 31; /* XXX what is this */
2144 }
2145 
2146 /* fill in output mapping for ps into shader object */
2147 static void
fill_in_ps_outputs(struct etna_shader_variant * sobj,struct etna_compile * c)2148 fill_in_ps_outputs(struct etna_shader_variant *sobj, struct etna_compile *c)
2149 {
2150    sobj->outfile.num_reg = 0;
2151 
2152    for (int idx = 0; idx < c->file[TGSI_FILE_OUTPUT].reg_size; ++idx) {
2153       struct etna_reg_desc *reg = &c->file[TGSI_FILE_OUTPUT].reg[idx];
2154 
2155       switch (reg->semantic.Name) {
2156       case TGSI_SEMANTIC_COLOR: /* FRAG_RESULT_COLOR */
2157          sobj->ps_color_out_reg = reg->native.id;
2158          break;
2159       case TGSI_SEMANTIC_POSITION: /* FRAG_RESULT_DEPTH */
2160          sobj->ps_depth_out_reg = reg->native.id; /* =always native reg 0, only z component should be assigned */
2161          break;
2162       default:
2163          assert(0); /* only outputs supported are COLOR and POSITION at the moment */
2164       }
2165    }
2166 }
2167 
2168 /* fill in inputs for vs into shader object */
2169 static void
fill_in_vs_inputs(struct etna_shader_variant * sobj,struct etna_compile * c)2170 fill_in_vs_inputs(struct etna_shader_variant *sobj, struct etna_compile *c)
2171 {
2172    struct etna_shader_io_file *sf = &sobj->infile;
2173 
2174    sf->num_reg = 0;
2175    for (int idx = 0; idx < c->file[TGSI_FILE_INPUT].reg_size; ++idx) {
2176       struct etna_reg_desc *reg = &c->file[TGSI_FILE_INPUT].reg[idx];
2177       assert(sf->num_reg < ETNA_NUM_INPUTS);
2178 
2179       if (!reg->native.valid)
2180          continue;
2181 
2182       /* XXX exclude inputs with special semantics such as gl_frontFacing */
2183       sf->reg[sf->num_reg].reg = reg->native.id;
2184       sf->reg[sf->num_reg].slot = sem2slot(&reg->semantic);
2185       sf->reg[sf->num_reg].num_components = util_last_bit(reg->usage_mask);
2186       sf->num_reg++;
2187    }
2188 
2189    sobj->input_count_unk8 = (sf->num_reg + 19) / 16; /* XXX what is this */
2190 }
2191 
2192 /* fill in outputs for vs into shader object */
2193 static void
fill_in_vs_outputs(struct etna_shader_variant * sobj,struct etna_compile * c)2194 fill_in_vs_outputs(struct etna_shader_variant *sobj, struct etna_compile *c)
2195 {
2196    struct etna_shader_io_file *sf = &sobj->outfile;
2197 
2198    sf->num_reg = 0;
2199    for (int idx = 0; idx < c->file[TGSI_FILE_OUTPUT].reg_size; ++idx) {
2200       struct etna_reg_desc *reg = &c->file[TGSI_FILE_OUTPUT].reg[idx];
2201       assert(sf->num_reg < ETNA_NUM_INPUTS);
2202 
2203       switch (reg->semantic.Name) {
2204       case TGSI_SEMANTIC_POSITION:
2205          sobj->vs_pos_out_reg = reg->native.id;
2206          break;
2207       case TGSI_SEMANTIC_PSIZE:
2208          sobj->vs_pointsize_out_reg = reg->native.id;
2209          break;
2210       default:
2211          sf->reg[sf->num_reg].reg = reg->native.id;
2212          sf->reg[sf->num_reg].slot = sem2slot(&reg->semantic);
2213          sf->reg[sf->num_reg].num_components = 4; // XXX reg->num_components;
2214          sf->num_reg++;
2215       }
2216    }
2217 
2218    /* fill in "mystery meat" load balancing value. This value determines how
2219     * work is scheduled between VS and PS
2220     * in the unified shader architecture. More precisely, it is determined from
2221     * the number of VS outputs, as well as chip-specific
2222     * vertex output buffer size, vertex cache size, and the number of shader
2223     * cores.
2224     *
2225     * XXX this is a conservative estimate, the "optimal" value is only known for
2226     * sure at link time because some
2227     * outputs may be unused and thus unmapped. Then again, in the general use
2228     * case with GLSL the vertex and fragment
2229     * shaders are linked already before submitting to Gallium, thus all outputs
2230     * are used.
2231     */
2232    int half_out = (c->file[TGSI_FILE_OUTPUT].reg_size + 1) / 2;
2233    assert(half_out);
2234 
2235    uint32_t b = ((20480 / (c->specs->vertex_output_buffer_size -
2236                            2 * half_out * c->specs->vertex_cache_size)) +
2237                  9) /
2238                 10;
2239    uint32_t a = (b + 256 / (c->specs->shader_core_count * half_out)) / 2;
2240    sobj->vs_load_balancing = VIVS_VS_LOAD_BALANCING_A(MIN2(a, 255)) |
2241                              VIVS_VS_LOAD_BALANCING_B(MIN2(b, 255)) |
2242                              VIVS_VS_LOAD_BALANCING_C(0x3f) |
2243                              VIVS_VS_LOAD_BALANCING_D(0x0f);
2244 }
2245 
2246 static bool
etna_compile_check_limits(struct etna_compile * c)2247 etna_compile_check_limits(struct etna_compile *c)
2248 {
2249    int max_uniforms = (c->info.processor == PIPE_SHADER_VERTEX)
2250                          ? c->specs->max_vs_uniforms
2251                          : c->specs->max_ps_uniforms;
2252    /* round up number of uniforms, including immediates, in units of four */
2253    int num_uniforms = c->imm_base / 4 + (c->imm_size + 3) / 4;
2254 
2255    if (!c->specs->has_icache && c->inst_ptr > c->specs->max_instructions) {
2256       DBG("Number of instructions (%d) exceeds maximum %d", c->inst_ptr,
2257           c->specs->max_instructions);
2258       return false;
2259    }
2260 
2261    if (c->next_free_native > c->specs->max_registers) {
2262       DBG("Number of registers (%d) exceeds maximum %d", c->next_free_native,
2263           c->specs->max_registers);
2264       return false;
2265    }
2266 
2267    if (num_uniforms > max_uniforms) {
2268       DBG("Number of uniforms (%d) exceeds maximum %d", num_uniforms,
2269           max_uniforms);
2270       return false;
2271    }
2272 
2273    if (c->num_varyings > c->specs->max_varyings) {
2274       DBG("Number of varyings (%d) exceeds maximum %d", c->num_varyings,
2275           c->specs->max_varyings);
2276       return false;
2277    }
2278 
2279    if (c->imm_base > c->specs->num_constants) {
2280       DBG("Number of constants (%d) exceeds maximum %d", c->imm_base,
2281           c->specs->num_constants);
2282    }
2283 
2284    return true;
2285 }
2286 
2287 static void
copy_uniform_state_to_shader(struct etna_compile * c,struct etna_shader_variant * sobj)2288 copy_uniform_state_to_shader(struct etna_compile *c, struct etna_shader_variant *sobj)
2289 {
2290    uint32_t count = c->imm_base + c->imm_size;
2291    struct etna_shader_uniform_info *uinfo = &sobj->uniforms;
2292 
2293    uinfo->imm_count = count;
2294 
2295    uinfo->imm_data = malloc(count * sizeof(*c->imm_data));
2296    for (unsigned i = 0; i < c->imm_base; i++)
2297       uinfo->imm_data[i] = i;
2298    memcpy(&uinfo->imm_data[c->imm_base], c->imm_data, c->imm_size * sizeof(*c->imm_data));
2299 
2300    uinfo->imm_contents = malloc(count * sizeof(*c->imm_contents));
2301    for (unsigned i = 0; i < c->imm_base; i++)
2302       uinfo->imm_contents[i] = ETNA_IMMEDIATE_UNIFORM;
2303    memcpy(&uinfo->imm_contents[c->imm_base], c->imm_contents, c->imm_size * sizeof(*c->imm_contents));
2304 
2305    etna_set_shader_uniforms_dirty_flags(sobj);
2306 }
2307 
2308 bool
etna_compile_shader(struct etna_shader_variant * v)2309 etna_compile_shader(struct etna_shader_variant *v)
2310 {
2311    if (DBG_ENABLED(ETNA_DBG_NIR))
2312       return etna_compile_shader_nir(v);
2313 
2314    /* Create scratch space that may be too large to fit on stack
2315     */
2316    bool ret;
2317    struct etna_compile *c;
2318 
2319    if (unlikely(!v))
2320       return false;
2321 
2322    const struct etna_specs *specs = v->shader->specs;
2323 
2324    struct tgsi_lowering_config lconfig = {
2325       .lower_FLR = !specs->has_sign_floor_ceil,
2326       .lower_CEIL = !specs->has_sign_floor_ceil,
2327       .lower_POW = true,
2328       .lower_EXP = true,
2329       .lower_LOG = true,
2330       .lower_DP2 = !specs->has_halti2_instructions,
2331       .lower_TRUNC = true,
2332    };
2333 
2334    c = CALLOC_STRUCT(etna_compile);
2335    if (!c)
2336       return false;
2337 
2338    memset(&c->lbl_usage, -1, sizeof(c->lbl_usage));
2339 
2340    const struct tgsi_token *tokens = v->shader->tokens;
2341 
2342    c->specs = specs;
2343    c->key = &v->key;
2344    c->tokens = tgsi_transform_lowering(&lconfig, tokens, &c->info);
2345    c->free_tokens = !!c->tokens;
2346    if (!c->tokens) {
2347       /* no lowering */
2348       c->tokens = tokens;
2349    }
2350 
2351    /* Build a map from gallium register to native registers for files
2352     * CONST, SAMP, IMM, OUT, IN, TEMP.
2353     * SAMP will map as-is for fragment shaders, there will be a +8 offset for
2354     * vertex shaders.
2355     */
2356    /* Pass one -- check register file declarations and immediates */
2357    etna_compile_parse_declarations(c);
2358 
2359    etna_allocate_decls(c);
2360 
2361    /* Pass two -- check usage of temporaries, inputs, outputs */
2362    etna_compile_pass_check_usage(c);
2363 
2364    assign_special_inputs(c);
2365 
2366    /* Assign native temp register to TEMPs */
2367    assign_temporaries_to_native(c, &c->file[TGSI_FILE_TEMPORARY]);
2368 
2369    /* optimize outputs */
2370    etna_compile_pass_optimize_outputs(c);
2371 
2372    /* assign inputs: last usage of input should be <= first usage of temp */
2373    /*   potential optimization case:
2374     *     if single MOV TEMP[y], IN[x] before which temp y is not used, and
2375     * after which IN[x]
2376     *     is not read, temp[y] can be used as input register as-is
2377     */
2378    /*   sort temporaries by first use
2379     *   sort inputs by last usage
2380     *   iterate over inputs, temporaries
2381     *     if last usage of input <= first usage of temp:
2382     *       assign input to temp
2383     *       advance input, temporary pointer
2384     *     else
2385     *       advance temporary pointer
2386     *
2387     *   potential problem: instruction with multiple inputs of which one is the
2388     * temp and the other is the input;
2389     *      however, as the temp is not used before this, how would this make
2390     * sense? uninitialized temporaries have an undefined
2391     *      value, so this would be ok
2392     */
2393    assign_inouts_to_temporaries(c, TGSI_FILE_INPUT);
2394 
2395    /* assign outputs: first usage of output should be >= last usage of temp */
2396    /*   potential optimization case:
2397     *      if single MOV OUT[x], TEMP[y] (with full write mask, or at least
2398     * writing all components that are used in
2399     *        the shader) after which temp y is no longer used temp[y] can be
2400     * used as output register as-is
2401     *
2402     *   potential problem: instruction with multiple outputs of which one is the
2403     * temp and the other is the output;
2404     *      however, as the temp is not used after this, how would this make
2405     * sense? could just discard the output value
2406     */
2407    /*   sort temporaries by last use
2408     *   sort outputs by first usage
2409     *   iterate over outputs, temporaries
2410     *     if first usage of output >= last usage of temp:
2411     *       assign output to temp
2412     *       advance output, temporary pointer
2413     *     else
2414     *       advance temporary pointer
2415     */
2416    assign_inouts_to_temporaries(c, TGSI_FILE_OUTPUT);
2417 
2418    assign_constants_and_immediates(c);
2419    assign_texture_units(c);
2420 
2421    /* list declarations */
2422    for (int x = 0; x < c->total_decls; ++x) {
2423       DBG_F(ETNA_DBG_COMPILER_MSGS, "%i: %s,%d active=%i first_use=%i "
2424                                     "last_use=%i native=%i usage_mask=%x "
2425                                     "has_semantic=%i",
2426             x, tgsi_file_name(c->decl[x].file), c->decl[x].idx,
2427             c->decl[x].active, c->decl[x].first_use, c->decl[x].last_use,
2428             c->decl[x].native.valid ? c->decl[x].native.id : -1,
2429             c->decl[x].usage_mask, c->decl[x].has_semantic);
2430       if (c->decl[x].has_semantic)
2431          DBG_F(ETNA_DBG_COMPILER_MSGS, " semantic_name=%s semantic_idx=%i",
2432                tgsi_semantic_names[c->decl[x].semantic.Name],
2433                c->decl[x].semantic.Index);
2434    }
2435    /* XXX for PS we need to permute so that inputs are always in temporary
2436     * 0..N-1.
2437     * There is no "switchboard" for varyings (AFAIK!). The output color,
2438     * however, can be routed
2439     * from an arbitrary temporary.
2440     */
2441    if (c->info.processor == PIPE_SHADER_FRAGMENT)
2442       permute_ps_inputs(c);
2443 
2444 
2445    /* list declarations */
2446    for (int x = 0; x < c->total_decls; ++x) {
2447       DBG_F(ETNA_DBG_COMPILER_MSGS, "%i: %s,%d active=%i first_use=%i "
2448                                     "last_use=%i native=%i usage_mask=%x "
2449                                     "has_semantic=%i",
2450             x, tgsi_file_name(c->decl[x].file), c->decl[x].idx,
2451             c->decl[x].active, c->decl[x].first_use, c->decl[x].last_use,
2452             c->decl[x].native.valid ? c->decl[x].native.id : -1,
2453             c->decl[x].usage_mask, c->decl[x].has_semantic);
2454       if (c->decl[x].has_semantic)
2455          DBG_F(ETNA_DBG_COMPILER_MSGS, " semantic_name=%s semantic_idx=%i",
2456                tgsi_semantic_names[c->decl[x].semantic.Name],
2457                c->decl[x].semantic.Index);
2458    }
2459 
2460    /* pass 3: generate instructions */
2461    etna_compile_pass_generate_code(c);
2462    etna_compile_add_z_div_if_needed(c);
2463    etna_compile_frag_rb_swap(c);
2464    etna_compile_add_nop_if_needed(c);
2465 
2466    ret = etna_compile_check_limits(c);
2467    if (!ret)
2468       goto out;
2469 
2470    etna_compile_fill_in_labels(c);
2471 
2472    /* fill in output structure */
2473    v->stage = c->info.processor == PIPE_SHADER_FRAGMENT ? MESA_SHADER_FRAGMENT : MESA_SHADER_VERTEX;
2474    v->uses_discard = c->info.uses_kill;
2475    v->code_size = c->inst_ptr * 4;
2476    v->code = mem_dup(c->code, c->inst_ptr * 16);
2477    v->num_loops = c->num_loops;
2478    v->num_temps = c->next_free_native;
2479    v->vs_id_in_reg = -1;
2480    v->vs_pos_out_reg = -1;
2481    v->vs_pointsize_out_reg = -1;
2482    v->ps_color_out_reg = -1;
2483    v->ps_depth_out_reg = -1;
2484    v->needs_icache = c->inst_ptr > c->specs->max_instructions;
2485    copy_uniform_state_to_shader(c, v);
2486 
2487    if (c->info.processor == PIPE_SHADER_VERTEX) {
2488       fill_in_vs_inputs(v, c);
2489       fill_in_vs_outputs(v, c);
2490    } else if (c->info.processor == PIPE_SHADER_FRAGMENT) {
2491       fill_in_ps_inputs(v, c);
2492       fill_in_ps_outputs(v, c);
2493    }
2494 
2495 out:
2496    if (c->free_tokens)
2497       FREE((void *)c->tokens);
2498 
2499    FREE(c->labels);
2500    FREE(c);
2501 
2502    return ret;
2503 }
2504 
2505 static const struct etna_shader_inout *
etna_shader_vs_lookup(const struct etna_shader_variant * sobj,const struct etna_shader_inout * in)2506 etna_shader_vs_lookup(const struct etna_shader_variant *sobj,
2507                       const struct etna_shader_inout *in)
2508 {
2509    for (int i = 0; i < sobj->outfile.num_reg; i++)
2510       if (sobj->outfile.reg[i].slot == in->slot)
2511          return &sobj->outfile.reg[i];
2512 
2513    return NULL;
2514 }
2515 
2516 bool
etna_link_shader(struct etna_shader_link_info * info,const struct etna_shader_variant * vs,const struct etna_shader_variant * fs)2517 etna_link_shader(struct etna_shader_link_info *info,
2518                  const struct etna_shader_variant *vs, const struct etna_shader_variant *fs)
2519 {
2520    int comp_ofs = 0;
2521    /* For each fragment input we need to find the associated vertex shader
2522     * output, which can be found by matching on semantic name and index. A
2523     * binary search could be used because the vs outputs are sorted by their
2524     * semantic index and grouped by semantic type by fill_in_vs_outputs.
2525     */
2526    assert(fs->infile.num_reg < ETNA_NUM_INPUTS);
2527    info->pcoord_varying_comp_ofs = -1;
2528 
2529    for (int idx = 0; idx < fs->infile.num_reg; ++idx) {
2530       const struct etna_shader_inout *fsio = &fs->infile.reg[idx];
2531       const struct etna_shader_inout *vsio = etna_shader_vs_lookup(vs, fsio);
2532       struct etna_varying *varying;
2533       bool interpolate_always = ((fsio->slot != VARYING_SLOT_COL0) &&
2534                                  (fsio->slot != VARYING_SLOT_COL1));
2535 
2536       assert(fsio->reg > 0 && fsio->reg <= ARRAY_SIZE(info->varyings));
2537 
2538       if (fsio->reg > info->num_varyings)
2539          info->num_varyings = fsio->reg;
2540 
2541       varying = &info->varyings[fsio->reg - 1];
2542       varying->num_components = fsio->num_components;
2543 
2544       if (!interpolate_always) /* colors affected by flat shading */
2545          varying->pa_attributes = 0x200;
2546       else /* texture coord or other bypasses flat shading */
2547          varying->pa_attributes = 0x2f1;
2548 
2549       varying->use[0] = VARYING_COMPONENT_USE_UNUSED;
2550       varying->use[1] = VARYING_COMPONENT_USE_UNUSED;
2551       varying->use[2] = VARYING_COMPONENT_USE_UNUSED;
2552       varying->use[3] = VARYING_COMPONENT_USE_UNUSED;
2553 
2554       /* point coord is an input to the PS without matching VS output,
2555        * so it gets a varying slot without being assigned a VS register.
2556        */
2557       if (fsio->slot == VARYING_SLOT_PNTC) {
2558          varying->use[0] = VARYING_COMPONENT_USE_POINTCOORD_X;
2559          varying->use[1] = VARYING_COMPONENT_USE_POINTCOORD_Y;
2560 
2561          info->pcoord_varying_comp_ofs = comp_ofs;
2562       } else {
2563          if (vsio == NULL) { /* not found -- link error */
2564             BUG("Semantic value not found in vertex shader outputs\n");
2565             return true;
2566          }
2567 
2568          varying->reg = vsio->reg;
2569       }
2570 
2571       comp_ofs += varying->num_components;
2572    }
2573 
2574    assert(info->num_varyings == fs->infile.num_reg);
2575 
2576    return false;
2577 }
2578