1 /*
2  * Copyright (c) 2012-2015 Etnaviv Project
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sub license,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the
12  * next paragraph) shall be included in all copies or substantial portions
13  * of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Wladimir J. van der Laan <laanwj@gmail.com>
25  */
26 
27 /* TGSI->Vivante shader ISA conversion */
28 
29 /* What does the compiler return (see etna_shader_object)?
30  *  1) instruction data
31  *  2) input-to-temporary mapping (fixed for ps)
32  *      *) in case of ps, semantic -> varying id mapping
33  *      *) for each varying: number of components used (r, rg, rgb, rgba)
34  *  3) temporary-to-output mapping (in case of vs, fixed for ps)
35  *  4) for each input/output: possible semantic (position, color, glpointcoord, ...)
36  *  5) immediates base offset, immediates data
37  *  6) used texture units (and possibly the TGSI_TEXTURE_* type); not needed to
38  *     configure the hw, but useful for error checking
39  *  7) enough information to add the z=(z+w)/2.0 necessary for older chips
40  *     (output reg id is enough)
41  *
42  *  Empty shaders are not allowed, should always at least generate a NOP. Also
43  *  if there is a label at the end of the shader, an extra NOP should be
44  *  generated as jump target.
45  *
46  * TODO
47  * * Use an instruction scheduler
48  * * Indirect access to uniforms / temporaries using amode
49  */
50 
51 #include "etnaviv_compiler.h"
52 
53 #include "etnaviv_asm.h"
54 #include "etnaviv_context.h"
55 #include "etnaviv_debug.h"
56 #include "etnaviv_disasm.h"
57 #include "etnaviv_uniforms.h"
58 #include "etnaviv_util.h"
59 
60 #include "pipe/p_shader_tokens.h"
61 #include "tgsi/tgsi_info.h"
62 #include "tgsi/tgsi_iterate.h"
63 #include "tgsi/tgsi_lowering.h"
64 #include "tgsi/tgsi_strings.h"
65 #include "tgsi/tgsi_util.h"
66 #include "util/u_math.h"
67 #include "util/u_memory.h"
68 
69 #include <fcntl.h>
70 #include <stdio.h>
71 #include <sys/stat.h>
72 #include <sys/types.h>
73 
74 #define ETNA_MAX_INNER_TEMPS 2
75 
76 static const float sincos_const[2][4] = {
77    {
78       2., -1., 4., -4.,
79    },
80    {
81       1. / (2. * M_PI), 0.75, 0.5, 0.0,
82    },
83 };
84 
85 /* Native register description structure */
86 struct etna_native_reg {
87    unsigned valid : 1;
88    unsigned is_tex : 1; /* is texture unit, overrides rgroup */
89    unsigned rgroup : 3;
90    unsigned id : 9;
91 };
92 
93 /* Register description */
94 struct etna_reg_desc {
95    enum tgsi_file_type file; /* IN, OUT, TEMP, ... */
96    int idx; /* index into file */
97    bool active; /* used in program */
98    int first_use; /* instruction id of first use (scope begin) */
99    int last_use; /* instruction id of last use (scope end, inclusive) */
100 
101    struct etna_native_reg native; /* native register to map to */
102    unsigned usage_mask : 4; /* usage, per channel */
103    bool has_semantic; /* register has associated TGSI semantic */
104    struct tgsi_declaration_semantic semantic; /* TGSI semantic */
105    struct tgsi_declaration_interp interp; /* Interpolation type */
106 };
107 
108 /* Label information structure */
109 struct etna_compile_label {
110    int inst_idx; /* Instruction id that label points to */
111 };
112 
113 enum etna_compile_frame_type {
114    ETNA_COMPILE_FRAME_IF, /* IF/ELSE/ENDIF */
115    ETNA_COMPILE_FRAME_LOOP,
116 };
117 
118 /* nesting scope frame (LOOP, IF, ...) during compilation
119  */
120 struct etna_compile_frame {
121    enum etna_compile_frame_type type;
122    int lbl_else_idx;
123    int lbl_endif_idx;
124    int lbl_loop_bgn_idx;
125    int lbl_loop_end_idx;
126 };
127 
128 struct etna_compile_file {
129    /* Number of registers in each TGSI file (max register+1) */
130    size_t reg_size;
131    /* Register descriptions, per register index */
132    struct etna_reg_desc *reg;
133 };
134 
135 #define array_insert(arr, val)                          \
136    do {                                                 \
137       if (arr##_count == arr##_sz) {                    \
138          arr##_sz = MAX2(2 * arr##_sz, 16);             \
139          arr = realloc(arr, arr##_sz * sizeof(arr[0])); \
140       }                                                 \
141       arr[arr##_count++] = val;                         \
142    } while (0)
143 
144 
145 /* scratch area for compiling shader, freed after compilation finishes */
146 struct etna_compile {
147    const struct tgsi_token *tokens;
148    bool free_tokens;
149 
150    struct tgsi_shader_info info;
151 
152    /* Register descriptions, per TGSI file, per register index */
153    struct etna_compile_file file[TGSI_FILE_COUNT];
154 
155    /* Keep track of TGSI register declarations */
156    struct etna_reg_desc decl[ETNA_MAX_DECL];
157    uint total_decls;
158 
159    /* Bitmap of dead instructions which are removed in a separate pass */
160    bool dead_inst[ETNA_MAX_TOKENS];
161 
162    /* Immediate data */
163    enum etna_immediate_contents imm_contents[ETNA_MAX_IMM];
164    uint32_t imm_data[ETNA_MAX_IMM];
165    uint32_t imm_base; /* base of immediates (in 32 bit units) */
166    uint32_t imm_size; /* size of immediates (in 32 bit units) */
167 
168    /* Next free native register, for register allocation */
169    uint32_t next_free_native;
170 
171    /* Temporary register for use within translated TGSI instruction,
172     * only allocated when needed.
173     */
174    int inner_temps; /* number of inner temps used; only up to one available at
175                        this point */
176    struct etna_native_reg inner_temp[ETNA_MAX_INNER_TEMPS];
177 
178    /* Fields for handling nested conditionals */
179    struct etna_compile_frame frame_stack[ETNA_MAX_DEPTH];
180    int frame_sp;
181    int lbl_usage[ETNA_MAX_INSTRUCTIONS];
182 
183    unsigned labels_count, labels_sz;
184    struct etna_compile_label *labels;
185 
186    unsigned num_loops;
187 
188    /* Code generation */
189    int inst_ptr; /* current instruction pointer */
190    uint32_t code[ETNA_MAX_INSTRUCTIONS * ETNA_INST_SIZE];
191 
192    /* I/O */
193 
194    /* Number of varyings (PS only) */
195    int num_varyings;
196 
197    /* GPU hardware specs */
198    const struct etna_specs *specs;
199 
200    const struct etna_shader_key *key;
201 };
202 
203 static struct etna_reg_desc *
etna_get_dst_reg(struct etna_compile * c,struct tgsi_dst_register dst)204 etna_get_dst_reg(struct etna_compile *c, struct tgsi_dst_register dst)
205 {
206    return &c->file[dst.File].reg[dst.Index];
207 }
208 
209 static struct etna_reg_desc *
etna_get_src_reg(struct etna_compile * c,struct tgsi_src_register src)210 etna_get_src_reg(struct etna_compile *c, struct tgsi_src_register src)
211 {
212    return &c->file[src.File].reg[src.Index];
213 }
214 
215 static struct etna_native_reg
etna_native_temp(unsigned reg)216 etna_native_temp(unsigned reg)
217 {
218    return (struct etna_native_reg) {
219       .valid = 1,
220       .rgroup = INST_RGROUP_TEMP,
221       .id = reg
222    };
223 }
224 
225 /** Register allocation **/
226 enum reg_sort_order {
227    FIRST_USE_ASC,
228    FIRST_USE_DESC,
229    LAST_USE_ASC,
230    LAST_USE_DESC
231 };
232 
233 /* Augmented register description for sorting */
234 struct sort_rec {
235    struct etna_reg_desc *ptr;
236    int key;
237 };
238 
239 static int
sort_rec_compar(const struct sort_rec * a,const struct sort_rec * b)240 sort_rec_compar(const struct sort_rec *a, const struct sort_rec *b)
241 {
242    if (a->key < b->key)
243       return -1;
244 
245    if (a->key > b->key)
246       return 1;
247 
248    return 0;
249 }
250 
251 /* create an index on a register set based on certain criteria. */
252 static int
sort_registers(struct sort_rec * sorted,struct etna_compile_file * file,enum reg_sort_order so)253 sort_registers(struct sort_rec *sorted, struct etna_compile_file *file,
254                enum reg_sort_order so)
255 {
256    struct etna_reg_desc *regs = file->reg;
257    int ptr = 0;
258 
259    /* pre-populate keys from active registers */
260    for (int idx = 0; idx < file->reg_size; ++idx) {
261       /* only interested in active registers now; will only assign inactive ones
262        * if no space in active ones */
263       if (regs[idx].active) {
264          sorted[ptr].ptr = &regs[idx];
265 
266          switch (so) {
267          case FIRST_USE_ASC:
268             sorted[ptr].key = regs[idx].first_use;
269             break;
270          case LAST_USE_ASC:
271             sorted[ptr].key = regs[idx].last_use;
272             break;
273          case FIRST_USE_DESC:
274             sorted[ptr].key = -regs[idx].first_use;
275             break;
276          case LAST_USE_DESC:
277             sorted[ptr].key = -regs[idx].last_use;
278             break;
279          }
280          ptr++;
281       }
282    }
283 
284    /* sort index by key */
285    qsort(sorted, ptr, sizeof(struct sort_rec),
286          (int (*)(const void *, const void *))sort_rec_compar);
287 
288    return ptr;
289 }
290 
291 /* Allocate a new, unused, native temp register */
292 static struct etna_native_reg
alloc_new_native_reg(struct etna_compile * c)293 alloc_new_native_reg(struct etna_compile *c)
294 {
295    assert(c->next_free_native < ETNA_MAX_TEMPS);
296    return etna_native_temp(c->next_free_native++);
297 }
298 
299 /* assign TEMPs to native registers */
300 static void
assign_temporaries_to_native(struct etna_compile * c,struct etna_compile_file * file)301 assign_temporaries_to_native(struct etna_compile *c,
302                              struct etna_compile_file *file)
303 {
304    struct etna_reg_desc *temps = file->reg;
305 
306    for (int idx = 0; idx < file->reg_size; ++idx)
307       temps[idx].native = alloc_new_native_reg(c);
308 }
309 
310 /* assign inputs and outputs to temporaries
311  * Gallium assumes that the hardware has separate registers for taking input and
312  * output, however Vivante GPUs use temporaries both for passing in inputs and
313  * passing back outputs.
314  * Try to re-use temporary registers where possible. */
315 static void
assign_inouts_to_temporaries(struct etna_compile * c,uint file)316 assign_inouts_to_temporaries(struct etna_compile *c, uint file)
317 {
318    bool mode_inputs = (file == TGSI_FILE_INPUT);
319    int inout_ptr = 0, num_inouts;
320    int temp_ptr = 0, num_temps;
321    struct sort_rec inout_order[ETNA_MAX_TEMPS];
322    struct sort_rec temps_order[ETNA_MAX_TEMPS];
323    num_inouts = sort_registers(inout_order, &c->file[file],
324                                mode_inputs ? LAST_USE_ASC : FIRST_USE_ASC);
325    num_temps = sort_registers(temps_order, &c->file[TGSI_FILE_TEMPORARY],
326                               mode_inputs ? FIRST_USE_ASC : LAST_USE_ASC);
327 
328    while (inout_ptr < num_inouts && temp_ptr < num_temps) {
329       struct etna_reg_desc *inout = inout_order[inout_ptr].ptr;
330       struct etna_reg_desc *temp = temps_order[temp_ptr].ptr;
331 
332       if (!inout->active || inout->native.valid) { /* Skip if already a native register assigned */
333          inout_ptr++;
334          continue;
335       }
336 
337       /* last usage of this input is before or in same instruction of first use
338        * of temporary? */
339       if (mode_inputs ? (inout->last_use <= temp->first_use)
340                       : (inout->first_use >= temp->last_use)) {
341          /* assign it and advance to next input */
342          inout->native = temp->native;
343          inout_ptr++;
344       }
345 
346       temp_ptr++;
347    }
348 
349    /* if we couldn't reuse current ones, allocate new temporaries */
350    for (inout_ptr = 0; inout_ptr < num_inouts; ++inout_ptr) {
351       struct etna_reg_desc *inout = inout_order[inout_ptr].ptr;
352 
353       if (inout->active && !inout->native.valid)
354          inout->native = alloc_new_native_reg(c);
355    }
356 }
357 
358 /* Allocate an immediate with a certain value and return the index. If
359  * there is already an immediate with that value, return that.
360  */
361 static struct etna_inst_src
alloc_imm(struct etna_compile * c,enum etna_immediate_contents contents,uint32_t value)362 alloc_imm(struct etna_compile *c, enum etna_immediate_contents contents,
363           uint32_t value)
364 {
365    int idx;
366 
367    /* Could use a hash table to speed this up */
368    for (idx = 0; idx < c->imm_size; ++idx) {
369       if (c->imm_contents[idx] == contents && c->imm_data[idx] == value)
370          break;
371    }
372 
373    /* look if there is an unused slot */
374    if (idx == c->imm_size) {
375       for (idx = 0; idx < c->imm_size; ++idx) {
376          if (c->imm_contents[idx] == ETNA_IMMEDIATE_UNUSED)
377             break;
378       }
379    }
380 
381    /* allocate new immediate */
382    if (idx == c->imm_size) {
383       assert(c->imm_size < ETNA_MAX_IMM);
384       idx = c->imm_size++;
385       c->imm_data[idx] = value;
386       c->imm_contents[idx] = contents;
387    }
388 
389    /* swizzle so that component with value is returned in all components */
390    idx += c->imm_base;
391    struct etna_inst_src imm_src = {
392       .use = 1,
393       .rgroup = INST_RGROUP_UNIFORM_0,
394       .reg = idx / 4,
395       .swiz = INST_SWIZ_BROADCAST(idx & 3)
396    };
397 
398    return imm_src;
399 }
400 
401 static struct etna_inst_src
alloc_imm_u32(struct etna_compile * c,uint32_t value)402 alloc_imm_u32(struct etna_compile *c, uint32_t value)
403 {
404    return alloc_imm(c, ETNA_IMMEDIATE_CONSTANT, value);
405 }
406 
407 static struct etna_inst_src
alloc_imm_vec4u(struct etna_compile * c,enum etna_immediate_contents contents,const uint32_t * values)408 alloc_imm_vec4u(struct etna_compile *c, enum etna_immediate_contents contents,
409                 const uint32_t *values)
410 {
411    struct etna_inst_src imm_src = { };
412    int idx, i;
413 
414    for (idx = 0; idx + 3 < c->imm_size; idx += 4) {
415       /* What if we can use a uniform with a different swizzle? */
416       for (i = 0; i < 4; i++)
417          if (c->imm_contents[idx + i] != contents || c->imm_data[idx + i] != values[i])
418             break;
419       if (i == 4)
420          break;
421    }
422 
423    if (idx + 3 >= c->imm_size) {
424       idx = align(c->imm_size, 4);
425       assert(idx + 4 <= ETNA_MAX_IMM);
426 
427       for (i = 0; i < 4; i++) {
428          c->imm_data[idx + i] = values[i];
429          c->imm_contents[idx + i] = contents;
430       }
431 
432       c->imm_size = idx + 4;
433    }
434 
435    assert((c->imm_base & 3) == 0);
436    idx += c->imm_base;
437    imm_src.use = 1;
438    imm_src.rgroup = INST_RGROUP_UNIFORM_0;
439    imm_src.reg = idx / 4;
440    imm_src.swiz = INST_SWIZ_IDENTITY;
441 
442    return imm_src;
443 }
444 
445 static uint32_t
get_imm_u32(struct etna_compile * c,const struct etna_inst_src * imm,unsigned swiz_idx)446 get_imm_u32(struct etna_compile *c, const struct etna_inst_src *imm,
447             unsigned swiz_idx)
448 {
449    assert(imm->use == 1 && imm->rgroup == INST_RGROUP_UNIFORM_0);
450    unsigned int idx = imm->reg * 4 + ((imm->swiz >> (swiz_idx * 2)) & 3);
451 
452    return c->imm_data[idx];
453 }
454 
455 /* Allocate immediate with a certain float value. If there is already an
456  * immediate with that value, return that.
457  */
458 static struct etna_inst_src
alloc_imm_f32(struct etna_compile * c,float value)459 alloc_imm_f32(struct etna_compile *c, float value)
460 {
461    return alloc_imm_u32(c, fui(value));
462 }
463 
464 static struct etna_inst_src
etna_imm_vec4f(struct etna_compile * c,const float * vec4)465 etna_imm_vec4f(struct etna_compile *c, const float *vec4)
466 {
467    uint32_t val[4];
468 
469    for (int i = 0; i < 4; i++)
470       val[i] = fui(vec4[i]);
471 
472    return alloc_imm_vec4u(c, ETNA_IMMEDIATE_CONSTANT, val);
473 }
474 
475 /* Pass -- check register file declarations and immediates */
476 static void
etna_compile_parse_declarations(struct etna_compile * c)477 etna_compile_parse_declarations(struct etna_compile *c)
478 {
479    struct tgsi_parse_context ctx = { };
480    unsigned status = TGSI_PARSE_OK;
481    status = tgsi_parse_init(&ctx, c->tokens);
482    assert(status == TGSI_PARSE_OK);
483 
484    while (!tgsi_parse_end_of_tokens(&ctx)) {
485       tgsi_parse_token(&ctx);
486 
487       switch (ctx.FullToken.Token.Type) {
488       case TGSI_TOKEN_TYPE_IMMEDIATE: {
489          /* immediates are handled differently from other files; they are
490           * not declared explicitly, and always add four components */
491          const struct tgsi_full_immediate *imm = &ctx.FullToken.FullImmediate;
492          assert(c->imm_size <= (ETNA_MAX_IMM - 4));
493 
494          for (int i = 0; i < 4; ++i) {
495             unsigned idx = c->imm_size++;
496 
497             c->imm_data[idx] = imm->u[i].Uint;
498             c->imm_contents[idx] = ETNA_IMMEDIATE_CONSTANT;
499          }
500       }
501       break;
502       }
503    }
504 
505    tgsi_parse_free(&ctx);
506 }
507 
508 /* Allocate register declarations for the registers in all register files */
509 static void
etna_allocate_decls(struct etna_compile * c)510 etna_allocate_decls(struct etna_compile *c)
511 {
512    uint idx = 0;
513 
514    for (int x = 0; x < TGSI_FILE_COUNT; ++x) {
515       c->file[x].reg = &c->decl[idx];
516       c->file[x].reg_size = c->info.file_max[x] + 1;
517 
518       for (int sub = 0; sub < c->file[x].reg_size; ++sub) {
519          c->decl[idx].file = x;
520          c->decl[idx].idx = sub;
521          idx++;
522       }
523    }
524 
525    c->total_decls = idx;
526 }
527 
528 /* Pass -- check and record usage of temporaries, inputs, outputs */
529 static void
etna_compile_pass_check_usage(struct etna_compile * c)530 etna_compile_pass_check_usage(struct etna_compile *c)
531 {
532    struct tgsi_parse_context ctx = { };
533    unsigned status = TGSI_PARSE_OK;
534    status = tgsi_parse_init(&ctx, c->tokens);
535    assert(status == TGSI_PARSE_OK);
536 
537    for (int idx = 0; idx < c->total_decls; ++idx) {
538       c->decl[idx].active = false;
539       c->decl[idx].first_use = c->decl[idx].last_use = -1;
540    }
541 
542    int inst_idx = 0;
543    while (!tgsi_parse_end_of_tokens(&ctx)) {
544       tgsi_parse_token(&ctx);
545       /* find out max register #s used
546        * For every register mark first and last instruction index where it's
547        * used this allows finding ranges where the temporary can be borrowed
548        * as input and/or output register
549        *
550        * XXX in the case of loops this needs special care, or even be completely
551        * disabled, as
552        * the last usage of a register inside a loop means it can still be used
553        * on next loop
554        * iteration (execution is no longer * chronological). The register can
555        * only be
556        * declared "free" after the loop finishes.
557        *
558        * Same for inputs: the first usage of a register inside a loop doesn't
559        * mean that the register
560        * won't have been overwritten in previous iteration. The register can
561        * only be declared free before the loop
562        * starts.
563        * The proper way would be to do full dominator / post-dominator analysis
564        * (especially with more complicated
565        * control flow such as direct branch instructions) but not for now...
566        */
567       switch (ctx.FullToken.Token.Type) {
568       case TGSI_TOKEN_TYPE_DECLARATION: {
569          /* Declaration: fill in file details */
570          const struct tgsi_full_declaration *decl = &ctx.FullToken.FullDeclaration;
571          struct etna_compile_file *file = &c->file[decl->Declaration.File];
572 
573          for (int idx = decl->Range.First; idx <= decl->Range.Last; ++idx) {
574             file->reg[idx].usage_mask = 0; // we'll compute this ourselves
575             file->reg[idx].has_semantic = decl->Declaration.Semantic;
576             file->reg[idx].semantic = decl->Semantic;
577             file->reg[idx].interp = decl->Interp;
578          }
579       } break;
580       case TGSI_TOKEN_TYPE_INSTRUCTION: {
581          /* Instruction: iterate over operands of instruction */
582          const struct tgsi_full_instruction *inst = &ctx.FullToken.FullInstruction;
583 
584          /* iterate over destination registers */
585          for (int idx = 0; idx < inst->Instruction.NumDstRegs; ++idx) {
586             struct etna_reg_desc *reg_desc = &c->file[inst->Dst[idx].Register.File].reg[inst->Dst[idx].Register.Index];
587 
588             if (reg_desc->first_use == -1)
589                reg_desc->first_use = inst_idx;
590 
591             reg_desc->last_use = inst_idx;
592             reg_desc->active = true;
593          }
594 
595          /* iterate over source registers */
596          for (int idx = 0; idx < inst->Instruction.NumSrcRegs; ++idx) {
597             struct etna_reg_desc *reg_desc = &c->file[inst->Src[idx].Register.File].reg[inst->Src[idx].Register.Index];
598 
599             if (reg_desc->first_use == -1)
600                reg_desc->first_use = inst_idx;
601 
602             reg_desc->last_use = inst_idx;
603             reg_desc->active = true;
604             /* accumulate usage mask for register, this is used to determine how
605              * many slots for varyings
606              * should be allocated */
607             reg_desc->usage_mask |= tgsi_util_get_inst_usage_mask(inst, idx);
608          }
609          inst_idx += 1;
610       } break;
611       default:
612          break;
613       }
614    }
615 
616    tgsi_parse_free(&ctx);
617 }
618 
619 /* assign inputs that need to be assigned to specific registers */
620 static void
assign_special_inputs(struct etna_compile * c)621 assign_special_inputs(struct etna_compile *c)
622 {
623    if (c->info.processor == PIPE_SHADER_FRAGMENT) {
624       /* never assign t0 as it is the position output, start assigning at t1 */
625       c->next_free_native = 1;
626 
627       /* hardwire TGSI_SEMANTIC_POSITION (input and output) to t0 */
628       for (int idx = 0; idx < c->total_decls; ++idx) {
629          struct etna_reg_desc *reg = &c->decl[idx];
630 
631          if (reg->active && reg->semantic.Name == TGSI_SEMANTIC_POSITION)
632             reg->native = etna_native_temp(0);
633       }
634    }
635 }
636 
637 /* Check that a move instruction does not swizzle any of the components
638  * that it writes.
639  */
640 static bool
etna_mov_check_no_swizzle(const struct tgsi_dst_register dst,const struct tgsi_src_register src)641 etna_mov_check_no_swizzle(const struct tgsi_dst_register dst,
642                           const struct tgsi_src_register src)
643 {
644    return (!(dst.WriteMask & TGSI_WRITEMASK_X) || src.SwizzleX == TGSI_SWIZZLE_X) &&
645           (!(dst.WriteMask & TGSI_WRITEMASK_Y) || src.SwizzleY == TGSI_SWIZZLE_Y) &&
646           (!(dst.WriteMask & TGSI_WRITEMASK_Z) || src.SwizzleZ == TGSI_SWIZZLE_Z) &&
647           (!(dst.WriteMask & TGSI_WRITEMASK_W) || src.SwizzleW == TGSI_SWIZZLE_W);
648 }
649 
650 /* Pass -- optimize outputs
651  * Mesa tends to generate code like this at the end if their shaders
652  *   MOV OUT[1], TEMP[2]
653  *   MOV OUT[0], TEMP[0]
654  *   MOV OUT[2], TEMP[1]
655  * Recognize if
656  * a) there is only a single assignment to an output register and
657  * b) the temporary is not used after that
658  * Also recognize direct assignment of IN to OUT (passthrough)
659  **/
660 static void
etna_compile_pass_optimize_outputs(struct etna_compile * c)661 etna_compile_pass_optimize_outputs(struct etna_compile *c)
662 {
663    struct tgsi_parse_context ctx = { };
664    int inst_idx = 0;
665    unsigned status = TGSI_PARSE_OK;
666    status = tgsi_parse_init(&ctx, c->tokens);
667    assert(status == TGSI_PARSE_OK);
668 
669    while (!tgsi_parse_end_of_tokens(&ctx)) {
670       tgsi_parse_token(&ctx);
671 
672       switch (ctx.FullToken.Token.Type) {
673       case TGSI_TOKEN_TYPE_INSTRUCTION: {
674          const struct tgsi_full_instruction *inst = &ctx.FullToken.FullInstruction;
675 
676          /* iterate over operands */
677          switch (inst->Instruction.Opcode) {
678          case TGSI_OPCODE_MOV: {
679             /* We are only interested in eliminating MOVs which write to
680              * the shader outputs. Test for this early. */
681             if (inst->Dst[0].Register.File != TGSI_FILE_OUTPUT)
682                break;
683             /* Elimination of a MOV must have no visible effect on the
684              * resulting shader: this means the MOV must not swizzle or
685              * saturate, and its source must not have the negate or
686              * absolute modifiers. */
687             if (!etna_mov_check_no_swizzle(inst->Dst[0].Register, inst->Src[0].Register) ||
688                 inst->Instruction.Saturate || inst->Src[0].Register.Negate ||
689                 inst->Src[0].Register.Absolute)
690                break;
691 
692             uint out_idx = inst->Dst[0].Register.Index;
693             uint in_idx = inst->Src[0].Register.Index;
694             /* assignment of temporary to output --
695              * and the output doesn't yet have a native register assigned
696              * and the last use of the temporary is this instruction
697              * and the MOV does not do a swizzle
698              */
699             if (inst->Src[0].Register.File == TGSI_FILE_TEMPORARY &&
700                 !c->file[TGSI_FILE_OUTPUT].reg[out_idx].native.valid &&
701                 c->file[TGSI_FILE_TEMPORARY].reg[in_idx].last_use == inst_idx) {
702                c->file[TGSI_FILE_OUTPUT].reg[out_idx].native =
703                   c->file[TGSI_FILE_TEMPORARY].reg[in_idx].native;
704                /* prevent temp from being re-used for the rest of the shader */
705                c->file[TGSI_FILE_TEMPORARY].reg[in_idx].last_use = ETNA_MAX_TOKENS;
706                /* mark this MOV instruction as a no-op */
707                c->dead_inst[inst_idx] = true;
708             }
709             /* direct assignment of input to output --
710              * and the input or output doesn't yet have a native register
711              * assigned
712              * and the output is only used in this instruction,
713              * allocate a new register, and associate both input and output to
714              * it
715              * and the MOV does not do a swizzle
716              */
717             if (inst->Src[0].Register.File == TGSI_FILE_INPUT &&
718                 !c->file[TGSI_FILE_INPUT].reg[in_idx].native.valid &&
719                 !c->file[TGSI_FILE_OUTPUT].reg[out_idx].native.valid &&
720                 c->file[TGSI_FILE_OUTPUT].reg[out_idx].last_use == inst_idx &&
721                 c->file[TGSI_FILE_OUTPUT].reg[out_idx].first_use == inst_idx) {
722                c->file[TGSI_FILE_OUTPUT].reg[out_idx].native =
723                   c->file[TGSI_FILE_INPUT].reg[in_idx].native =
724                      alloc_new_native_reg(c);
725                /* mark this MOV instruction as a no-op */
726                c->dead_inst[inst_idx] = true;
727             }
728          } break;
729          default:;
730          }
731          inst_idx += 1;
732       } break;
733       }
734    }
735 
736    tgsi_parse_free(&ctx);
737 }
738 
739 /* Get a temporary to be used within one TGSI instruction.
740  * The first time that this function is called the temporary will be allocated.
741  * Each call to this function will return the same temporary.
742  */
743 static struct etna_native_reg
etna_compile_get_inner_temp(struct etna_compile * c)744 etna_compile_get_inner_temp(struct etna_compile *c)
745 {
746    int inner_temp = c->inner_temps;
747 
748    if (inner_temp < ETNA_MAX_INNER_TEMPS) {
749       if (!c->inner_temp[inner_temp].valid)
750          c->inner_temp[inner_temp] = alloc_new_native_reg(c);
751 
752       /* alloc_new_native_reg() handles lack of registers */
753       c->inner_temps += 1;
754    } else {
755       BUG("Too many inner temporaries (%i) requested in one instruction",
756           inner_temp + 1);
757    }
758 
759    return c->inner_temp[inner_temp];
760 }
761 
762 static struct etna_inst_dst
etna_native_to_dst(struct etna_native_reg native,unsigned comps)763 etna_native_to_dst(struct etna_native_reg native, unsigned comps)
764 {
765    /* Can only assign to temporaries */
766    assert(native.valid && !native.is_tex && native.rgroup == INST_RGROUP_TEMP);
767 
768    struct etna_inst_dst rv = {
769       .comps = comps,
770       .use = 1,
771       .reg = native.id,
772    };
773 
774    return rv;
775 }
776 
777 static struct etna_inst_src
etna_native_to_src(struct etna_native_reg native,uint32_t swizzle)778 etna_native_to_src(struct etna_native_reg native, uint32_t swizzle)
779 {
780    assert(native.valid && !native.is_tex);
781 
782    struct etna_inst_src rv = {
783       .use = 1,
784       .swiz = swizzle,
785       .rgroup = native.rgroup,
786       .reg = native.id,
787       .amode = INST_AMODE_DIRECT,
788    };
789 
790    return rv;
791 }
792 
793 static inline struct etna_inst_src
negate(struct etna_inst_src src)794 negate(struct etna_inst_src src)
795 {
796    src.neg = !src.neg;
797 
798    return src;
799 }
800 
801 static inline struct etna_inst_src
absolute(struct etna_inst_src src)802 absolute(struct etna_inst_src src)
803 {
804    src.abs = 1;
805 
806    return src;
807 }
808 
809 static inline struct etna_inst_src
swizzle(struct etna_inst_src src,unsigned swizzle)810 swizzle(struct etna_inst_src src, unsigned swizzle)
811 {
812    src.swiz = inst_swiz_compose(src.swiz, swizzle);
813 
814    return src;
815 }
816 
817 /* Emit instruction and append it to program */
818 static void
emit_inst(struct etna_compile * c,struct etna_inst * inst)819 emit_inst(struct etna_compile *c, struct etna_inst *inst)
820 {
821    assert(c->inst_ptr <= ETNA_MAX_INSTRUCTIONS);
822 
823    /* Check for uniform conflicts (each instruction can only access one
824     * uniform),
825     * if detected, use an intermediate temporary */
826    unsigned uni_rgroup = -1;
827    unsigned uni_reg = -1;
828 
829    for (int src = 0; src < ETNA_NUM_SRC; ++src) {
830       if (etna_rgroup_is_uniform(inst->src[src].rgroup)) {
831          if (uni_reg == -1) { /* first unique uniform used */
832             uni_rgroup = inst->src[src].rgroup;
833             uni_reg = inst->src[src].reg;
834          } else { /* second or later; check that it is a re-use */
835             if (uni_rgroup != inst->src[src].rgroup ||
836                 uni_reg != inst->src[src].reg) {
837                DBG_F(ETNA_DBG_COMPILER_MSGS, "perf warning: instruction that "
838                                              "accesses different uniforms, "
839                                              "need to generate extra MOV");
840                struct etna_native_reg inner_temp = etna_compile_get_inner_temp(c);
841 
842                /* Generate move instruction to temporary */
843                etna_assemble(&c->code[c->inst_ptr * 4], &(struct etna_inst) {
844                   .opcode = INST_OPCODE_MOV,
845                   .dst = etna_native_to_dst(inner_temp, INST_COMPS_X | INST_COMPS_Y |
846                                                         INST_COMPS_Z | INST_COMPS_W),
847                   .src[2] = inst->src[src]
848                });
849 
850                c->inst_ptr++;
851 
852                /* Modify instruction to use temp register instead of uniform */
853                inst->src[src].use = 1;
854                inst->src[src].rgroup = INST_RGROUP_TEMP;
855                inst->src[src].reg = inner_temp.id;
856                inst->src[src].swiz = INST_SWIZ_IDENTITY; /* swizzling happens on MOV */
857                inst->src[src].neg = 0; /* negation happens on MOV */
858                inst->src[src].abs = 0; /* abs happens on MOV */
859                inst->src[src].amode = 0; /* amode effects happen on MOV */
860             }
861          }
862       }
863    }
864 
865    /* Finally assemble the actual instruction */
866    etna_assemble(&c->code[c->inst_ptr * 4], inst);
867    c->inst_ptr++;
868 }
869 
870 static unsigned int
etna_amode(struct tgsi_ind_register indirect)871 etna_amode(struct tgsi_ind_register indirect)
872 {
873    assert(indirect.File == TGSI_FILE_ADDRESS);
874    assert(indirect.Index == 0);
875 
876    switch (indirect.Swizzle) {
877    case TGSI_SWIZZLE_X:
878       return INST_AMODE_ADD_A_X;
879    case TGSI_SWIZZLE_Y:
880       return INST_AMODE_ADD_A_Y;
881    case TGSI_SWIZZLE_Z:
882       return INST_AMODE_ADD_A_Z;
883    case TGSI_SWIZZLE_W:
884       return INST_AMODE_ADD_A_W;
885    default:
886       assert(!"Invalid swizzle");
887    }
888 
889    unreachable("bad swizzle");
890 }
891 
892 /* convert destination operand */
893 static struct etna_inst_dst
convert_dst(struct etna_compile * c,const struct tgsi_full_dst_register * in)894 convert_dst(struct etna_compile *c, const struct tgsi_full_dst_register *in)
895 {
896    struct etna_inst_dst rv = {
897       /// XXX .amode
898       .comps = in->Register.WriteMask,
899    };
900 
901    if (in->Register.File == TGSI_FILE_ADDRESS) {
902       assert(in->Register.Index == 0);
903       rv.reg = in->Register.Index;
904       rv.use = 0;
905    } else {
906       rv = etna_native_to_dst(etna_get_dst_reg(c, in->Register)->native,
907                               in->Register.WriteMask);
908    }
909 
910    if (in->Register.Indirect)
911       rv.amode = etna_amode(in->Indirect);
912 
913    return rv;
914 }
915 
916 /* convert texture operand */
917 static struct etna_inst_tex
convert_tex(struct etna_compile * c,const struct tgsi_full_src_register * in,const struct tgsi_instruction_texture * tex)918 convert_tex(struct etna_compile *c, const struct tgsi_full_src_register *in,
919             const struct tgsi_instruction_texture *tex)
920 {
921    struct etna_native_reg native_reg = etna_get_src_reg(c, in->Register)->native;
922    struct etna_inst_tex rv = {
923       // XXX .amode (to allow for an array of samplers?)
924       .swiz = INST_SWIZ_IDENTITY
925    };
926 
927    assert(native_reg.is_tex && native_reg.valid);
928    rv.id = native_reg.id;
929 
930    return rv;
931 }
932 
933 /* convert source operand */
934 static struct etna_inst_src
etna_create_src(const struct tgsi_full_src_register * tgsi,const struct etna_native_reg * native)935 etna_create_src(const struct tgsi_full_src_register *tgsi,
936                 const struct etna_native_reg *native)
937 {
938    const struct tgsi_src_register *reg = &tgsi->Register;
939    struct etna_inst_src rv = {
940       .use = 1,
941       .swiz = INST_SWIZ(reg->SwizzleX, reg->SwizzleY, reg->SwizzleZ, reg->SwizzleW),
942       .neg = reg->Negate,
943       .abs = reg->Absolute,
944       .rgroup = native->rgroup,
945       .reg = native->id,
946       .amode = INST_AMODE_DIRECT,
947    };
948 
949    assert(native->valid && !native->is_tex);
950 
951    if (reg->Indirect)
952       rv.amode = etna_amode(tgsi->Indirect);
953 
954    return rv;
955 }
956 
957 static struct etna_inst_src
etna_mov_src_to_temp(struct etna_compile * c,struct etna_inst_src src,struct etna_native_reg temp)958 etna_mov_src_to_temp(struct etna_compile *c, struct etna_inst_src src,
959                      struct etna_native_reg temp)
960 {
961    struct etna_inst mov = { };
962 
963    mov.opcode = INST_OPCODE_MOV;
964    mov.sat = 0;
965    mov.dst = etna_native_to_dst(temp, INST_COMPS_X | INST_COMPS_Y |
966                                       INST_COMPS_Z | INST_COMPS_W);
967    mov.src[2] = src;
968    emit_inst(c, &mov);
969 
970    src.swiz = INST_SWIZ_IDENTITY;
971    src.neg = src.abs = 0;
972    src.rgroup = temp.rgroup;
973    src.reg = temp.id;
974 
975    return src;
976 }
977 
978 static struct etna_inst_src
etna_mov_src(struct etna_compile * c,struct etna_inst_src src)979 etna_mov_src(struct etna_compile *c, struct etna_inst_src src)
980 {
981    struct etna_native_reg temp = etna_compile_get_inner_temp(c);
982 
983    return etna_mov_src_to_temp(c, src, temp);
984 }
985 
986 static bool
etna_src_uniforms_conflict(struct etna_inst_src a,struct etna_inst_src b)987 etna_src_uniforms_conflict(struct etna_inst_src a, struct etna_inst_src b)
988 {
989    return etna_rgroup_is_uniform(a.rgroup) &&
990           etna_rgroup_is_uniform(b.rgroup) &&
991           (a.rgroup != b.rgroup || a.reg != b.reg);
992 }
993 
994 /* create a new label */
995 static unsigned int
alloc_new_label(struct etna_compile * c)996 alloc_new_label(struct etna_compile *c)
997 {
998    struct etna_compile_label label = {
999       .inst_idx = -1, /* start by point to no specific instruction */
1000    };
1001 
1002    array_insert(c->labels, label);
1003 
1004    return c->labels_count - 1;
1005 }
1006 
1007 /* place label at current instruction pointer */
1008 static void
label_place(struct etna_compile * c,struct etna_compile_label * label)1009 label_place(struct etna_compile *c, struct etna_compile_label *label)
1010 {
1011    label->inst_idx = c->inst_ptr;
1012 }
1013 
1014 /* mark label use at current instruction.
1015  * target of the label will be filled in in the marked instruction's src2.imm
1016  * slot as soon
1017  * as the value becomes known.
1018  */
1019 static void
label_mark_use(struct etna_compile * c,int lbl_idx)1020 label_mark_use(struct etna_compile *c, int lbl_idx)
1021 {
1022    assert(c->inst_ptr < ETNA_MAX_INSTRUCTIONS);
1023    c->lbl_usage[c->inst_ptr] = lbl_idx;
1024 }
1025 
1026 /* walk the frame stack and return first frame with matching type */
1027 static struct etna_compile_frame *
find_frame(struct etna_compile * c,enum etna_compile_frame_type type)1028 find_frame(struct etna_compile *c, enum etna_compile_frame_type type)
1029 {
1030    for (int sp = c->frame_sp; sp >= 0; sp--)
1031       if (c->frame_stack[sp].type == type)
1032          return &c->frame_stack[sp];
1033 
1034    assert(0);
1035    return NULL;
1036 }
1037 
1038 struct instr_translater {
1039    void (*fxn)(const struct instr_translater *t, struct etna_compile *c,
1040                const struct tgsi_full_instruction *inst,
1041                struct etna_inst_src *src);
1042    unsigned tgsi_opc;
1043    uint8_t opc;
1044 
1045    /* tgsi src -> etna src swizzle */
1046    int src[3];
1047 
1048    unsigned cond;
1049 };
1050 
1051 static void
trans_instr(const struct instr_translater * t,struct etna_compile * c,const struct tgsi_full_instruction * inst,struct etna_inst_src * src)1052 trans_instr(const struct instr_translater *t, struct etna_compile *c,
1053             const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
1054 {
1055    const struct tgsi_opcode_info *info = tgsi_get_opcode_info(inst->Instruction.Opcode);
1056    struct etna_inst instr = { };
1057 
1058    instr.opcode = t->opc;
1059    instr.cond = t->cond;
1060    instr.sat = inst->Instruction.Saturate;
1061 
1062    assert(info->num_dst <= 1);
1063    if (info->num_dst)
1064       instr.dst = convert_dst(c, &inst->Dst[0]);
1065 
1066    assert(info->num_src <= ETNA_NUM_SRC);
1067 
1068    for (unsigned i = 0; i < info->num_src; i++) {
1069       int swizzle = t->src[i];
1070 
1071       assert(swizzle != -1);
1072       instr.src[swizzle] = src[i];
1073    }
1074 
1075    emit_inst(c, &instr);
1076 }
1077 
1078 static void
trans_min_max(const struct instr_translater * t,struct etna_compile * c,const struct tgsi_full_instruction * inst,struct etna_inst_src * src)1079 trans_min_max(const struct instr_translater *t, struct etna_compile *c,
1080               const struct tgsi_full_instruction *inst,
1081               struct etna_inst_src *src)
1082 {
1083    emit_inst(c, &(struct etna_inst) {
1084       .opcode = INST_OPCODE_SELECT,
1085        .cond = t->cond,
1086        .sat = inst->Instruction.Saturate,
1087        .dst = convert_dst(c, &inst->Dst[0]),
1088        .src[0] = src[0],
1089        .src[1] = src[1],
1090        .src[2] = src[0],
1091     });
1092 }
1093 
1094 static void
trans_if(const struct instr_translater * t,struct etna_compile * c,const struct tgsi_full_instruction * inst,struct etna_inst_src * src)1095 trans_if(const struct instr_translater *t, struct etna_compile *c,
1096          const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
1097 {
1098    struct etna_compile_frame *f = &c->frame_stack[c->frame_sp++];
1099    struct etna_inst_src imm_0 = alloc_imm_f32(c, 0.0f);
1100 
1101    /* push IF to stack */
1102    f->type = ETNA_COMPILE_FRAME_IF;
1103    /* create "else" label */
1104    f->lbl_else_idx = alloc_new_label(c);
1105    f->lbl_endif_idx = -1;
1106 
1107    /* We need to avoid the emit_inst() below becoming two instructions */
1108    if (etna_src_uniforms_conflict(src[0], imm_0))
1109       src[0] = etna_mov_src(c, src[0]);
1110 
1111    /* mark position in instruction stream of label reference so that it can be
1112     * filled in in next pass */
1113    label_mark_use(c, f->lbl_else_idx);
1114 
1115    /* create conditional branch to label if src0 EQ 0 */
1116    emit_inst(c, &(struct etna_inst){
1117       .opcode = INST_OPCODE_BRANCH,
1118       .cond = INST_CONDITION_EQ,
1119       .src[0] = src[0],
1120       .src[1] = imm_0,
1121     /* imm is filled in later */
1122    });
1123 }
1124 
1125 static void
trans_else(const struct instr_translater * t,struct etna_compile * c,const struct tgsi_full_instruction * inst,struct etna_inst_src * src)1126 trans_else(const struct instr_translater *t, struct etna_compile *c,
1127            const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
1128 {
1129    assert(c->frame_sp > 0);
1130    struct etna_compile_frame *f = &c->frame_stack[c->frame_sp - 1];
1131    assert(f->type == ETNA_COMPILE_FRAME_IF);
1132 
1133    /* create "endif" label, and branch to endif label */
1134    f->lbl_endif_idx = alloc_new_label(c);
1135    label_mark_use(c, f->lbl_endif_idx);
1136    emit_inst(c, &(struct etna_inst) {
1137       .opcode = INST_OPCODE_BRANCH,
1138       .cond = INST_CONDITION_TRUE,
1139       /* imm is filled in later */
1140    });
1141 
1142    /* mark "else" label at this position in instruction stream */
1143    label_place(c, &c->labels[f->lbl_else_idx]);
1144 }
1145 
1146 static void
trans_endif(const struct instr_translater * t,struct etna_compile * c,const struct tgsi_full_instruction * inst,struct etna_inst_src * src)1147 trans_endif(const struct instr_translater *t, struct etna_compile *c,
1148             const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
1149 {
1150    assert(c->frame_sp > 0);
1151    struct etna_compile_frame *f = &c->frame_stack[--c->frame_sp];
1152    assert(f->type == ETNA_COMPILE_FRAME_IF);
1153 
1154    /* assign "endif" or "else" (if no ELSE) label to current position in
1155     * instruction stream, pop IF */
1156    if (f->lbl_endif_idx != -1)
1157       label_place(c, &c->labels[f->lbl_endif_idx]);
1158    else
1159       label_place(c, &c->labels[f->lbl_else_idx]);
1160 }
1161 
1162 static void
trans_loop_bgn(const struct instr_translater * t,struct etna_compile * c,const struct tgsi_full_instruction * inst,struct etna_inst_src * src)1163 trans_loop_bgn(const struct instr_translater *t, struct etna_compile *c,
1164                const struct tgsi_full_instruction *inst,
1165                struct etna_inst_src *src)
1166 {
1167    struct etna_compile_frame *f = &c->frame_stack[c->frame_sp++];
1168 
1169    /* push LOOP to stack */
1170    f->type = ETNA_COMPILE_FRAME_LOOP;
1171    f->lbl_loop_bgn_idx = alloc_new_label(c);
1172    f->lbl_loop_end_idx = alloc_new_label(c);
1173 
1174    label_place(c, &c->labels[f->lbl_loop_bgn_idx]);
1175 
1176    c->num_loops++;
1177 }
1178 
1179 static void
trans_loop_end(const struct instr_translater * t,struct etna_compile * c,const struct tgsi_full_instruction * inst,struct etna_inst_src * src)1180 trans_loop_end(const struct instr_translater *t, struct etna_compile *c,
1181                const struct tgsi_full_instruction *inst,
1182                struct etna_inst_src *src)
1183 {
1184    assert(c->frame_sp > 0);
1185    struct etna_compile_frame *f = &c->frame_stack[--c->frame_sp];
1186    assert(f->type == ETNA_COMPILE_FRAME_LOOP);
1187 
1188    /* mark position in instruction stream of label reference so that it can be
1189     * filled in in next pass */
1190    label_mark_use(c, f->lbl_loop_bgn_idx);
1191 
1192    /* create branch to loop_bgn label */
1193    emit_inst(c, &(struct etna_inst) {
1194       .opcode = INST_OPCODE_BRANCH,
1195       .cond = INST_CONDITION_TRUE,
1196       .src[0] = src[0],
1197       /* imm is filled in later */
1198    });
1199 
1200    label_place(c, &c->labels[f->lbl_loop_end_idx]);
1201 }
1202 
1203 static void
trans_brk(const struct instr_translater * t,struct etna_compile * c,const struct tgsi_full_instruction * inst,struct etna_inst_src * src)1204 trans_brk(const struct instr_translater *t, struct etna_compile *c,
1205           const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
1206 {
1207    assert(c->frame_sp > 0);
1208    struct etna_compile_frame *f = find_frame(c, ETNA_COMPILE_FRAME_LOOP);
1209 
1210    /* mark position in instruction stream of label reference so that it can be
1211     * filled in in next pass */
1212    label_mark_use(c, f->lbl_loop_end_idx);
1213 
1214    /* create branch to loop_end label */
1215    emit_inst(c, &(struct etna_inst) {
1216       .opcode = INST_OPCODE_BRANCH,
1217       .cond = INST_CONDITION_TRUE,
1218       .src[0] = src[0],
1219       /* imm is filled in later */
1220    });
1221 }
1222 
1223 static void
trans_cont(const struct instr_translater * t,struct etna_compile * c,const struct tgsi_full_instruction * inst,struct etna_inst_src * src)1224 trans_cont(const struct instr_translater *t, struct etna_compile *c,
1225            const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
1226 {
1227    assert(c->frame_sp > 0);
1228    struct etna_compile_frame *f = find_frame(c, ETNA_COMPILE_FRAME_LOOP);
1229 
1230    /* mark position in instruction stream of label reference so that it can be
1231     * filled in in next pass */
1232    label_mark_use(c, f->lbl_loop_bgn_idx);
1233 
1234    /* create branch to loop_end label */
1235    emit_inst(c, &(struct etna_inst) {
1236       .opcode = INST_OPCODE_BRANCH,
1237       .cond = INST_CONDITION_TRUE,
1238       .src[0] = src[0],
1239       /* imm is filled in later */
1240    });
1241 }
1242 
1243 static void
trans_deriv(const struct instr_translater * t,struct etna_compile * c,const struct tgsi_full_instruction * inst,struct etna_inst_src * src)1244 trans_deriv(const struct instr_translater *t, struct etna_compile *c,
1245             const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
1246 {
1247    emit_inst(c, &(struct etna_inst) {
1248       .opcode = t->opc,
1249       .sat = inst->Instruction.Saturate,
1250       .dst = convert_dst(c, &inst->Dst[0]),
1251       .src[0] = src[0],
1252       .src[2] = src[0],
1253    });
1254 }
1255 
1256 static void
trans_arl(const struct instr_translater * t,struct etna_compile * c,const struct tgsi_full_instruction * inst,struct etna_inst_src * src)1257 trans_arl(const struct instr_translater *t, struct etna_compile *c,
1258           const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
1259 {
1260    struct etna_native_reg temp = etna_compile_get_inner_temp(c);
1261    struct etna_inst arl = { };
1262    struct etna_inst_dst dst;
1263 
1264    dst = etna_native_to_dst(temp, INST_COMPS_X | INST_COMPS_Y | INST_COMPS_Z |
1265                                   INST_COMPS_W);
1266 
1267    if (c->specs->has_sign_floor_ceil) {
1268       struct etna_inst floor = { };
1269 
1270       floor.opcode = INST_OPCODE_FLOOR;
1271       floor.src[2] = src[0];
1272       floor.dst = dst;
1273 
1274       emit_inst(c, &floor);
1275    } else {
1276       struct etna_inst floor[2] = { };
1277 
1278       floor[0].opcode = INST_OPCODE_FRC;
1279       floor[0].sat = inst->Instruction.Saturate;
1280       floor[0].dst = dst;
1281       floor[0].src[2] = src[0];
1282 
1283       floor[1].opcode = INST_OPCODE_ADD;
1284       floor[1].sat = inst->Instruction.Saturate;
1285       floor[1].dst = dst;
1286       floor[1].src[0] = src[0];
1287       floor[1].src[2].use = 1;
1288       floor[1].src[2].swiz = INST_SWIZ_IDENTITY;
1289       floor[1].src[2].neg = 1;
1290       floor[1].src[2].rgroup = temp.rgroup;
1291       floor[1].src[2].reg = temp.id;
1292 
1293       emit_inst(c, &floor[0]);
1294       emit_inst(c, &floor[1]);
1295    }
1296 
1297    arl.opcode = INST_OPCODE_MOVAR;
1298    arl.sat = inst->Instruction.Saturate;
1299    arl.dst = convert_dst(c, &inst->Dst[0]);
1300    arl.src[2] = etna_native_to_src(temp, INST_SWIZ_IDENTITY);
1301 
1302    emit_inst(c, &arl);
1303 }
1304 
1305 static void
trans_lrp(const struct instr_translater * t,struct etna_compile * c,const struct tgsi_full_instruction * inst,struct etna_inst_src * src)1306 trans_lrp(const struct instr_translater *t, struct etna_compile *c,
1307           const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
1308 {
1309    /* dst = src0 * src1 + (1 - src0) * src2
1310     *     => src0 * src1 - (src0 - 1) * src2
1311     *     => src0 * src1 - (src0 * src2 - src2)
1312     * MAD tTEMP.xyzw, tSRC0.xyzw, tSRC2.xyzw, -tSRC2.xyzw
1313     * MAD tDST.xyzw, tSRC0.xyzw, tSRC1.xyzw, -tTEMP.xyzw
1314     */
1315    struct etna_native_reg temp = etna_compile_get_inner_temp(c);
1316    if (etna_src_uniforms_conflict(src[0], src[1]) ||
1317        etna_src_uniforms_conflict(src[0], src[2])) {
1318       src[0] = etna_mov_src(c, src[0]);
1319    }
1320 
1321    struct etna_inst mad[2] = { };
1322    mad[0].opcode = INST_OPCODE_MAD;
1323    mad[0].sat = 0;
1324    mad[0].dst = etna_native_to_dst(temp, INST_COMPS_X | INST_COMPS_Y |
1325                                          INST_COMPS_Z | INST_COMPS_W);
1326    mad[0].src[0] = src[0];
1327    mad[0].src[1] = src[2];
1328    mad[0].src[2] = negate(src[2]);
1329    mad[1].opcode = INST_OPCODE_MAD;
1330    mad[1].sat = inst->Instruction.Saturate;
1331    mad[1].dst = convert_dst(c, &inst->Dst[0]), mad[1].src[0] = src[0];
1332    mad[1].src[1] = src[1];
1333    mad[1].src[2] = negate(etna_native_to_src(temp, INST_SWIZ_IDENTITY));
1334 
1335    emit_inst(c, &mad[0]);
1336    emit_inst(c, &mad[1]);
1337 }
1338 
1339 static void
trans_lit(const struct instr_translater * t,struct etna_compile * c,const struct tgsi_full_instruction * inst,struct etna_inst_src * src)1340 trans_lit(const struct instr_translater *t, struct etna_compile *c,
1341           const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
1342 {
1343    /* SELECT.LT tmp._y__, 0, src.yyyy, 0
1344     *  - can be eliminated if src.y is a uniform and >= 0
1345     * SELECT.GT tmp.___w, 128, src.wwww, 128
1346     * SELECT.LT tmp.___w, -128, tmp.wwww, -128
1347     *  - can be eliminated if src.w is a uniform and fits clamp
1348     * LOG tmp.x, void, void, tmp.yyyy
1349     * MUL tmp.x, tmp.xxxx, tmp.wwww, void
1350     * LITP dst, undef, src.xxxx, tmp.xxxx
1351     */
1352    struct etna_native_reg inner_temp = etna_compile_get_inner_temp(c);
1353    struct etna_inst_src src_y = { };
1354 
1355    if (!etna_rgroup_is_uniform(src[0].rgroup)) {
1356       src_y = etna_native_to_src(inner_temp, SWIZZLE(Y, Y, Y, Y));
1357 
1358       struct etna_inst ins = { };
1359       ins.opcode = INST_OPCODE_SELECT;
1360       ins.cond = INST_CONDITION_LT;
1361       ins.dst = etna_native_to_dst(inner_temp, INST_COMPS_Y);
1362       ins.src[0] = ins.src[2] = alloc_imm_f32(c, 0.0);
1363       ins.src[1] = swizzle(src[0], SWIZZLE(Y, Y, Y, Y));
1364       emit_inst(c, &ins);
1365    } else if (uif(get_imm_u32(c, &src[0], 1)) < 0)
1366       src_y = alloc_imm_f32(c, 0.0);
1367    else
1368       src_y = swizzle(src[0], SWIZZLE(Y, Y, Y, Y));
1369 
1370    struct etna_inst_src src_w = { };
1371 
1372    if (!etna_rgroup_is_uniform(src[0].rgroup)) {
1373       src_w = etna_native_to_src(inner_temp, SWIZZLE(W, W, W, W));
1374 
1375       struct etna_inst ins = { };
1376       ins.opcode = INST_OPCODE_SELECT;
1377       ins.cond = INST_CONDITION_GT;
1378       ins.dst = etna_native_to_dst(inner_temp, INST_COMPS_W);
1379       ins.src[0] = ins.src[2] = alloc_imm_f32(c, 128.);
1380       ins.src[1] = swizzle(src[0], SWIZZLE(W, W, W, W));
1381       emit_inst(c, &ins);
1382       ins.cond = INST_CONDITION_LT;
1383       ins.src[0].neg = !ins.src[0].neg;
1384       ins.src[2].neg = !ins.src[2].neg;
1385       ins.src[1] = src_w;
1386       emit_inst(c, &ins);
1387    } else if (uif(get_imm_u32(c, &src[0], 3)) < -128.)
1388       src_w = alloc_imm_f32(c, -128.);
1389    else if (uif(get_imm_u32(c, &src[0], 3)) > 128.)
1390       src_w = alloc_imm_f32(c, 128.);
1391    else
1392       src_w = swizzle(src[0], SWIZZLE(W, W, W, W));
1393 
1394    if (c->specs->has_new_transcendentals) { /* Alternative LOG sequence */
1395       emit_inst(c, &(struct etna_inst) {
1396          .opcode = INST_OPCODE_LOG,
1397          .dst = etna_native_to_dst(inner_temp, INST_COMPS_X | INST_COMPS_Y),
1398          .src[2] = src_y,
1399          .tex = { .amode=1 }, /* Unknown bit needs to be set */
1400       });
1401       emit_inst(c, &(struct etna_inst) {
1402          .opcode = INST_OPCODE_MUL,
1403          .dst = etna_native_to_dst(inner_temp, INST_COMPS_X),
1404          .src[0] = etna_native_to_src(inner_temp, SWIZZLE(X, X, X, X)),
1405          .src[1] = etna_native_to_src(inner_temp, SWIZZLE(Y, Y, Y, Y)),
1406       });
1407    } else {
1408       struct etna_inst ins[3] = { };
1409       ins[0].opcode = INST_OPCODE_LOG;
1410       ins[0].dst = etna_native_to_dst(inner_temp, INST_COMPS_X);
1411       ins[0].src[2] = src_y;
1412 
1413       emit_inst(c, &ins[0]);
1414    }
1415    emit_inst(c, &(struct etna_inst) {
1416       .opcode = INST_OPCODE_MUL,
1417       .sat = 0,
1418       .dst = etna_native_to_dst(inner_temp, INST_COMPS_X),
1419       .src[0] = etna_native_to_src(inner_temp, SWIZZLE(X, X, X, X)),
1420       .src[1] = src_w,
1421    });
1422    emit_inst(c, &(struct etna_inst) {
1423       .opcode = INST_OPCODE_LITP,
1424       .sat = 0,
1425       .dst = convert_dst(c, &inst->Dst[0]),
1426       .src[0] = swizzle(src[0], SWIZZLE(X, X, X, X)),
1427       .src[1] = swizzle(src[0], SWIZZLE(X, X, X, X)),
1428       .src[2] = etna_native_to_src(inner_temp, SWIZZLE(X, X, X, X)),
1429    });
1430 }
1431 
1432 static void
trans_ssg(const struct instr_translater * t,struct etna_compile * c,const struct tgsi_full_instruction * inst,struct etna_inst_src * src)1433 trans_ssg(const struct instr_translater *t, struct etna_compile *c,
1434           const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
1435 {
1436    if (c->specs->has_sign_floor_ceil) {
1437       emit_inst(c, &(struct etna_inst){
1438          .opcode = INST_OPCODE_SIGN,
1439          .sat = inst->Instruction.Saturate,
1440          .dst = convert_dst(c, &inst->Dst[0]),
1441          .src[2] = src[0],
1442       });
1443    } else {
1444       struct etna_native_reg temp = etna_compile_get_inner_temp(c);
1445       struct etna_inst ins[2] = { };
1446 
1447       ins[0].opcode = INST_OPCODE_SET;
1448       ins[0].cond = INST_CONDITION_NZ;
1449       ins[0].dst = etna_native_to_dst(temp, INST_COMPS_X | INST_COMPS_Y |
1450                                             INST_COMPS_Z | INST_COMPS_W);
1451       ins[0].src[0] = src[0];
1452 
1453       ins[1].opcode = INST_OPCODE_SELECT;
1454       ins[1].cond = INST_CONDITION_LZ;
1455       ins[1].sat = inst->Instruction.Saturate;
1456       ins[1].dst = convert_dst(c, &inst->Dst[0]);
1457       ins[1].src[0] = src[0];
1458       ins[1].src[2] = etna_native_to_src(temp, INST_SWIZ_IDENTITY);
1459       ins[1].src[1] = negate(ins[1].src[2]);
1460 
1461       emit_inst(c, &ins[0]);
1462       emit_inst(c, &ins[1]);
1463    }
1464 }
1465 
1466 static void
trans_trig(const struct instr_translater * t,struct etna_compile * c,const struct tgsi_full_instruction * inst,struct etna_inst_src * src)1467 trans_trig(const struct instr_translater *t, struct etna_compile *c,
1468            const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
1469 {
1470    if (c->specs->has_new_transcendentals) { /* Alternative SIN/COS */
1471       /* On newer chips alternative SIN/COS instructions are implemented,
1472        * which:
1473        * - Need their input scaled by 1/pi instead of 2/pi
1474        * - Output an x and y component, which need to be multiplied to
1475        *   get the result
1476        */
1477       struct etna_native_reg temp = etna_compile_get_inner_temp(c); /* only using .xyz */
1478       emit_inst(c, &(struct etna_inst) {
1479          .opcode = INST_OPCODE_MUL,
1480          .sat = 0,
1481          .dst = etna_native_to_dst(temp, INST_COMPS_Z),
1482          .src[0] = src[0], /* any swizzling happens here */
1483          .src[1] = alloc_imm_f32(c, 1.0f / M_PI),
1484       });
1485       emit_inst(c, &(struct etna_inst) {
1486          .opcode = inst->Instruction.Opcode == TGSI_OPCODE_COS
1487                     ? INST_OPCODE_COS
1488                     : INST_OPCODE_SIN,
1489          .sat = 0,
1490          .dst = etna_native_to_dst(temp, INST_COMPS_X | INST_COMPS_Y),
1491          .src[2] = etna_native_to_src(temp, SWIZZLE(Z, Z, Z, Z)),
1492          .tex = { .amode=1 }, /* Unknown bit needs to be set */
1493       });
1494       emit_inst(c, &(struct etna_inst) {
1495          .opcode = INST_OPCODE_MUL,
1496          .sat = inst->Instruction.Saturate,
1497          .dst = convert_dst(c, &inst->Dst[0]),
1498          .src[0] = etna_native_to_src(temp, SWIZZLE(X, X, X, X)),
1499          .src[1] = etna_native_to_src(temp, SWIZZLE(Y, Y, Y, Y)),
1500       });
1501 
1502    } else if (c->specs->has_sin_cos_sqrt) {
1503       struct etna_native_reg temp = etna_compile_get_inner_temp(c);
1504       /* add divide by PI/2, using a temp register. GC2000
1505        * fails with src==dst for the trig instruction. */
1506       emit_inst(c, &(struct etna_inst) {
1507          .opcode = INST_OPCODE_MUL,
1508          .sat = 0,
1509          .dst = etna_native_to_dst(temp, INST_COMPS_X | INST_COMPS_Y |
1510                                          INST_COMPS_Z | INST_COMPS_W),
1511          .src[0] = src[0], /* any swizzling happens here */
1512          .src[1] = alloc_imm_f32(c, 2.0f / M_PI),
1513       });
1514       emit_inst(c, &(struct etna_inst) {
1515          .opcode = inst->Instruction.Opcode == TGSI_OPCODE_COS
1516                     ? INST_OPCODE_COS
1517                     : INST_OPCODE_SIN,
1518          .sat = inst->Instruction.Saturate,
1519          .dst = convert_dst(c, &inst->Dst[0]),
1520          .src[2] = etna_native_to_src(temp, INST_SWIZ_IDENTITY),
1521       });
1522    } else {
1523       /* Implement Nick's fast sine/cosine. Taken from:
1524        * http://forum.devmaster.net/t/fast-and-accurate-sine-cosine/9648
1525        * A=(1/2*PI 0 1/2*PI 0) B=(0.75 0 0.5 0) C=(-4 4 X X)
1526        *  MAD t.x_zw, src.xxxx, A, B
1527        *  FRC t.x_z_, void, void, t.xwzw
1528        *  MAD t.x_z_, t.xwzw, 2, -1
1529        *  MUL t._y__, t.wzww, |t.wzww|, void  (for sin/scs)
1530        *  DP3 t.x_z_, t.zyww, C, void         (for sin)
1531        *  DP3 t.__z_, t.zyww, C, void         (for scs)
1532        *  MUL t._y__, t.wxww, |t.wxww|, void  (for cos/scs)
1533        *  DP3 t.x_z_, t.xyww, C, void         (for cos)
1534        *  DP3 t.x___, t.xyww, C, void         (for scs)
1535        *  MAD t._y_w, t,xxzz, |t.xxzz|, -t.xxzz
1536        *  MAD dst, t.ywyw, .2225, t.xzxz
1537        */
1538       struct etna_inst *p, ins[9] = { };
1539       struct etna_native_reg t0 = etna_compile_get_inner_temp(c);
1540       struct etna_inst_src t0s = etna_native_to_src(t0, INST_SWIZ_IDENTITY);
1541       struct etna_inst_src sincos[3], in = src[0];
1542       sincos[0] = etna_imm_vec4f(c, sincos_const[0]);
1543       sincos[1] = etna_imm_vec4f(c, sincos_const[1]);
1544 
1545       /* A uniform source will cause the inner temp limit to
1546        * be exceeded.  Explicitly deal with that scenario.
1547        */
1548       if (etna_rgroup_is_uniform(src[0].rgroup)) {
1549          struct etna_inst ins = { };
1550          ins.opcode = INST_OPCODE_MOV;
1551          ins.dst = etna_native_to_dst(t0, INST_COMPS_X);
1552          ins.src[2] = in;
1553          emit_inst(c, &ins);
1554          in = t0s;
1555       }
1556 
1557       ins[0].opcode = INST_OPCODE_MAD;
1558       ins[0].dst = etna_native_to_dst(t0, INST_COMPS_X | INST_COMPS_Z | INST_COMPS_W);
1559       ins[0].src[0] = swizzle(in, SWIZZLE(X, X, X, X));
1560       ins[0].src[1] = swizzle(sincos[1], SWIZZLE(X, W, X, W)); /* 1/2*PI */
1561       ins[0].src[2] = swizzle(sincos[1], SWIZZLE(Y, W, Z, W)); /* 0.75, 0, 0.5, 0 */
1562 
1563       ins[1].opcode = INST_OPCODE_FRC;
1564       ins[1].dst = etna_native_to_dst(t0, INST_COMPS_X | INST_COMPS_Z);
1565       ins[1].src[2] = swizzle(t0s, SWIZZLE(X, W, Z, W));
1566 
1567       ins[2].opcode = INST_OPCODE_MAD;
1568       ins[2].dst = etna_native_to_dst(t0, INST_COMPS_X | INST_COMPS_Z);
1569       ins[2].src[0] = swizzle(t0s, SWIZZLE(X, W, Z, W));
1570       ins[2].src[1] = swizzle(sincos[0], SWIZZLE(X, X, X, X)); /* 2 */
1571       ins[2].src[2] = swizzle(sincos[0], SWIZZLE(Y, Y, Y, Y)); /* -1 */
1572 
1573       unsigned mul_swiz, dp3_swiz;
1574       if (inst->Instruction.Opcode == TGSI_OPCODE_SIN) {
1575          mul_swiz = SWIZZLE(W, Z, W, W);
1576          dp3_swiz = SWIZZLE(Z, Y, W, W);
1577       } else {
1578          mul_swiz = SWIZZLE(W, X, W, W);
1579          dp3_swiz = SWIZZLE(X, Y, W, W);
1580       }
1581 
1582       ins[3].opcode = INST_OPCODE_MUL;
1583       ins[3].dst = etna_native_to_dst(t0, INST_COMPS_Y);
1584       ins[3].src[0] = swizzle(t0s, mul_swiz);
1585       ins[3].src[1] = absolute(ins[3].src[0]);
1586 
1587       ins[4].opcode = INST_OPCODE_DP3;
1588       ins[4].dst = etna_native_to_dst(t0, INST_COMPS_X | INST_COMPS_Z);
1589       ins[4].src[0] = swizzle(t0s, dp3_swiz);
1590       ins[4].src[1] = swizzle(sincos[0], SWIZZLE(Z, W, W, W));
1591 
1592       p = &ins[5];
1593       p->opcode = INST_OPCODE_MAD;
1594       p->dst = etna_native_to_dst(t0, INST_COMPS_Y | INST_COMPS_W);
1595       p->src[0] = swizzle(t0s, SWIZZLE(X, X, Z, Z));
1596       p->src[1] = absolute(p->src[0]);
1597       p->src[2] = negate(p->src[0]);
1598 
1599       p++;
1600       p->opcode = INST_OPCODE_MAD;
1601       p->sat = inst->Instruction.Saturate;
1602       p->dst = convert_dst(c, &inst->Dst[0]),
1603       p->src[0] = swizzle(t0s, SWIZZLE(Y, W, Y, W));
1604       p->src[1] = alloc_imm_f32(c, 0.2225);
1605       p->src[2] = swizzle(t0s, SWIZZLE(X, Z, X, Z));
1606 
1607       for (int i = 0; &ins[i] <= p; i++)
1608          emit_inst(c, &ins[i]);
1609    }
1610 }
1611 
1612 static void
trans_lg2(const struct instr_translater * t,struct etna_compile * c,const struct tgsi_full_instruction * inst,struct etna_inst_src * src)1613 trans_lg2(const struct instr_translater *t, struct etna_compile *c,
1614             const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
1615 {
1616    if (c->specs->has_new_transcendentals) {
1617       /* On newer chips alternative LOG instruction is implemented,
1618        * which outputs an x and y component, which need to be multiplied to
1619        * get the result.
1620        */
1621       struct etna_native_reg temp = etna_compile_get_inner_temp(c); /* only using .xy */
1622       emit_inst(c, &(struct etna_inst) {
1623          .opcode = INST_OPCODE_LOG,
1624          .sat = 0,
1625          .dst = etna_native_to_dst(temp, INST_COMPS_X | INST_COMPS_Y),
1626          .src[2] = src[0],
1627          .tex = { .amode=1 }, /* Unknown bit needs to be set */
1628       });
1629       emit_inst(c, &(struct etna_inst) {
1630          .opcode = INST_OPCODE_MUL,
1631          .sat = inst->Instruction.Saturate,
1632          .dst = convert_dst(c, &inst->Dst[0]),
1633          .src[0] = etna_native_to_src(temp, SWIZZLE(X, X, X, X)),
1634          .src[1] = etna_native_to_src(temp, SWIZZLE(Y, Y, Y, Y)),
1635       });
1636    } else {
1637       emit_inst(c, &(struct etna_inst) {
1638          .opcode = INST_OPCODE_LOG,
1639          .sat = inst->Instruction.Saturate,
1640          .dst = convert_dst(c, &inst->Dst[0]),
1641          .src[2] = src[0],
1642       });
1643    }
1644 }
1645 
1646 static void
trans_sampler(const struct instr_translater * t,struct etna_compile * c,const struct tgsi_full_instruction * inst,struct etna_inst_src * src)1647 trans_sampler(const struct instr_translater *t, struct etna_compile *c,
1648               const struct tgsi_full_instruction *inst,
1649               struct etna_inst_src *src)
1650 {
1651    /* There is no native support for GL texture rectangle coordinates, so
1652     * we have to rescale from ([0, width], [0, height]) to ([0, 1], [0, 1]). */
1653    if (inst->Texture.Texture == TGSI_TEXTURE_RECT) {
1654       uint32_t unit = inst->Src[1].Register.Index;
1655       struct etna_inst ins[2] = { };
1656       struct etna_native_reg temp = etna_compile_get_inner_temp(c);
1657 
1658       ins[0].opcode = INST_OPCODE_MUL;
1659       ins[0].dst = etna_native_to_dst(temp, INST_COMPS_X);
1660       ins[0].src[0] = src[0];
1661       ins[0].src[1] = alloc_imm(c, ETNA_IMMEDIATE_TEXRECT_SCALE_X, unit);
1662 
1663       ins[1].opcode = INST_OPCODE_MUL;
1664       ins[1].dst = etna_native_to_dst(temp, INST_COMPS_Y);
1665       ins[1].src[0] = src[0];
1666       ins[1].src[1] = alloc_imm(c, ETNA_IMMEDIATE_TEXRECT_SCALE_Y, unit);
1667 
1668       emit_inst(c, &ins[0]);
1669       emit_inst(c, &ins[1]);
1670 
1671       src[0] = etna_native_to_src(temp, INST_SWIZ_IDENTITY); /* temp.xyzw */
1672    }
1673 
1674    switch (inst->Instruction.Opcode) {
1675    case TGSI_OPCODE_TEX:
1676       emit_inst(c, &(struct etna_inst) {
1677          .opcode = INST_OPCODE_TEXLD,
1678          .sat = 0,
1679          .dst = convert_dst(c, &inst->Dst[0]),
1680          .tex = convert_tex(c, &inst->Src[1], &inst->Texture),
1681          .src[0] = src[0],
1682       });
1683       break;
1684 
1685    case TGSI_OPCODE_TXB:
1686       emit_inst(c, &(struct etna_inst) {
1687          .opcode = INST_OPCODE_TEXLDB,
1688          .sat = 0,
1689          .dst = convert_dst(c, &inst->Dst[0]),
1690          .tex = convert_tex(c, &inst->Src[1], &inst->Texture),
1691          .src[0] = src[0],
1692       });
1693       break;
1694 
1695    case TGSI_OPCODE_TXL:
1696       emit_inst(c, &(struct etna_inst) {
1697          .opcode = INST_OPCODE_TEXLDL,
1698          .sat = 0,
1699          .dst = convert_dst(c, &inst->Dst[0]),
1700          .tex = convert_tex(c, &inst->Src[1], &inst->Texture),
1701          .src[0] = src[0],
1702       });
1703       break;
1704 
1705    case TGSI_OPCODE_TXP: { /* divide src.xyz by src.w */
1706       struct etna_native_reg temp = etna_compile_get_inner_temp(c);
1707 
1708       emit_inst(c, &(struct etna_inst) {
1709          .opcode = INST_OPCODE_RCP,
1710          .sat = 0,
1711          .dst = etna_native_to_dst(temp, INST_COMPS_W), /* tmp.w */
1712          .src[2] = swizzle(src[0], SWIZZLE(W, W, W, W)),
1713       });
1714       emit_inst(c, &(struct etna_inst) {
1715          .opcode = INST_OPCODE_MUL,
1716          .sat = 0,
1717          .dst = etna_native_to_dst(temp, INST_COMPS_X | INST_COMPS_Y |
1718                                          INST_COMPS_Z), /* tmp.xyz */
1719          .src[0] = etna_native_to_src(temp, SWIZZLE(W, W, W, W)),
1720          .src[1] = src[0], /* src.xyzw */
1721       });
1722       emit_inst(c, &(struct etna_inst) {
1723          .opcode = INST_OPCODE_TEXLD,
1724          .sat = 0,
1725          .dst = convert_dst(c, &inst->Dst[0]),
1726          .tex = convert_tex(c, &inst->Src[1], &inst->Texture),
1727          .src[0] = etna_native_to_src(temp, INST_SWIZ_IDENTITY), /* tmp.xyzw */
1728       });
1729    } break;
1730 
1731    default:
1732       BUG("Unhandled instruction %s",
1733           tgsi_get_opcode_name(inst->Instruction.Opcode));
1734       assert(0);
1735       break;
1736    }
1737 }
1738 
1739 static void
trans_dummy(const struct instr_translater * t,struct etna_compile * c,const struct tgsi_full_instruction * inst,struct etna_inst_src * src)1740 trans_dummy(const struct instr_translater *t, struct etna_compile *c,
1741             const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
1742 {
1743    /* nothing to do */
1744 }
1745 
1746 static const struct instr_translater translaters[TGSI_OPCODE_LAST] = {
1747 #define INSTR(n, f, ...) \
1748    [TGSI_OPCODE_##n] = {.fxn = (f), .tgsi_opc = TGSI_OPCODE_##n, ##__VA_ARGS__}
1749 
1750    INSTR(MOV, trans_instr, .opc = INST_OPCODE_MOV, .src = {2, -1, -1}),
1751    INSTR(RCP, trans_instr, .opc = INST_OPCODE_RCP, .src = {2, -1, -1}),
1752    INSTR(RSQ, trans_instr, .opc = INST_OPCODE_RSQ, .src = {2, -1, -1}),
1753    INSTR(MUL, trans_instr, .opc = INST_OPCODE_MUL, .src = {0, 1, -1}),
1754    INSTR(ADD, trans_instr, .opc = INST_OPCODE_ADD, .src = {0, 2, -1}),
1755    INSTR(DP2, trans_instr, .opc = INST_OPCODE_DP2, .src = {0, 1, -1}),
1756    INSTR(DP3, trans_instr, .opc = INST_OPCODE_DP3, .src = {0, 1, -1}),
1757    INSTR(DP4, trans_instr, .opc = INST_OPCODE_DP4, .src = {0, 1, -1}),
1758    INSTR(DST, trans_instr, .opc = INST_OPCODE_DST, .src = {0, 1, -1}),
1759    INSTR(MAD, trans_instr, .opc = INST_OPCODE_MAD, .src = {0, 1, 2}),
1760    INSTR(EX2, trans_instr, .opc = INST_OPCODE_EXP, .src = {2, -1, -1}),
1761    INSTR(LG2, trans_lg2),
1762    INSTR(SQRT, trans_instr, .opc = INST_OPCODE_SQRT, .src = {2, -1, -1}),
1763    INSTR(FRC, trans_instr, .opc = INST_OPCODE_FRC, .src = {2, -1, -1}),
1764    INSTR(CEIL, trans_instr, .opc = INST_OPCODE_CEIL, .src = {2, -1, -1}),
1765    INSTR(FLR, trans_instr, .opc = INST_OPCODE_FLOOR, .src = {2, -1, -1}),
1766    INSTR(CMP, trans_instr, .opc = INST_OPCODE_SELECT, .src = {0, 1, 2}, .cond = INST_CONDITION_LZ),
1767 
1768    INSTR(KILL, trans_instr, .opc = INST_OPCODE_TEXKILL),
1769    INSTR(KILL_IF, trans_instr, .opc = INST_OPCODE_TEXKILL, .src = {0, -1, -1}, .cond = INST_CONDITION_LZ),
1770 
1771    INSTR(DDX, trans_deriv, .opc = INST_OPCODE_DSX),
1772    INSTR(DDY, trans_deriv, .opc = INST_OPCODE_DSY),
1773 
1774    INSTR(IF, trans_if),
1775    INSTR(ELSE, trans_else),
1776    INSTR(ENDIF, trans_endif),
1777 
1778    INSTR(BGNLOOP, trans_loop_bgn),
1779    INSTR(ENDLOOP, trans_loop_end),
1780    INSTR(BRK, trans_brk),
1781    INSTR(CONT, trans_cont),
1782 
1783    INSTR(MIN, trans_min_max, .opc = INST_OPCODE_SELECT, .cond = INST_CONDITION_GT),
1784    INSTR(MAX, trans_min_max, .opc = INST_OPCODE_SELECT, .cond = INST_CONDITION_LT),
1785 
1786    INSTR(ARL, trans_arl),
1787    INSTR(LRP, trans_lrp),
1788    INSTR(LIT, trans_lit),
1789    INSTR(SSG, trans_ssg),
1790 
1791    INSTR(SIN, trans_trig),
1792    INSTR(COS, trans_trig),
1793 
1794    INSTR(SLT, trans_instr, .opc = INST_OPCODE_SET, .src = {0, 1, -1}, .cond = INST_CONDITION_LT),
1795    INSTR(SGE, trans_instr, .opc = INST_OPCODE_SET, .src = {0, 1, -1}, .cond = INST_CONDITION_GE),
1796    INSTR(SEQ, trans_instr, .opc = INST_OPCODE_SET, .src = {0, 1, -1}, .cond = INST_CONDITION_EQ),
1797    INSTR(SGT, trans_instr, .opc = INST_OPCODE_SET, .src = {0, 1, -1}, .cond = INST_CONDITION_GT),
1798    INSTR(SLE, trans_instr, .opc = INST_OPCODE_SET, .src = {0, 1, -1}, .cond = INST_CONDITION_LE),
1799    INSTR(SNE, trans_instr, .opc = INST_OPCODE_SET, .src = {0, 1, -1}, .cond = INST_CONDITION_NE),
1800 
1801    INSTR(TEX, trans_sampler),
1802    INSTR(TXB, trans_sampler),
1803    INSTR(TXL, trans_sampler),
1804    INSTR(TXP, trans_sampler),
1805 
1806    INSTR(NOP, trans_dummy),
1807    INSTR(END, trans_dummy),
1808 };
1809 
1810 /* Pass -- compile instructions */
1811 static void
etna_compile_pass_generate_code(struct etna_compile * c)1812 etna_compile_pass_generate_code(struct etna_compile *c)
1813 {
1814    struct tgsi_parse_context ctx = { };
1815    unsigned status = tgsi_parse_init(&ctx, c->tokens);
1816    assert(status == TGSI_PARSE_OK);
1817 
1818    int inst_idx = 0;
1819    while (!tgsi_parse_end_of_tokens(&ctx)) {
1820       const struct tgsi_full_instruction *inst = 0;
1821 
1822       /* No inner temps used yet for this instruction, clear counter */
1823       c->inner_temps = 0;
1824 
1825       tgsi_parse_token(&ctx);
1826 
1827       switch (ctx.FullToken.Token.Type) {
1828       case TGSI_TOKEN_TYPE_INSTRUCTION:
1829          /* iterate over operands */
1830          inst = &ctx.FullToken.FullInstruction;
1831          if (c->dead_inst[inst_idx]) { /* skip dead instructions */
1832             inst_idx++;
1833             continue;
1834          }
1835 
1836          /* Lookup the TGSI information and generate the source arguments */
1837          struct etna_inst_src src[ETNA_NUM_SRC];
1838          memset(src, 0, sizeof(src));
1839 
1840          const struct tgsi_opcode_info *tgsi = tgsi_get_opcode_info(inst->Instruction.Opcode);
1841 
1842          for (int i = 0; i < tgsi->num_src && i < ETNA_NUM_SRC; i++) {
1843             const struct tgsi_full_src_register *reg = &inst->Src[i];
1844             const struct etna_native_reg *n = &etna_get_src_reg(c, reg->Register)->native;
1845 
1846             if (!n->valid || n->is_tex)
1847                continue;
1848 
1849             src[i] = etna_create_src(reg, n);
1850          }
1851 
1852          const unsigned opc = inst->Instruction.Opcode;
1853          const struct instr_translater *t = &translaters[opc];
1854 
1855          if (t->fxn) {
1856             t->fxn(t, c, inst, src);
1857 
1858             inst_idx += 1;
1859          } else {
1860             BUG("Unhandled instruction %s", tgsi_get_opcode_name(opc));
1861             assert(0);
1862          }
1863          break;
1864       }
1865    }
1866    tgsi_parse_free(&ctx);
1867 }
1868 
1869 /* Look up register by semantic */
1870 static struct etna_reg_desc *
find_decl_by_semantic(struct etna_compile * c,uint file,uint name,uint index)1871 find_decl_by_semantic(struct etna_compile *c, uint file, uint name, uint index)
1872 {
1873    for (int idx = 0; idx < c->file[file].reg_size; ++idx) {
1874       struct etna_reg_desc *reg = &c->file[file].reg[idx];
1875 
1876       if (reg->semantic.Name == name && reg->semantic.Index == index)
1877          return reg;
1878    }
1879 
1880    return NULL; /* not found */
1881 }
1882 
1883 /** Add ADD and MUL instruction to bring Z/W to 0..1 if -1..1 if needed:
1884  * - this is a vertex shader
1885  * - and this is an older GPU
1886  */
1887 static void
etna_compile_add_z_div_if_needed(struct etna_compile * c)1888 etna_compile_add_z_div_if_needed(struct etna_compile *c)
1889 {
1890    if (c->info.processor == PIPE_SHADER_VERTEX && c->specs->vs_need_z_div) {
1891       /* find position out */
1892       struct etna_reg_desc *pos_reg =
1893          find_decl_by_semantic(c, TGSI_FILE_OUTPUT, TGSI_SEMANTIC_POSITION, 0);
1894 
1895       if (pos_reg != NULL) {
1896          /*
1897           * ADD tX.__z_, tX.zzzz, void, tX.wwww
1898           * MUL tX.__z_, tX.zzzz, 0.5, void
1899          */
1900          emit_inst(c, &(struct etna_inst) {
1901             .opcode = INST_OPCODE_ADD,
1902             .dst = etna_native_to_dst(pos_reg->native, INST_COMPS_Z),
1903             .src[0] = etna_native_to_src(pos_reg->native, SWIZZLE(Z, Z, Z, Z)),
1904             .src[2] = etna_native_to_src(pos_reg->native, SWIZZLE(W, W, W, W)),
1905          });
1906          emit_inst(c, &(struct etna_inst) {
1907             .opcode = INST_OPCODE_MUL,
1908             .dst = etna_native_to_dst(pos_reg->native, INST_COMPS_Z),
1909             .src[0] = etna_native_to_src(pos_reg->native, SWIZZLE(Z, Z, Z, Z)),
1910             .src[1] = alloc_imm_f32(c, 0.5f),
1911          });
1912       }
1913    }
1914 }
1915 
1916 static void
etna_compile_frag_rb_swap(struct etna_compile * c)1917 etna_compile_frag_rb_swap(struct etna_compile *c)
1918 {
1919    if (c->info.processor == PIPE_SHADER_FRAGMENT && c->key->frag_rb_swap) {
1920       /* find color out */
1921       struct etna_reg_desc *color_reg =
1922          find_decl_by_semantic(c, TGSI_FILE_OUTPUT, TGSI_SEMANTIC_COLOR, 0);
1923 
1924       emit_inst(c, &(struct etna_inst) {
1925          .opcode = INST_OPCODE_MOV,
1926          .dst = etna_native_to_dst(color_reg->native, INST_COMPS_X | INST_COMPS_Y | INST_COMPS_Z | INST_COMPS_W),
1927          .src[2] = etna_native_to_src(color_reg->native, SWIZZLE(Z, Y, X, W)),
1928       });
1929    }
1930 }
1931 
1932 /** add a NOP to the shader if
1933  * a) the shader is empty
1934  * or
1935  * b) there is a label at the end of the shader
1936  */
1937 static void
etna_compile_add_nop_if_needed(struct etna_compile * c)1938 etna_compile_add_nop_if_needed(struct etna_compile *c)
1939 {
1940    bool label_at_last_inst = false;
1941 
1942    for (int idx = 0; idx < c->labels_count; ++idx) {
1943       if (c->labels[idx].inst_idx == c->inst_ptr)
1944          label_at_last_inst = true;
1945 
1946    }
1947 
1948    if (c->inst_ptr == 0 || label_at_last_inst)
1949       emit_inst(c, &(struct etna_inst){.opcode = INST_OPCODE_NOP});
1950 }
1951 
1952 static void
assign_uniforms(struct etna_compile_file * file,unsigned base)1953 assign_uniforms(struct etna_compile_file *file, unsigned base)
1954 {
1955    for (int idx = 0; idx < file->reg_size; ++idx) {
1956       file->reg[idx].native.valid = 1;
1957       file->reg[idx].native.rgroup = INST_RGROUP_UNIFORM_0;
1958       file->reg[idx].native.id = base + idx;
1959    }
1960 }
1961 
1962 /* Allocate CONST and IMM to native ETNA_RGROUP_UNIFORM(x).
1963  * CONST must be consecutive as const buffers are supposed to be consecutive,
1964  * and before IMM, as this is
1965  * more convenient because is possible for the compilation process itself to
1966  * generate extra
1967  * immediates for constants such as pi, one, zero.
1968  */
1969 static void
assign_constants_and_immediates(struct etna_compile * c)1970 assign_constants_and_immediates(struct etna_compile *c)
1971 {
1972    assign_uniforms(&c->file[TGSI_FILE_CONSTANT], 0);
1973    /* immediates start after the constants */
1974    c->imm_base = c->file[TGSI_FILE_CONSTANT].reg_size * 4;
1975    assign_uniforms(&c->file[TGSI_FILE_IMMEDIATE], c->imm_base / 4);
1976    DBG_F(ETNA_DBG_COMPILER_MSGS, "imm base: %i size: %i", c->imm_base,
1977          c->imm_size);
1978 }
1979 
1980 /* Assign declared samplers to native texture units */
1981 static void
assign_texture_units(struct etna_compile * c)1982 assign_texture_units(struct etna_compile *c)
1983 {
1984    uint tex_base = 0;
1985 
1986    if (c->info.processor == PIPE_SHADER_VERTEX)
1987       tex_base = c->specs->vertex_sampler_offset;
1988 
1989    for (int idx = 0; idx < c->file[TGSI_FILE_SAMPLER].reg_size; ++idx) {
1990       c->file[TGSI_FILE_SAMPLER].reg[idx].native.valid = 1;
1991       c->file[TGSI_FILE_SAMPLER].reg[idx].native.is_tex = 1; // overrides rgroup
1992       c->file[TGSI_FILE_SAMPLER].reg[idx].native.id = tex_base + idx;
1993    }
1994 }
1995 
1996 /* Additional pass to fill in branch targets. This pass should be last
1997  * as no instruction reordering or removing/addition can be done anymore
1998  * once the branch targets are computed.
1999  */
2000 static void
etna_compile_fill_in_labels(struct etna_compile * c)2001 etna_compile_fill_in_labels(struct etna_compile *c)
2002 {
2003    for (int idx = 0; idx < c->inst_ptr; ++idx) {
2004       if (c->lbl_usage[idx] != -1)
2005          etna_assemble_set_imm(&c->code[idx * 4],
2006                                c->labels[c->lbl_usage[idx]].inst_idx);
2007    }
2008 }
2009 
2010 /* compare two etna_native_reg structures, return true if equal */
2011 static bool
cmp_etna_native_reg(const struct etna_native_reg to,const struct etna_native_reg from)2012 cmp_etna_native_reg(const struct etna_native_reg to,
2013                     const struct etna_native_reg from)
2014 {
2015    return to.valid == from.valid && to.is_tex == from.is_tex &&
2016           to.rgroup == from.rgroup && to.id == from.id;
2017 }
2018 
2019 /* go through all declarations and swap native registers *to* and *from* */
2020 static void
swap_native_registers(struct etna_compile * c,const struct etna_native_reg to,const struct etna_native_reg from)2021 swap_native_registers(struct etna_compile *c, const struct etna_native_reg to,
2022                       const struct etna_native_reg from)
2023 {
2024    if (cmp_etna_native_reg(from, to))
2025       return; /* Nothing to do */
2026 
2027    for (int idx = 0; idx < c->total_decls; ++idx) {
2028       if (cmp_etna_native_reg(c->decl[idx].native, from)) {
2029          c->decl[idx].native = to;
2030       } else if (cmp_etna_native_reg(c->decl[idx].native, to)) {
2031          c->decl[idx].native = from;
2032       }
2033    }
2034 }
2035 
2036 /* For PS we need to permute so that inputs are always in temporary 0..N-1.
2037  * Semantic POS is always t0. If that semantic is not used, avoid t0.
2038  */
2039 static void
permute_ps_inputs(struct etna_compile * c)2040 permute_ps_inputs(struct etna_compile *c)
2041 {
2042    /* Special inputs:
2043     * gl_FragCoord  VARYING_SLOT_POS   TGSI_SEMANTIC_POSITION
2044     * gl_PointCoord VARYING_SLOT_PNTC  TGSI_SEMANTIC_PCOORD
2045     */
2046    uint native_idx = 1;
2047 
2048    for (int idx = 0; idx < c->file[TGSI_FILE_INPUT].reg_size; ++idx) {
2049       struct etna_reg_desc *reg = &c->file[TGSI_FILE_INPUT].reg[idx];
2050       uint input_id;
2051       assert(reg->has_semantic);
2052 
2053       if (!reg->active || reg->semantic.Name == TGSI_SEMANTIC_POSITION)
2054          continue;
2055 
2056       input_id = native_idx++;
2057       swap_native_registers(c, etna_native_temp(input_id),
2058                             c->file[TGSI_FILE_INPUT].reg[idx].native);
2059    }
2060 
2061    c->num_varyings = native_idx - 1;
2062 
2063    if (native_idx > c->next_free_native)
2064       c->next_free_native = native_idx;
2065 }
2066 
2067 /* fill in ps inputs into shader object */
2068 static void
fill_in_ps_inputs(struct etna_shader_variant * sobj,struct etna_compile * c)2069 fill_in_ps_inputs(struct etna_shader_variant *sobj, struct etna_compile *c)
2070 {
2071    struct etna_shader_io_file *sf = &sobj->infile;
2072 
2073    sf->num_reg = 0;
2074 
2075    for (int idx = 0; idx < c->file[TGSI_FILE_INPUT].reg_size; ++idx) {
2076       struct etna_reg_desc *reg = &c->file[TGSI_FILE_INPUT].reg[idx];
2077 
2078       if (reg->native.id > 0) {
2079          assert(sf->num_reg < ETNA_NUM_INPUTS);
2080          sf->reg[sf->num_reg].reg = reg->native.id;
2081          sf->reg[sf->num_reg].semantic = reg->semantic;
2082          /* convert usage mask to number of components (*=wildcard)
2083           *   .r    (0..1)  -> 1 component
2084           *   .*g   (2..3)  -> 2 component
2085           *   .**b  (4..7)  -> 3 components
2086           *   .***a (8..15) -> 4 components
2087           */
2088          sf->reg[sf->num_reg].num_components = util_last_bit(reg->usage_mask);
2089          sf->num_reg++;
2090       }
2091    }
2092 
2093    assert(sf->num_reg == c->num_varyings);
2094    sobj->input_count_unk8 = 31; /* XXX what is this */
2095 }
2096 
2097 /* fill in output mapping for ps into shader object */
2098 static void
fill_in_ps_outputs(struct etna_shader_variant * sobj,struct etna_compile * c)2099 fill_in_ps_outputs(struct etna_shader_variant *sobj, struct etna_compile *c)
2100 {
2101    sobj->outfile.num_reg = 0;
2102 
2103    for (int idx = 0; idx < c->file[TGSI_FILE_OUTPUT].reg_size; ++idx) {
2104       struct etna_reg_desc *reg = &c->file[TGSI_FILE_OUTPUT].reg[idx];
2105 
2106       switch (reg->semantic.Name) {
2107       case TGSI_SEMANTIC_COLOR: /* FRAG_RESULT_COLOR */
2108          sobj->ps_color_out_reg = reg->native.id;
2109          break;
2110       case TGSI_SEMANTIC_POSITION: /* FRAG_RESULT_DEPTH */
2111          sobj->ps_depth_out_reg = reg->native.id; /* =always native reg 0, only z component should be assigned */
2112          break;
2113       default:
2114          assert(0); /* only outputs supported are COLOR and POSITION at the moment */
2115       }
2116    }
2117 }
2118 
2119 /* fill in inputs for vs into shader object */
2120 static void
fill_in_vs_inputs(struct etna_shader_variant * sobj,struct etna_compile * c)2121 fill_in_vs_inputs(struct etna_shader_variant *sobj, struct etna_compile *c)
2122 {
2123    struct etna_shader_io_file *sf = &sobj->infile;
2124 
2125    sf->num_reg = 0;
2126    for (int idx = 0; idx < c->file[TGSI_FILE_INPUT].reg_size; ++idx) {
2127       struct etna_reg_desc *reg = &c->file[TGSI_FILE_INPUT].reg[idx];
2128       assert(sf->num_reg < ETNA_NUM_INPUTS);
2129 
2130       if (!reg->native.valid)
2131          continue;
2132 
2133       /* XXX exclude inputs with special semantics such as gl_frontFacing */
2134       sf->reg[sf->num_reg].reg = reg->native.id;
2135       sf->reg[sf->num_reg].semantic = reg->semantic;
2136       sf->reg[sf->num_reg].num_components = util_last_bit(reg->usage_mask);
2137       sf->num_reg++;
2138    }
2139 
2140    sobj->input_count_unk8 = (sf->num_reg + 19) / 16; /* XXX what is this */
2141 }
2142 
2143 /* build two-level output index [Semantic][Index] for fast linking */
2144 static void
build_output_index(struct etna_shader_variant * sobj)2145 build_output_index(struct etna_shader_variant *sobj)
2146 {
2147    int total = 0;
2148    int offset = 0;
2149 
2150    for (int name = 0; name < TGSI_SEMANTIC_COUNT; ++name)
2151       total += sobj->output_count_per_semantic[name];
2152 
2153    sobj->output_per_semantic_list = CALLOC(total, sizeof(struct etna_shader_inout *));
2154 
2155    for (int name = 0; name < TGSI_SEMANTIC_COUNT; ++name) {
2156       sobj->output_per_semantic[name] = &sobj->output_per_semantic_list[offset];
2157       offset += sobj->output_count_per_semantic[name];
2158    }
2159 
2160    for (int idx = 0; idx < sobj->outfile.num_reg; ++idx) {
2161       sobj->output_per_semantic[sobj->outfile.reg[idx].semantic.Name]
2162                                [sobj->outfile.reg[idx].semantic.Index] =
2163          &sobj->outfile.reg[idx];
2164    }
2165 }
2166 
2167 /* fill in outputs for vs into shader object */
2168 static void
fill_in_vs_outputs(struct etna_shader_variant * sobj,struct etna_compile * c)2169 fill_in_vs_outputs(struct etna_shader_variant *sobj, struct etna_compile *c)
2170 {
2171    struct etna_shader_io_file *sf = &sobj->outfile;
2172 
2173    sf->num_reg = 0;
2174    for (int idx = 0; idx < c->file[TGSI_FILE_OUTPUT].reg_size; ++idx) {
2175       struct etna_reg_desc *reg = &c->file[TGSI_FILE_OUTPUT].reg[idx];
2176       assert(sf->num_reg < ETNA_NUM_INPUTS);
2177 
2178       switch (reg->semantic.Name) {
2179       case TGSI_SEMANTIC_POSITION:
2180          sobj->vs_pos_out_reg = reg->native.id;
2181          break;
2182       case TGSI_SEMANTIC_PSIZE:
2183          sobj->vs_pointsize_out_reg = reg->native.id;
2184          break;
2185       default:
2186          sf->reg[sf->num_reg].reg = reg->native.id;
2187          sf->reg[sf->num_reg].semantic = reg->semantic;
2188          sf->reg[sf->num_reg].num_components = 4; // XXX reg->num_components;
2189          sf->num_reg++;
2190          sobj->output_count_per_semantic[reg->semantic.Name] =
2191             MAX2(reg->semantic.Index + 1,
2192                  sobj->output_count_per_semantic[reg->semantic.Name]);
2193       }
2194    }
2195 
2196    /* build two-level index for linking */
2197    build_output_index(sobj);
2198 
2199    /* fill in "mystery meat" load balancing value. This value determines how
2200     * work is scheduled between VS and PS
2201     * in the unified shader architecture. More precisely, it is determined from
2202     * the number of VS outputs, as well as chip-specific
2203     * vertex output buffer size, vertex cache size, and the number of shader
2204     * cores.
2205     *
2206     * XXX this is a conservative estimate, the "optimal" value is only known for
2207     * sure at link time because some
2208     * outputs may be unused and thus unmapped. Then again, in the general use
2209     * case with GLSL the vertex and fragment
2210     * shaders are linked already before submitting to Gallium, thus all outputs
2211     * are used.
2212     */
2213    int half_out = (c->file[TGSI_FILE_OUTPUT].reg_size + 1) / 2;
2214    assert(half_out);
2215 
2216    uint32_t b = ((20480 / (c->specs->vertex_output_buffer_size -
2217                            2 * half_out * c->specs->vertex_cache_size)) +
2218                  9) /
2219                 10;
2220    uint32_t a = (b + 256 / (c->specs->shader_core_count * half_out)) / 2;
2221    sobj->vs_load_balancing = VIVS_VS_LOAD_BALANCING_A(MIN2(a, 255)) |
2222                              VIVS_VS_LOAD_BALANCING_B(MIN2(b, 255)) |
2223                              VIVS_VS_LOAD_BALANCING_C(0x3f) |
2224                              VIVS_VS_LOAD_BALANCING_D(0x0f);
2225 }
2226 
2227 static bool
etna_compile_check_limits(struct etna_compile * c)2228 etna_compile_check_limits(struct etna_compile *c)
2229 {
2230    int max_uniforms = (c->info.processor == PIPE_SHADER_VERTEX)
2231                          ? c->specs->max_vs_uniforms
2232                          : c->specs->max_ps_uniforms;
2233    /* round up number of uniforms, including immediates, in units of four */
2234    int num_uniforms = c->imm_base / 4 + (c->imm_size + 3) / 4;
2235 
2236    if (!c->specs->has_icache && c->inst_ptr > c->specs->max_instructions) {
2237       DBG("Number of instructions (%d) exceeds maximum %d", c->inst_ptr,
2238           c->specs->max_instructions);
2239       return false;
2240    }
2241 
2242    if (c->next_free_native > c->specs->max_registers) {
2243       DBG("Number of registers (%d) exceeds maximum %d", c->next_free_native,
2244           c->specs->max_registers);
2245       return false;
2246    }
2247 
2248    if (num_uniforms > max_uniforms) {
2249       DBG("Number of uniforms (%d) exceeds maximum %d", num_uniforms,
2250           max_uniforms);
2251       return false;
2252    }
2253 
2254    if (c->num_varyings > c->specs->max_varyings) {
2255       DBG("Number of varyings (%d) exceeds maximum %d", c->num_varyings,
2256           c->specs->max_varyings);
2257       return false;
2258    }
2259 
2260    if (c->imm_base > c->specs->num_constants) {
2261       DBG("Number of constants (%d) exceeds maximum %d", c->imm_base,
2262           c->specs->num_constants);
2263    }
2264 
2265    return true;
2266 }
2267 
2268 static void
copy_uniform_state_to_shader(struct etna_compile * c,struct etna_shader_variant * sobj)2269 copy_uniform_state_to_shader(struct etna_compile *c, struct etna_shader_variant *sobj)
2270 {
2271    uint32_t count = c->imm_size;
2272    struct etna_shader_uniform_info *uinfo = &sobj->uniforms;
2273 
2274    uinfo->const_count = c->imm_base;
2275    uinfo->imm_count = count;
2276    uinfo->imm_data = mem_dup(c->imm_data, count * sizeof(*c->imm_data));
2277    uinfo->imm_contents = mem_dup(c->imm_contents, count * sizeof(*c->imm_contents));
2278 
2279    etna_set_shader_uniforms_dirty_flags(sobj);
2280 }
2281 
2282 bool
etna_compile_shader(struct etna_shader_variant * v)2283 etna_compile_shader(struct etna_shader_variant *v)
2284 {
2285    /* Create scratch space that may be too large to fit on stack
2286     */
2287    bool ret;
2288    struct etna_compile *c;
2289 
2290    if (unlikely(!v))
2291       return false;
2292 
2293    const struct etna_specs *specs = v->shader->specs;
2294 
2295    struct tgsi_lowering_config lconfig = {
2296       .lower_FLR = !specs->has_sign_floor_ceil,
2297       .lower_CEIL = !specs->has_sign_floor_ceil,
2298       .lower_POW = true,
2299       .lower_EXP = true,
2300       .lower_LOG = true,
2301       .lower_DP2 = !specs->has_halti2_instructions,
2302       .lower_TRUNC = true,
2303    };
2304 
2305    c = CALLOC_STRUCT(etna_compile);
2306    if (!c)
2307       return false;
2308 
2309    memset(&c->lbl_usage, -1, sizeof(c->lbl_usage));
2310 
2311    const struct tgsi_token *tokens = v->shader->tokens;
2312 
2313    c->specs = specs;
2314    c->key = &v->key;
2315    c->tokens = tgsi_transform_lowering(&lconfig, tokens, &c->info);
2316    c->free_tokens = !!c->tokens;
2317    if (!c->tokens) {
2318       /* no lowering */
2319       c->tokens = tokens;
2320    }
2321 
2322    /* Build a map from gallium register to native registers for files
2323     * CONST, SAMP, IMM, OUT, IN, TEMP.
2324     * SAMP will map as-is for fragment shaders, there will be a +8 offset for
2325     * vertex shaders.
2326     */
2327    /* Pass one -- check register file declarations and immediates */
2328    etna_compile_parse_declarations(c);
2329 
2330    etna_allocate_decls(c);
2331 
2332    /* Pass two -- check usage of temporaries, inputs, outputs */
2333    etna_compile_pass_check_usage(c);
2334 
2335    assign_special_inputs(c);
2336 
2337    /* Assign native temp register to TEMPs */
2338    assign_temporaries_to_native(c, &c->file[TGSI_FILE_TEMPORARY]);
2339 
2340    /* optimize outputs */
2341    etna_compile_pass_optimize_outputs(c);
2342 
2343    /* XXX assign special inputs: gl_FrontFacing (VARYING_SLOT_FACE)
2344     *     this is part of RGROUP_INTERNAL
2345     */
2346 
2347    /* assign inputs: last usage of input should be <= first usage of temp */
2348    /*   potential optimization case:
2349     *     if single MOV TEMP[y], IN[x] before which temp y is not used, and
2350     * after which IN[x]
2351     *     is not read, temp[y] can be used as input register as-is
2352     */
2353    /*   sort temporaries by first use
2354     *   sort inputs by last usage
2355     *   iterate over inputs, temporaries
2356     *     if last usage of input <= first usage of temp:
2357     *       assign input to temp
2358     *       advance input, temporary pointer
2359     *     else
2360     *       advance temporary pointer
2361     *
2362     *   potential problem: instruction with multiple inputs of which one is the
2363     * temp and the other is the input;
2364     *      however, as the temp is not used before this, how would this make
2365     * sense? uninitialized temporaries have an undefined
2366     *      value, so this would be ok
2367     */
2368    assign_inouts_to_temporaries(c, TGSI_FILE_INPUT);
2369 
2370    /* assign outputs: first usage of output should be >= last usage of temp */
2371    /*   potential optimization case:
2372     *      if single MOV OUT[x], TEMP[y] (with full write mask, or at least
2373     * writing all components that are used in
2374     *        the shader) after which temp y is no longer used temp[y] can be
2375     * used as output register as-is
2376     *
2377     *   potential problem: instruction with multiple outputs of which one is the
2378     * temp and the other is the output;
2379     *      however, as the temp is not used after this, how would this make
2380     * sense? could just discard the output value
2381     */
2382    /*   sort temporaries by last use
2383     *   sort outputs by first usage
2384     *   iterate over outputs, temporaries
2385     *     if first usage of output >= last usage of temp:
2386     *       assign output to temp
2387     *       advance output, temporary pointer
2388     *     else
2389     *       advance temporary pointer
2390     */
2391    assign_inouts_to_temporaries(c, TGSI_FILE_OUTPUT);
2392 
2393    assign_constants_and_immediates(c);
2394    assign_texture_units(c);
2395 
2396    /* list declarations */
2397    for (int x = 0; x < c->total_decls; ++x) {
2398       DBG_F(ETNA_DBG_COMPILER_MSGS, "%i: %s,%d active=%i first_use=%i "
2399                                     "last_use=%i native=%i usage_mask=%x "
2400                                     "has_semantic=%i",
2401             x, tgsi_file_name(c->decl[x].file), c->decl[x].idx,
2402             c->decl[x].active, c->decl[x].first_use, c->decl[x].last_use,
2403             c->decl[x].native.valid ? c->decl[x].native.id : -1,
2404             c->decl[x].usage_mask, c->decl[x].has_semantic);
2405       if (c->decl[x].has_semantic)
2406          DBG_F(ETNA_DBG_COMPILER_MSGS, " semantic_name=%s semantic_idx=%i",
2407                tgsi_semantic_names[c->decl[x].semantic.Name],
2408                c->decl[x].semantic.Index);
2409    }
2410    /* XXX for PS we need to permute so that inputs are always in temporary
2411     * 0..N-1.
2412     * There is no "switchboard" for varyings (AFAIK!). The output color,
2413     * however, can be routed
2414     * from an arbitrary temporary.
2415     */
2416    if (c->info.processor == PIPE_SHADER_FRAGMENT)
2417       permute_ps_inputs(c);
2418 
2419 
2420    /* list declarations */
2421    for (int x = 0; x < c->total_decls; ++x) {
2422       DBG_F(ETNA_DBG_COMPILER_MSGS, "%i: %s,%d active=%i first_use=%i "
2423                                     "last_use=%i native=%i usage_mask=%x "
2424                                     "has_semantic=%i",
2425             x, tgsi_file_name(c->decl[x].file), c->decl[x].idx,
2426             c->decl[x].active, c->decl[x].first_use, c->decl[x].last_use,
2427             c->decl[x].native.valid ? c->decl[x].native.id : -1,
2428             c->decl[x].usage_mask, c->decl[x].has_semantic);
2429       if (c->decl[x].has_semantic)
2430          DBG_F(ETNA_DBG_COMPILER_MSGS, " semantic_name=%s semantic_idx=%i",
2431                tgsi_semantic_names[c->decl[x].semantic.Name],
2432                c->decl[x].semantic.Index);
2433    }
2434 
2435    /* pass 3: generate instructions */
2436    etna_compile_pass_generate_code(c);
2437    etna_compile_add_z_div_if_needed(c);
2438    etna_compile_frag_rb_swap(c);
2439    etna_compile_add_nop_if_needed(c);
2440 
2441    ret = etna_compile_check_limits(c);
2442    if (!ret)
2443       goto out;
2444 
2445    etna_compile_fill_in_labels(c);
2446 
2447    /* fill in output structure */
2448    v->processor = c->info.processor;
2449    v->code_size = c->inst_ptr * 4;
2450    v->code = mem_dup(c->code, c->inst_ptr * 16);
2451    v->num_loops = c->num_loops;
2452    v->num_temps = c->next_free_native;
2453    v->vs_pos_out_reg = -1;
2454    v->vs_pointsize_out_reg = -1;
2455    v->ps_color_out_reg = -1;
2456    v->ps_depth_out_reg = -1;
2457    v->needs_icache = c->inst_ptr > c->specs->max_instructions;
2458    copy_uniform_state_to_shader(c, v);
2459 
2460    if (c->info.processor == PIPE_SHADER_VERTEX) {
2461       fill_in_vs_inputs(v, c);
2462       fill_in_vs_outputs(v, c);
2463    } else if (c->info.processor == PIPE_SHADER_FRAGMENT) {
2464       fill_in_ps_inputs(v, c);
2465       fill_in_ps_outputs(v, c);
2466    }
2467 
2468 out:
2469    if (c->free_tokens)
2470       FREE((void *)c->tokens);
2471 
2472    FREE(c->labels);
2473    FREE(c);
2474 
2475    return ret;
2476 }
2477 
2478 extern const char *tgsi_swizzle_names[];
2479 void
etna_dump_shader(const struct etna_shader_variant * shader)2480 etna_dump_shader(const struct etna_shader_variant *shader)
2481 {
2482    if (shader->processor == PIPE_SHADER_VERTEX)
2483       printf("VERT\n");
2484    else
2485       printf("FRAG\n");
2486 
2487 
2488    etna_disasm(shader->code, shader->code_size, PRINT_RAW);
2489 
2490    printf("num loops: %i\n", shader->num_loops);
2491    printf("num temps: %i\n", shader->num_temps);
2492    printf("num const: %i\n", shader->uniforms.const_count);
2493    printf("immediates:\n");
2494    for (int idx = 0; idx < shader->uniforms.imm_count; ++idx) {
2495       printf(" [%i].%s = %f (0x%08x)\n",
2496              (idx + shader->uniforms.const_count) / 4,
2497              tgsi_swizzle_names[idx % 4],
2498              *((float *)&shader->uniforms.imm_data[idx]),
2499              shader->uniforms.imm_data[idx]);
2500    }
2501    printf("inputs:\n");
2502    for (int idx = 0; idx < shader->infile.num_reg; ++idx) {
2503       printf(" [%i] name=%s index=%i comps=%i\n", shader->infile.reg[idx].reg,
2504              tgsi_semantic_names[shader->infile.reg[idx].semantic.Name],
2505              shader->infile.reg[idx].semantic.Index,
2506              shader->infile.reg[idx].num_components);
2507    }
2508    printf("outputs:\n");
2509    for (int idx = 0; idx < shader->outfile.num_reg; ++idx) {
2510       printf(" [%i] name=%s index=%i comps=%i\n", shader->outfile.reg[idx].reg,
2511              tgsi_semantic_names[shader->outfile.reg[idx].semantic.Name],
2512              shader->outfile.reg[idx].semantic.Index,
2513              shader->outfile.reg[idx].num_components);
2514    }
2515    printf("special:\n");
2516    if (shader->processor == PIPE_SHADER_VERTEX) {
2517       printf("  vs_pos_out_reg=%i\n", shader->vs_pos_out_reg);
2518       printf("  vs_pointsize_out_reg=%i\n", shader->vs_pointsize_out_reg);
2519       printf("  vs_load_balancing=0x%08x\n", shader->vs_load_balancing);
2520    } else {
2521       printf("  ps_color_out_reg=%i\n", shader->ps_color_out_reg);
2522       printf("  ps_depth_out_reg=%i\n", shader->ps_depth_out_reg);
2523    }
2524    printf("  input_count_unk8=0x%08x\n", shader->input_count_unk8);
2525 }
2526 
2527 void
etna_destroy_shader(struct etna_shader_variant * shader)2528 etna_destroy_shader(struct etna_shader_variant *shader)
2529 {
2530    assert(shader);
2531 
2532    FREE(shader->code);
2533    FREE(shader->uniforms.imm_data);
2534    FREE(shader->uniforms.imm_contents);
2535    FREE(shader->output_per_semantic_list);
2536    FREE(shader);
2537 }
2538 
2539 static const struct etna_shader_inout *
etna_shader_vs_lookup(const struct etna_shader_variant * sobj,const struct etna_shader_inout * in)2540 etna_shader_vs_lookup(const struct etna_shader_variant *sobj,
2541                       const struct etna_shader_inout *in)
2542 {
2543    if (in->semantic.Index < sobj->output_count_per_semantic[in->semantic.Name])
2544       return sobj->output_per_semantic[in->semantic.Name][in->semantic.Index];
2545 
2546    return NULL;
2547 }
2548 
2549 bool
etna_link_shader(struct etna_shader_link_info * info,const struct etna_shader_variant * vs,const struct etna_shader_variant * fs)2550 etna_link_shader(struct etna_shader_link_info *info,
2551                  const struct etna_shader_variant *vs, const struct etna_shader_variant *fs)
2552 {
2553    int comp_ofs = 0;
2554    /* For each fragment input we need to find the associated vertex shader
2555     * output, which can be found by matching on semantic name and index. A
2556     * binary search could be used because the vs outputs are sorted by their
2557     * semantic index and grouped by semantic type by fill_in_vs_outputs.
2558     */
2559    assert(fs->infile.num_reg < ETNA_NUM_INPUTS);
2560    info->pcoord_varying_comp_ofs = -1;
2561 
2562    for (int idx = 0; idx < fs->infile.num_reg; ++idx) {
2563       const struct etna_shader_inout *fsio = &fs->infile.reg[idx];
2564       const struct etna_shader_inout *vsio = etna_shader_vs_lookup(vs, fsio);
2565       struct etna_varying *varying;
2566       bool interpolate_always = fsio->semantic.Name != TGSI_SEMANTIC_COLOR;
2567 
2568       assert(fsio->reg > 0 && fsio->reg <= ARRAY_SIZE(info->varyings));
2569 
2570       if (fsio->reg > info->num_varyings)
2571          info->num_varyings = fsio->reg;
2572 
2573       varying = &info->varyings[fsio->reg - 1];
2574       varying->num_components = fsio->num_components;
2575 
2576       if (!interpolate_always) /* colors affected by flat shading */
2577          varying->pa_attributes = 0x200;
2578       else /* texture coord or other bypasses flat shading */
2579          varying->pa_attributes = 0x2f1;
2580 
2581       varying->use[0] = interpolate_always ? VARYING_COMPONENT_USE_POINTCOORD_X : VARYING_COMPONENT_USE_USED;
2582       varying->use[1] = interpolate_always ? VARYING_COMPONENT_USE_POINTCOORD_Y : VARYING_COMPONENT_USE_USED;
2583       varying->use[2] = VARYING_COMPONENT_USE_USED;
2584       varying->use[3] = VARYING_COMPONENT_USE_USED;
2585 
2586 
2587       /* point coord is an input to the PS without matching VS output,
2588        * so it gets a varying slot without being assigned a VS register.
2589        */
2590       if (fsio->semantic.Name == TGSI_SEMANTIC_PCOORD) {
2591          info->pcoord_varying_comp_ofs = comp_ofs;
2592       } else {
2593          if (vsio == NULL) { /* not found -- link error */
2594             BUG("Semantic %d value %d not found in vertex shader outputs\n", fsio->semantic.Name, fsio->semantic.Index);
2595             return true;
2596          }
2597 
2598          varying->reg = vsio->reg;
2599       }
2600 
2601       comp_ofs += varying->num_components;
2602    }
2603 
2604    assert(info->num_varyings == fs->infile.num_reg);
2605 
2606    return false;
2607 }
2608