1 /*
2  * Copyright (c) 2012-2015 Etnaviv Project
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sub license,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the
12  * next paragraph) shall be included in all copies or substantial portions
13  * of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Wladimir J. van der Laan <laanwj@gmail.com>
25  */
26 
27 /* TGSI->Vivante shader ISA conversion */
28 
29 /* What does the compiler return (see etna_shader_object)?
30  *  1) instruction data
31  *  2) input-to-temporary mapping (fixed for ps)
32  *      *) in case of ps, semantic -> varying id mapping
33  *      *) for each varying: number of components used (r, rg, rgb, rgba)
34  *  3) temporary-to-output mapping (in case of vs, fixed for ps)
35  *  4) for each input/output: possible semantic (position, color, glpointcoord, ...)
36  *  5) immediates base offset, immediates data
37  *  6) used texture units (and possibly the TGSI_TEXTURE_* type); not needed to
38  *     configure the hw, but useful for error checking
39  *  7) enough information to add the z=(z+w)/2.0 necessary for older chips
40  *     (output reg id is enough)
41  *
42  *  Empty shaders are not allowed, should always at least generate a NOP. Also
43  *  if there is a label at the end of the shader, an extra NOP should be
44  *  generated as jump target.
45  *
46  * TODO
47  * * Use an instruction scheduler
48  * * Indirect access to uniforms / temporaries using amode
49  */
50 
51 #include "etnaviv_compiler.h"
52 
53 #include "etnaviv_asm.h"
54 #include "etnaviv_context.h"
55 #include "etnaviv_debug.h"
56 #include "etnaviv_disasm.h"
57 #include "etnaviv_uniforms.h"
58 #include "etnaviv_util.h"
59 
60 #include "pipe/p_shader_tokens.h"
61 #include "tgsi/tgsi_info.h"
62 #include "tgsi/tgsi_iterate.h"
63 #include "tgsi/tgsi_lowering.h"
64 #include "tgsi/tgsi_strings.h"
65 #include "tgsi/tgsi_util.h"
66 #include "util/u_math.h"
67 #include "util/u_memory.h"
68 
69 #include <fcntl.h>
70 #include <stdio.h>
71 #include <sys/stat.h>
72 #include <sys/types.h>
73 
74 #define ETNA_MAX_INNER_TEMPS 2
75 
76 static const float sincos_const[2][4] = {
77    {
78       2., -1., 4., -4.,
79    },
80    {
81       1. / (2. * M_PI), 0.75, 0.5, 0.0,
82    },
83 };
84 
85 /* Native register description structure */
86 struct etna_native_reg {
87    unsigned valid : 1;
88    unsigned is_tex : 1; /* is texture unit, overrides rgroup */
89    unsigned rgroup : 3;
90    unsigned id : 9;
91 };
92 
93 /* Register description */
94 struct etna_reg_desc {
95    enum tgsi_file_type file; /* IN, OUT, TEMP, ... */
96    int idx; /* index into file */
97    bool active; /* used in program */
98    int first_use; /* instruction id of first use (scope begin) */
99    int last_use; /* instruction id of last use (scope end, inclusive) */
100 
101    struct etna_native_reg native; /* native register to map to */
102    unsigned usage_mask : 4; /* usage, per channel */
103    bool has_semantic; /* register has associated TGSI semantic */
104    struct tgsi_declaration_semantic semantic; /* TGSI semantic */
105    struct tgsi_declaration_interp interp; /* Interpolation type */
106 };
107 
108 /* Label information structure */
109 struct etna_compile_label {
110    int inst_idx; /* Instruction id that label points to */
111 };
112 
113 enum etna_compile_frame_type {
114    ETNA_COMPILE_FRAME_IF, /* IF/ELSE/ENDIF */
115    ETNA_COMPILE_FRAME_LOOP,
116 };
117 
118 /* nesting scope frame (LOOP, IF, ...) during compilation
119  */
120 struct etna_compile_frame {
121    enum etna_compile_frame_type type;
122    struct etna_compile_label *lbl_else;
123    struct etna_compile_label *lbl_endif;
124    struct etna_compile_label *lbl_loop_bgn;
125    struct etna_compile_label *lbl_loop_end;
126 };
127 
128 struct etna_compile_file {
129    /* Number of registers in each TGSI file (max register+1) */
130    size_t reg_size;
131    /* Register descriptions, per register index */
132    struct etna_reg_desc *reg;
133 };
134 
135 #define array_insert(arr, val)                          \
136    do {                                                 \
137       if (arr##_count == arr##_sz) {                    \
138          arr##_sz = MAX2(2 * arr##_sz, 16);             \
139          arr = realloc(arr, arr##_sz * sizeof(arr[0])); \
140       }                                                 \
141       arr[arr##_count++] = val;                         \
142    } while (0)
143 
144 
145 /* scratch area for compiling shader, freed after compilation finishes */
146 struct etna_compile {
147    const struct tgsi_token *tokens;
148    bool free_tokens;
149 
150    struct tgsi_shader_info info;
151 
152    /* Register descriptions, per TGSI file, per register index */
153    struct etna_compile_file file[TGSI_FILE_COUNT];
154 
155    /* Keep track of TGSI register declarations */
156    struct etna_reg_desc decl[ETNA_MAX_DECL];
157    uint total_decls;
158 
159    /* Bitmap of dead instructions which are removed in a separate pass */
160    bool dead_inst[ETNA_MAX_TOKENS];
161 
162    /* Immediate data */
163    enum etna_immediate_contents imm_contents[ETNA_MAX_IMM];
164    uint32_t imm_data[ETNA_MAX_IMM];
165    uint32_t imm_base; /* base of immediates (in 32 bit units) */
166    uint32_t imm_size; /* size of immediates (in 32 bit units) */
167 
168    /* Next free native register, for register allocation */
169    uint32_t next_free_native;
170 
171    /* Temporary register for use within translated TGSI instruction,
172     * only allocated when needed.
173     */
174    int inner_temps; /* number of inner temps used; only up to one available at
175                        this point */
176    struct etna_native_reg inner_temp[ETNA_MAX_INNER_TEMPS];
177 
178    /* Fields for handling nested conditionals */
179    struct etna_compile_frame frame_stack[ETNA_MAX_DEPTH];
180    int frame_sp;
181    struct etna_compile_label *lbl_usage[ETNA_MAX_INSTRUCTIONS];
182 
183    unsigned labels_count, labels_sz;
184    struct etna_compile_label *labels;
185 
186    /* Code generation */
187    int inst_ptr; /* current instruction pointer */
188    uint32_t code[ETNA_MAX_INSTRUCTIONS * ETNA_INST_SIZE];
189 
190    /* I/O */
191 
192    /* Number of varyings (PS only) */
193    int num_varyings;
194 
195    /* GPU hardware specs */
196    const struct etna_specs *specs;
197 };
198 
199 static struct etna_reg_desc *
etna_get_dst_reg(struct etna_compile * c,struct tgsi_dst_register dst)200 etna_get_dst_reg(struct etna_compile *c, struct tgsi_dst_register dst)
201 {
202    return &c->file[dst.File].reg[dst.Index];
203 }
204 
205 static struct etna_reg_desc *
etna_get_src_reg(struct etna_compile * c,struct tgsi_src_register src)206 etna_get_src_reg(struct etna_compile *c, struct tgsi_src_register src)
207 {
208    return &c->file[src.File].reg[src.Index];
209 }
210 
211 static struct etna_native_reg
etna_native_temp(unsigned reg)212 etna_native_temp(unsigned reg)
213 {
214    return (struct etna_native_reg) {
215       .valid = 1,
216       .rgroup = INST_RGROUP_TEMP,
217       .id = reg
218    };
219 }
220 
221 /** Register allocation **/
222 enum reg_sort_order {
223    FIRST_USE_ASC,
224    FIRST_USE_DESC,
225    LAST_USE_ASC,
226    LAST_USE_DESC
227 };
228 
229 /* Augmented register description for sorting */
230 struct sort_rec {
231    struct etna_reg_desc *ptr;
232    int key;
233 };
234 
235 static int
sort_rec_compar(const struct sort_rec * a,const struct sort_rec * b)236 sort_rec_compar(const struct sort_rec *a, const struct sort_rec *b)
237 {
238    if (a->key < b->key)
239       return -1;
240 
241    if (a->key > b->key)
242       return 1;
243 
244    return 0;
245 }
246 
247 /* create an index on a register set based on certain criteria. */
248 static int
sort_registers(struct sort_rec * sorted,struct etna_compile_file * file,enum reg_sort_order so)249 sort_registers(struct sort_rec *sorted, struct etna_compile_file *file,
250                enum reg_sort_order so)
251 {
252    struct etna_reg_desc *regs = file->reg;
253    int ptr = 0;
254 
255    /* pre-populate keys from active registers */
256    for (int idx = 0; idx < file->reg_size; ++idx) {
257       /* only interested in active registers now; will only assign inactive ones
258        * if no space in active ones */
259       if (regs[idx].active) {
260          sorted[ptr].ptr = &regs[idx];
261 
262          switch (so) {
263          case FIRST_USE_ASC:
264             sorted[ptr].key = regs[idx].first_use;
265             break;
266          case LAST_USE_ASC:
267             sorted[ptr].key = regs[idx].last_use;
268             break;
269          case FIRST_USE_DESC:
270             sorted[ptr].key = -regs[idx].first_use;
271             break;
272          case LAST_USE_DESC:
273             sorted[ptr].key = -regs[idx].last_use;
274             break;
275          }
276          ptr++;
277       }
278    }
279 
280    /* sort index by key */
281    qsort(sorted, ptr, sizeof(struct sort_rec),
282          (int (*)(const void *, const void *))sort_rec_compar);
283 
284    return ptr;
285 }
286 
287 /* Allocate a new, unused, native temp register */
288 static struct etna_native_reg
alloc_new_native_reg(struct etna_compile * c)289 alloc_new_native_reg(struct etna_compile *c)
290 {
291    assert(c->next_free_native < ETNA_MAX_TEMPS);
292    return etna_native_temp(c->next_free_native++);
293 }
294 
295 /* assign TEMPs to native registers */
296 static void
assign_temporaries_to_native(struct etna_compile * c,struct etna_compile_file * file)297 assign_temporaries_to_native(struct etna_compile *c,
298                              struct etna_compile_file *file)
299 {
300    struct etna_reg_desc *temps = file->reg;
301 
302    for (int idx = 0; idx < file->reg_size; ++idx)
303       temps[idx].native = alloc_new_native_reg(c);
304 }
305 
306 /* assign inputs and outputs to temporaries
307  * Gallium assumes that the hardware has separate registers for taking input and
308  * output, however Vivante GPUs use temporaries both for passing in inputs and
309  * passing back outputs.
310  * Try to re-use temporary registers where possible. */
311 static void
assign_inouts_to_temporaries(struct etna_compile * c,uint file)312 assign_inouts_to_temporaries(struct etna_compile *c, uint file)
313 {
314    bool mode_inputs = (file == TGSI_FILE_INPUT);
315    int inout_ptr = 0, num_inouts;
316    int temp_ptr = 0, num_temps;
317    struct sort_rec inout_order[ETNA_MAX_TEMPS];
318    struct sort_rec temps_order[ETNA_MAX_TEMPS];
319    num_inouts = sort_registers(inout_order, &c->file[file],
320                                mode_inputs ? LAST_USE_ASC : FIRST_USE_ASC);
321    num_temps = sort_registers(temps_order, &c->file[TGSI_FILE_TEMPORARY],
322                               mode_inputs ? FIRST_USE_ASC : LAST_USE_ASC);
323 
324    while (inout_ptr < num_inouts && temp_ptr < num_temps) {
325       struct etna_reg_desc *inout = inout_order[inout_ptr].ptr;
326       struct etna_reg_desc *temp = temps_order[temp_ptr].ptr;
327 
328       if (!inout->active || inout->native.valid) { /* Skip if already a native register assigned */
329          inout_ptr++;
330          continue;
331       }
332 
333       /* last usage of this input is before or in same instruction of first use
334        * of temporary? */
335       if (mode_inputs ? (inout->last_use <= temp->first_use)
336                       : (inout->first_use >= temp->last_use)) {
337          /* assign it and advance to next input */
338          inout->native = temp->native;
339          inout_ptr++;
340       }
341 
342       temp_ptr++;
343    }
344 
345    /* if we couldn't reuse current ones, allocate new temporaries */
346    for (inout_ptr = 0; inout_ptr < num_inouts; ++inout_ptr) {
347       struct etna_reg_desc *inout = inout_order[inout_ptr].ptr;
348 
349       if (inout->active && !inout->native.valid)
350          inout->native = alloc_new_native_reg(c);
351    }
352 }
353 
354 /* Allocate an immediate with a certain value and return the index. If
355  * there is already an immediate with that value, return that.
356  */
357 static struct etna_inst_src
alloc_imm(struct etna_compile * c,enum etna_immediate_contents contents,uint32_t value)358 alloc_imm(struct etna_compile *c, enum etna_immediate_contents contents,
359           uint32_t value)
360 {
361    int idx;
362 
363    /* Could use a hash table to speed this up */
364    for (idx = 0; idx < c->imm_size; ++idx) {
365       if (c->imm_contents[idx] == contents && c->imm_data[idx] == value)
366          break;
367    }
368 
369    /* look if there is an unused slot */
370    if (idx == c->imm_size) {
371       for (idx = 0; idx < c->imm_size; ++idx) {
372          if (c->imm_contents[idx] == ETNA_IMMEDIATE_UNUSED)
373             break;
374       }
375    }
376 
377    /* allocate new immediate */
378    if (idx == c->imm_size) {
379       assert(c->imm_size < ETNA_MAX_IMM);
380       idx = c->imm_size++;
381       c->imm_data[idx] = value;
382       c->imm_contents[idx] = contents;
383    }
384 
385    /* swizzle so that component with value is returned in all components */
386    idx += c->imm_base;
387    struct etna_inst_src imm_src = {
388       .use = 1,
389       .rgroup = INST_RGROUP_UNIFORM_0,
390       .reg = idx / 4,
391       .swiz = INST_SWIZ_BROADCAST(idx & 3)
392    };
393 
394    return imm_src;
395 }
396 
397 static struct etna_inst_src
alloc_imm_u32(struct etna_compile * c,uint32_t value)398 alloc_imm_u32(struct etna_compile *c, uint32_t value)
399 {
400    return alloc_imm(c, ETNA_IMMEDIATE_CONSTANT, value);
401 }
402 
403 static struct etna_inst_src
alloc_imm_vec4u(struct etna_compile * c,enum etna_immediate_contents contents,const uint32_t * values)404 alloc_imm_vec4u(struct etna_compile *c, enum etna_immediate_contents contents,
405                 const uint32_t *values)
406 {
407    struct etna_inst_src imm_src = { };
408    int idx, i;
409 
410    for (idx = 0; idx + 3 < c->imm_size; idx += 4) {
411       /* What if we can use a uniform with a different swizzle? */
412       for (i = 0; i < 4; i++)
413          if (c->imm_contents[idx + i] != contents || c->imm_data[idx + i] != values[i])
414             break;
415       if (i == 4)
416          break;
417    }
418 
419    if (idx + 3 >= c->imm_size) {
420       idx = align(c->imm_size, 4);
421       assert(idx + 4 <= ETNA_MAX_IMM);
422 
423       for (i = 0; i < 4; i++) {
424          c->imm_data[idx + i] = values[i];
425          c->imm_contents[idx + i] = contents;
426       }
427 
428       c->imm_size = idx + 4;
429    }
430 
431    assert((c->imm_base & 3) == 0);
432    idx += c->imm_base;
433    imm_src.use = 1;
434    imm_src.rgroup = INST_RGROUP_UNIFORM_0;
435    imm_src.reg = idx / 4;
436    imm_src.swiz = INST_SWIZ_IDENTITY;
437 
438    return imm_src;
439 }
440 
441 static uint32_t
get_imm_u32(struct etna_compile * c,const struct etna_inst_src * imm,unsigned swiz_idx)442 get_imm_u32(struct etna_compile *c, const struct etna_inst_src *imm,
443             unsigned swiz_idx)
444 {
445    assert(imm->use == 1 && imm->rgroup == INST_RGROUP_UNIFORM_0);
446    unsigned int idx = imm->reg * 4 + ((imm->swiz >> (swiz_idx * 2)) & 3);
447 
448    return c->imm_data[idx];
449 }
450 
451 /* Allocate immediate with a certain float value. If there is already an
452  * immediate with that value, return that.
453  */
454 static struct etna_inst_src
alloc_imm_f32(struct etna_compile * c,float value)455 alloc_imm_f32(struct etna_compile *c, float value)
456 {
457    return alloc_imm_u32(c, fui(value));
458 }
459 
460 static struct etna_inst_src
etna_imm_vec4f(struct etna_compile * c,const float * vec4)461 etna_imm_vec4f(struct etna_compile *c, const float *vec4)
462 {
463    uint32_t val[4];
464 
465    for (int i = 0; i < 4; i++)
466       val[i] = fui(vec4[i]);
467 
468    return alloc_imm_vec4u(c, ETNA_IMMEDIATE_CONSTANT, val);
469 }
470 
471 /* Pass -- check register file declarations and immediates */
472 static void
etna_compile_parse_declarations(struct etna_compile * c)473 etna_compile_parse_declarations(struct etna_compile *c)
474 {
475    struct tgsi_parse_context ctx = { };
476    unsigned status = TGSI_PARSE_OK;
477    status = tgsi_parse_init(&ctx, c->tokens);
478    assert(status == TGSI_PARSE_OK);
479 
480    while (!tgsi_parse_end_of_tokens(&ctx)) {
481       tgsi_parse_token(&ctx);
482 
483       switch (ctx.FullToken.Token.Type) {
484       case TGSI_TOKEN_TYPE_IMMEDIATE: {
485          /* immediates are handled differently from other files; they are
486           * not declared explicitly, and always add four components */
487          const struct tgsi_full_immediate *imm = &ctx.FullToken.FullImmediate;
488          assert(c->imm_size <= (ETNA_MAX_IMM - 4));
489 
490          for (int i = 0; i < 4; ++i) {
491             unsigned idx = c->imm_size++;
492 
493             c->imm_data[idx] = imm->u[i].Uint;
494             c->imm_contents[idx] = ETNA_IMMEDIATE_CONSTANT;
495          }
496       }
497       break;
498       }
499    }
500 
501    tgsi_parse_free(&ctx);
502 }
503 
504 /* Allocate register declarations for the registers in all register files */
505 static void
etna_allocate_decls(struct etna_compile * c)506 etna_allocate_decls(struct etna_compile *c)
507 {
508    uint idx = 0;
509 
510    for (int x = 0; x < TGSI_FILE_COUNT; ++x) {
511       c->file[x].reg = &c->decl[idx];
512       c->file[x].reg_size = c->info.file_max[x] + 1;
513 
514       for (int sub = 0; sub < c->file[x].reg_size; ++sub) {
515          c->decl[idx].file = x;
516          c->decl[idx].idx = sub;
517          idx++;
518       }
519    }
520 
521    c->total_decls = idx;
522 }
523 
524 /* Pass -- check and record usage of temporaries, inputs, outputs */
525 static void
etna_compile_pass_check_usage(struct etna_compile * c)526 etna_compile_pass_check_usage(struct etna_compile *c)
527 {
528    struct tgsi_parse_context ctx = { };
529    unsigned status = TGSI_PARSE_OK;
530    status = tgsi_parse_init(&ctx, c->tokens);
531    assert(status == TGSI_PARSE_OK);
532 
533    for (int idx = 0; idx < c->total_decls; ++idx) {
534       c->decl[idx].active = false;
535       c->decl[idx].first_use = c->decl[idx].last_use = -1;
536    }
537 
538    int inst_idx = 0;
539    while (!tgsi_parse_end_of_tokens(&ctx)) {
540       tgsi_parse_token(&ctx);
541       /* find out max register #s used
542        * For every register mark first and last instruction index where it's
543        * used this allows finding ranges where the temporary can be borrowed
544        * as input and/or output register
545        *
546        * XXX in the case of loops this needs special care, or even be completely
547        * disabled, as
548        * the last usage of a register inside a loop means it can still be used
549        * on next loop
550        * iteration (execution is no longer * chronological). The register can
551        * only be
552        * declared "free" after the loop finishes.
553        *
554        * Same for inputs: the first usage of a register inside a loop doesn't
555        * mean that the register
556        * won't have been overwritten in previous iteration. The register can
557        * only be declared free before the loop
558        * starts.
559        * The proper way would be to do full dominator / post-dominator analysis
560        * (especially with more complicated
561        * control flow such as direct branch instructions) but not for now...
562        */
563       switch (ctx.FullToken.Token.Type) {
564       case TGSI_TOKEN_TYPE_DECLARATION: {
565          /* Declaration: fill in file details */
566          const struct tgsi_full_declaration *decl = &ctx.FullToken.FullDeclaration;
567          struct etna_compile_file *file = &c->file[decl->Declaration.File];
568 
569          for (int idx = decl->Range.First; idx <= decl->Range.Last; ++idx) {
570             file->reg[idx].usage_mask = 0; // we'll compute this ourselves
571             file->reg[idx].has_semantic = decl->Declaration.Semantic;
572             file->reg[idx].semantic = decl->Semantic;
573             file->reg[idx].interp = decl->Interp;
574          }
575       } break;
576       case TGSI_TOKEN_TYPE_INSTRUCTION: {
577          /* Instruction: iterate over operands of instruction */
578          const struct tgsi_full_instruction *inst = &ctx.FullToken.FullInstruction;
579 
580          /* iterate over destination registers */
581          for (int idx = 0; idx < inst->Instruction.NumDstRegs; ++idx) {
582             struct etna_reg_desc *reg_desc = &c->file[inst->Dst[idx].Register.File].reg[inst->Dst[idx].Register.Index];
583 
584             if (reg_desc->first_use == -1)
585                reg_desc->first_use = inst_idx;
586 
587             reg_desc->last_use = inst_idx;
588             reg_desc->active = true;
589          }
590 
591          /* iterate over source registers */
592          for (int idx = 0; idx < inst->Instruction.NumSrcRegs; ++idx) {
593             struct etna_reg_desc *reg_desc = &c->file[inst->Src[idx].Register.File].reg[inst->Src[idx].Register.Index];
594 
595             if (reg_desc->first_use == -1)
596                reg_desc->first_use = inst_idx;
597 
598             reg_desc->last_use = inst_idx;
599             reg_desc->active = true;
600             /* accumulate usage mask for register, this is used to determine how
601              * many slots for varyings
602              * should be allocated */
603             reg_desc->usage_mask |= tgsi_util_get_inst_usage_mask(inst, idx);
604          }
605          inst_idx += 1;
606       } break;
607       default:
608          break;
609       }
610    }
611 
612    tgsi_parse_free(&ctx);
613 }
614 
615 /* assign inputs that need to be assigned to specific registers */
616 static void
assign_special_inputs(struct etna_compile * c)617 assign_special_inputs(struct etna_compile *c)
618 {
619    if (c->info.processor == PIPE_SHADER_FRAGMENT) {
620       /* never assign t0 as it is the position output, start assigning at t1 */
621       c->next_free_native = 1;
622 
623       /* hardwire TGSI_SEMANTIC_POSITION (input and output) to t0 */
624       for (int idx = 0; idx < c->total_decls; ++idx) {
625          struct etna_reg_desc *reg = &c->decl[idx];
626 
627          if (reg->active && reg->semantic.Name == TGSI_SEMANTIC_POSITION)
628             reg->native = etna_native_temp(0);
629       }
630    }
631 }
632 
633 /* Check that a move instruction does not swizzle any of the components
634  * that it writes.
635  */
636 static bool
etna_mov_check_no_swizzle(const struct tgsi_dst_register dst,const struct tgsi_src_register src)637 etna_mov_check_no_swizzle(const struct tgsi_dst_register dst,
638                           const struct tgsi_src_register src)
639 {
640    return (!(dst.WriteMask & TGSI_WRITEMASK_X) || src.SwizzleX == TGSI_SWIZZLE_X) &&
641           (!(dst.WriteMask & TGSI_WRITEMASK_Y) || src.SwizzleY == TGSI_SWIZZLE_Y) &&
642           (!(dst.WriteMask & TGSI_WRITEMASK_Z) || src.SwizzleZ == TGSI_SWIZZLE_Z) &&
643           (!(dst.WriteMask & TGSI_WRITEMASK_W) || src.SwizzleW == TGSI_SWIZZLE_W);
644 }
645 
646 /* Pass -- optimize outputs
647  * Mesa tends to generate code like this at the end if their shaders
648  *   MOV OUT[1], TEMP[2]
649  *   MOV OUT[0], TEMP[0]
650  *   MOV OUT[2], TEMP[1]
651  * Recognize if
652  * a) there is only a single assignment to an output register and
653  * b) the temporary is not used after that
654  * Also recognize direct assignment of IN to OUT (passthrough)
655  **/
656 static void
etna_compile_pass_optimize_outputs(struct etna_compile * c)657 etna_compile_pass_optimize_outputs(struct etna_compile *c)
658 {
659    struct tgsi_parse_context ctx = { };
660    int inst_idx = 0;
661    unsigned status = TGSI_PARSE_OK;
662    status = tgsi_parse_init(&ctx, c->tokens);
663    assert(status == TGSI_PARSE_OK);
664 
665    while (!tgsi_parse_end_of_tokens(&ctx)) {
666       tgsi_parse_token(&ctx);
667 
668       switch (ctx.FullToken.Token.Type) {
669       case TGSI_TOKEN_TYPE_INSTRUCTION: {
670          const struct tgsi_full_instruction *inst = &ctx.FullToken.FullInstruction;
671 
672          /* iterate over operands */
673          switch (inst->Instruction.Opcode) {
674          case TGSI_OPCODE_MOV: {
675             /* We are only interested in eliminating MOVs which write to
676              * the shader outputs. Test for this early. */
677             if (inst->Dst[0].Register.File != TGSI_FILE_OUTPUT)
678                break;
679             /* Elimination of a MOV must have no visible effect on the
680              * resulting shader: this means the MOV must not swizzle or
681              * saturate, and its source must not have the negate or
682              * absolute modifiers. */
683             if (!etna_mov_check_no_swizzle(inst->Dst[0].Register, inst->Src[0].Register) ||
684                 inst->Instruction.Saturate || inst->Src[0].Register.Negate ||
685                 inst->Src[0].Register.Absolute)
686                break;
687 
688             uint out_idx = inst->Dst[0].Register.Index;
689             uint in_idx = inst->Src[0].Register.Index;
690             /* assignment of temporary to output --
691              * and the output doesn't yet have a native register assigned
692              * and the last use of the temporary is this instruction
693              * and the MOV does not do a swizzle
694              */
695             if (inst->Src[0].Register.File == TGSI_FILE_TEMPORARY &&
696                 !c->file[TGSI_FILE_OUTPUT].reg[out_idx].native.valid &&
697                 c->file[TGSI_FILE_TEMPORARY].reg[in_idx].last_use == inst_idx) {
698                c->file[TGSI_FILE_OUTPUT].reg[out_idx].native =
699                   c->file[TGSI_FILE_TEMPORARY].reg[in_idx].native;
700                /* prevent temp from being re-used for the rest of the shader */
701                c->file[TGSI_FILE_TEMPORARY].reg[in_idx].last_use = ETNA_MAX_TOKENS;
702                /* mark this MOV instruction as a no-op */
703                c->dead_inst[inst_idx] = true;
704             }
705             /* direct assignment of input to output --
706              * and the input or output doesn't yet have a native register
707              * assigned
708              * and the output is only used in this instruction,
709              * allocate a new register, and associate both input and output to
710              * it
711              * and the MOV does not do a swizzle
712              */
713             if (inst->Src[0].Register.File == TGSI_FILE_INPUT &&
714                 !c->file[TGSI_FILE_INPUT].reg[in_idx].native.valid &&
715                 !c->file[TGSI_FILE_OUTPUT].reg[out_idx].native.valid &&
716                 c->file[TGSI_FILE_OUTPUT].reg[out_idx].last_use == inst_idx &&
717                 c->file[TGSI_FILE_OUTPUT].reg[out_idx].first_use == inst_idx) {
718                c->file[TGSI_FILE_OUTPUT].reg[out_idx].native =
719                   c->file[TGSI_FILE_INPUT].reg[in_idx].native =
720                      alloc_new_native_reg(c);
721                /* mark this MOV instruction as a no-op */
722                c->dead_inst[inst_idx] = true;
723             }
724          } break;
725          default:;
726          }
727          inst_idx += 1;
728       } break;
729       }
730    }
731 
732    tgsi_parse_free(&ctx);
733 }
734 
735 /* Get a temporary to be used within one TGSI instruction.
736  * The first time that this function is called the temporary will be allocated.
737  * Each call to this function will return the same temporary.
738  */
739 static struct etna_native_reg
etna_compile_get_inner_temp(struct etna_compile * c)740 etna_compile_get_inner_temp(struct etna_compile *c)
741 {
742    int inner_temp = c->inner_temps;
743 
744    if (inner_temp < ETNA_MAX_INNER_TEMPS) {
745       if (!c->inner_temp[inner_temp].valid)
746          c->inner_temp[inner_temp] = alloc_new_native_reg(c);
747 
748       /* alloc_new_native_reg() handles lack of registers */
749       c->inner_temps += 1;
750    } else {
751       BUG("Too many inner temporaries (%i) requested in one instruction",
752           inner_temp + 1);
753    }
754 
755    return c->inner_temp[inner_temp];
756 }
757 
758 static struct etna_inst_dst
etna_native_to_dst(struct etna_native_reg native,unsigned comps)759 etna_native_to_dst(struct etna_native_reg native, unsigned comps)
760 {
761    /* Can only assign to temporaries */
762    assert(native.valid && !native.is_tex && native.rgroup == INST_RGROUP_TEMP);
763 
764    struct etna_inst_dst rv = {
765       .comps = comps,
766       .use = 1,
767       .reg = native.id,
768    };
769 
770    return rv;
771 }
772 
773 static struct etna_inst_src
etna_native_to_src(struct etna_native_reg native,uint32_t swizzle)774 etna_native_to_src(struct etna_native_reg native, uint32_t swizzle)
775 {
776    assert(native.valid && !native.is_tex);
777 
778    struct etna_inst_src rv = {
779       .use = 1,
780       .swiz = swizzle,
781       .rgroup = native.rgroup,
782       .reg = native.id,
783       .amode = INST_AMODE_DIRECT,
784    };
785 
786    return rv;
787 }
788 
789 static inline struct etna_inst_src
negate(struct etna_inst_src src)790 negate(struct etna_inst_src src)
791 {
792    src.neg = !src.neg;
793 
794    return src;
795 }
796 
797 static inline struct etna_inst_src
absolute(struct etna_inst_src src)798 absolute(struct etna_inst_src src)
799 {
800    src.abs = 1;
801 
802    return src;
803 }
804 
805 static inline struct etna_inst_src
swizzle(struct etna_inst_src src,unsigned swizzle)806 swizzle(struct etna_inst_src src, unsigned swizzle)
807 {
808    src.swiz = inst_swiz_compose(src.swiz, swizzle);
809 
810    return src;
811 }
812 
813 /* Emit instruction and append it to program */
814 static void
emit_inst(struct etna_compile * c,struct etna_inst * inst)815 emit_inst(struct etna_compile *c, struct etna_inst *inst)
816 {
817    assert(c->inst_ptr <= ETNA_MAX_INSTRUCTIONS);
818 
819    /* Check for uniform conflicts (each instruction can only access one
820     * uniform),
821     * if detected, use an intermediate temporary */
822    unsigned uni_rgroup = -1;
823    unsigned uni_reg = -1;
824 
825    for (int src = 0; src < ETNA_NUM_SRC; ++src) {
826       if (etna_rgroup_is_uniform(inst->src[src].rgroup)) {
827          if (uni_reg == -1) { /* first unique uniform used */
828             uni_rgroup = inst->src[src].rgroup;
829             uni_reg = inst->src[src].reg;
830          } else { /* second or later; check that it is a re-use */
831             if (uni_rgroup != inst->src[src].rgroup ||
832                 uni_reg != inst->src[src].reg) {
833                DBG_F(ETNA_DBG_COMPILER_MSGS, "perf warning: instruction that "
834                                              "accesses different uniforms, "
835                                              "need to generate extra MOV");
836                struct etna_native_reg inner_temp = etna_compile_get_inner_temp(c);
837 
838                /* Generate move instruction to temporary */
839                etna_assemble(&c->code[c->inst_ptr * 4], &(struct etna_inst) {
840                   .opcode = INST_OPCODE_MOV,
841                   .dst = etna_native_to_dst(inner_temp, INST_COMPS_X | INST_COMPS_Y |
842                                                         INST_COMPS_Z | INST_COMPS_W),
843                   .src[2] = inst->src[src]
844                });
845 
846                c->inst_ptr++;
847 
848                /* Modify instruction to use temp register instead of uniform */
849                inst->src[src].use = 1;
850                inst->src[src].rgroup = INST_RGROUP_TEMP;
851                inst->src[src].reg = inner_temp.id;
852                inst->src[src].swiz = INST_SWIZ_IDENTITY; /* swizzling happens on MOV */
853                inst->src[src].neg = 0; /* negation happens on MOV */
854                inst->src[src].abs = 0; /* abs happens on MOV */
855                inst->src[src].amode = 0; /* amode effects happen on MOV */
856             }
857          }
858       }
859    }
860 
861    /* Finally assemble the actual instruction */
862    etna_assemble(&c->code[c->inst_ptr * 4], inst);
863    c->inst_ptr++;
864 }
865 
866 static unsigned int
etna_amode(struct tgsi_ind_register indirect)867 etna_amode(struct tgsi_ind_register indirect)
868 {
869    assert(indirect.File == TGSI_FILE_ADDRESS);
870    assert(indirect.Index == 0);
871 
872    switch (indirect.Swizzle) {
873    case TGSI_SWIZZLE_X:
874       return INST_AMODE_ADD_A_X;
875    case TGSI_SWIZZLE_Y:
876       return INST_AMODE_ADD_A_Y;
877    case TGSI_SWIZZLE_Z:
878       return INST_AMODE_ADD_A_Z;
879    case TGSI_SWIZZLE_W:
880       return INST_AMODE_ADD_A_W;
881    default:
882       assert(!"Invalid swizzle");
883    }
884 }
885 
886 /* convert destination operand */
887 static struct etna_inst_dst
convert_dst(struct etna_compile * c,const struct tgsi_full_dst_register * in)888 convert_dst(struct etna_compile *c, const struct tgsi_full_dst_register *in)
889 {
890    struct etna_inst_dst rv = {
891       /// XXX .amode
892       .comps = in->Register.WriteMask,
893    };
894 
895    if (in->Register.File == TGSI_FILE_ADDRESS) {
896       assert(in->Register.Index == 0);
897       rv.reg = in->Register.Index;
898       rv.use = 0;
899    } else {
900       rv = etna_native_to_dst(etna_get_dst_reg(c, in->Register)->native,
901                               in->Register.WriteMask);
902    }
903 
904    if (in->Register.Indirect)
905       rv.amode = etna_amode(in->Indirect);
906 
907    return rv;
908 }
909 
910 /* convert texture operand */
911 static struct etna_inst_tex
convert_tex(struct etna_compile * c,const struct tgsi_full_src_register * in,const struct tgsi_instruction_texture * tex)912 convert_tex(struct etna_compile *c, const struct tgsi_full_src_register *in,
913             const struct tgsi_instruction_texture *tex)
914 {
915    struct etna_native_reg native_reg = etna_get_src_reg(c, in->Register)->native;
916    struct etna_inst_tex rv = {
917       // XXX .amode (to allow for an array of samplers?)
918       .swiz = INST_SWIZ_IDENTITY
919    };
920 
921    assert(native_reg.is_tex && native_reg.valid);
922    rv.id = native_reg.id;
923 
924    return rv;
925 }
926 
927 /* convert source operand */
928 static struct etna_inst_src
etna_create_src(const struct tgsi_full_src_register * tgsi,const struct etna_native_reg * native)929 etna_create_src(const struct tgsi_full_src_register *tgsi,
930                 const struct etna_native_reg *native)
931 {
932    const struct tgsi_src_register *reg = &tgsi->Register;
933    struct etna_inst_src rv = {
934       .use = 1,
935       .swiz = INST_SWIZ(reg->SwizzleX, reg->SwizzleY, reg->SwizzleZ, reg->SwizzleW),
936       .neg = reg->Negate,
937       .abs = reg->Absolute,
938       .rgroup = native->rgroup,
939       .reg = native->id,
940       .amode = INST_AMODE_DIRECT,
941    };
942 
943    assert(native->valid && !native->is_tex);
944 
945    if (reg->Indirect)
946       rv.amode = etna_amode(tgsi->Indirect);
947 
948    return rv;
949 }
950 
951 static struct etna_inst_src
etna_mov_src_to_temp(struct etna_compile * c,struct etna_inst_src src,struct etna_native_reg temp)952 etna_mov_src_to_temp(struct etna_compile *c, struct etna_inst_src src,
953                      struct etna_native_reg temp)
954 {
955    struct etna_inst mov = { };
956 
957    mov.opcode = INST_OPCODE_MOV;
958    mov.sat = 0;
959    mov.dst = etna_native_to_dst(temp, INST_COMPS_X | INST_COMPS_Y |
960                                       INST_COMPS_Z | INST_COMPS_W);
961    mov.src[2] = src;
962    emit_inst(c, &mov);
963 
964    src.swiz = INST_SWIZ_IDENTITY;
965    src.neg = src.abs = 0;
966    src.rgroup = temp.rgroup;
967    src.reg = temp.id;
968 
969    return src;
970 }
971 
972 static struct etna_inst_src
etna_mov_src(struct etna_compile * c,struct etna_inst_src src)973 etna_mov_src(struct etna_compile *c, struct etna_inst_src src)
974 {
975    struct etna_native_reg temp = etna_compile_get_inner_temp(c);
976 
977    return etna_mov_src_to_temp(c, src, temp);
978 }
979 
980 static bool
etna_src_uniforms_conflict(struct etna_inst_src a,struct etna_inst_src b)981 etna_src_uniforms_conflict(struct etna_inst_src a, struct etna_inst_src b)
982 {
983    return etna_rgroup_is_uniform(a.rgroup) &&
984           etna_rgroup_is_uniform(b.rgroup) &&
985           (a.rgroup != b.rgroup || a.reg != b.reg);
986 }
987 
988 /* create a new label */
989 static struct etna_compile_label *
alloc_new_label(struct etna_compile * c)990 alloc_new_label(struct etna_compile *c)
991 {
992    struct etna_compile_label label = {
993       .inst_idx = -1, /* start by point to no specific instruction */
994    };
995 
996    array_insert(c->labels, label);
997 
998    return &c->labels[c->labels_count - 1];
999 }
1000 
1001 /* place label at current instruction pointer */
1002 static void
label_place(struct etna_compile * c,struct etna_compile_label * label)1003 label_place(struct etna_compile *c, struct etna_compile_label *label)
1004 {
1005    label->inst_idx = c->inst_ptr;
1006 }
1007 
1008 /* mark label use at current instruction.
1009  * target of the label will be filled in in the marked instruction's src2.imm
1010  * slot as soon
1011  * as the value becomes known.
1012  */
1013 static void
label_mark_use(struct etna_compile * c,struct etna_compile_label * label)1014 label_mark_use(struct etna_compile *c, struct etna_compile_label *label)
1015 {
1016    assert(c->inst_ptr < ETNA_MAX_INSTRUCTIONS);
1017    c->lbl_usage[c->inst_ptr] = label;
1018 }
1019 
1020 /* walk the frame stack and return first frame with matching type */
1021 static struct etna_compile_frame *
find_frame(struct etna_compile * c,enum etna_compile_frame_type type)1022 find_frame(struct etna_compile *c, enum etna_compile_frame_type type)
1023 {
1024    for (int sp = c->frame_sp; sp >= 0; sp--)
1025       if (c->frame_stack[sp].type == type)
1026          return &c->frame_stack[sp];
1027 
1028    assert(0);
1029    return NULL;
1030 }
1031 
1032 struct instr_translater {
1033    void (*fxn)(const struct instr_translater *t, struct etna_compile *c,
1034                const struct tgsi_full_instruction *inst,
1035                struct etna_inst_src *src);
1036    unsigned tgsi_opc;
1037    uint8_t opc;
1038 
1039    /* tgsi src -> etna src swizzle */
1040    int src[3];
1041 
1042    unsigned cond;
1043 };
1044 
1045 static void
trans_instr(const struct instr_translater * t,struct etna_compile * c,const struct tgsi_full_instruction * inst,struct etna_inst_src * src)1046 trans_instr(const struct instr_translater *t, struct etna_compile *c,
1047             const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
1048 {
1049    const struct tgsi_opcode_info *info = tgsi_get_opcode_info(inst->Instruction.Opcode);
1050    struct etna_inst instr = { };
1051 
1052    instr.opcode = t->opc;
1053    instr.cond = t->cond;
1054    instr.sat = inst->Instruction.Saturate;
1055 
1056    assert(info->num_dst <= 1);
1057    if (info->num_dst)
1058       instr.dst = convert_dst(c, &inst->Dst[0]);
1059 
1060    assert(info->num_src <= ETNA_NUM_SRC);
1061 
1062    for (unsigned i = 0; i < info->num_src; i++) {
1063       int swizzle = t->src[i];
1064 
1065       assert(swizzle != -1);
1066       instr.src[swizzle] = src[i];
1067    }
1068 
1069    emit_inst(c, &instr);
1070 }
1071 
1072 static void
trans_min_max(const struct instr_translater * t,struct etna_compile * c,const struct tgsi_full_instruction * inst,struct etna_inst_src * src)1073 trans_min_max(const struct instr_translater *t, struct etna_compile *c,
1074               const struct tgsi_full_instruction *inst,
1075               struct etna_inst_src *src)
1076 {
1077    emit_inst(c, &(struct etna_inst) {
1078       .opcode = INST_OPCODE_SELECT,
1079        .cond = t->cond,
1080        .sat = inst->Instruction.Saturate,
1081        .dst = convert_dst(c, &inst->Dst[0]),
1082        .src[0] = src[0],
1083        .src[1] = src[1],
1084        .src[2] = src[0],
1085     });
1086 }
1087 
1088 static void
trans_if(const struct instr_translater * t,struct etna_compile * c,const struct tgsi_full_instruction * inst,struct etna_inst_src * src)1089 trans_if(const struct instr_translater *t, struct etna_compile *c,
1090          const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
1091 {
1092    struct etna_compile_frame *f = &c->frame_stack[c->frame_sp++];
1093    struct etna_inst_src imm_0 = alloc_imm_f32(c, 0.0f);
1094 
1095    /* push IF to stack */
1096    f->type = ETNA_COMPILE_FRAME_IF;
1097    /* create "else" label */
1098    f->lbl_else = alloc_new_label(c);
1099    f->lbl_endif = NULL;
1100 
1101    /* We need to avoid the emit_inst() below becoming two instructions */
1102    if (etna_src_uniforms_conflict(src[0], imm_0))
1103       src[0] = etna_mov_src(c, src[0]);
1104 
1105    /* mark position in instruction stream of label reference so that it can be
1106     * filled in in next pass */
1107    label_mark_use(c, f->lbl_else);
1108 
1109    /* create conditional branch to label if src0 EQ 0 */
1110    emit_inst(c, &(struct etna_inst){
1111       .opcode = INST_OPCODE_BRANCH,
1112       .cond = INST_CONDITION_EQ,
1113       .src[0] = src[0],
1114       .src[1] = imm_0,
1115     /* imm is filled in later */
1116    });
1117 }
1118 
1119 static void
trans_else(const struct instr_translater * t,struct etna_compile * c,const struct tgsi_full_instruction * inst,struct etna_inst_src * src)1120 trans_else(const struct instr_translater *t, struct etna_compile *c,
1121            const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
1122 {
1123    assert(c->frame_sp > 0);
1124    struct etna_compile_frame *f = &c->frame_stack[c->frame_sp - 1];
1125    assert(f->type == ETNA_COMPILE_FRAME_IF);
1126 
1127    /* create "endif" label, and branch to endif label */
1128    f->lbl_endif = alloc_new_label(c);
1129    label_mark_use(c, f->lbl_endif);
1130    emit_inst(c, &(struct etna_inst) {
1131       .opcode = INST_OPCODE_BRANCH,
1132       .cond = INST_CONDITION_TRUE,
1133       /* imm is filled in later */
1134    });
1135 
1136    /* mark "else" label at this position in instruction stream */
1137    label_place(c, f->lbl_else);
1138 }
1139 
1140 static void
trans_endif(const struct instr_translater * t,struct etna_compile * c,const struct tgsi_full_instruction * inst,struct etna_inst_src * src)1141 trans_endif(const struct instr_translater *t, struct etna_compile *c,
1142             const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
1143 {
1144    assert(c->frame_sp > 0);
1145    struct etna_compile_frame *f = &c->frame_stack[--c->frame_sp];
1146    assert(f->type == ETNA_COMPILE_FRAME_IF);
1147 
1148    /* assign "endif" or "else" (if no ELSE) label to current position in
1149     * instruction stream, pop IF */
1150    if (f->lbl_endif != NULL)
1151       label_place(c, f->lbl_endif);
1152    else
1153       label_place(c, f->lbl_else);
1154 }
1155 
1156 static void
trans_loop_bgn(const struct instr_translater * t,struct etna_compile * c,const struct tgsi_full_instruction * inst,struct etna_inst_src * src)1157 trans_loop_bgn(const struct instr_translater *t, struct etna_compile *c,
1158                const struct tgsi_full_instruction *inst,
1159                struct etna_inst_src *src)
1160 {
1161    struct etna_compile_frame *f = &c->frame_stack[c->frame_sp++];
1162 
1163    /* push LOOP to stack */
1164    f->type = ETNA_COMPILE_FRAME_LOOP;
1165    f->lbl_loop_bgn = alloc_new_label(c);
1166    f->lbl_loop_end = alloc_new_label(c);
1167 
1168    label_place(c, f->lbl_loop_bgn);
1169 }
1170 
1171 static void
trans_loop_end(const struct instr_translater * t,struct etna_compile * c,const struct tgsi_full_instruction * inst,struct etna_inst_src * src)1172 trans_loop_end(const struct instr_translater *t, struct etna_compile *c,
1173                const struct tgsi_full_instruction *inst,
1174                struct etna_inst_src *src)
1175 {
1176    assert(c->frame_sp > 0);
1177    struct etna_compile_frame *f = &c->frame_stack[--c->frame_sp];
1178    assert(f->type == ETNA_COMPILE_FRAME_LOOP);
1179 
1180    /* mark position in instruction stream of label reference so that it can be
1181     * filled in in next pass */
1182    label_mark_use(c, f->lbl_loop_bgn);
1183 
1184    /* create branch to loop_bgn label */
1185    emit_inst(c, &(struct etna_inst) {
1186       .opcode = INST_OPCODE_BRANCH,
1187       .cond = INST_CONDITION_TRUE,
1188       .src[0] = src[0],
1189       /* imm is filled in later */
1190    });
1191 
1192    label_place(c, f->lbl_loop_end);
1193 }
1194 
1195 static void
trans_brk(const struct instr_translater * t,struct etna_compile * c,const struct tgsi_full_instruction * inst,struct etna_inst_src * src)1196 trans_brk(const struct instr_translater *t, struct etna_compile *c,
1197           const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
1198 {
1199    assert(c->frame_sp > 0);
1200    struct etna_compile_frame *f = find_frame(c, ETNA_COMPILE_FRAME_LOOP);
1201 
1202    /* mark position in instruction stream of label reference so that it can be
1203     * filled in in next pass */
1204    label_mark_use(c, f->lbl_loop_end);
1205 
1206    /* create branch to loop_end label */
1207    emit_inst(c, &(struct etna_inst) {
1208       .opcode = INST_OPCODE_BRANCH,
1209       .cond = INST_CONDITION_TRUE,
1210       .src[0] = src[0],
1211       /* imm is filled in later */
1212    });
1213 }
1214 
1215 static void
trans_cont(const struct instr_translater * t,struct etna_compile * c,const struct tgsi_full_instruction * inst,struct etna_inst_src * src)1216 trans_cont(const struct instr_translater *t, struct etna_compile *c,
1217            const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
1218 {
1219    assert(c->frame_sp > 0);
1220    struct etna_compile_frame *f = find_frame(c, ETNA_COMPILE_FRAME_LOOP);
1221 
1222    /* mark position in instruction stream of label reference so that it can be
1223     * filled in in next pass */
1224    label_mark_use(c, f->lbl_loop_bgn);
1225 
1226    /* create branch to loop_end label */
1227    emit_inst(c, &(struct etna_inst) {
1228       .opcode = INST_OPCODE_BRANCH,
1229       .cond = INST_CONDITION_TRUE,
1230       .src[0] = src[0],
1231       /* imm is filled in later */
1232    });
1233 }
1234 
1235 static void
trans_deriv(const struct instr_translater * t,struct etna_compile * c,const struct tgsi_full_instruction * inst,struct etna_inst_src * src)1236 trans_deriv(const struct instr_translater *t, struct etna_compile *c,
1237             const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
1238 {
1239    emit_inst(c, &(struct etna_inst) {
1240       .opcode = t->opc,
1241       .sat = inst->Instruction.Saturate,
1242       .dst = convert_dst(c, &inst->Dst[0]),
1243       .src[0] = src[0],
1244       .src[2] = src[0],
1245    });
1246 }
1247 
1248 static void
trans_arl(const struct instr_translater * t,struct etna_compile * c,const struct tgsi_full_instruction * inst,struct etna_inst_src * src)1249 trans_arl(const struct instr_translater *t, struct etna_compile *c,
1250           const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
1251 {
1252    struct etna_native_reg temp = etna_compile_get_inner_temp(c);
1253    struct etna_inst arl = { };
1254    struct etna_inst_dst dst;
1255 
1256    dst = etna_native_to_dst(temp, INST_COMPS_X | INST_COMPS_Y | INST_COMPS_Z |
1257                                   INST_COMPS_W);
1258 
1259    if (c->specs->has_sign_floor_ceil) {
1260       struct etna_inst floor = { };
1261 
1262       floor.opcode = INST_OPCODE_FLOOR;
1263       floor.src[2] = src[0];
1264       floor.dst = dst;
1265 
1266       emit_inst(c, &floor);
1267    } else {
1268       struct etna_inst floor[2] = { };
1269 
1270       floor[0].opcode = INST_OPCODE_FRC;
1271       floor[0].sat = inst->Instruction.Saturate;
1272       floor[0].dst = dst;
1273       floor[0].src[2] = src[0];
1274 
1275       floor[1].opcode = INST_OPCODE_ADD;
1276       floor[1].sat = inst->Instruction.Saturate;
1277       floor[1].dst = dst;
1278       floor[1].src[0] = src[0];
1279       floor[1].src[2].use = 1;
1280       floor[1].src[2].swiz = INST_SWIZ_IDENTITY;
1281       floor[1].src[2].neg = 1;
1282       floor[1].src[2].rgroup = temp.rgroup;
1283       floor[1].src[2].reg = temp.id;
1284 
1285       emit_inst(c, &floor[0]);
1286       emit_inst(c, &floor[1]);
1287    }
1288 
1289    arl.opcode = INST_OPCODE_MOVAR;
1290    arl.sat = inst->Instruction.Saturate;
1291    arl.dst = convert_dst(c, &inst->Dst[0]);
1292    arl.src[2] = etna_native_to_src(temp, INST_SWIZ_IDENTITY);
1293 
1294    emit_inst(c, &arl);
1295 }
1296 
1297 static void
trans_lrp(const struct instr_translater * t,struct etna_compile * c,const struct tgsi_full_instruction * inst,struct etna_inst_src * src)1298 trans_lrp(const struct instr_translater *t, struct etna_compile *c,
1299           const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
1300 {
1301    /* dst = src0 * src1 + (1 - src0) * src2
1302     *     => src0 * src1 - (src0 - 1) * src2
1303     *     => src0 * src1 - (src0 * src2 - src2)
1304     * MAD tTEMP.xyzw, tSRC0.xyzw, tSRC2.xyzw, -tSRC2.xyzw
1305     * MAD tDST.xyzw, tSRC0.xyzw, tSRC1.xyzw, -tTEMP.xyzw
1306     */
1307    struct etna_native_reg temp = etna_compile_get_inner_temp(c);
1308    if (etna_src_uniforms_conflict(src[0], src[1]) ||
1309        etna_src_uniforms_conflict(src[0], src[2])) {
1310       src[0] = etna_mov_src(c, src[0]);
1311    }
1312 
1313    struct etna_inst mad[2] = { };
1314    mad[0].opcode = INST_OPCODE_MAD;
1315    mad[0].sat = 0;
1316    mad[0].dst = etna_native_to_dst(temp, INST_COMPS_X | INST_COMPS_Y |
1317                                          INST_COMPS_Z | INST_COMPS_W);
1318    mad[0].src[0] = src[0];
1319    mad[0].src[1] = src[2];
1320    mad[0].src[2] = negate(src[2]);
1321    mad[1].opcode = INST_OPCODE_MAD;
1322    mad[1].sat = inst->Instruction.Saturate;
1323    mad[1].dst = convert_dst(c, &inst->Dst[0]), mad[1].src[0] = src[0];
1324    mad[1].src[1] = src[1];
1325    mad[1].src[2] = negate(etna_native_to_src(temp, INST_SWIZ_IDENTITY));
1326 
1327    emit_inst(c, &mad[0]);
1328    emit_inst(c, &mad[1]);
1329 }
1330 
1331 static void
trans_lit(const struct instr_translater * t,struct etna_compile * c,const struct tgsi_full_instruction * inst,struct etna_inst_src * src)1332 trans_lit(const struct instr_translater *t, struct etna_compile *c,
1333           const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
1334 {
1335    /* SELECT.LT tmp._y__, 0, src.yyyy, 0
1336     *  - can be eliminated if src.y is a uniform and >= 0
1337     * SELECT.GT tmp.___w, 128, src.wwww, 128
1338     * SELECT.LT tmp.___w, -128, tmp.wwww, -128
1339     *  - can be eliminated if src.w is a uniform and fits clamp
1340     * LOG tmp.x, void, void, tmp.yyyy
1341     * MUL tmp.x, tmp.xxxx, tmp.wwww, void
1342     * LITP dst, undef, src.xxxx, tmp.xxxx
1343     */
1344    struct etna_native_reg inner_temp = etna_compile_get_inner_temp(c);
1345    struct etna_inst_src src_y = { };
1346 
1347    if (!etna_rgroup_is_uniform(src[0].rgroup)) {
1348       src_y = etna_native_to_src(inner_temp, SWIZZLE(Y, Y, Y, Y));
1349 
1350       struct etna_inst ins = { };
1351       ins.opcode = INST_OPCODE_SELECT;
1352       ins.cond = INST_CONDITION_LT;
1353       ins.dst = etna_native_to_dst(inner_temp, INST_COMPS_Y);
1354       ins.src[0] = ins.src[2] = alloc_imm_f32(c, 0.0);
1355       ins.src[1] = swizzle(src[0], SWIZZLE(Y, Y, Y, Y));
1356       emit_inst(c, &ins);
1357    } else if (uif(get_imm_u32(c, &src[0], 1)) < 0)
1358       src_y = alloc_imm_f32(c, 0.0);
1359    else
1360       src_y = swizzle(src[0], SWIZZLE(Y, Y, Y, Y));
1361 
1362    struct etna_inst_src src_w = { };
1363 
1364    if (!etna_rgroup_is_uniform(src[0].rgroup)) {
1365       src_w = etna_native_to_src(inner_temp, SWIZZLE(W, W, W, W));
1366 
1367       struct etna_inst ins = { };
1368       ins.opcode = INST_OPCODE_SELECT;
1369       ins.cond = INST_CONDITION_GT;
1370       ins.dst = etna_native_to_dst(inner_temp, INST_COMPS_W);
1371       ins.src[0] = ins.src[2] = alloc_imm_f32(c, 128.);
1372       ins.src[1] = swizzle(src[0], SWIZZLE(W, W, W, W));
1373       emit_inst(c, &ins);
1374       ins.cond = INST_CONDITION_LT;
1375       ins.src[0].neg = !ins.src[0].neg;
1376       ins.src[2].neg = !ins.src[2].neg;
1377       ins.src[1] = src_w;
1378       emit_inst(c, &ins);
1379    } else if (uif(get_imm_u32(c, &src[0], 3)) < -128.)
1380       src_w = alloc_imm_f32(c, -128.);
1381    else if (uif(get_imm_u32(c, &src[0], 3)) > 128.)
1382       src_w = alloc_imm_f32(c, 128.);
1383    else
1384       src_w = swizzle(src[0], SWIZZLE(W, W, W, W));
1385 
1386    struct etna_inst ins[3] = { };
1387    ins[0].opcode = INST_OPCODE_LOG;
1388    ins[0].dst = etna_native_to_dst(inner_temp, INST_COMPS_X);
1389    ins[0].src[2] = src_y;
1390 
1391    emit_inst(c, &ins[0]);
1392    emit_inst(c, &(struct etna_inst) {
1393       .opcode = INST_OPCODE_MUL,
1394       .sat = 0,
1395       .dst = etna_native_to_dst(inner_temp, INST_COMPS_X),
1396       .src[0] = etna_native_to_src(inner_temp, SWIZZLE(X, X, X, X)),
1397       .src[1] = src_w,
1398    });
1399    emit_inst(c, &(struct etna_inst) {
1400       .opcode = INST_OPCODE_LITP,
1401       .sat = 0,
1402       .dst = convert_dst(c, &inst->Dst[0]),
1403       .src[0] = swizzle(src[0], SWIZZLE(X, X, X, X)),
1404       .src[1] = swizzle(src[0], SWIZZLE(X, X, X, X)),
1405       .src[2] = etna_native_to_src(inner_temp, SWIZZLE(X, X, X, X)),
1406    });
1407 }
1408 
1409 static void
trans_ssg(const struct instr_translater * t,struct etna_compile * c,const struct tgsi_full_instruction * inst,struct etna_inst_src * src)1410 trans_ssg(const struct instr_translater *t, struct etna_compile *c,
1411           const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
1412 {
1413    if (c->specs->has_sign_floor_ceil) {
1414       emit_inst(c, &(struct etna_inst){
1415          .opcode = INST_OPCODE_SIGN,
1416          .sat = inst->Instruction.Saturate,
1417          .dst = convert_dst(c, &inst->Dst[0]),
1418          .src[2] = src[0],
1419       });
1420    } else {
1421       struct etna_native_reg temp = etna_compile_get_inner_temp(c);
1422       struct etna_inst ins[2] = { };
1423 
1424       ins[0].opcode = INST_OPCODE_SET;
1425       ins[0].cond = INST_CONDITION_NZ;
1426       ins[0].dst = etna_native_to_dst(temp, INST_COMPS_X | INST_COMPS_Y |
1427                                             INST_COMPS_Z | INST_COMPS_W);
1428       ins[0].src[0] = src[0];
1429 
1430       ins[1].opcode = INST_OPCODE_SELECT;
1431       ins[1].cond = INST_CONDITION_LZ;
1432       ins[1].sat = inst->Instruction.Saturate;
1433       ins[1].dst = convert_dst(c, &inst->Dst[0]);
1434       ins[1].src[0] = src[0];
1435       ins[1].src[2] = etna_native_to_src(temp, INST_SWIZ_IDENTITY);
1436       ins[1].src[1] = negate(ins[1].src[2]);
1437 
1438       emit_inst(c, &ins[0]);
1439       emit_inst(c, &ins[1]);
1440    }
1441 }
1442 
1443 static void
trans_trig(const struct instr_translater * t,struct etna_compile * c,const struct tgsi_full_instruction * inst,struct etna_inst_src * src)1444 trans_trig(const struct instr_translater *t, struct etna_compile *c,
1445            const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
1446 {
1447    if (c->specs->has_new_sin_cos) { /* Alternative SIN/COS */
1448       /* On newer chips alternative SIN/COS instructions are implemented,
1449        * which:
1450        * - Need their input scaled by 1/pi instead of 2/pi
1451        * - Output an x and y component, which need to be multiplied to
1452        *   get the result
1453        */
1454       /* TGSI lowering should deal with SCS */
1455       assert(inst->Instruction.Opcode != TGSI_OPCODE_SCS);
1456 
1457       struct etna_native_reg temp = etna_compile_get_inner_temp(c); /* only using .xyz */
1458       emit_inst(c, &(struct etna_inst) {
1459          .opcode = INST_OPCODE_MUL,
1460          .sat = 0,
1461          .dst = etna_native_to_dst(temp, INST_COMPS_Z),
1462          .src[0] = src[0], /* any swizzling happens here */
1463          .src[1] = alloc_imm_f32(c, 1.0f / M_PI),
1464       });
1465       emit_inst(c, &(struct etna_inst) {
1466          .opcode = inst->Instruction.Opcode == TGSI_OPCODE_COS
1467                     ? INST_OPCODE_COS
1468                     : INST_OPCODE_SIN,
1469          .sat = 0,
1470          .dst = etna_native_to_dst(temp, INST_COMPS_X | INST_COMPS_Y),
1471          .src[2] = etna_native_to_src(temp, SWIZZLE(Z, Z, Z, Z)),
1472          .tex = { .amode=1 }, /* Unknown bit needs to be set */
1473       });
1474       emit_inst(c, &(struct etna_inst) {
1475          .opcode = INST_OPCODE_MUL,
1476          .sat = inst->Instruction.Saturate,
1477          .dst = convert_dst(c, &inst->Dst[0]),
1478          .src[0] = etna_native_to_src(temp, SWIZZLE(X, X, X, X)),
1479          .src[1] = etna_native_to_src(temp, SWIZZLE(Y, Y, Y, Y)),
1480       });
1481 
1482    } else if (c->specs->has_sin_cos_sqrt) {
1483       /* TGSI lowering should deal with SCS */
1484       assert(inst->Instruction.Opcode != TGSI_OPCODE_SCS);
1485 
1486       struct etna_native_reg temp = etna_compile_get_inner_temp(c);
1487       /* add divide by PI/2, using a temp register. GC2000
1488        * fails with src==dst for the trig instruction. */
1489       emit_inst(c, &(struct etna_inst) {
1490          .opcode = INST_OPCODE_MUL,
1491          .sat = 0,
1492          .dst = etna_native_to_dst(temp, INST_COMPS_X | INST_COMPS_Y |
1493                                          INST_COMPS_Z | INST_COMPS_W),
1494          .src[0] = src[0], /* any swizzling happens here */
1495          .src[1] = alloc_imm_f32(c, 2.0f / M_PI),
1496       });
1497       emit_inst(c, &(struct etna_inst) {
1498          .opcode = inst->Instruction.Opcode == TGSI_OPCODE_COS
1499                     ? INST_OPCODE_COS
1500                     : INST_OPCODE_SIN,
1501          .sat = inst->Instruction.Saturate,
1502          .dst = convert_dst(c, &inst->Dst[0]),
1503          .src[2] = etna_native_to_src(temp, INST_SWIZ_IDENTITY),
1504       });
1505    } else {
1506       /* Implement Nick's fast sine/cosine. Taken from:
1507        * http://forum.devmaster.net/t/fast-and-accurate-sine-cosine/9648
1508        * A=(1/2*PI 0 1/2*PI 0) B=(0.75 0 0.5 0) C=(-4 4 X X)
1509        *  MAD t.x_zw, src.xxxx, A, B
1510        *  FRC t.x_z_, void, void, t.xwzw
1511        *  MAD t.x_z_, t.xwzw, 2, -1
1512        *  MUL t._y__, t.wzww, |t.wzww|, void  (for sin/scs)
1513        *  DP3 t.x_z_, t.zyww, C, void         (for sin)
1514        *  DP3 t.__z_, t.zyww, C, void         (for scs)
1515        *  MUL t._y__, t.wxww, |t.wxww|, void  (for cos/scs)
1516        *  DP3 t.x_z_, t.xyww, C, void         (for cos)
1517        *  DP3 t.x___, t.xyww, C, void         (for scs)
1518        *  MAD t._y_w, t,xxzz, |t.xxzz|, -t.xxzz
1519        *  MAD dst, t.ywyw, .2225, t.xzxz
1520        *
1521        * TODO: we don't set dst.zw correctly for SCS.
1522        */
1523       struct etna_inst *p, ins[9] = { };
1524       struct etna_native_reg t0 = etna_compile_get_inner_temp(c);
1525       struct etna_inst_src t0s = etna_native_to_src(t0, INST_SWIZ_IDENTITY);
1526       struct etna_inst_src sincos[3], in = src[0];
1527       sincos[0] = etna_imm_vec4f(c, sincos_const[0]);
1528       sincos[1] = etna_imm_vec4f(c, sincos_const[1]);
1529 
1530       /* A uniform source will cause the inner temp limit to
1531        * be exceeded.  Explicitly deal with that scenario.
1532        */
1533       if (etna_rgroup_is_uniform(src[0].rgroup)) {
1534          struct etna_inst ins = { };
1535          ins.opcode = INST_OPCODE_MOV;
1536          ins.dst = etna_native_to_dst(t0, INST_COMPS_X);
1537          ins.src[2] = in;
1538          emit_inst(c, &ins);
1539          in = t0s;
1540       }
1541 
1542       ins[0].opcode = INST_OPCODE_MAD;
1543       ins[0].dst = etna_native_to_dst(t0, INST_COMPS_X | INST_COMPS_Z | INST_COMPS_W);
1544       ins[0].src[0] = swizzle(in, SWIZZLE(X, X, X, X));
1545       ins[0].src[1] = swizzle(sincos[1], SWIZZLE(X, W, X, W)); /* 1/2*PI */
1546       ins[0].src[2] = swizzle(sincos[1], SWIZZLE(Y, W, Z, W)); /* 0.75, 0, 0.5, 0 */
1547 
1548       ins[1].opcode = INST_OPCODE_FRC;
1549       ins[1].dst = etna_native_to_dst(t0, INST_COMPS_X | INST_COMPS_Z);
1550       ins[1].src[2] = swizzle(t0s, SWIZZLE(X, W, Z, W));
1551 
1552       ins[2].opcode = INST_OPCODE_MAD;
1553       ins[2].dst = etna_native_to_dst(t0, INST_COMPS_X | INST_COMPS_Z);
1554       ins[2].src[0] = swizzle(t0s, SWIZZLE(X, W, Z, W));
1555       ins[2].src[1] = swizzle(sincos[0], SWIZZLE(X, X, X, X)); /* 2 */
1556       ins[2].src[2] = swizzle(sincos[0], SWIZZLE(Y, Y, Y, Y)); /* -1 */
1557 
1558       unsigned mul_swiz, dp3_swiz;
1559       if (inst->Instruction.Opcode == TGSI_OPCODE_SIN) {
1560          mul_swiz = SWIZZLE(W, Z, W, W);
1561          dp3_swiz = SWIZZLE(Z, Y, W, W);
1562       } else {
1563          mul_swiz = SWIZZLE(W, X, W, W);
1564          dp3_swiz = SWIZZLE(X, Y, W, W);
1565       }
1566 
1567       ins[3].opcode = INST_OPCODE_MUL;
1568       ins[3].dst = etna_native_to_dst(t0, INST_COMPS_Y);
1569       ins[3].src[0] = swizzle(t0s, mul_swiz);
1570       ins[3].src[1] = absolute(ins[3].src[0]);
1571 
1572       ins[4].opcode = INST_OPCODE_DP3;
1573       ins[4].dst = etna_native_to_dst(t0, INST_COMPS_X | INST_COMPS_Z);
1574       ins[4].src[0] = swizzle(t0s, dp3_swiz);
1575       ins[4].src[1] = swizzle(sincos[0], SWIZZLE(Z, W, W, W));
1576 
1577       if (inst->Instruction.Opcode == TGSI_OPCODE_SCS) {
1578          ins[5] = ins[3];
1579          ins[6] = ins[4];
1580          ins[4].dst.comps = INST_COMPS_X;
1581          ins[6].dst.comps = INST_COMPS_Z;
1582          ins[5].src[0] = swizzle(t0s, SWIZZLE(W, Z, W, W));
1583          ins[6].src[0] = swizzle(t0s, SWIZZLE(Z, Y, W, W));
1584          ins[5].src[1] = absolute(ins[5].src[0]);
1585          p = &ins[7];
1586       } else {
1587          p = &ins[5];
1588       }
1589 
1590       p->opcode = INST_OPCODE_MAD;
1591       p->dst = etna_native_to_dst(t0, INST_COMPS_Y | INST_COMPS_W);
1592       p->src[0] = swizzle(t0s, SWIZZLE(X, X, Z, Z));
1593       p->src[1] = absolute(p->src[0]);
1594       p->src[2] = negate(p->src[0]);
1595 
1596       p++;
1597       p->opcode = INST_OPCODE_MAD;
1598       p->sat = inst->Instruction.Saturate;
1599       p->dst = convert_dst(c, &inst->Dst[0]),
1600       p->src[0] = swizzle(t0s, SWIZZLE(Y, W, Y, W));
1601       p->src[1] = alloc_imm_f32(c, 0.2225);
1602       p->src[2] = swizzle(t0s, SWIZZLE(X, Z, X, Z));
1603 
1604       for (int i = 0; &ins[i] <= p; i++)
1605          emit_inst(c, &ins[i]);
1606    }
1607 }
1608 
1609 static void
trans_dph(const struct instr_translater * t,struct etna_compile * c,const struct tgsi_full_instruction * inst,struct etna_inst_src * src)1610 trans_dph(const struct instr_translater *t, struct etna_compile *c,
1611           const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
1612 {
1613    /*
1614    DP3 tmp.xyzw, src0.xyzw, src1,xyzw, void
1615    ADD dst.xyzw, tmp.xyzw, void, src1.wwww
1616    */
1617    struct etna_native_reg temp = etna_compile_get_inner_temp(c);
1618    struct etna_inst ins[2] = { };
1619 
1620    ins[0].opcode = INST_OPCODE_DP3;
1621    ins[0].dst = etna_native_to_dst(temp, INST_COMPS_X | INST_COMPS_Y |
1622                                          INST_COMPS_Z | INST_COMPS_W);
1623    ins[0].src[0] = src[0];
1624    ins[0].src[1] = src[1];
1625 
1626    ins[1].opcode = INST_OPCODE_ADD;
1627    ins[1].sat = inst->Instruction.Saturate;
1628    ins[1].dst = convert_dst(c, &inst->Dst[0]);
1629    ins[1].src[0] = etna_native_to_src(temp, INST_SWIZ_IDENTITY);
1630    ins[1].src[2] = swizzle(src[1], SWIZZLE(W, W, W, W));
1631 
1632    emit_inst(c, &ins[0]);
1633    emit_inst(c, &ins[1]);
1634 }
1635 
1636 static void
trans_sampler(const struct instr_translater * t,struct etna_compile * c,const struct tgsi_full_instruction * inst,struct etna_inst_src * src)1637 trans_sampler(const struct instr_translater *t, struct etna_compile *c,
1638               const struct tgsi_full_instruction *inst,
1639               struct etna_inst_src *src)
1640 {
1641    /* There is no native support for GL texture rectangle coordinates, so
1642     * we have to rescale from ([0, width], [0, height]) to ([0, 1], [0, 1]). */
1643    if (inst->Texture.Texture == TGSI_TEXTURE_RECT) {
1644       uint32_t unit = inst->Src[1].Register.Index;
1645       struct etna_inst ins[2] = { };
1646       struct etna_native_reg temp = etna_compile_get_inner_temp(c);
1647 
1648       ins[0].opcode = INST_OPCODE_MUL;
1649       ins[0].dst = etna_native_to_dst(temp, INST_COMPS_X);
1650       ins[0].src[0] = src[0];
1651       ins[0].src[1] = alloc_imm(c, ETNA_IMMEDIATE_TEXRECT_SCALE_X, unit);
1652 
1653       ins[1].opcode = INST_OPCODE_MUL;
1654       ins[1].dst = etna_native_to_dst(temp, INST_COMPS_Y);
1655       ins[1].src[0] = src[0];
1656       ins[1].src[1] = alloc_imm(c, ETNA_IMMEDIATE_TEXRECT_SCALE_Y, unit);
1657 
1658       emit_inst(c, &ins[0]);
1659       emit_inst(c, &ins[1]);
1660 
1661       src[0] = etna_native_to_src(temp, INST_SWIZ_IDENTITY); /* temp.xyzw */
1662    }
1663 
1664    switch (inst->Instruction.Opcode) {
1665    case TGSI_OPCODE_TEX:
1666       emit_inst(c, &(struct etna_inst) {
1667          .opcode = INST_OPCODE_TEXLD,
1668          .sat = 0,
1669          .dst = convert_dst(c, &inst->Dst[0]),
1670          .tex = convert_tex(c, &inst->Src[1], &inst->Texture),
1671          .src[0] = src[0],
1672       });
1673       break;
1674 
1675    case TGSI_OPCODE_TXB:
1676       emit_inst(c, &(struct etna_inst) {
1677          .opcode = INST_OPCODE_TEXLDB,
1678          .sat = 0,
1679          .dst = convert_dst(c, &inst->Dst[0]),
1680          .tex = convert_tex(c, &inst->Src[1], &inst->Texture),
1681          .src[0] = src[0],
1682       });
1683       break;
1684 
1685    case TGSI_OPCODE_TXL:
1686       emit_inst(c, &(struct etna_inst) {
1687          .opcode = INST_OPCODE_TEXLDL,
1688          .sat = 0,
1689          .dst = convert_dst(c, &inst->Dst[0]),
1690          .tex = convert_tex(c, &inst->Src[1], &inst->Texture),
1691          .src[0] = src[0],
1692       });
1693       break;
1694 
1695    case TGSI_OPCODE_TXP: { /* divide src.xyz by src.w */
1696       struct etna_native_reg temp = etna_compile_get_inner_temp(c);
1697 
1698       emit_inst(c, &(struct etna_inst) {
1699          .opcode = INST_OPCODE_RCP,
1700          .sat = 0,
1701          .dst = etna_native_to_dst(temp, INST_COMPS_W), /* tmp.w */
1702          .src[2] = swizzle(src[0], SWIZZLE(W, W, W, W)),
1703       });
1704       emit_inst(c, &(struct etna_inst) {
1705          .opcode = INST_OPCODE_MUL,
1706          .sat = 0,
1707          .dst = etna_native_to_dst(temp, INST_COMPS_X | INST_COMPS_Y |
1708                                          INST_COMPS_Z), /* tmp.xyz */
1709          .src[0] = etna_native_to_src(temp, SWIZZLE(W, W, W, W)),
1710          .src[1] = src[0], /* src.xyzw */
1711       });
1712       emit_inst(c, &(struct etna_inst) {
1713          .opcode = INST_OPCODE_TEXLD,
1714          .sat = 0,
1715          .dst = convert_dst(c, &inst->Dst[0]),
1716          .tex = convert_tex(c, &inst->Src[1], &inst->Texture),
1717          .src[0] = etna_native_to_src(temp, INST_SWIZ_IDENTITY), /* tmp.xyzw */
1718       });
1719    } break;
1720 
1721    default:
1722       BUG("Unhandled instruction %s",
1723           tgsi_get_opcode_name(inst->Instruction.Opcode));
1724       assert(0);
1725       break;
1726    }
1727 }
1728 
1729 static void
trans_dummy(const struct instr_translater * t,struct etna_compile * c,const struct tgsi_full_instruction * inst,struct etna_inst_src * src)1730 trans_dummy(const struct instr_translater *t, struct etna_compile *c,
1731             const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
1732 {
1733    /* nothing to do */
1734 }
1735 
1736 static const struct instr_translater translaters[TGSI_OPCODE_LAST] = {
1737 #define INSTR(n, f, ...) \
1738    [TGSI_OPCODE_##n] = {.fxn = (f), .tgsi_opc = TGSI_OPCODE_##n, ##__VA_ARGS__}
1739 
1740    INSTR(MOV, trans_instr, .opc = INST_OPCODE_MOV, .src = {2, -1, -1}),
1741    INSTR(RCP, trans_instr, .opc = INST_OPCODE_RCP, .src = {2, -1, -1}),
1742    INSTR(RSQ, trans_instr, .opc = INST_OPCODE_RSQ, .src = {2, -1, -1}),
1743    INSTR(MUL, trans_instr, .opc = INST_OPCODE_MUL, .src = {0, 1, -1}),
1744    INSTR(ADD, trans_instr, .opc = INST_OPCODE_ADD, .src = {0, 2, -1}),
1745    INSTR(DP3, trans_instr, .opc = INST_OPCODE_DP3, .src = {0, 1, -1}),
1746    INSTR(DP4, trans_instr, .opc = INST_OPCODE_DP4, .src = {0, 1, -1}),
1747    INSTR(DST, trans_instr, .opc = INST_OPCODE_DST, .src = {0, 1, -1}),
1748    INSTR(MAD, trans_instr, .opc = INST_OPCODE_MAD, .src = {0, 1, 2}),
1749    INSTR(EX2, trans_instr, .opc = INST_OPCODE_EXP, .src = {2, -1, -1}),
1750    INSTR(LG2, trans_instr, .opc = INST_OPCODE_LOG, .src = {2, -1, -1}),
1751    INSTR(SQRT, trans_instr, .opc = INST_OPCODE_SQRT, .src = {2, -1, -1}),
1752    INSTR(FRC, trans_instr, .opc = INST_OPCODE_FRC, .src = {2, -1, -1}),
1753    INSTR(CEIL, trans_instr, .opc = INST_OPCODE_CEIL, .src = {2, -1, -1}),
1754    INSTR(FLR, trans_instr, .opc = INST_OPCODE_FLOOR, .src = {2, -1, -1}),
1755    INSTR(CMP, trans_instr, .opc = INST_OPCODE_SELECT, .src = {0, 1, 2}, .cond = INST_CONDITION_LZ),
1756 
1757    INSTR(KILL, trans_instr, .opc = INST_OPCODE_TEXKILL),
1758    INSTR(KILL_IF, trans_instr, .opc = INST_OPCODE_TEXKILL, .src = {0, -1, -1}, .cond = INST_CONDITION_LZ),
1759 
1760    INSTR(DDX, trans_deriv, .opc = INST_OPCODE_DSX),
1761    INSTR(DDY, trans_deriv, .opc = INST_OPCODE_DSY),
1762 
1763    INSTR(IF, trans_if),
1764    INSTR(ELSE, trans_else),
1765    INSTR(ENDIF, trans_endif),
1766 
1767    INSTR(BGNLOOP, trans_loop_bgn),
1768    INSTR(ENDLOOP, trans_loop_end),
1769    INSTR(BRK, trans_brk),
1770    INSTR(CONT, trans_cont),
1771 
1772    INSTR(MIN, trans_min_max, .opc = INST_OPCODE_SELECT, .cond = INST_CONDITION_GT),
1773    INSTR(MAX, trans_min_max, .opc = INST_OPCODE_SELECT, .cond = INST_CONDITION_LT),
1774 
1775    INSTR(ARL, trans_arl),
1776    INSTR(LRP, trans_lrp),
1777    INSTR(LIT, trans_lit),
1778    INSTR(SSG, trans_ssg),
1779    INSTR(DPH, trans_dph),
1780 
1781    INSTR(SIN, trans_trig),
1782    INSTR(COS, trans_trig),
1783    INSTR(SCS, trans_trig),
1784 
1785    INSTR(SLT, trans_instr, .opc = INST_OPCODE_SET, .src = {0, 1, -1}, .cond = INST_CONDITION_LT),
1786    INSTR(SGE, trans_instr, .opc = INST_OPCODE_SET, .src = {0, 1, -1}, .cond = INST_CONDITION_GE),
1787    INSTR(SEQ, trans_instr, .opc = INST_OPCODE_SET, .src = {0, 1, -1}, .cond = INST_CONDITION_EQ),
1788    INSTR(SGT, trans_instr, .opc = INST_OPCODE_SET, .src = {0, 1, -1}, .cond = INST_CONDITION_GT),
1789    INSTR(SLE, trans_instr, .opc = INST_OPCODE_SET, .src = {0, 1, -1}, .cond = INST_CONDITION_LE),
1790    INSTR(SNE, trans_instr, .opc = INST_OPCODE_SET, .src = {0, 1, -1}, .cond = INST_CONDITION_NE),
1791 
1792    INSTR(TEX, trans_sampler),
1793    INSTR(TXB, trans_sampler),
1794    INSTR(TXL, trans_sampler),
1795    INSTR(TXP, trans_sampler),
1796 
1797    INSTR(NOP, trans_dummy),
1798    INSTR(END, trans_dummy),
1799 };
1800 
1801 /* Pass -- compile instructions */
1802 static void
etna_compile_pass_generate_code(struct etna_compile * c)1803 etna_compile_pass_generate_code(struct etna_compile *c)
1804 {
1805    struct tgsi_parse_context ctx = { };
1806    unsigned status = tgsi_parse_init(&ctx, c->tokens);
1807    assert(status == TGSI_PARSE_OK);
1808 
1809    int inst_idx = 0;
1810    while (!tgsi_parse_end_of_tokens(&ctx)) {
1811       const struct tgsi_full_instruction *inst = 0;
1812 
1813       /* No inner temps used yet for this instruction, clear counter */
1814       c->inner_temps = 0;
1815 
1816       tgsi_parse_token(&ctx);
1817 
1818       switch (ctx.FullToken.Token.Type) {
1819       case TGSI_TOKEN_TYPE_INSTRUCTION:
1820          /* iterate over operands */
1821          inst = &ctx.FullToken.FullInstruction;
1822          if (c->dead_inst[inst_idx]) { /* skip dead instructions */
1823             inst_idx++;
1824             continue;
1825          }
1826 
1827          /* Lookup the TGSI information and generate the source arguments */
1828          struct etna_inst_src src[ETNA_NUM_SRC];
1829          memset(src, 0, sizeof(src));
1830 
1831          const struct tgsi_opcode_info *tgsi = tgsi_get_opcode_info(inst->Instruction.Opcode);
1832 
1833          for (int i = 0; i < tgsi->num_src && i < ETNA_NUM_SRC; i++) {
1834             const struct tgsi_full_src_register *reg = &inst->Src[i];
1835             const struct etna_native_reg *n = &etna_get_src_reg(c, reg->Register)->native;
1836 
1837             if (!n->valid || n->is_tex)
1838                continue;
1839 
1840             src[i] = etna_create_src(reg, n);
1841          }
1842 
1843          const unsigned opc = inst->Instruction.Opcode;
1844          const struct instr_translater *t = &translaters[opc];
1845 
1846          if (t->fxn) {
1847             t->fxn(t, c, inst, src);
1848 
1849             inst_idx += 1;
1850          } else {
1851             BUG("Unhandled instruction %s", tgsi_get_opcode_name(opc));
1852             assert(0);
1853          }
1854          break;
1855       }
1856    }
1857    tgsi_parse_free(&ctx);
1858 }
1859 
1860 /* Look up register by semantic */
1861 static struct etna_reg_desc *
find_decl_by_semantic(struct etna_compile * c,uint file,uint name,uint index)1862 find_decl_by_semantic(struct etna_compile *c, uint file, uint name, uint index)
1863 {
1864    for (int idx = 0; idx < c->file[file].reg_size; ++idx) {
1865       struct etna_reg_desc *reg = &c->file[file].reg[idx];
1866 
1867       if (reg->semantic.Name == name && reg->semantic.Index == index)
1868          return reg;
1869    }
1870 
1871    return NULL; /* not found */
1872 }
1873 
1874 /** Add ADD and MUL instruction to bring Z/W to 0..1 if -1..1 if needed:
1875  * - this is a vertex shader
1876  * - and this is an older GPU
1877  */
1878 static void
etna_compile_add_z_div_if_needed(struct etna_compile * c)1879 etna_compile_add_z_div_if_needed(struct etna_compile *c)
1880 {
1881    if (c->info.processor == PIPE_SHADER_VERTEX && c->specs->vs_need_z_div) {
1882       /* find position out */
1883       struct etna_reg_desc *pos_reg =
1884          find_decl_by_semantic(c, TGSI_FILE_OUTPUT, TGSI_SEMANTIC_POSITION, 0);
1885 
1886       if (pos_reg != NULL) {
1887          /*
1888           * ADD tX.__z_, tX.zzzz, void, tX.wwww
1889           * MUL tX.__z_, tX.zzzz, 0.5, void
1890          */
1891          emit_inst(c, &(struct etna_inst) {
1892             .opcode = INST_OPCODE_ADD,
1893             .dst = etna_native_to_dst(pos_reg->native, INST_COMPS_Z),
1894             .src[0] = etna_native_to_src(pos_reg->native, SWIZZLE(Z, Z, Z, Z)),
1895             .src[2] = etna_native_to_src(pos_reg->native, SWIZZLE(W, W, W, W)),
1896          });
1897          emit_inst(c, &(struct etna_inst) {
1898             .opcode = INST_OPCODE_MUL,
1899             .dst = etna_native_to_dst(pos_reg->native, INST_COMPS_Z),
1900             .src[0] = etna_native_to_src(pos_reg->native, SWIZZLE(Z, Z, Z, Z)),
1901             .src[1] = alloc_imm_f32(c, 0.5f),
1902          });
1903       }
1904    }
1905 }
1906 
1907 /** add a NOP to the shader if
1908  * a) the shader is empty
1909  * or
1910  * b) there is a label at the end of the shader
1911  */
1912 static void
etna_compile_add_nop_if_needed(struct etna_compile * c)1913 etna_compile_add_nop_if_needed(struct etna_compile *c)
1914 {
1915    bool label_at_last_inst = false;
1916 
1917    for (int idx = 0; idx < c->labels_count; ++idx) {
1918       if (c->labels[idx].inst_idx == c->inst_ptr)
1919          label_at_last_inst = true;
1920 
1921    }
1922 
1923    if (c->inst_ptr == 0 || label_at_last_inst)
1924       emit_inst(c, &(struct etna_inst){.opcode = INST_OPCODE_NOP});
1925 }
1926 
1927 static void
assign_uniforms(struct etna_compile_file * file,unsigned base)1928 assign_uniforms(struct etna_compile_file *file, unsigned base)
1929 {
1930    for (int idx = 0; idx < file->reg_size; ++idx) {
1931       file->reg[idx].native.valid = 1;
1932       file->reg[idx].native.rgroup = INST_RGROUP_UNIFORM_0;
1933       file->reg[idx].native.id = base + idx;
1934    }
1935 }
1936 
1937 /* Allocate CONST and IMM to native ETNA_RGROUP_UNIFORM(x).
1938  * CONST must be consecutive as const buffers are supposed to be consecutive,
1939  * and before IMM, as this is
1940  * more convenient because is possible for the compilation process itself to
1941  * generate extra
1942  * immediates for constants such as pi, one, zero.
1943  */
1944 static void
assign_constants_and_immediates(struct etna_compile * c)1945 assign_constants_and_immediates(struct etna_compile *c)
1946 {
1947    assign_uniforms(&c->file[TGSI_FILE_CONSTANT], 0);
1948    /* immediates start after the constants */
1949    c->imm_base = c->file[TGSI_FILE_CONSTANT].reg_size * 4;
1950    assign_uniforms(&c->file[TGSI_FILE_IMMEDIATE], c->imm_base / 4);
1951    DBG_F(ETNA_DBG_COMPILER_MSGS, "imm base: %i size: %i", c->imm_base,
1952          c->imm_size);
1953 }
1954 
1955 /* Assign declared samplers to native texture units */
1956 static void
assign_texture_units(struct etna_compile * c)1957 assign_texture_units(struct etna_compile *c)
1958 {
1959    uint tex_base = 0;
1960 
1961    if (c->info.processor == PIPE_SHADER_VERTEX)
1962       tex_base = c->specs->vertex_sampler_offset;
1963 
1964    for (int idx = 0; idx < c->file[TGSI_FILE_SAMPLER].reg_size; ++idx) {
1965       c->file[TGSI_FILE_SAMPLER].reg[idx].native.valid = 1;
1966       c->file[TGSI_FILE_SAMPLER].reg[idx].native.is_tex = 1; // overrides rgroup
1967       c->file[TGSI_FILE_SAMPLER].reg[idx].native.id = tex_base + idx;
1968    }
1969 }
1970 
1971 /* Additional pass to fill in branch targets. This pass should be last
1972  * as no instruction reordering or removing/addition can be done anymore
1973  * once the branch targets are computed.
1974  */
1975 static void
etna_compile_fill_in_labels(struct etna_compile * c)1976 etna_compile_fill_in_labels(struct etna_compile *c)
1977 {
1978    for (int idx = 0; idx < c->inst_ptr; ++idx) {
1979       if (c->lbl_usage[idx])
1980          etna_assemble_set_imm(&c->code[idx * 4], c->lbl_usage[idx]->inst_idx);
1981    }
1982 }
1983 
1984 /* compare two etna_native_reg structures, return true if equal */
1985 static bool
cmp_etna_native_reg(const struct etna_native_reg to,const struct etna_native_reg from)1986 cmp_etna_native_reg(const struct etna_native_reg to,
1987                     const struct etna_native_reg from)
1988 {
1989    return to.valid == from.valid && to.is_tex == from.is_tex &&
1990           to.rgroup == from.rgroup && to.id == from.id;
1991 }
1992 
1993 /* go through all declarations and swap native registers *to* and *from* */
1994 static void
swap_native_registers(struct etna_compile * c,const struct etna_native_reg to,const struct etna_native_reg from)1995 swap_native_registers(struct etna_compile *c, const struct etna_native_reg to,
1996                       const struct etna_native_reg from)
1997 {
1998    if (cmp_etna_native_reg(from, to))
1999       return; /* Nothing to do */
2000 
2001    for (int idx = 0; idx < c->total_decls; ++idx) {
2002       if (cmp_etna_native_reg(c->decl[idx].native, from)) {
2003          c->decl[idx].native = to;
2004       } else if (cmp_etna_native_reg(c->decl[idx].native, to)) {
2005          c->decl[idx].native = from;
2006       }
2007    }
2008 }
2009 
2010 /* For PS we need to permute so that inputs are always in temporary 0..N-1.
2011  * Semantic POS is always t0. If that semantic is not used, avoid t0.
2012  */
2013 static void
permute_ps_inputs(struct etna_compile * c)2014 permute_ps_inputs(struct etna_compile *c)
2015 {
2016    /* Special inputs:
2017     * gl_FragCoord  VARYING_SLOT_POS   TGSI_SEMANTIC_POSITION
2018     * gl_PointCoord VARYING_SLOT_PNTC  TGSI_SEMANTIC_PCOORD
2019     */
2020    uint native_idx = 1;
2021 
2022    for (int idx = 0; idx < c->file[TGSI_FILE_INPUT].reg_size; ++idx) {
2023       struct etna_reg_desc *reg = &c->file[TGSI_FILE_INPUT].reg[idx];
2024       uint input_id;
2025       assert(reg->has_semantic);
2026 
2027       if (!reg->active || reg->semantic.Name == TGSI_SEMANTIC_POSITION)
2028          continue;
2029 
2030       input_id = native_idx++;
2031       swap_native_registers(c, etna_native_temp(input_id),
2032                             c->file[TGSI_FILE_INPUT].reg[idx].native);
2033    }
2034 
2035    c->num_varyings = native_idx - 1;
2036 
2037    if (native_idx > c->next_free_native)
2038       c->next_free_native = native_idx;
2039 }
2040 
2041 /* fill in ps inputs into shader object */
2042 static void
fill_in_ps_inputs(struct etna_shader * sobj,struct etna_compile * c)2043 fill_in_ps_inputs(struct etna_shader *sobj, struct etna_compile *c)
2044 {
2045    struct etna_shader_io_file *sf = &sobj->infile;
2046 
2047    sf->num_reg = 0;
2048 
2049    for (int idx = 0; idx < c->file[TGSI_FILE_INPUT].reg_size; ++idx) {
2050       struct etna_reg_desc *reg = &c->file[TGSI_FILE_INPUT].reg[idx];
2051 
2052       if (reg->native.id > 0) {
2053          assert(sf->num_reg < ETNA_NUM_INPUTS);
2054          sf->reg[sf->num_reg].reg = reg->native.id;
2055          sf->reg[sf->num_reg].semantic = reg->semantic;
2056          /* convert usage mask to number of components (*=wildcard)
2057           *   .r    (0..1)  -> 1 component
2058           *   .*g   (2..3)  -> 2 component
2059           *   .**b  (4..7)  -> 3 components
2060           *   .***a (8..15) -> 4 components
2061           */
2062          sf->reg[sf->num_reg].num_components = util_last_bit(reg->usage_mask);
2063          sf->num_reg++;
2064       }
2065    }
2066 
2067    assert(sf->num_reg == c->num_varyings);
2068    sobj->input_count_unk8 = 31; /* XXX what is this */
2069 }
2070 
2071 /* fill in output mapping for ps into shader object */
2072 static void
fill_in_ps_outputs(struct etna_shader * sobj,struct etna_compile * c)2073 fill_in_ps_outputs(struct etna_shader *sobj, struct etna_compile *c)
2074 {
2075    sobj->outfile.num_reg = 0;
2076 
2077    for (int idx = 0; idx < c->file[TGSI_FILE_OUTPUT].reg_size; ++idx) {
2078       struct etna_reg_desc *reg = &c->file[TGSI_FILE_OUTPUT].reg[idx];
2079 
2080       switch (reg->semantic.Name) {
2081       case TGSI_SEMANTIC_COLOR: /* FRAG_RESULT_COLOR */
2082          sobj->ps_color_out_reg = reg->native.id;
2083          break;
2084       case TGSI_SEMANTIC_POSITION: /* FRAG_RESULT_DEPTH */
2085          sobj->ps_depth_out_reg = reg->native.id; /* =always native reg 0, only z component should be assigned */
2086          break;
2087       default:
2088          assert(0); /* only outputs supported are COLOR and POSITION at the moment */
2089       }
2090    }
2091 }
2092 
2093 /* fill in inputs for vs into shader object */
2094 static void
fill_in_vs_inputs(struct etna_shader * sobj,struct etna_compile * c)2095 fill_in_vs_inputs(struct etna_shader *sobj, struct etna_compile *c)
2096 {
2097    struct etna_shader_io_file *sf = &sobj->infile;
2098 
2099    sf->num_reg = 0;
2100    for (int idx = 0; idx < c->file[TGSI_FILE_INPUT].reg_size; ++idx) {
2101       struct etna_reg_desc *reg = &c->file[TGSI_FILE_INPUT].reg[idx];
2102       assert(sf->num_reg < ETNA_NUM_INPUTS);
2103       /* XXX exclude inputs with special semantics such as gl_frontFacing */
2104       sf->reg[sf->num_reg].reg = reg->native.id;
2105       sf->reg[sf->num_reg].semantic = reg->semantic;
2106       sf->reg[sf->num_reg].num_components = util_last_bit(reg->usage_mask);
2107       sf->num_reg++;
2108    }
2109 
2110    sobj->input_count_unk8 = (sf->num_reg + 19) / 16; /* XXX what is this */
2111 }
2112 
2113 /* build two-level output index [Semantic][Index] for fast linking */
2114 static void
build_output_index(struct etna_shader * sobj)2115 build_output_index(struct etna_shader *sobj)
2116 {
2117    int total = 0;
2118    int offset = 0;
2119 
2120    for (int name = 0; name < TGSI_SEMANTIC_COUNT; ++name)
2121       total += sobj->output_count_per_semantic[name];
2122 
2123    sobj->output_per_semantic_list = CALLOC(total, sizeof(struct etna_shader_inout *));
2124 
2125    for (int name = 0; name < TGSI_SEMANTIC_COUNT; ++name) {
2126       sobj->output_per_semantic[name] = &sobj->output_per_semantic_list[offset];
2127       offset += sobj->output_count_per_semantic[name];
2128    }
2129 
2130    for (int idx = 0; idx < sobj->outfile.num_reg; ++idx) {
2131       sobj->output_per_semantic[sobj->outfile.reg[idx].semantic.Name]
2132                                [sobj->outfile.reg[idx].semantic.Index] =
2133          &sobj->outfile.reg[idx];
2134    }
2135 }
2136 
2137 /* fill in outputs for vs into shader object */
2138 static void
fill_in_vs_outputs(struct etna_shader * sobj,struct etna_compile * c)2139 fill_in_vs_outputs(struct etna_shader *sobj, struct etna_compile *c)
2140 {
2141    struct etna_shader_io_file *sf = &sobj->outfile;
2142 
2143    sf->num_reg = 0;
2144    for (int idx = 0; idx < c->file[TGSI_FILE_OUTPUT].reg_size; ++idx) {
2145       struct etna_reg_desc *reg = &c->file[TGSI_FILE_OUTPUT].reg[idx];
2146       assert(sf->num_reg < ETNA_NUM_INPUTS);
2147 
2148       switch (reg->semantic.Name) {
2149       case TGSI_SEMANTIC_POSITION:
2150          sobj->vs_pos_out_reg = reg->native.id;
2151          break;
2152       case TGSI_SEMANTIC_PSIZE:
2153          sobj->vs_pointsize_out_reg = reg->native.id;
2154          break;
2155       default:
2156          sf->reg[sf->num_reg].reg = reg->native.id;
2157          sf->reg[sf->num_reg].semantic = reg->semantic;
2158          sf->reg[sf->num_reg].num_components = 4; // XXX reg->num_components;
2159          sf->num_reg++;
2160          sobj->output_count_per_semantic[reg->semantic.Name] =
2161             MAX2(reg->semantic.Index + 1,
2162                  sobj->output_count_per_semantic[reg->semantic.Name]);
2163       }
2164    }
2165 
2166    /* build two-level index for linking */
2167    build_output_index(sobj);
2168 
2169    /* fill in "mystery meat" load balancing value. This value determines how
2170     * work is scheduled between VS and PS
2171     * in the unified shader architecture. More precisely, it is determined from
2172     * the number of VS outputs, as well as chip-specific
2173     * vertex output buffer size, vertex cache size, and the number of shader
2174     * cores.
2175     *
2176     * XXX this is a conservative estimate, the "optimal" value is only known for
2177     * sure at link time because some
2178     * outputs may be unused and thus unmapped. Then again, in the general use
2179     * case with GLSL the vertex and fragment
2180     * shaders are linked already before submitting to Gallium, thus all outputs
2181     * are used.
2182     */
2183    int half_out = (c->file[TGSI_FILE_OUTPUT].reg_size + 1) / 2;
2184    assert(half_out);
2185 
2186    uint32_t b = ((20480 / (c->specs->vertex_output_buffer_size -
2187                            2 * half_out * c->specs->vertex_cache_size)) +
2188                  9) /
2189                 10;
2190    uint32_t a = (b + 256 / (c->specs->shader_core_count * half_out)) / 2;
2191    sobj->vs_load_balancing = VIVS_VS_LOAD_BALANCING_A(MIN2(a, 255)) |
2192                              VIVS_VS_LOAD_BALANCING_B(MIN2(b, 255)) |
2193                              VIVS_VS_LOAD_BALANCING_C(0x3f) |
2194                              VIVS_VS_LOAD_BALANCING_D(0x0f);
2195 }
2196 
2197 static bool
etna_compile_check_limits(struct etna_compile * c)2198 etna_compile_check_limits(struct etna_compile *c)
2199 {
2200    int max_uniforms = (c->info.processor == PIPE_SHADER_VERTEX)
2201                          ? c->specs->max_vs_uniforms
2202                          : c->specs->max_ps_uniforms;
2203    /* round up number of uniforms, including immediates, in units of four */
2204    int num_uniforms = c->imm_base / 4 + (c->imm_size + 3) / 4;
2205 
2206    if (c->inst_ptr > c->specs->max_instructions) {
2207       DBG("Number of instructions (%d) exceeds maximum %d", c->inst_ptr,
2208           c->specs->max_instructions);
2209       return false;
2210    }
2211 
2212    if (c->next_free_native > c->specs->max_registers) {
2213       DBG("Number of registers (%d) exceeds maximum %d", c->next_free_native,
2214           c->specs->max_registers);
2215       return false;
2216    }
2217 
2218    if (num_uniforms > max_uniforms) {
2219       DBG("Number of uniforms (%d) exceeds maximum %d", num_uniforms,
2220           max_uniforms);
2221       return false;
2222    }
2223 
2224    if (c->num_varyings > c->specs->max_varyings) {
2225       DBG("Number of varyings (%d) exceeds maximum %d", c->num_varyings,
2226           c->specs->max_varyings);
2227       return false;
2228    }
2229 
2230    if (c->imm_base > c->specs->num_constants) {
2231       DBG("Number of constants (%d) exceeds maximum %d", c->imm_base,
2232           c->specs->num_constants);
2233    }
2234 
2235    return true;
2236 }
2237 
2238 static void
copy_uniform_state_to_shader(struct etna_compile * c,struct etna_shader * sobj)2239 copy_uniform_state_to_shader(struct etna_compile *c, struct etna_shader *sobj)
2240 {
2241    uint32_t count = c->imm_size;
2242    struct etna_shader_uniform_info *uinfo = &sobj->uniforms;
2243 
2244    uinfo->const_count = c->imm_base;
2245    uinfo->imm_count = count;
2246    uinfo->imm_data = mem_dup(c->imm_data, count * sizeof(*c->imm_data));
2247    uinfo->imm_contents = mem_dup(c->imm_contents, count * sizeof(*c->imm_contents));
2248 
2249    etna_set_shader_uniforms_dirty_flags(sobj);
2250 }
2251 
2252 struct etna_shader *
etna_compile_shader(const struct etna_specs * specs,const struct tgsi_token * tokens)2253 etna_compile_shader(const struct etna_specs *specs,
2254                     const struct tgsi_token *tokens)
2255 {
2256    /* Create scratch space that may be too large to fit on stack
2257     */
2258    bool ret;
2259    struct etna_compile *c;
2260    struct etna_shader *shader;
2261 
2262    struct tgsi_lowering_config lconfig = {
2263       .lower_SCS = specs->has_sin_cos_sqrt,
2264       .lower_FLR = !specs->has_sign_floor_ceil,
2265       .lower_CEIL = !specs->has_sign_floor_ceil,
2266       .lower_POW = true,
2267       .lower_EXP = true,
2268       .lower_LOG = true,
2269       .lower_DP2 = true,
2270       .lower_DP2A = true,
2271       .lower_TRUNC = true,
2272       .lower_XPD = true
2273    };
2274 
2275    c = CALLOC_STRUCT(etna_compile);
2276    if (!c)
2277       return NULL;
2278 
2279    shader = CALLOC_STRUCT(etna_shader);
2280    if (!shader)
2281       goto out;
2282 
2283    c->specs = specs;
2284    c->tokens = tgsi_transform_lowering(&lconfig, tokens, &c->info);
2285    c->free_tokens = !!c->tokens;
2286    if (!c->tokens) {
2287       /* no lowering */
2288       c->tokens = tokens;
2289    }
2290 
2291    /* Build a map from gallium register to native registers for files
2292     * CONST, SAMP, IMM, OUT, IN, TEMP.
2293     * SAMP will map as-is for fragment shaders, there will be a +8 offset for
2294     * vertex shaders.
2295     */
2296    /* Pass one -- check register file declarations and immediates */
2297    etna_compile_parse_declarations(c);
2298 
2299    etna_allocate_decls(c);
2300 
2301    /* Pass two -- check usage of temporaries, inputs, outputs */
2302    etna_compile_pass_check_usage(c);
2303 
2304    assign_special_inputs(c);
2305 
2306    /* Assign native temp register to TEMPs */
2307    assign_temporaries_to_native(c, &c->file[TGSI_FILE_TEMPORARY]);
2308 
2309    /* optimize outputs */
2310    etna_compile_pass_optimize_outputs(c);
2311 
2312    /* XXX assign special inputs: gl_FrontFacing (VARYING_SLOT_FACE)
2313     *     this is part of RGROUP_INTERNAL
2314     */
2315 
2316    /* assign inputs: last usage of input should be <= first usage of temp */
2317    /*   potential optimization case:
2318     *     if single MOV TEMP[y], IN[x] before which temp y is not used, and
2319     * after which IN[x]
2320     *     is not read, temp[y] can be used as input register as-is
2321     */
2322    /*   sort temporaries by first use
2323     *   sort inputs by last usage
2324     *   iterate over inputs, temporaries
2325     *     if last usage of input <= first usage of temp:
2326     *       assign input to temp
2327     *       advance input, temporary pointer
2328     *     else
2329     *       advance temporary pointer
2330     *
2331     *   potential problem: instruction with multiple inputs of which one is the
2332     * temp and the other is the input;
2333     *      however, as the temp is not used before this, how would this make
2334     * sense? uninitialized temporaries have an undefined
2335     *      value, so this would be ok
2336     */
2337    assign_inouts_to_temporaries(c, TGSI_FILE_INPUT);
2338 
2339    /* assign outputs: first usage of output should be >= last usage of temp */
2340    /*   potential optimization case:
2341     *      if single MOV OUT[x], TEMP[y] (with full write mask, or at least
2342     * writing all components that are used in
2343     *        the shader) after which temp y is no longer used temp[y] can be
2344     * used as output register as-is
2345     *
2346     *   potential problem: instruction with multiple outputs of which one is the
2347     * temp and the other is the output;
2348     *      however, as the temp is not used after this, how would this make
2349     * sense? could just discard the output value
2350     */
2351    /*   sort temporaries by last use
2352     *   sort outputs by first usage
2353     *   iterate over outputs, temporaries
2354     *     if first usage of output >= last usage of temp:
2355     *       assign output to temp
2356     *       advance output, temporary pointer
2357     *     else
2358     *       advance temporary pointer
2359     */
2360    assign_inouts_to_temporaries(c, TGSI_FILE_OUTPUT);
2361 
2362    assign_constants_and_immediates(c);
2363    assign_texture_units(c);
2364 
2365    /* list declarations */
2366    for (int x = 0; x < c->total_decls; ++x) {
2367       DBG_F(ETNA_DBG_COMPILER_MSGS, "%i: %s,%d active=%i first_use=%i "
2368                                     "last_use=%i native=%i usage_mask=%x "
2369                                     "has_semantic=%i",
2370             x, tgsi_file_name(c->decl[x].file), c->decl[x].idx,
2371             c->decl[x].active, c->decl[x].first_use, c->decl[x].last_use,
2372             c->decl[x].native.valid ? c->decl[x].native.id : -1,
2373             c->decl[x].usage_mask, c->decl[x].has_semantic);
2374       if (c->decl[x].has_semantic)
2375          DBG_F(ETNA_DBG_COMPILER_MSGS, " semantic_name=%s semantic_idx=%i",
2376                tgsi_semantic_names[c->decl[x].semantic.Name],
2377                c->decl[x].semantic.Index);
2378    }
2379    /* XXX for PS we need to permute so that inputs are always in temporary
2380     * 0..N-1.
2381     * There is no "switchboard" for varyings (AFAIK!). The output color,
2382     * however, can be routed
2383     * from an arbitrary temporary.
2384     */
2385    if (c->info.processor == PIPE_SHADER_FRAGMENT)
2386       permute_ps_inputs(c);
2387 
2388 
2389    /* list declarations */
2390    for (int x = 0; x < c->total_decls; ++x) {
2391       DBG_F(ETNA_DBG_COMPILER_MSGS, "%i: %s,%d active=%i first_use=%i "
2392                                     "last_use=%i native=%i usage_mask=%x "
2393                                     "has_semantic=%i",
2394             x, tgsi_file_name(c->decl[x].file), c->decl[x].idx,
2395             c->decl[x].active, c->decl[x].first_use, c->decl[x].last_use,
2396             c->decl[x].native.valid ? c->decl[x].native.id : -1,
2397             c->decl[x].usage_mask, c->decl[x].has_semantic);
2398       if (c->decl[x].has_semantic)
2399          DBG_F(ETNA_DBG_COMPILER_MSGS, " semantic_name=%s semantic_idx=%i",
2400                tgsi_semantic_names[c->decl[x].semantic.Name],
2401                c->decl[x].semantic.Index);
2402    }
2403 
2404    /* pass 3: generate instructions */
2405    etna_compile_pass_generate_code(c);
2406    etna_compile_add_z_div_if_needed(c);
2407    etna_compile_add_nop_if_needed(c);
2408    etna_compile_fill_in_labels(c);
2409 
2410    ret = etna_compile_check_limits(c);
2411    if (!ret) {
2412       FREE(shader);
2413       shader = NULL;
2414       goto out;
2415    }
2416 
2417    /* fill in output structure */
2418    shader->processor = c->info.processor;
2419    shader->code_size = c->inst_ptr * 4;
2420    shader->code = mem_dup(c->code, c->inst_ptr * 16);
2421    shader->num_temps = c->next_free_native;
2422    shader->vs_pos_out_reg = -1;
2423    shader->vs_pointsize_out_reg = -1;
2424    shader->ps_color_out_reg = -1;
2425    shader->ps_depth_out_reg = -1;
2426    copy_uniform_state_to_shader(c, shader);
2427 
2428    if (c->info.processor == PIPE_SHADER_VERTEX) {
2429       fill_in_vs_inputs(shader, c);
2430       fill_in_vs_outputs(shader, c);
2431    } else if (c->info.processor == PIPE_SHADER_FRAGMENT) {
2432       fill_in_ps_inputs(shader, c);
2433       fill_in_ps_outputs(shader, c);
2434    }
2435 
2436 out:
2437    if (c->free_tokens)
2438       FREE((void *)c->tokens);
2439 
2440    FREE(c->labels);
2441    FREE(c);
2442 
2443    return shader;
2444 }
2445 
2446 extern const char *tgsi_swizzle_names[];
2447 void
etna_dump_shader(const struct etna_shader * shader)2448 etna_dump_shader(const struct etna_shader *shader)
2449 {
2450    if (shader->processor == PIPE_SHADER_VERTEX)
2451       printf("VERT\n");
2452    else
2453       printf("FRAG\n");
2454 
2455 
2456    etna_disasm(shader->code, shader->code_size, PRINT_RAW);
2457 
2458    printf("num temps: %i\n", shader->num_temps);
2459    printf("num const: %i\n", shader->uniforms.const_count);
2460    printf("immediates:\n");
2461    for (int idx = 0; idx < shader->uniforms.imm_count; ++idx) {
2462       printf(" [%i].%s = %f (0x%08x)\n",
2463              (idx + shader->uniforms.const_count) / 4,
2464              tgsi_swizzle_names[idx % 4],
2465              *((float *)&shader->uniforms.imm_data[idx]),
2466              shader->uniforms.imm_data[idx]);
2467    }
2468    printf("inputs:\n");
2469    for (int idx = 0; idx < shader->infile.num_reg; ++idx) {
2470       printf(" [%i] name=%s index=%i comps=%i\n", shader->infile.reg[idx].reg,
2471              tgsi_semantic_names[shader->infile.reg[idx].semantic.Name],
2472              shader->infile.reg[idx].semantic.Index,
2473              shader->infile.reg[idx].num_components);
2474    }
2475    printf("outputs:\n");
2476    for (int idx = 0; idx < shader->outfile.num_reg; ++idx) {
2477       printf(" [%i] name=%s index=%i comps=%i\n", shader->outfile.reg[idx].reg,
2478              tgsi_semantic_names[shader->outfile.reg[idx].semantic.Name],
2479              shader->outfile.reg[idx].semantic.Index,
2480              shader->outfile.reg[idx].num_components);
2481    }
2482    printf("special:\n");
2483    if (shader->processor == PIPE_SHADER_VERTEX) {
2484       printf("  vs_pos_out_reg=%i\n", shader->vs_pos_out_reg);
2485       printf("  vs_pointsize_out_reg=%i\n", shader->vs_pointsize_out_reg);
2486       printf("  vs_load_balancing=0x%08x\n", shader->vs_load_balancing);
2487    } else {
2488       printf("  ps_color_out_reg=%i\n", shader->ps_color_out_reg);
2489       printf("  ps_depth_out_reg=%i\n", shader->ps_depth_out_reg);
2490    }
2491    printf("  input_count_unk8=0x%08x\n", shader->input_count_unk8);
2492 }
2493 
2494 void
etna_destroy_shader(struct etna_shader * shader)2495 etna_destroy_shader(struct etna_shader *shader)
2496 {
2497    assert(shader);
2498 
2499    FREE(shader->code);
2500    FREE(shader->uniforms.imm_data);
2501    FREE(shader->uniforms.imm_contents);
2502    FREE(shader->output_per_semantic_list);
2503    FREE(shader);
2504 }
2505 
2506 static const struct etna_shader_inout *
etna_shader_vs_lookup(const struct etna_shader * sobj,const struct etna_shader_inout * in)2507 etna_shader_vs_lookup(const struct etna_shader *sobj,
2508                       const struct etna_shader_inout *in)
2509 {
2510    if (in->semantic.Index < sobj->output_count_per_semantic[in->semantic.Name])
2511       return sobj->output_per_semantic[in->semantic.Name][in->semantic.Index];
2512 
2513    return NULL;
2514 }
2515 
2516 bool
etna_link_shader(struct etna_shader_link_info * info,const struct etna_shader * vs,const struct etna_shader * fs)2517 etna_link_shader(struct etna_shader_link_info *info,
2518                  const struct etna_shader *vs, const struct etna_shader *fs)
2519 {
2520    /* For each fragment input we need to find the associated vertex shader
2521     * output, which can be found by matching on semantic name and index. A
2522     * binary search could be used because the vs outputs are sorted by their
2523     * semantic index and grouped by semantic type by fill_in_vs_outputs.
2524     */
2525    assert(fs->infile.num_reg < ETNA_NUM_INPUTS);
2526 
2527    for (int idx = 0; idx < fs->infile.num_reg; ++idx) {
2528       const struct etna_shader_inout *fsio = &fs->infile.reg[idx];
2529       const struct etna_shader_inout *vsio = etna_shader_vs_lookup(vs, fsio);
2530       struct etna_varying *varying;
2531 
2532       assert(fsio->reg > 0 && fsio->reg <= ARRAY_SIZE(info->varyings));
2533 
2534       if (fsio->reg > info->num_varyings)
2535          info->num_varyings = fsio->reg;
2536 
2537       varying = &info->varyings[fsio->reg - 1];
2538       varying->num_components = fsio->num_components;
2539 
2540       if (fsio->semantic.Name == TGSI_SEMANTIC_COLOR) /* colors affected by flat shading */
2541          varying->pa_attributes = 0x200;
2542       else /* texture coord or other bypasses flat shading */
2543          varying->pa_attributes = 0x2f1;
2544 
2545       if (fsio->semantic.Name == TGSI_SEMANTIC_PCOORD) {
2546          varying->use[0] = VARYING_COMPONENT_USE_POINTCOORD_X;
2547          varying->use[1] = VARYING_COMPONENT_USE_POINTCOORD_Y;
2548          varying->use[2] = VARYING_COMPONENT_USE_USED;
2549          varying->use[3] = VARYING_COMPONENT_USE_USED;
2550          varying->reg = 0; /* replaced by point coord -- doesn't matter */
2551          continue;
2552       }
2553 
2554       if (vsio == NULL)
2555          return true; /* not found -- link error */
2556 
2557       varying->use[0] = VARYING_COMPONENT_USE_USED;
2558       varying->use[1] = VARYING_COMPONENT_USE_USED;
2559       varying->use[2] = VARYING_COMPONENT_USE_USED;
2560       varying->use[3] = VARYING_COMPONENT_USE_USED;
2561       varying->reg = vsio->reg;
2562    }
2563 
2564    assert(info->num_varyings == fs->infile.num_reg);
2565 
2566    return false;
2567 }
2568