1 /*
2  * Copyright (c) 2020 Etnaviv Project
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sub license,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the
12  * next paragraph) shall be included in all copies or substantial portions
13  * of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Jonathan Marek <jonathan@marek.ca>
25  */
26 
27 #ifndef H_ETNAVIV_COMPILER_NIR
28 #define H_ETNAVIV_COMPILER_NIR
29 
30 #include "compiler/nir/nir.h"
31 #include "etnaviv_asm.h"
32 #include "etnaviv_compiler.h"
33 
34 struct etna_compile {
35    nir_shader *nir;
36    nir_function_impl *impl;
37 #define is_fs(c) ((c)->nir->info.stage == MESA_SHADER_FRAGMENT)
38    const struct etna_specs *specs;
39    struct etna_shader_variant *variant;
40 
41    /* block # to instr index */
42    unsigned *block_ptr;
43 
44    /* Code generation */
45    int inst_ptr; /* current instruction pointer */
46    struct etna_inst code[ETNA_MAX_INSTRUCTIONS * ETNA_INST_SIZE];
47 
48    /* constants */
49    uint64_t consts[ETNA_MAX_IMM];
50    unsigned const_count;
51 
52    /* ra state */
53    struct ra_graph *g;
54    unsigned *live_map;
55    unsigned num_nodes;
56 
57    /* There was an error during compilation */
58    bool error;
59 };
60 
61 #define compile_error(ctx, args...) ({ \
62    printf(args); \
63    ctx->error = true; \
64    assert(0); \
65 })
66 
67 enum {
68    BYPASS_DST = 1,
69    BYPASS_SRC = 2,
70 };
71 
is_sysval(nir_instr * instr)72 static inline bool is_sysval(nir_instr *instr)
73 {
74    if (instr->type != nir_instr_type_intrinsic)
75       return false;
76 
77    nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
78    return intr->intrinsic == nir_intrinsic_load_front_face ||
79           intr->intrinsic == nir_intrinsic_load_frag_coord;
80 }
81 
82 /* get unique ssa/reg index for nir_src */
83 static inline unsigned
src_index(nir_function_impl * impl,nir_src * src)84 src_index(nir_function_impl *impl, nir_src *src)
85 {
86    return src->is_ssa ? src->ssa->index : (src->reg.reg->index + impl->ssa_alloc);
87 }
88 
89 /* get unique ssa/reg index for nir_dest */
90 static inline unsigned
dest_index(nir_function_impl * impl,nir_dest * dest)91 dest_index(nir_function_impl *impl, nir_dest *dest)
92 {
93    return dest->is_ssa ? dest->ssa.index : (dest->reg.reg->index + impl->ssa_alloc);
94 }
95 
96 static inline void
update_swiz_mask(nir_alu_instr * alu,nir_dest * dest,unsigned * swiz,unsigned * mask)97 update_swiz_mask(nir_alu_instr *alu, nir_dest *dest, unsigned *swiz, unsigned *mask)
98 {
99    if (!swiz)
100       return;
101 
102    bool is_vec = dest != NULL;
103    unsigned swizzle = 0, write_mask = 0;
104    for (unsigned i = 0; i < 4; i++) {
105       /* channel not written */
106       if (!(alu->dest.write_mask & (1 << i)))
107          continue;
108       /* src is different (only check for vecN) */
109       if (is_vec && alu->src[i].src.ssa != &dest->ssa)
110          continue;
111 
112       unsigned src_swiz = is_vec ? alu->src[i].swizzle[0] : alu->src[0].swizzle[i];
113       swizzle |= (*swiz >> src_swiz * 2 & 3) << i * 2;
114       /* this channel isn't written through this chain */
115       if (*mask & (1 << src_swiz))
116          write_mask |= 1 << i;
117    }
118    *swiz = swizzle;
119    *mask = write_mask;
120 }
121 
122 static nir_dest *
real_dest(nir_dest * dest,unsigned * swiz,unsigned * mask)123 real_dest(nir_dest *dest, unsigned *swiz, unsigned *mask)
124 {
125    if (!dest || !dest->is_ssa)
126       return dest;
127 
128    bool can_bypass_src = !list_length(&dest->ssa.if_uses);
129    nir_instr *p_instr = dest->ssa.parent_instr;
130 
131    /* if used by a vecN, the "real" destination becomes the vecN destination
132     * lower_alu guarantees that values used by a vecN are only used by that vecN
133     * we can apply the same logic to movs in a some cases too
134     */
135    nir_foreach_use(use_src, &dest->ssa) {
136       nir_instr *instr = use_src->parent_instr;
137 
138       /* src bypass check: for now only deal with tex src mov case
139        * note: for alu don't bypass mov for multiple uniform sources
140        */
141       switch (instr->type) {
142       case nir_instr_type_tex:
143          if (p_instr->type == nir_instr_type_alu &&
144              nir_instr_as_alu(p_instr)->op == nir_op_mov) {
145             break;
146          }
147          /* fallthrough */
148       default:
149          can_bypass_src = false;
150          break;
151       }
152 
153       if (instr->type != nir_instr_type_alu)
154          continue;
155 
156       nir_alu_instr *alu = nir_instr_as_alu(instr);
157 
158       switch (alu->op) {
159       case nir_op_vec2:
160       case nir_op_vec3:
161       case nir_op_vec4:
162          assert(list_length(&dest->ssa.if_uses) == 0);
163          nir_foreach_use(use_src, &dest->ssa)
164             assert(use_src->parent_instr == instr);
165 
166          update_swiz_mask(alu, dest, swiz, mask);
167          break;
168       case nir_op_mov: {
169          switch (dest->ssa.parent_instr->type) {
170          case nir_instr_type_alu:
171          case nir_instr_type_tex:
172             break;
173          default:
174             continue;
175          }
176          if (list_length(&dest->ssa.if_uses) || list_length(&dest->ssa.uses) > 1)
177             continue;
178 
179          update_swiz_mask(alu, NULL, swiz, mask);
180          break;
181       };
182       default:
183          continue;
184       }
185 
186       assert(!(instr->pass_flags & BYPASS_SRC));
187       instr->pass_flags |= BYPASS_DST;
188       return real_dest(&alu->dest.dest, swiz, mask);
189    }
190 
191    if (can_bypass_src && !(p_instr->pass_flags & BYPASS_DST)) {
192       p_instr->pass_flags |= BYPASS_SRC;
193       return NULL;
194    }
195 
196    return dest;
197 }
198 
199 /* if instruction dest needs a register, return nir_dest for it */
200 static inline nir_dest *
dest_for_instr(nir_instr * instr)201 dest_for_instr(nir_instr *instr)
202 {
203    nir_dest *dest = NULL;
204 
205    switch (instr->type) {
206    case nir_instr_type_alu:
207       dest = &nir_instr_as_alu(instr)->dest.dest;
208       break;
209    case nir_instr_type_tex:
210       dest = &nir_instr_as_tex(instr)->dest;
211       break;
212    case nir_instr_type_intrinsic: {
213       nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
214       if (intr->intrinsic == nir_intrinsic_load_uniform ||
215           intr->intrinsic == nir_intrinsic_load_ubo ||
216           intr->intrinsic == nir_intrinsic_load_input ||
217           intr->intrinsic == nir_intrinsic_load_instance_id)
218          dest = &intr->dest;
219    } break;
220    case nir_instr_type_deref:
221       return NULL;
222    default:
223       break;
224    }
225    return real_dest(dest, NULL, NULL);
226 }
227 
228 struct live_def {
229    nir_instr *instr;
230    nir_dest *dest; /* cached dest_for_instr */
231    unsigned live_start, live_end; /* live range */
232 };
233 
234 unsigned
235 etna_live_defs(nir_function_impl *impl, struct live_def *defs, unsigned *live_map);
236 
237 /* Swizzles and write masks can be used to layer virtual non-interfering
238  * registers on top of the real VEC4 registers. For example, the virtual
239  * VEC3_XYZ register and the virtual SCALAR_W register that use the same
240  * physical VEC4 base register do not interfere.
241  */
242 enum reg_class {
243    REG_CLASS_VIRT_SCALAR,
244    REG_CLASS_VIRT_VEC2,
245    REG_CLASS_VIRT_VEC3,
246    REG_CLASS_VEC4,
247    /* special vec2 class for fast transcendentals, limited to XY or ZW */
248    REG_CLASS_VIRT_VEC2T,
249    /* special classes for LOAD - contiguous components */
250    REG_CLASS_VIRT_VEC2C,
251    REG_CLASS_VIRT_VEC3C,
252    NUM_REG_CLASSES,
253 };
254 
255 enum reg_type {
256    REG_TYPE_VEC4,
257    REG_TYPE_VIRT_VEC3_XYZ,
258    REG_TYPE_VIRT_VEC3_XYW,
259    REG_TYPE_VIRT_VEC3_XZW,
260    REG_TYPE_VIRT_VEC3_YZW,
261    REG_TYPE_VIRT_VEC2_XY,
262    REG_TYPE_VIRT_VEC2_XZ,
263    REG_TYPE_VIRT_VEC2_XW,
264    REG_TYPE_VIRT_VEC2_YZ,
265    REG_TYPE_VIRT_VEC2_YW,
266    REG_TYPE_VIRT_VEC2_ZW,
267    REG_TYPE_VIRT_SCALAR_X,
268    REG_TYPE_VIRT_SCALAR_Y,
269    REG_TYPE_VIRT_SCALAR_Z,
270    REG_TYPE_VIRT_SCALAR_W,
271    REG_TYPE_VIRT_VEC2T_XY,
272    REG_TYPE_VIRT_VEC2T_ZW,
273    REG_TYPE_VIRT_VEC2C_XY,
274    REG_TYPE_VIRT_VEC2C_YZ,
275    REG_TYPE_VIRT_VEC2C_ZW,
276    REG_TYPE_VIRT_VEC3C_XYZ,
277    REG_TYPE_VIRT_VEC3C_YZW,
278    NUM_REG_TYPES,
279 };
280 
281 /* writemask when used as dest */
282 static const uint8_t
283 reg_writemask[NUM_REG_TYPES] = {
284    [REG_TYPE_VEC4] = 0xf,
285    [REG_TYPE_VIRT_SCALAR_X] = 0x1,
286    [REG_TYPE_VIRT_SCALAR_Y] = 0x2,
287    [REG_TYPE_VIRT_VEC2_XY] = 0x3,
288    [REG_TYPE_VIRT_VEC2T_XY] = 0x3,
289    [REG_TYPE_VIRT_VEC2C_XY] = 0x3,
290    [REG_TYPE_VIRT_SCALAR_Z] = 0x4,
291    [REG_TYPE_VIRT_VEC2_XZ] = 0x5,
292    [REG_TYPE_VIRT_VEC2_YZ] = 0x6,
293    [REG_TYPE_VIRT_VEC2C_YZ] = 0x6,
294    [REG_TYPE_VIRT_VEC3_XYZ] = 0x7,
295    [REG_TYPE_VIRT_VEC3C_XYZ] = 0x7,
296    [REG_TYPE_VIRT_SCALAR_W] = 0x8,
297    [REG_TYPE_VIRT_VEC2_XW] = 0x9,
298    [REG_TYPE_VIRT_VEC2_YW] = 0xa,
299    [REG_TYPE_VIRT_VEC3_XYW] = 0xb,
300    [REG_TYPE_VIRT_VEC2_ZW] = 0xc,
301    [REG_TYPE_VIRT_VEC2T_ZW] = 0xc,
302    [REG_TYPE_VIRT_VEC2C_ZW] = 0xc,
303    [REG_TYPE_VIRT_VEC3_XZW] = 0xd,
304    [REG_TYPE_VIRT_VEC3_YZW] = 0xe,
305    [REG_TYPE_VIRT_VEC3C_YZW] = 0xe,
306 };
307 
reg_get_type(int virt_reg)308 static inline int reg_get_type(int virt_reg)
309 {
310    return virt_reg % NUM_REG_TYPES;
311 }
312 
reg_get_base(struct etna_compile * c,int virt_reg)313 static inline int reg_get_base(struct etna_compile *c, int virt_reg)
314 {
315    /* offset by 1 to avoid reserved position register */
316    if (c->nir->info.stage == MESA_SHADER_FRAGMENT)
317       return (virt_reg / NUM_REG_TYPES + 1) % ETNA_MAX_TEMPS;
318    return virt_reg / NUM_REG_TYPES;
319 }
320 
321 struct ra_regs *
322 etna_ra_setup(void *mem_ctx);
323 
324 void
325 etna_ra_assign(struct etna_compile *c, nir_shader *shader);
326 
327 unsigned
328 etna_ra_finish(struct etna_compile *c);
329 
330 static inline void
emit_inst(struct etna_compile * c,struct etna_inst * inst)331 emit_inst(struct etna_compile *c, struct etna_inst *inst)
332 {
333    c->code[c->inst_ptr++] = *inst;
334 }
335 
336 void
337 etna_emit_alu(struct etna_compile *c, nir_op op, struct etna_inst_dst dst,
338               struct etna_inst_src src[3], bool saturate);
339 
340 void
341 etna_emit_tex(struct etna_compile *c, nir_texop op, unsigned texid, unsigned dst_swiz,
342               struct etna_inst_dst dst, struct etna_inst_src coord,
343               struct etna_inst_src lod_bias, struct etna_inst_src compare);
344 
345 void
346 etna_emit_jump(struct etna_compile *c, unsigned block, struct etna_inst_src condition);
347 
348 void
349 etna_emit_discard(struct etna_compile *c, struct etna_inst_src condition);
350 
351 #endif
352