1 /*
2  * Copyright © 2016-2017 Broadcom
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "broadcom/common/v3d_device_info.h"
25 #include "v3d_compiler.h"
26 #include "util/u_prim.h"
27 #include "compiler/nir/nir_schedule.h"
28 
29 int
vir_get_nsrc(struct qinst * inst)30 vir_get_nsrc(struct qinst *inst)
31 {
32         switch (inst->qpu.type) {
33         case V3D_QPU_INSTR_TYPE_BRANCH:
34                 return 0;
35         case V3D_QPU_INSTR_TYPE_ALU:
36                 if (inst->qpu.alu.add.op != V3D_QPU_A_NOP)
37                         return v3d_qpu_add_op_num_src(inst->qpu.alu.add.op);
38                 else
39                         return v3d_qpu_mul_op_num_src(inst->qpu.alu.mul.op);
40         }
41 
42         return 0;
43 }
44 
45 /**
46  * Returns whether the instruction has any side effects that must be
47  * preserved.
48  */
49 bool
vir_has_side_effects(struct v3d_compile * c,struct qinst * inst)50 vir_has_side_effects(struct v3d_compile *c, struct qinst *inst)
51 {
52         switch (inst->qpu.type) {
53         case V3D_QPU_INSTR_TYPE_BRANCH:
54                 return true;
55         case V3D_QPU_INSTR_TYPE_ALU:
56                 switch (inst->qpu.alu.add.op) {
57                 case V3D_QPU_A_SETREVF:
58                 case V3D_QPU_A_SETMSF:
59                 case V3D_QPU_A_VPMSETUP:
60                 case V3D_QPU_A_STVPMV:
61                 case V3D_QPU_A_STVPMD:
62                 case V3D_QPU_A_STVPMP:
63                 case V3D_QPU_A_VPMWT:
64                 case V3D_QPU_A_TMUWT:
65                         return true;
66                 default:
67                         break;
68                 }
69 
70                 switch (inst->qpu.alu.mul.op) {
71                 case V3D_QPU_M_MULTOP:
72                         return true;
73                 default:
74                         break;
75                 }
76         }
77 
78         if (inst->qpu.sig.ldtmu ||
79             inst->qpu.sig.ldvary ||
80             inst->qpu.sig.ldtlbu ||
81             inst->qpu.sig.ldtlb ||
82             inst->qpu.sig.wrtmuc ||
83             inst->qpu.sig.thrsw) {
84                 return true;
85         }
86 
87         return false;
88 }
89 
90 bool
vir_is_raw_mov(struct qinst * inst)91 vir_is_raw_mov(struct qinst *inst)
92 {
93         if (inst->qpu.type != V3D_QPU_INSTR_TYPE_ALU ||
94             (inst->qpu.alu.mul.op != V3D_QPU_M_FMOV &&
95              inst->qpu.alu.mul.op != V3D_QPU_M_MOV)) {
96                 return false;
97         }
98 
99         if (inst->qpu.alu.add.output_pack != V3D_QPU_PACK_NONE ||
100             inst->qpu.alu.mul.output_pack != V3D_QPU_PACK_NONE) {
101                 return false;
102         }
103 
104         if (inst->qpu.alu.add.a_unpack != V3D_QPU_UNPACK_NONE ||
105             inst->qpu.alu.add.b_unpack != V3D_QPU_UNPACK_NONE ||
106             inst->qpu.alu.mul.a_unpack != V3D_QPU_UNPACK_NONE ||
107             inst->qpu.alu.mul.b_unpack != V3D_QPU_UNPACK_NONE) {
108                 return false;
109         }
110 
111         if (inst->qpu.flags.ac != V3D_QPU_COND_NONE ||
112             inst->qpu.flags.mc != V3D_QPU_COND_NONE)
113                 return false;
114 
115         return true;
116 }
117 
118 bool
vir_is_add(struct qinst * inst)119 vir_is_add(struct qinst *inst)
120 {
121         return (inst->qpu.type == V3D_QPU_INSTR_TYPE_ALU &&
122                 inst->qpu.alu.add.op != V3D_QPU_A_NOP);
123 }
124 
125 bool
vir_is_mul(struct qinst * inst)126 vir_is_mul(struct qinst *inst)
127 {
128         return (inst->qpu.type == V3D_QPU_INSTR_TYPE_ALU &&
129                 inst->qpu.alu.mul.op != V3D_QPU_M_NOP);
130 }
131 
132 bool
vir_is_tex(struct qinst * inst)133 vir_is_tex(struct qinst *inst)
134 {
135         if (inst->dst.file == QFILE_MAGIC)
136                 return v3d_qpu_magic_waddr_is_tmu(inst->dst.index);
137 
138         if (inst->qpu.type == V3D_QPU_INSTR_TYPE_ALU &&
139             inst->qpu.alu.add.op == V3D_QPU_A_TMUWT) {
140                 return true;
141         }
142 
143         return false;
144 }
145 
146 bool
vir_writes_r3(const struct v3d_device_info * devinfo,struct qinst * inst)147 vir_writes_r3(const struct v3d_device_info *devinfo, struct qinst *inst)
148 {
149         for (int i = 0; i < vir_get_nsrc(inst); i++) {
150                 switch (inst->src[i].file) {
151                 case QFILE_VPM:
152                         return true;
153                 default:
154                         break;
155                 }
156         }
157 
158         if (devinfo->ver < 41 && (inst->qpu.sig.ldvary ||
159                                   inst->qpu.sig.ldtlb ||
160                                   inst->qpu.sig.ldtlbu ||
161                                   inst->qpu.sig.ldvpm)) {
162                 return true;
163         }
164 
165         return false;
166 }
167 
168 bool
vir_writes_r4(const struct v3d_device_info * devinfo,struct qinst * inst)169 vir_writes_r4(const struct v3d_device_info *devinfo, struct qinst *inst)
170 {
171         switch (inst->dst.file) {
172         case QFILE_MAGIC:
173                 switch (inst->dst.index) {
174                 case V3D_QPU_WADDR_RECIP:
175                 case V3D_QPU_WADDR_RSQRT:
176                 case V3D_QPU_WADDR_EXP:
177                 case V3D_QPU_WADDR_LOG:
178                 case V3D_QPU_WADDR_SIN:
179                         return true;
180                 }
181                 break;
182         default:
183                 break;
184         }
185 
186         if (devinfo->ver < 41 && inst->qpu.sig.ldtmu)
187                 return true;
188 
189         return false;
190 }
191 
192 void
vir_set_unpack(struct qinst * inst,int src,enum v3d_qpu_input_unpack unpack)193 vir_set_unpack(struct qinst *inst, int src,
194                enum v3d_qpu_input_unpack unpack)
195 {
196         assert(src == 0 || src == 1);
197 
198         if (vir_is_add(inst)) {
199                 if (src == 0)
200                         inst->qpu.alu.add.a_unpack = unpack;
201                 else
202                         inst->qpu.alu.add.b_unpack = unpack;
203         } else {
204                 assert(vir_is_mul(inst));
205                 if (src == 0)
206                         inst->qpu.alu.mul.a_unpack = unpack;
207                 else
208                         inst->qpu.alu.mul.b_unpack = unpack;
209         }
210 }
211 
212 void
vir_set_pack(struct qinst * inst,enum v3d_qpu_output_pack pack)213 vir_set_pack(struct qinst *inst, enum v3d_qpu_output_pack pack)
214 {
215         if (vir_is_add(inst)) {
216                 inst->qpu.alu.add.output_pack = pack;
217         } else {
218                 assert(vir_is_mul(inst));
219                 inst->qpu.alu.mul.output_pack = pack;
220         }
221 }
222 
223 void
vir_set_cond(struct qinst * inst,enum v3d_qpu_cond cond)224 vir_set_cond(struct qinst *inst, enum v3d_qpu_cond cond)
225 {
226         if (vir_is_add(inst)) {
227                 inst->qpu.flags.ac = cond;
228         } else {
229                 assert(vir_is_mul(inst));
230                 inst->qpu.flags.mc = cond;
231         }
232 }
233 
234 void
vir_set_pf(struct qinst * inst,enum v3d_qpu_pf pf)235 vir_set_pf(struct qinst *inst, enum v3d_qpu_pf pf)
236 {
237         if (vir_is_add(inst)) {
238                 inst->qpu.flags.apf = pf;
239         } else {
240                 assert(vir_is_mul(inst));
241                 inst->qpu.flags.mpf = pf;
242         }
243 }
244 
245 void
vir_set_uf(struct qinst * inst,enum v3d_qpu_uf uf)246 vir_set_uf(struct qinst *inst, enum v3d_qpu_uf uf)
247 {
248         if (vir_is_add(inst)) {
249                 inst->qpu.flags.auf = uf;
250         } else {
251                 assert(vir_is_mul(inst));
252                 inst->qpu.flags.muf = uf;
253         }
254 }
255 
256 #if 0
257 uint8_t
258 vir_channels_written(struct qinst *inst)
259 {
260         if (vir_is_mul(inst)) {
261                 switch (inst->dst.pack) {
262                 case QPU_PACK_MUL_NOP:
263                 case QPU_PACK_MUL_8888:
264                         return 0xf;
265                 case QPU_PACK_MUL_8A:
266                         return 0x1;
267                 case QPU_PACK_MUL_8B:
268                         return 0x2;
269                 case QPU_PACK_MUL_8C:
270                         return 0x4;
271                 case QPU_PACK_MUL_8D:
272                         return 0x8;
273                 }
274         } else {
275                 switch (inst->dst.pack) {
276                 case QPU_PACK_A_NOP:
277                 case QPU_PACK_A_8888:
278                 case QPU_PACK_A_8888_SAT:
279                 case QPU_PACK_A_32_SAT:
280                         return 0xf;
281                 case QPU_PACK_A_8A:
282                 case QPU_PACK_A_8A_SAT:
283                         return 0x1;
284                 case QPU_PACK_A_8B:
285                 case QPU_PACK_A_8B_SAT:
286                         return 0x2;
287                 case QPU_PACK_A_8C:
288                 case QPU_PACK_A_8C_SAT:
289                         return 0x4;
290                 case QPU_PACK_A_8D:
291                 case QPU_PACK_A_8D_SAT:
292                         return 0x8;
293                 case QPU_PACK_A_16A:
294                 case QPU_PACK_A_16A_SAT:
295                         return 0x3;
296                 case QPU_PACK_A_16B:
297                 case QPU_PACK_A_16B_SAT:
298                         return 0xc;
299                 }
300         }
301         unreachable("Bad pack field");
302 }
303 #endif
304 
305 struct qreg
vir_get_temp(struct v3d_compile * c)306 vir_get_temp(struct v3d_compile *c)
307 {
308         struct qreg reg;
309 
310         reg.file = QFILE_TEMP;
311         reg.index = c->num_temps++;
312 
313         if (c->num_temps > c->defs_array_size) {
314                 uint32_t old_size = c->defs_array_size;
315                 c->defs_array_size = MAX2(old_size * 2, 16);
316 
317                 c->defs = reralloc(c, c->defs, struct qinst *,
318                                    c->defs_array_size);
319                 memset(&c->defs[old_size], 0,
320                        sizeof(c->defs[0]) * (c->defs_array_size - old_size));
321 
322                 c->spillable = reralloc(c, c->spillable,
323                                         BITSET_WORD,
324                                         BITSET_WORDS(c->defs_array_size));
325                 for (int i = old_size; i < c->defs_array_size; i++)
326                         BITSET_SET(c->spillable, i);
327         }
328 
329         return reg;
330 }
331 
332 struct qinst *
vir_add_inst(enum v3d_qpu_add_op op,struct qreg dst,struct qreg src0,struct qreg src1)333 vir_add_inst(enum v3d_qpu_add_op op, struct qreg dst, struct qreg src0, struct qreg src1)
334 {
335         struct qinst *inst = calloc(1, sizeof(*inst));
336 
337         inst->qpu = v3d_qpu_nop();
338         inst->qpu.alu.add.op = op;
339 
340         inst->dst = dst;
341         inst->src[0] = src0;
342         inst->src[1] = src1;
343         inst->uniform = ~0;
344 
345         return inst;
346 }
347 
348 struct qinst *
vir_mul_inst(enum v3d_qpu_mul_op op,struct qreg dst,struct qreg src0,struct qreg src1)349 vir_mul_inst(enum v3d_qpu_mul_op op, struct qreg dst, struct qreg src0, struct qreg src1)
350 {
351         struct qinst *inst = calloc(1, sizeof(*inst));
352 
353         inst->qpu = v3d_qpu_nop();
354         inst->qpu.alu.mul.op = op;
355 
356         inst->dst = dst;
357         inst->src[0] = src0;
358         inst->src[1] = src1;
359         inst->uniform = ~0;
360 
361         return inst;
362 }
363 
364 struct qinst *
vir_branch_inst(struct v3d_compile * c,enum v3d_qpu_branch_cond cond)365 vir_branch_inst(struct v3d_compile *c, enum v3d_qpu_branch_cond cond)
366 {
367         struct qinst *inst = calloc(1, sizeof(*inst));
368 
369         inst->qpu = v3d_qpu_nop();
370         inst->qpu.type = V3D_QPU_INSTR_TYPE_BRANCH;
371         inst->qpu.branch.cond = cond;
372         inst->qpu.branch.msfign = V3D_QPU_MSFIGN_NONE;
373         inst->qpu.branch.bdi = V3D_QPU_BRANCH_DEST_REL;
374         inst->qpu.branch.ub = true;
375         inst->qpu.branch.bdu = V3D_QPU_BRANCH_DEST_REL;
376 
377         inst->dst = vir_nop_reg();
378         inst->uniform = vir_get_uniform_index(c, QUNIFORM_CONSTANT, 0);
379 
380         return inst;
381 }
382 
383 static void
vir_emit(struct v3d_compile * c,struct qinst * inst)384 vir_emit(struct v3d_compile *c, struct qinst *inst)
385 {
386         switch (c->cursor.mode) {
387         case vir_cursor_add:
388                 list_add(&inst->link, c->cursor.link);
389                 break;
390         case vir_cursor_addtail:
391                 list_addtail(&inst->link, c->cursor.link);
392                 break;
393         }
394 
395         c->cursor = vir_after_inst(inst);
396         c->live_intervals_valid = false;
397 }
398 
399 /* Updates inst to write to a new temporary, emits it, and notes the def. */
400 struct qreg
vir_emit_def(struct v3d_compile * c,struct qinst * inst)401 vir_emit_def(struct v3d_compile *c, struct qinst *inst)
402 {
403         assert(inst->dst.file == QFILE_NULL);
404 
405         /* If we're emitting an instruction that's a def, it had better be
406          * writing a register.
407          */
408         if (inst->qpu.type == V3D_QPU_INSTR_TYPE_ALU) {
409                 assert(inst->qpu.alu.add.op == V3D_QPU_A_NOP ||
410                        v3d_qpu_add_op_has_dst(inst->qpu.alu.add.op));
411                 assert(inst->qpu.alu.mul.op == V3D_QPU_M_NOP ||
412                        v3d_qpu_mul_op_has_dst(inst->qpu.alu.mul.op));
413         }
414 
415         inst->dst = vir_get_temp(c);
416 
417         if (inst->dst.file == QFILE_TEMP)
418                 c->defs[inst->dst.index] = inst;
419 
420         vir_emit(c, inst);
421 
422         return inst->dst;
423 }
424 
425 struct qinst *
vir_emit_nondef(struct v3d_compile * c,struct qinst * inst)426 vir_emit_nondef(struct v3d_compile *c, struct qinst *inst)
427 {
428         if (inst->dst.file == QFILE_TEMP)
429                 c->defs[inst->dst.index] = NULL;
430 
431         vir_emit(c, inst);
432 
433         return inst;
434 }
435 
436 struct qblock *
vir_new_block(struct v3d_compile * c)437 vir_new_block(struct v3d_compile *c)
438 {
439         struct qblock *block = rzalloc(c, struct qblock);
440 
441         list_inithead(&block->instructions);
442 
443         block->predecessors = _mesa_set_create(block,
444                                                _mesa_hash_pointer,
445                                                _mesa_key_pointer_equal);
446 
447         block->index = c->next_block_index++;
448 
449         return block;
450 }
451 
452 void
vir_set_emit_block(struct v3d_compile * c,struct qblock * block)453 vir_set_emit_block(struct v3d_compile *c, struct qblock *block)
454 {
455         c->cur_block = block;
456         c->cursor = vir_after_block(block);
457         list_addtail(&block->link, &c->blocks);
458 }
459 
460 struct qblock *
vir_entry_block(struct v3d_compile * c)461 vir_entry_block(struct v3d_compile *c)
462 {
463         return list_first_entry(&c->blocks, struct qblock, link);
464 }
465 
466 struct qblock *
vir_exit_block(struct v3d_compile * c)467 vir_exit_block(struct v3d_compile *c)
468 {
469         return list_last_entry(&c->blocks, struct qblock, link);
470 }
471 
472 void
vir_link_blocks(struct qblock * predecessor,struct qblock * successor)473 vir_link_blocks(struct qblock *predecessor, struct qblock *successor)
474 {
475         _mesa_set_add(successor->predecessors, predecessor);
476         if (predecessor->successors[0]) {
477                 assert(!predecessor->successors[1]);
478                 predecessor->successors[1] = successor;
479         } else {
480                 predecessor->successors[0] = successor;
481         }
482 }
483 
484 const struct v3d_compiler *
v3d_compiler_init(const struct v3d_device_info * devinfo)485 v3d_compiler_init(const struct v3d_device_info *devinfo)
486 {
487         struct v3d_compiler *compiler = rzalloc(NULL, struct v3d_compiler);
488         if (!compiler)
489                 return NULL;
490 
491         compiler->devinfo = devinfo;
492 
493         if (!vir_init_reg_sets(compiler)) {
494                 ralloc_free(compiler);
495                 return NULL;
496         }
497 
498         return compiler;
499 }
500 
501 void
v3d_compiler_free(const struct v3d_compiler * compiler)502 v3d_compiler_free(const struct v3d_compiler *compiler)
503 {
504         ralloc_free((void *)compiler);
505 }
506 
507 static struct v3d_compile *
vir_compile_init(const struct v3d_compiler * compiler,struct v3d_key * key,nir_shader * s,void (* debug_output)(const char * msg,void * debug_output_data),void * debug_output_data,int program_id,int variant_id,bool fallback_scheduler)508 vir_compile_init(const struct v3d_compiler *compiler,
509                  struct v3d_key *key,
510                  nir_shader *s,
511                  void (*debug_output)(const char *msg,
512                                       void *debug_output_data),
513                  void *debug_output_data,
514                  int program_id, int variant_id,
515                  bool fallback_scheduler)
516 {
517         struct v3d_compile *c = rzalloc(NULL, struct v3d_compile);
518 
519         c->compiler = compiler;
520         c->devinfo = compiler->devinfo;
521         c->key = key;
522         c->program_id = program_id;
523         c->variant_id = variant_id;
524         c->threads = 4;
525         c->debug_output = debug_output;
526         c->debug_output_data = debug_output_data;
527         c->compilation_result = V3D_COMPILATION_SUCCEEDED;
528         c->fallback_scheduler = fallback_scheduler;
529 
530         s = nir_shader_clone(c, s);
531         c->s = s;
532 
533         list_inithead(&c->blocks);
534         vir_set_emit_block(c, vir_new_block(c));
535 
536         c->output_position_index = -1;
537         c->output_sample_mask_index = -1;
538 
539         c->def_ht = _mesa_hash_table_create(c, _mesa_hash_pointer,
540                                             _mesa_key_pointer_equal);
541 
542         return c;
543 }
544 
545 static int
type_size_vec4(const struct glsl_type * type,bool bindless)546 type_size_vec4(const struct glsl_type *type, bool bindless)
547 {
548         return glsl_count_attribute_slots(type, false);
549 }
550 
551 static void
v3d_lower_nir(struct v3d_compile * c)552 v3d_lower_nir(struct v3d_compile *c)
553 {
554         struct nir_lower_tex_options tex_options = {
555                 .lower_txd = true,
556                 .lower_tg4_broadcom_swizzle = true,
557 
558                 .lower_rect = false, /* XXX: Use this on V3D 3.x */
559                 .lower_txp = ~0,
560                 /* Apply swizzles to all samplers. */
561                 .swizzle_result = ~0,
562         };
563 
564         /* Lower the format swizzle and (for 32-bit returns)
565          * ARB_texture_swizzle-style swizzle.
566          */
567         for (int i = 0; i < ARRAY_SIZE(c->key->tex); i++) {
568                 for (int j = 0; j < 4; j++)
569                         tex_options.swizzles[i][j] = c->key->tex[i].swizzle[j];
570 
571                 if (c->key->tex[i].clamp_s)
572                         tex_options.saturate_s |= 1 << i;
573                 if (c->key->tex[i].clamp_t)
574                         tex_options.saturate_t |= 1 << i;
575                 if (c->key->tex[i].clamp_r)
576                         tex_options.saturate_r |= 1 << i;
577                 if (c->key->tex[i].return_size == 16) {
578                         tex_options.lower_tex_packing[i] =
579                                 nir_lower_tex_packing_16;
580                 }
581         }
582 
583         /* CS textures may not have return_size reflecting the shadow state. */
584         nir_foreach_uniform_variable(var, c->s) {
585                 const struct glsl_type *type = glsl_without_array(var->type);
586                 unsigned array_len = MAX2(glsl_get_length(var->type), 1);
587 
588                 if (!glsl_type_is_sampler(type) ||
589                     !glsl_sampler_type_is_shadow(type))
590                         continue;
591 
592                 for (int i = 0; i < array_len; i++) {
593                         tex_options.lower_tex_packing[var->data.binding + i] =
594                                 nir_lower_tex_packing_16;
595                 }
596         }
597 
598         NIR_PASS_V(c->s, nir_lower_tex, &tex_options);
599         NIR_PASS_V(c->s, nir_lower_system_values);
600         NIR_PASS_V(c->s, nir_lower_compute_system_values, NULL);
601 
602         NIR_PASS_V(c->s, nir_lower_vars_to_scratch,
603                    nir_var_function_temp,
604                    0,
605                    glsl_get_natural_size_align_bytes);
606         NIR_PASS_V(c->s, v3d_nir_lower_scratch);
607 }
608 
609 static void
v3d_set_prog_data_uniforms(struct v3d_compile * c,struct v3d_prog_data * prog_data)610 v3d_set_prog_data_uniforms(struct v3d_compile *c,
611                            struct v3d_prog_data *prog_data)
612 {
613         int count = c->num_uniforms;
614         struct v3d_uniform_list *ulist = &prog_data->uniforms;
615 
616         ulist->count = count;
617         ulist->data = ralloc_array(prog_data, uint32_t, count);
618         memcpy(ulist->data, c->uniform_data,
619                count * sizeof(*ulist->data));
620         ulist->contents = ralloc_array(prog_data, enum quniform_contents, count);
621         memcpy(ulist->contents, c->uniform_contents,
622                count * sizeof(*ulist->contents));
623 }
624 
625 static void
v3d_vs_set_prog_data(struct v3d_compile * c,struct v3d_vs_prog_data * prog_data)626 v3d_vs_set_prog_data(struct v3d_compile *c,
627                      struct v3d_vs_prog_data *prog_data)
628 {
629         /* The vertex data gets format converted by the VPM so that
630          * each attribute channel takes up a VPM column.  Precompute
631          * the sizes for the shader record.
632          */
633         for (int i = 0; i < ARRAY_SIZE(prog_data->vattr_sizes); i++) {
634                 prog_data->vattr_sizes[i] = c->vattr_sizes[i];
635                 prog_data->vpm_input_size += c->vattr_sizes[i];
636         }
637 
638         prog_data->uses_vid = BITSET_TEST(c->s->info.system_values_read,
639                                           SYSTEM_VALUE_VERTEX_ID) ||
640                               BITSET_TEST(c->s->info.system_values_read,
641                                           SYSTEM_VALUE_VERTEX_ID_ZERO_BASE);
642 
643         prog_data->uses_biid = BITSET_TEST(c->s->info.system_values_read,
644                                            SYSTEM_VALUE_BASE_INSTANCE);
645 
646         prog_data->uses_iid = BITSET_TEST(c->s->info.system_values_read,
647                                           SYSTEM_VALUE_INSTANCE_ID) ||
648                               BITSET_TEST(c->s->info.system_values_read,
649                                           SYSTEM_VALUE_INSTANCE_INDEX);
650 
651         if (prog_data->uses_vid)
652                 prog_data->vpm_input_size++;
653         if (prog_data->uses_biid)
654                 prog_data->vpm_input_size++;
655         if (prog_data->uses_iid)
656                 prog_data->vpm_input_size++;
657 
658         /* Input/output segment size are in sectors (8 rows of 32 bits per
659          * channel).
660          */
661         prog_data->vpm_input_size = align(prog_data->vpm_input_size, 8) / 8;
662         prog_data->vpm_output_size = align(c->vpm_output_size, 8) / 8;
663 
664         /* Set us up for shared input/output segments.  This is apparently
665          * necessary for our VCM setup to avoid varying corruption.
666          */
667         prog_data->separate_segments = false;
668         prog_data->vpm_output_size = MAX2(prog_data->vpm_output_size,
669                                           prog_data->vpm_input_size);
670         prog_data->vpm_input_size = 0;
671 
672         /* Compute VCM cache size.  We set up our program to take up less than
673          * half of the VPM, so that any set of bin and render programs won't
674          * run out of space.  We need space for at least one input segment,
675          * and then allocate the rest to output segments (one for the current
676          * program, the rest to VCM).  The valid range of the VCM cache size
677          * field is 1-4 16-vertex batches, but GFXH-1744 limits us to 2-4
678          * batches.
679          */
680         assert(c->devinfo->vpm_size);
681         int sector_size = V3D_CHANNELS * sizeof(uint32_t) * 8;
682         int vpm_size_in_sectors = c->devinfo->vpm_size / sector_size;
683         int half_vpm = vpm_size_in_sectors / 2;
684         int vpm_output_sectors = half_vpm - prog_data->vpm_input_size;
685         int vpm_output_batches = vpm_output_sectors / prog_data->vpm_output_size;
686         assert(vpm_output_batches >= 2);
687         prog_data->vcm_cache_size = CLAMP(vpm_output_batches - 1, 2, 4);
688 }
689 
690 static void
v3d_gs_set_prog_data(struct v3d_compile * c,struct v3d_gs_prog_data * prog_data)691 v3d_gs_set_prog_data(struct v3d_compile *c,
692                      struct v3d_gs_prog_data *prog_data)
693 {
694         prog_data->num_inputs = c->num_inputs;
695         memcpy(prog_data->input_slots, c->input_slots,
696                c->num_inputs * sizeof(*c->input_slots));
697 
698         /* gl_PrimitiveIdIn is written by the GBG into the first word of the
699          * VPM output header automatically and the shader will overwrite
700          * it after reading it if necessary, so it doesn't add to the VPM
701          * size requirements.
702          */
703         prog_data->uses_pid = BITSET_TEST(c->s->info.system_values_read,
704                                           SYSTEM_VALUE_PRIMITIVE_ID);
705 
706         /* Output segment size is in sectors (8 rows of 32 bits per channel) */
707         prog_data->vpm_output_size = align(c->vpm_output_size, 8) / 8;
708 
709         /* Compute SIMD dispatch width and update VPM output size accordingly
710          * to ensure we can fit our program in memory. Available widths are
711          * 16, 8, 4, 1.
712          *
713          * Notice that at draw time we will have to consider VPM memory
714          * requirements from other stages and choose a smaller dispatch
715          * width if needed to fit the program in VPM memory.
716          */
717         prog_data->simd_width = 16;
718         while ((prog_data->simd_width > 1 && prog_data->vpm_output_size > 16) ||
719                prog_data->simd_width == 2) {
720                 prog_data->simd_width >>= 1;
721                 prog_data->vpm_output_size =
722                         align(prog_data->vpm_output_size, 2) / 2;
723         }
724         assert(prog_data->vpm_output_size <= 16);
725         assert(prog_data->simd_width != 2);
726 
727         prog_data->out_prim_type = c->s->info.gs.output_primitive;
728         prog_data->num_invocations = c->s->info.gs.invocations;
729 }
730 
731 static void
v3d_set_fs_prog_data_inputs(struct v3d_compile * c,struct v3d_fs_prog_data * prog_data)732 v3d_set_fs_prog_data_inputs(struct v3d_compile *c,
733                             struct v3d_fs_prog_data *prog_data)
734 {
735         prog_data->num_inputs = c->num_inputs;
736         memcpy(prog_data->input_slots, c->input_slots,
737                c->num_inputs * sizeof(*c->input_slots));
738 
739         STATIC_ASSERT(ARRAY_SIZE(prog_data->flat_shade_flags) >
740                       (V3D_MAX_FS_INPUTS - 1) / 24);
741         for (int i = 0; i < V3D_MAX_FS_INPUTS; i++) {
742                 if (BITSET_TEST(c->flat_shade_flags, i))
743                         prog_data->flat_shade_flags[i / 24] |= 1 << (i % 24);
744 
745                 if (BITSET_TEST(c->noperspective_flags, i))
746                         prog_data->noperspective_flags[i / 24] |= 1 << (i % 24);
747 
748                 if (BITSET_TEST(c->centroid_flags, i))
749                         prog_data->centroid_flags[i / 24] |= 1 << (i % 24);
750         }
751 }
752 
753 static void
v3d_fs_set_prog_data(struct v3d_compile * c,struct v3d_fs_prog_data * prog_data)754 v3d_fs_set_prog_data(struct v3d_compile *c,
755                      struct v3d_fs_prog_data *prog_data)
756 {
757         v3d_set_fs_prog_data_inputs(c, prog_data);
758         prog_data->writes_z = c->writes_z;
759         prog_data->disable_ez = !c->s->info.fs.early_fragment_tests;
760         prog_data->uses_center_w = c->uses_center_w;
761         prog_data->uses_implicit_point_line_varyings =
762                 c->uses_implicit_point_line_varyings;
763         prog_data->lock_scoreboard_on_first_thrsw =
764                 c->lock_scoreboard_on_first_thrsw;
765         prog_data->force_per_sample_msaa = c->force_per_sample_msaa;
766 }
767 
768 static void
v3d_cs_set_prog_data(struct v3d_compile * c,struct v3d_compute_prog_data * prog_data)769 v3d_cs_set_prog_data(struct v3d_compile *c,
770                      struct v3d_compute_prog_data *prog_data)
771 {
772         prog_data->shared_size = c->s->info.cs.shared_size;
773 }
774 
775 static void
v3d_set_prog_data(struct v3d_compile * c,struct v3d_prog_data * prog_data)776 v3d_set_prog_data(struct v3d_compile *c,
777                   struct v3d_prog_data *prog_data)
778 {
779         prog_data->threads = c->threads;
780         prog_data->single_seg = !c->last_thrsw;
781         prog_data->spill_size = c->spill_size;
782         prog_data->tmu_dirty_rcl = c->tmu_dirty_rcl;
783 
784         v3d_set_prog_data_uniforms(c, prog_data);
785 
786         switch (c->s->info.stage) {
787         case MESA_SHADER_VERTEX:
788                 v3d_vs_set_prog_data(c, (struct v3d_vs_prog_data *)prog_data);
789                 break;
790         case MESA_SHADER_GEOMETRY:
791                 v3d_gs_set_prog_data(c, (struct v3d_gs_prog_data *)prog_data);
792                 break;
793         case MESA_SHADER_FRAGMENT:
794                 v3d_fs_set_prog_data(c, (struct v3d_fs_prog_data *)prog_data);
795                 break;
796         case MESA_SHADER_COMPUTE:
797                 v3d_cs_set_prog_data(c, (struct v3d_compute_prog_data *)prog_data);
798                 break;
799         default:
800                 unreachable("unsupported shader stage");
801         }
802 }
803 
804 static uint64_t *
v3d_return_qpu_insts(struct v3d_compile * c,uint32_t * final_assembly_size)805 v3d_return_qpu_insts(struct v3d_compile *c, uint32_t *final_assembly_size)
806 {
807         *final_assembly_size = c->qpu_inst_count * sizeof(uint64_t);
808 
809         uint64_t *qpu_insts = malloc(*final_assembly_size);
810         if (!qpu_insts)
811                 return NULL;
812 
813         memcpy(qpu_insts, c->qpu_insts, *final_assembly_size);
814 
815         vir_compile_destroy(c);
816 
817         return qpu_insts;
818 }
819 
820 static void
v3d_nir_lower_vs_early(struct v3d_compile * c)821 v3d_nir_lower_vs_early(struct v3d_compile *c)
822 {
823         /* Split our I/O vars and dead code eliminate the unused
824          * components.
825          */
826         NIR_PASS_V(c->s, nir_lower_io_to_scalar_early,
827                    nir_var_shader_in | nir_var_shader_out);
828         uint64_t used_outputs[4] = {0};
829         for (int i = 0; i < c->vs_key->num_used_outputs; i++) {
830                 int slot = v3d_slot_get_slot(c->vs_key->used_outputs[i]);
831                 int comp = v3d_slot_get_component(c->vs_key->used_outputs[i]);
832                 used_outputs[comp] |= 1ull << slot;
833         }
834         NIR_PASS_V(c->s, nir_remove_unused_io_vars,
835                    nir_var_shader_out, used_outputs, NULL); /* demotes to globals */
836         NIR_PASS_V(c->s, nir_lower_global_vars_to_local);
837         v3d_optimize_nir(c->s);
838         NIR_PASS_V(c->s, nir_remove_dead_variables, nir_var_shader_in, NULL);
839 
840         /* This must go before nir_lower_io */
841         if (c->vs_key->per_vertex_point_size)
842                 NIR_PASS_V(c->s, nir_lower_point_size, 1.0f, 0.0f);
843 
844         NIR_PASS_V(c->s, nir_lower_io, nir_var_shader_in | nir_var_shader_out,
845                    type_size_vec4,
846                    (nir_lower_io_options)0);
847         /* clean up nir_lower_io's deref_var remains and do a constant folding pass
848          * on the code it generated.
849          */
850         NIR_PASS_V(c->s, nir_opt_dce);
851         NIR_PASS_V(c->s, nir_opt_constant_folding);
852 }
853 
854 static void
v3d_nir_lower_gs_early(struct v3d_compile * c)855 v3d_nir_lower_gs_early(struct v3d_compile *c)
856 {
857         /* Split our I/O vars and dead code eliminate the unused
858          * components.
859          */
860         NIR_PASS_V(c->s, nir_lower_io_to_scalar_early,
861                    nir_var_shader_in | nir_var_shader_out);
862         uint64_t used_outputs[4] = {0};
863         for (int i = 0; i < c->gs_key->num_used_outputs; i++) {
864                 int slot = v3d_slot_get_slot(c->gs_key->used_outputs[i]);
865                 int comp = v3d_slot_get_component(c->gs_key->used_outputs[i]);
866                 used_outputs[comp] |= 1ull << slot;
867         }
868         NIR_PASS_V(c->s, nir_remove_unused_io_vars,
869                    nir_var_shader_out, used_outputs, NULL); /* demotes to globals */
870         NIR_PASS_V(c->s, nir_lower_global_vars_to_local);
871         v3d_optimize_nir(c->s);
872         NIR_PASS_V(c->s, nir_remove_dead_variables, nir_var_shader_in, NULL);
873 
874         /* This must go before nir_lower_io */
875         if (c->gs_key->per_vertex_point_size)
876                 NIR_PASS_V(c->s, nir_lower_point_size, 1.0f, 0.0f);
877 
878         NIR_PASS_V(c->s, nir_lower_io, nir_var_shader_in | nir_var_shader_out,
879                    type_size_vec4,
880                    (nir_lower_io_options)0);
881         /* clean up nir_lower_io's deref_var remains */
882         NIR_PASS_V(c->s, nir_opt_dce);
883 }
884 
885 static void
v3d_fixup_fs_output_types(struct v3d_compile * c)886 v3d_fixup_fs_output_types(struct v3d_compile *c)
887 {
888         nir_foreach_shader_out_variable(var, c->s) {
889                 uint32_t mask = 0;
890 
891                 switch (var->data.location) {
892                 case FRAG_RESULT_COLOR:
893                         mask = ~0;
894                         break;
895                 case FRAG_RESULT_DATA0:
896                 case FRAG_RESULT_DATA1:
897                 case FRAG_RESULT_DATA2:
898                 case FRAG_RESULT_DATA3:
899                         mask = 1 << (var->data.location - FRAG_RESULT_DATA0);
900                         break;
901                 }
902 
903                 if (c->fs_key->int_color_rb & mask) {
904                         var->type =
905                                 glsl_vector_type(GLSL_TYPE_INT,
906                                                  glsl_get_components(var->type));
907                 } else if (c->fs_key->uint_color_rb & mask) {
908                         var->type =
909                                 glsl_vector_type(GLSL_TYPE_UINT,
910                                                  glsl_get_components(var->type));
911                 }
912         }
913 }
914 
915 static void
v3d_nir_lower_fs_early(struct v3d_compile * c)916 v3d_nir_lower_fs_early(struct v3d_compile *c)
917 {
918         if (c->fs_key->int_color_rb || c->fs_key->uint_color_rb)
919                 v3d_fixup_fs_output_types(c);
920 
921         NIR_PASS_V(c->s, v3d_nir_lower_logic_ops, c);
922 
923         if (c->fs_key->line_smoothing) {
924                 v3d_nir_lower_line_smooth(c->s);
925                 NIR_PASS_V(c->s, nir_lower_global_vars_to_local);
926                 /* The lowering pass can introduce new sysval reads */
927                 nir_shader_gather_info(c->s, nir_shader_get_entrypoint(c->s));
928         }
929 
930         /* If the shader has no non-TLB side effects, we can promote it to
931          * enabling early_fragment_tests even if the user didn't.
932          */
933         if (!(c->s->info.num_images ||
934               c->s->info.num_ssbos)) {
935                 c->s->info.fs.early_fragment_tests = true;
936         }
937 }
938 
939 static void
v3d_nir_lower_gs_late(struct v3d_compile * c)940 v3d_nir_lower_gs_late(struct v3d_compile *c)
941 {
942         if (c->key->ucp_enables) {
943                 NIR_PASS_V(c->s, nir_lower_clip_gs, c->key->ucp_enables,
944                            false, NULL);
945         }
946 
947         /* Note: GS output scalarizing must happen after nir_lower_clip_gs. */
948         NIR_PASS_V(c->s, nir_lower_io_to_scalar, nir_var_shader_out);
949 }
950 
951 static void
v3d_nir_lower_vs_late(struct v3d_compile * c)952 v3d_nir_lower_vs_late(struct v3d_compile *c)
953 {
954         if (c->vs_key->clamp_color)
955                 NIR_PASS_V(c->s, nir_lower_clamp_color_outputs);
956 
957         if (c->key->ucp_enables) {
958                 NIR_PASS_V(c->s, nir_lower_clip_vs, c->key->ucp_enables,
959                            false, false, NULL);
960                 NIR_PASS_V(c->s, nir_lower_io_to_scalar,
961                            nir_var_shader_out);
962         }
963 
964         /* Note: VS output scalarizing must happen after nir_lower_clip_vs. */
965         NIR_PASS_V(c->s, nir_lower_io_to_scalar, nir_var_shader_out);
966 }
967 
968 static void
v3d_nir_lower_fs_late(struct v3d_compile * c)969 v3d_nir_lower_fs_late(struct v3d_compile *c)
970 {
971         if (c->fs_key->light_twoside)
972                 NIR_PASS_V(c->s, nir_lower_two_sided_color, true);
973 
974         if (c->fs_key->clamp_color)
975                 NIR_PASS_V(c->s, nir_lower_clamp_color_outputs);
976 
977         /* In OpenGL the fragment shader can't read gl_ClipDistance[], but
978          * Vulkan allows it, in which case the SPIR-V compiler will declare
979          * VARING_SLOT_CLIP_DIST0 as compact array variable. Pass true as
980          * the last parameter to always operate with a compact array in both
981          * OpenGL and Vulkan so we do't have to care about the API we
982          * are using.
983          */
984         if (c->key->ucp_enables)
985                 NIR_PASS_V(c->s, nir_lower_clip_fs, c->key->ucp_enables, true);
986 
987         /* Note: FS input scalarizing must happen after
988          * nir_lower_two_sided_color, which only handles a vec4 at a time.
989          */
990         NIR_PASS_V(c->s, nir_lower_io_to_scalar, nir_var_shader_in);
991 }
992 
993 static uint32_t
vir_get_max_temps(struct v3d_compile * c)994 vir_get_max_temps(struct v3d_compile *c)
995 {
996         int max_ip = 0;
997         vir_for_each_inst_inorder(inst, c)
998                 max_ip++;
999 
1000         uint32_t *pressure = rzalloc_array(NULL, uint32_t, max_ip);
1001 
1002         for (int t = 0; t < c->num_temps; t++) {
1003                 for (int i = c->temp_start[t]; (i < c->temp_end[t] &&
1004                                                 i < max_ip); i++) {
1005                         if (i > max_ip)
1006                                 break;
1007                         pressure[i]++;
1008                 }
1009         }
1010 
1011         uint32_t max_temps = 0;
1012         for (int i = 0; i < max_ip; i++)
1013                 max_temps = MAX2(max_temps, pressure[i]);
1014 
1015         ralloc_free(pressure);
1016 
1017         return max_temps;
1018 }
1019 
1020 enum v3d_dependency_class {
1021         V3D_DEPENDENCY_CLASS_GS_VPM_OUTPUT_0
1022 };
1023 
1024 static bool
v3d_intrinsic_dependency_cb(nir_intrinsic_instr * intr,nir_schedule_dependency * dep,void * user_data)1025 v3d_intrinsic_dependency_cb(nir_intrinsic_instr *intr,
1026                             nir_schedule_dependency *dep,
1027                             void *user_data)
1028 {
1029         struct v3d_compile *c = user_data;
1030 
1031         switch (intr->intrinsic) {
1032         case nir_intrinsic_store_output:
1033                 /* Writing to location 0 overwrites the value passed in for
1034                  * gl_PrimitiveID on geometry shaders
1035                  */
1036                 if (c->s->info.stage != MESA_SHADER_GEOMETRY ||
1037                     nir_intrinsic_base(intr) != 0)
1038                         break;
1039 
1040                 nir_const_value *const_value =
1041                         nir_src_as_const_value(intr->src[1]);
1042 
1043                 if (const_value == NULL)
1044                         break;
1045 
1046                 uint64_t offset =
1047                         nir_const_value_as_uint(*const_value,
1048                                                 nir_src_bit_size(intr->src[1]));
1049                 if (offset != 0)
1050                         break;
1051 
1052                 dep->klass = V3D_DEPENDENCY_CLASS_GS_VPM_OUTPUT_0;
1053                 dep->type = NIR_SCHEDULE_WRITE_DEPENDENCY;
1054                 return true;
1055 
1056         case nir_intrinsic_load_primitive_id:
1057                 if (c->s->info.stage != MESA_SHADER_GEOMETRY)
1058                         break;
1059 
1060                 dep->klass = V3D_DEPENDENCY_CLASS_GS_VPM_OUTPUT_0;
1061                 dep->type = NIR_SCHEDULE_READ_DEPENDENCY;
1062                 return true;
1063 
1064         default:
1065                 break;
1066         }
1067 
1068         return false;
1069 }
1070 
1071 static void
v3d_attempt_compile(struct v3d_compile * c)1072 v3d_attempt_compile(struct v3d_compile *c)
1073 {
1074         switch (c->s->info.stage) {
1075         case MESA_SHADER_VERTEX:
1076                 c->vs_key = (struct v3d_vs_key *) c->key;
1077                 break;
1078         case MESA_SHADER_GEOMETRY:
1079                 c->gs_key = (struct v3d_gs_key *) c->key;
1080                 break;
1081         case MESA_SHADER_FRAGMENT:
1082                 c->fs_key = (struct v3d_fs_key *) c->key;
1083                 break;
1084         case MESA_SHADER_COMPUTE:
1085                 break;
1086         default:
1087                 unreachable("unsupported shader stage");
1088         }
1089 
1090         switch (c->s->info.stage) {
1091         case MESA_SHADER_VERTEX:
1092                 v3d_nir_lower_vs_early(c);
1093                 break;
1094         case MESA_SHADER_GEOMETRY:
1095                 v3d_nir_lower_gs_early(c);
1096                 break;
1097         case MESA_SHADER_FRAGMENT:
1098                 v3d_nir_lower_fs_early(c);
1099                 break;
1100         default:
1101                 break;
1102         }
1103 
1104         v3d_lower_nir(c);
1105 
1106         switch (c->s->info.stage) {
1107         case MESA_SHADER_VERTEX:
1108                 v3d_nir_lower_vs_late(c);
1109                 break;
1110         case MESA_SHADER_GEOMETRY:
1111                 v3d_nir_lower_gs_late(c);
1112                 break;
1113         case MESA_SHADER_FRAGMENT:
1114                 v3d_nir_lower_fs_late(c);
1115                 break;
1116         default:
1117                 break;
1118         }
1119 
1120         NIR_PASS_V(c->s, v3d_nir_lower_io, c);
1121         NIR_PASS_V(c->s, v3d_nir_lower_txf_ms, c);
1122         NIR_PASS_V(c->s, v3d_nir_lower_image_load_store);
1123         NIR_PASS_V(c->s, nir_lower_idiv, nir_lower_idiv_fast);
1124 
1125         if (c->key->robust_buffer_access) {
1126            /* v3d_nir_lower_robust_buffer_access assumes constant buffer
1127             * indices on ubo/ssbo intrinsics so run a copy propagation pass
1128             * before we run the lowering to warrant this. We also want to run
1129             * the lowering before v3d_optimize to clean-up redundant
1130             * get_buffer_size calls produced in the pass.
1131             */
1132            NIR_PASS_V(c->s, nir_copy_prop);
1133            NIR_PASS_V(c->s, v3d_nir_lower_robust_buffer_access, c);
1134         }
1135 
1136         v3d_optimize_nir(c->s);
1137 
1138         /* Do late algebraic optimization to turn add(a, neg(b)) back into
1139          * subs, then the mandatory cleanup after algebraic.  Note that it may
1140          * produce fnegs, and if so then we need to keep running to squash
1141          * fneg(fneg(a)).
1142          */
1143         bool more_late_algebraic = true;
1144         while (more_late_algebraic) {
1145                 more_late_algebraic = false;
1146                 NIR_PASS(more_late_algebraic, c->s, nir_opt_algebraic_late);
1147                 NIR_PASS_V(c->s, nir_opt_constant_folding);
1148                 NIR_PASS_V(c->s, nir_copy_prop);
1149                 NIR_PASS_V(c->s, nir_opt_dce);
1150                 NIR_PASS_V(c->s, nir_opt_cse);
1151         }
1152 
1153         NIR_PASS_V(c->s, nir_lower_bool_to_int32);
1154         NIR_PASS_V(c->s, nir_convert_from_ssa, true);
1155 
1156         struct nir_schedule_options schedule_options = {
1157                 /* Schedule for about half our register space, to enable more
1158                  * shaders to hit 4 threads.
1159                  */
1160                 .threshold = 24,
1161 
1162                 /* Vertex shaders share the same memory for inputs and outputs,
1163                  * fragement and geometry shaders do not.
1164                  */
1165                 .stages_with_shared_io_memory =
1166                 (((1 << MESA_ALL_SHADER_STAGES) - 1) &
1167                  ~((1 << MESA_SHADER_FRAGMENT) |
1168                    (1 << MESA_SHADER_GEOMETRY))),
1169 
1170                 .fallback = c->fallback_scheduler,
1171 
1172                 .intrinsic_cb = v3d_intrinsic_dependency_cb,
1173                 .intrinsic_cb_data = c,
1174         };
1175         NIR_PASS_V(c->s, nir_schedule, &schedule_options);
1176 
1177         v3d_nir_to_vir(c);
1178 }
1179 
1180 uint32_t
v3d_prog_data_size(gl_shader_stage stage)1181 v3d_prog_data_size(gl_shader_stage stage)
1182 {
1183         static const int prog_data_size[] = {
1184                 [MESA_SHADER_VERTEX] = sizeof(struct v3d_vs_prog_data),
1185                 [MESA_SHADER_GEOMETRY] = sizeof(struct v3d_gs_prog_data),
1186                 [MESA_SHADER_FRAGMENT] = sizeof(struct v3d_fs_prog_data),
1187                 [MESA_SHADER_COMPUTE] = sizeof(struct v3d_compute_prog_data),
1188         };
1189 
1190         assert(stage >= 0 &&
1191                stage < ARRAY_SIZE(prog_data_size) &&
1192                prog_data_size[stage]);
1193 
1194         return prog_data_size[stage];
1195 }
1196 
v3d_shaderdb_dump(struct v3d_compile * c,char ** shaderdb_str)1197 int v3d_shaderdb_dump(struct v3d_compile *c,
1198 		      char **shaderdb_str)
1199 {
1200         if (c == NULL)
1201                 return -1;
1202 
1203         return asprintf(shaderdb_str,
1204                         "%s shader: %d inst, %d threads, %d loops, "
1205                         "%d uniforms, %d max-temps, %d:%d spills:fills, "
1206                         "%d sfu-stalls, %d inst-and-stalls",
1207                         vir_get_stage_name(c),
1208                         c->qpu_inst_count,
1209                         c->threads,
1210                         c->loops,
1211                         c->num_uniforms,
1212                         vir_get_max_temps(c),
1213                         c->spills,
1214                         c->fills,
1215                         c->qpu_inst_stalled_count,
1216                         c->qpu_inst_count + c->qpu_inst_stalled_count);
1217 }
1218 
v3d_compile(const struct v3d_compiler * compiler,struct v3d_key * key,struct v3d_prog_data ** out_prog_data,nir_shader * s,void (* debug_output)(const char * msg,void * debug_output_data),void * debug_output_data,int program_id,int variant_id,uint32_t * final_assembly_size)1219 uint64_t *v3d_compile(const struct v3d_compiler *compiler,
1220                       struct v3d_key *key,
1221                       struct v3d_prog_data **out_prog_data,
1222                       nir_shader *s,
1223                       void (*debug_output)(const char *msg,
1224                                            void *debug_output_data),
1225                       void *debug_output_data,
1226                       int program_id, int variant_id,
1227                       uint32_t *final_assembly_size)
1228 {
1229         struct v3d_compile *c;
1230 
1231         for (int i = 0; true; i++) {
1232                 c = vir_compile_init(compiler, key, s,
1233                                      debug_output, debug_output_data,
1234                                      program_id, variant_id,
1235                                      i > 0 /* fallback_scheduler */);
1236 
1237                 v3d_attempt_compile(c);
1238 
1239                 if (i > 0 ||
1240                     c->compilation_result !=
1241                     V3D_COMPILATION_FAILED_REGISTER_ALLOCATION)
1242                         break;
1243 
1244                 char *debug_msg;
1245                 int ret = asprintf(&debug_msg,
1246                                    "Using fallback scheduler for %s",
1247                                    vir_get_stage_name(c));
1248 
1249                 if (ret >= 0) {
1250                         if (unlikely(V3D_DEBUG & V3D_DEBUG_PERF))
1251                                 fprintf(stderr, "%s\n", debug_msg);
1252 
1253                         c->debug_output(debug_msg, c->debug_output_data);
1254                         free(debug_msg);
1255                 }
1256 
1257                 vir_compile_destroy(c);
1258         }
1259 
1260         struct v3d_prog_data *prog_data;
1261 
1262         prog_data = rzalloc_size(NULL, v3d_prog_data_size(c->s->info.stage));
1263 
1264         v3d_set_prog_data(c, prog_data);
1265 
1266         *out_prog_data = prog_data;
1267 
1268         char *shaderdb;
1269         int ret = v3d_shaderdb_dump(c, &shaderdb);
1270         if (ret >= 0) {
1271                 if (V3D_DEBUG & V3D_DEBUG_SHADERDB)
1272                         fprintf(stderr, "SHADER-DB: %s\n", shaderdb);
1273 
1274                 c->debug_output(shaderdb, c->debug_output_data);
1275                 free(shaderdb);
1276         }
1277 
1278        return v3d_return_qpu_insts(c, final_assembly_size);
1279 }
1280 
1281 void
vir_remove_instruction(struct v3d_compile * c,struct qinst * qinst)1282 vir_remove_instruction(struct v3d_compile *c, struct qinst *qinst)
1283 {
1284         if (qinst->dst.file == QFILE_TEMP)
1285                 c->defs[qinst->dst.index] = NULL;
1286 
1287         assert(&qinst->link != c->cursor.link);
1288 
1289         list_del(&qinst->link);
1290         free(qinst);
1291 
1292         c->live_intervals_valid = false;
1293 }
1294 
1295 struct qreg
vir_follow_movs(struct v3d_compile * c,struct qreg reg)1296 vir_follow_movs(struct v3d_compile *c, struct qreg reg)
1297 {
1298         /* XXX
1299         int pack = reg.pack;
1300 
1301         while (reg.file == QFILE_TEMP &&
1302                c->defs[reg.index] &&
1303                (c->defs[reg.index]->op == QOP_MOV ||
1304                 c->defs[reg.index]->op == QOP_FMOV) &&
1305                !c->defs[reg.index]->dst.pack &&
1306                !c->defs[reg.index]->src[0].pack) {
1307                 reg = c->defs[reg.index]->src[0];
1308         }
1309 
1310         reg.pack = pack;
1311         */
1312         return reg;
1313 }
1314 
1315 void
vir_compile_destroy(struct v3d_compile * c)1316 vir_compile_destroy(struct v3d_compile *c)
1317 {
1318         /* Defuse the assert that we aren't removing the cursor's instruction.
1319          */
1320         c->cursor.link = NULL;
1321 
1322         vir_for_each_block(block, c) {
1323                 while (!list_is_empty(&block->instructions)) {
1324                         struct qinst *qinst =
1325                                 list_first_entry(&block->instructions,
1326                                                  struct qinst, link);
1327                         vir_remove_instruction(c, qinst);
1328                 }
1329         }
1330 
1331         ralloc_free(c);
1332 }
1333 
1334 uint32_t
vir_get_uniform_index(struct v3d_compile * c,enum quniform_contents contents,uint32_t data)1335 vir_get_uniform_index(struct v3d_compile *c,
1336                       enum quniform_contents contents,
1337                       uint32_t data)
1338 {
1339         for (int i = 0; i < c->num_uniforms; i++) {
1340                 if (c->uniform_contents[i] == contents &&
1341                     c->uniform_data[i] == data) {
1342                         return i;
1343                 }
1344         }
1345 
1346         uint32_t uniform = c->num_uniforms++;
1347 
1348         if (uniform >= c->uniform_array_size) {
1349                 c->uniform_array_size = MAX2(MAX2(16, uniform + 1),
1350                                              c->uniform_array_size * 2);
1351 
1352                 c->uniform_data = reralloc(c, c->uniform_data,
1353                                            uint32_t,
1354                                            c->uniform_array_size);
1355                 c->uniform_contents = reralloc(c, c->uniform_contents,
1356                                                enum quniform_contents,
1357                                                c->uniform_array_size);
1358         }
1359 
1360         c->uniform_contents[uniform] = contents;
1361         c->uniform_data[uniform] = data;
1362 
1363         return uniform;
1364 }
1365 
1366 struct qreg
vir_uniform(struct v3d_compile * c,enum quniform_contents contents,uint32_t data)1367 vir_uniform(struct v3d_compile *c,
1368             enum quniform_contents contents,
1369             uint32_t data)
1370 {
1371         struct qinst *inst = vir_NOP(c);
1372         inst->qpu.sig.ldunif = true;
1373         inst->uniform = vir_get_uniform_index(c, contents, data);
1374         inst->dst = vir_get_temp(c);
1375         c->defs[inst->dst.index] = inst;
1376         return inst->dst;
1377 }
1378 
1379 #define OPTPASS(func)                                                   \
1380         do {                                                            \
1381                 bool stage_progress = func(c);                          \
1382                 if (stage_progress) {                                   \
1383                         progress = true;                                \
1384                         if (print_opt_debug) {                          \
1385                                 fprintf(stderr,                         \
1386                                         "VIR opt pass %2d: %s progress\n", \
1387                                         pass, #func);                   \
1388                         }                                               \
1389                         /*XXX vir_validate(c);*/                        \
1390                 }                                                       \
1391         } while (0)
1392 
1393 void
vir_optimize(struct v3d_compile * c)1394 vir_optimize(struct v3d_compile *c)
1395 {
1396         bool print_opt_debug = false;
1397         int pass = 1;
1398 
1399         while (true) {
1400                 bool progress = false;
1401 
1402                 OPTPASS(vir_opt_copy_propagate);
1403                 OPTPASS(vir_opt_redundant_flags);
1404                 OPTPASS(vir_opt_dead_code);
1405                 OPTPASS(vir_opt_small_immediates);
1406 
1407                 if (!progress)
1408                         break;
1409 
1410                 pass++;
1411         }
1412 }
1413 
1414 const char *
vir_get_stage_name(struct v3d_compile * c)1415 vir_get_stage_name(struct v3d_compile *c)
1416 {
1417         if (c->vs_key && c->vs_key->is_coord)
1418                 return "MESA_SHADER_VERTEX_BIN";
1419         else if (c->gs_key && c->gs_key->is_coord)
1420                 return "MESA_SHADER_GEOMETRY_BIN";
1421         else
1422                 return gl_shader_stage_name(c->s->info.stage);
1423 }
1424