1 /*
2  * Copyright © 2016-2017 Broadcom
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "broadcom/common/v3d_device_info.h"
25 #include "v3d_compiler.h"
26 
27 int
vir_get_non_sideband_nsrc(struct qinst * inst)28 vir_get_non_sideband_nsrc(struct qinst *inst)
29 {
30         switch (inst->qpu.type) {
31         case V3D_QPU_INSTR_TYPE_BRANCH:
32                 return 0;
33         case V3D_QPU_INSTR_TYPE_ALU:
34                 if (inst->qpu.alu.add.op != V3D_QPU_A_NOP)
35                         return v3d_qpu_add_op_num_src(inst->qpu.alu.add.op);
36                 else
37                         return v3d_qpu_mul_op_num_src(inst->qpu.alu.mul.op);
38         }
39 
40         return 0;
41 }
42 
43 int
vir_get_nsrc(struct qinst * inst)44 vir_get_nsrc(struct qinst *inst)
45 {
46         int nsrc = vir_get_non_sideband_nsrc(inst);
47 
48         if (vir_has_implicit_uniform(inst))
49                 nsrc++;
50 
51         return nsrc;
52 }
53 
54 bool
vir_has_implicit_uniform(struct qinst * inst)55 vir_has_implicit_uniform(struct qinst *inst)
56 {
57         switch (inst->qpu.type) {
58         case V3D_QPU_INSTR_TYPE_BRANCH:
59                 return true;
60         case V3D_QPU_INSTR_TYPE_ALU:
61                 switch (inst->dst.file) {
62                 case QFILE_TLBU:
63                         return true;
64                 default:
65                         return inst->has_implicit_uniform;
66                 }
67         }
68         return false;
69 }
70 
71 /* The sideband uniform for textures gets stored after the normal ALU
72  * arguments.
73  */
74 int
vir_get_implicit_uniform_src(struct qinst * inst)75 vir_get_implicit_uniform_src(struct qinst *inst)
76 {
77         return vir_get_nsrc(inst) - 1;
78 }
79 
80 /**
81  * Returns whether the instruction has any side effects that must be
82  * preserved.
83  */
84 bool
vir_has_side_effects(struct v3d_compile * c,struct qinst * inst)85 vir_has_side_effects(struct v3d_compile *c, struct qinst *inst)
86 {
87         switch (inst->qpu.type) {
88         case V3D_QPU_INSTR_TYPE_BRANCH:
89                 return true;
90         case V3D_QPU_INSTR_TYPE_ALU:
91                 switch (inst->qpu.alu.add.op) {
92                 case V3D_QPU_A_SETREVF:
93                 case V3D_QPU_A_SETMSF:
94                 case V3D_QPU_A_VPMSETUP:
95                 case V3D_QPU_A_STVPMV:
96                 case V3D_QPU_A_STVPMD:
97                 case V3D_QPU_A_STVPMP:
98                 case V3D_QPU_A_VPMWT:
99                         return true;
100                 default:
101                         break;
102                 }
103 
104                 switch (inst->qpu.alu.mul.op) {
105                 case V3D_QPU_M_MULTOP:
106                         return true;
107                 default:
108                         break;
109                 }
110         }
111 
112         if (inst->qpu.sig.ldtmu ||
113             inst->qpu.sig.ldvary ||
114             inst->qpu.sig.wrtmuc ||
115             inst->qpu.sig.thrsw) {
116                 return true;
117         }
118 
119         return false;
120 }
121 
122 bool
vir_is_float_input(struct qinst * inst)123 vir_is_float_input(struct qinst *inst)
124 {
125         /* XXX: More instrs */
126         switch (inst->qpu.type) {
127         case V3D_QPU_INSTR_TYPE_BRANCH:
128                 return false;
129         case V3D_QPU_INSTR_TYPE_ALU:
130                 switch (inst->qpu.alu.add.op) {
131                 case V3D_QPU_A_FADD:
132                 case V3D_QPU_A_FSUB:
133                 case V3D_QPU_A_FMIN:
134                 case V3D_QPU_A_FMAX:
135                 case V3D_QPU_A_FTOIN:
136                         return true;
137                 default:
138                         break;
139                 }
140 
141                 switch (inst->qpu.alu.mul.op) {
142                 case V3D_QPU_M_FMOV:
143                 case V3D_QPU_M_VFMUL:
144                 case V3D_QPU_M_FMUL:
145                         return true;
146                 default:
147                         break;
148                 }
149         }
150 
151         return false;
152 }
153 
154 bool
vir_is_raw_mov(struct qinst * inst)155 vir_is_raw_mov(struct qinst *inst)
156 {
157         if (inst->qpu.type != V3D_QPU_INSTR_TYPE_ALU ||
158             (inst->qpu.alu.mul.op != V3D_QPU_M_FMOV &&
159              inst->qpu.alu.mul.op != V3D_QPU_M_MOV)) {
160                 return false;
161         }
162 
163         if (inst->qpu.alu.add.output_pack != V3D_QPU_PACK_NONE ||
164             inst->qpu.alu.mul.output_pack != V3D_QPU_PACK_NONE) {
165                 return false;
166         }
167 
168         if (inst->qpu.flags.ac != V3D_QPU_COND_NONE ||
169             inst->qpu.flags.mc != V3D_QPU_COND_NONE)
170                 return false;
171 
172         return true;
173 }
174 
175 bool
vir_is_add(struct qinst * inst)176 vir_is_add(struct qinst *inst)
177 {
178         return (inst->qpu.type == V3D_QPU_INSTR_TYPE_ALU &&
179                 inst->qpu.alu.add.op != V3D_QPU_A_NOP);
180 }
181 
182 bool
vir_is_mul(struct qinst * inst)183 vir_is_mul(struct qinst *inst)
184 {
185         return (inst->qpu.type == V3D_QPU_INSTR_TYPE_ALU &&
186                 inst->qpu.alu.mul.op != V3D_QPU_M_NOP);
187 }
188 
189 bool
vir_is_tex(struct qinst * inst)190 vir_is_tex(struct qinst *inst)
191 {
192         if (inst->dst.file == QFILE_MAGIC)
193                 return v3d_qpu_magic_waddr_is_tmu(inst->dst.index);
194 
195         return false;
196 }
197 
198 bool
vir_depends_on_flags(struct qinst * inst)199 vir_depends_on_flags(struct qinst *inst)
200 {
201         if (inst->qpu.type == V3D_QPU_INSTR_TYPE_BRANCH) {
202                 return (inst->qpu.branch.cond != V3D_QPU_BRANCH_COND_ALWAYS);
203         } else {
204                 return (inst->qpu.flags.ac != V3D_QPU_COND_NONE &&
205                         inst->qpu.flags.mc != V3D_QPU_COND_NONE);
206         }
207 }
208 
209 bool
vir_writes_r3(const struct v3d_device_info * devinfo,struct qinst * inst)210 vir_writes_r3(const struct v3d_device_info *devinfo, struct qinst *inst)
211 {
212         for (int i = 0; i < vir_get_nsrc(inst); i++) {
213                 switch (inst->src[i].file) {
214                 case QFILE_VPM:
215                         return true;
216                 default:
217                         break;
218                 }
219         }
220 
221         if (devinfo->ver < 41 && (inst->qpu.sig.ldvary ||
222                                   inst->qpu.sig.ldtlb ||
223                                   inst->qpu.sig.ldtlbu ||
224                                   inst->qpu.sig.ldvpm)) {
225                 return true;
226         }
227 
228         return false;
229 }
230 
231 bool
vir_writes_r4(const struct v3d_device_info * devinfo,struct qinst * inst)232 vir_writes_r4(const struct v3d_device_info *devinfo, struct qinst *inst)
233 {
234         switch (inst->dst.file) {
235         case QFILE_MAGIC:
236                 switch (inst->dst.index) {
237                 case V3D_QPU_WADDR_RECIP:
238                 case V3D_QPU_WADDR_RSQRT:
239                 case V3D_QPU_WADDR_EXP:
240                 case V3D_QPU_WADDR_LOG:
241                 case V3D_QPU_WADDR_SIN:
242                         return true;
243                 }
244                 break;
245         default:
246                 break;
247         }
248 
249         if (devinfo->ver < 41 && inst->qpu.sig.ldtmu)
250                 return true;
251 
252         return false;
253 }
254 
255 void
vir_set_unpack(struct qinst * inst,int src,enum v3d_qpu_input_unpack unpack)256 vir_set_unpack(struct qinst *inst, int src,
257                enum v3d_qpu_input_unpack unpack)
258 {
259         assert(src == 0 || src == 1);
260 
261         if (vir_is_add(inst)) {
262                 if (src == 0)
263                         inst->qpu.alu.add.a_unpack = unpack;
264                 else
265                         inst->qpu.alu.add.b_unpack = unpack;
266         } else {
267                 assert(vir_is_mul(inst));
268                 if (src == 0)
269                         inst->qpu.alu.mul.a_unpack = unpack;
270                 else
271                         inst->qpu.alu.mul.b_unpack = unpack;
272         }
273 }
274 
275 void
vir_set_cond(struct qinst * inst,enum v3d_qpu_cond cond)276 vir_set_cond(struct qinst *inst, enum v3d_qpu_cond cond)
277 {
278         if (vir_is_add(inst)) {
279                 inst->qpu.flags.ac = cond;
280         } else {
281                 assert(vir_is_mul(inst));
282                 inst->qpu.flags.mc = cond;
283         }
284 }
285 
286 void
vir_set_pf(struct qinst * inst,enum v3d_qpu_pf pf)287 vir_set_pf(struct qinst *inst, enum v3d_qpu_pf pf)
288 {
289         if (vir_is_add(inst)) {
290                 inst->qpu.flags.apf = pf;
291         } else {
292                 assert(vir_is_mul(inst));
293                 inst->qpu.flags.mpf = pf;
294         }
295 }
296 
297 #if 0
298 uint8_t
299 vir_channels_written(struct qinst *inst)
300 {
301         if (vir_is_mul(inst)) {
302                 switch (inst->dst.pack) {
303                 case QPU_PACK_MUL_NOP:
304                 case QPU_PACK_MUL_8888:
305                         return 0xf;
306                 case QPU_PACK_MUL_8A:
307                         return 0x1;
308                 case QPU_PACK_MUL_8B:
309                         return 0x2;
310                 case QPU_PACK_MUL_8C:
311                         return 0x4;
312                 case QPU_PACK_MUL_8D:
313                         return 0x8;
314                 }
315         } else {
316                 switch (inst->dst.pack) {
317                 case QPU_PACK_A_NOP:
318                 case QPU_PACK_A_8888:
319                 case QPU_PACK_A_8888_SAT:
320                 case QPU_PACK_A_32_SAT:
321                         return 0xf;
322                 case QPU_PACK_A_8A:
323                 case QPU_PACK_A_8A_SAT:
324                         return 0x1;
325                 case QPU_PACK_A_8B:
326                 case QPU_PACK_A_8B_SAT:
327                         return 0x2;
328                 case QPU_PACK_A_8C:
329                 case QPU_PACK_A_8C_SAT:
330                         return 0x4;
331                 case QPU_PACK_A_8D:
332                 case QPU_PACK_A_8D_SAT:
333                         return 0x8;
334                 case QPU_PACK_A_16A:
335                 case QPU_PACK_A_16A_SAT:
336                         return 0x3;
337                 case QPU_PACK_A_16B:
338                 case QPU_PACK_A_16B_SAT:
339                         return 0xc;
340                 }
341         }
342         unreachable("Bad pack field");
343 }
344 #endif
345 
346 struct qreg
vir_get_temp(struct v3d_compile * c)347 vir_get_temp(struct v3d_compile *c)
348 {
349         struct qreg reg;
350 
351         reg.file = QFILE_TEMP;
352         reg.index = c->num_temps++;
353 
354         if (c->num_temps > c->defs_array_size) {
355                 uint32_t old_size = c->defs_array_size;
356                 c->defs_array_size = MAX2(old_size * 2, 16);
357                 c->defs = reralloc(c, c->defs, struct qinst *,
358                                    c->defs_array_size);
359                 memset(&c->defs[old_size], 0,
360                        sizeof(c->defs[0]) * (c->defs_array_size - old_size));
361         }
362 
363         return reg;
364 }
365 
366 struct qinst *
vir_add_inst(enum v3d_qpu_add_op op,struct qreg dst,struct qreg src0,struct qreg src1)367 vir_add_inst(enum v3d_qpu_add_op op, struct qreg dst, struct qreg src0, struct qreg src1)
368 {
369         struct qinst *inst = calloc(1, sizeof(*inst));
370 
371         inst->qpu = v3d_qpu_nop();
372         inst->qpu.alu.add.op = op;
373 
374         inst->dst = dst;
375         inst->src[0] = src0;
376         inst->src[1] = src1;
377         inst->uniform = ~0;
378 
379         return inst;
380 }
381 
382 struct qinst *
vir_mul_inst(enum v3d_qpu_mul_op op,struct qreg dst,struct qreg src0,struct qreg src1)383 vir_mul_inst(enum v3d_qpu_mul_op op, struct qreg dst, struct qreg src0, struct qreg src1)
384 {
385         struct qinst *inst = calloc(1, sizeof(*inst));
386 
387         inst->qpu = v3d_qpu_nop();
388         inst->qpu.alu.mul.op = op;
389 
390         inst->dst = dst;
391         inst->src[0] = src0;
392         inst->src[1] = src1;
393         inst->uniform = ~0;
394 
395         return inst;
396 }
397 
398 struct qinst *
vir_branch_inst(enum v3d_qpu_branch_cond cond,struct qreg src)399 vir_branch_inst(enum v3d_qpu_branch_cond cond, struct qreg src)
400 {
401         struct qinst *inst = calloc(1, sizeof(*inst));
402 
403         inst->qpu = v3d_qpu_nop();
404         inst->qpu.type = V3D_QPU_INSTR_TYPE_BRANCH;
405         inst->qpu.branch.cond = cond;
406         inst->qpu.branch.msfign = V3D_QPU_MSFIGN_NONE;
407         inst->qpu.branch.bdi = V3D_QPU_BRANCH_DEST_REL;
408         inst->qpu.branch.ub = true;
409         inst->qpu.branch.bdu = V3D_QPU_BRANCH_DEST_REL;
410 
411         inst->dst = vir_reg(QFILE_NULL, 0);
412         inst->src[0] = src;
413         inst->uniform = ~0;
414 
415         return inst;
416 }
417 
418 static void
vir_emit(struct v3d_compile * c,struct qinst * inst)419 vir_emit(struct v3d_compile *c, struct qinst *inst)
420 {
421         list_addtail(&inst->link, &c->cur_block->instructions);
422 }
423 
424 /* Updates inst to write to a new temporary, emits it, and notes the def. */
425 struct qreg
vir_emit_def(struct v3d_compile * c,struct qinst * inst)426 vir_emit_def(struct v3d_compile *c, struct qinst *inst)
427 {
428         assert(inst->dst.file == QFILE_NULL);
429 
430         inst->dst = vir_get_temp(c);
431 
432         if (inst->dst.file == QFILE_TEMP)
433                 c->defs[inst->dst.index] = inst;
434 
435         vir_emit(c, inst);
436 
437         return inst->dst;
438 }
439 
440 struct qinst *
vir_emit_nondef(struct v3d_compile * c,struct qinst * inst)441 vir_emit_nondef(struct v3d_compile *c, struct qinst *inst)
442 {
443         if (inst->dst.file == QFILE_TEMP)
444                 c->defs[inst->dst.index] = NULL;
445 
446         vir_emit(c, inst);
447 
448         return inst;
449 }
450 
451 struct qblock *
vir_new_block(struct v3d_compile * c)452 vir_new_block(struct v3d_compile *c)
453 {
454         struct qblock *block = rzalloc(c, struct qblock);
455 
456         list_inithead(&block->instructions);
457 
458         block->predecessors = _mesa_set_create(block,
459                                                _mesa_hash_pointer,
460                                                _mesa_key_pointer_equal);
461 
462         block->index = c->next_block_index++;
463 
464         return block;
465 }
466 
467 void
vir_set_emit_block(struct v3d_compile * c,struct qblock * block)468 vir_set_emit_block(struct v3d_compile *c, struct qblock *block)
469 {
470         c->cur_block = block;
471         list_addtail(&block->link, &c->blocks);
472 }
473 
474 struct qblock *
vir_entry_block(struct v3d_compile * c)475 vir_entry_block(struct v3d_compile *c)
476 {
477         return list_first_entry(&c->blocks, struct qblock, link);
478 }
479 
480 struct qblock *
vir_exit_block(struct v3d_compile * c)481 vir_exit_block(struct v3d_compile *c)
482 {
483         return list_last_entry(&c->blocks, struct qblock, link);
484 }
485 
486 void
vir_link_blocks(struct qblock * predecessor,struct qblock * successor)487 vir_link_blocks(struct qblock *predecessor, struct qblock *successor)
488 {
489         _mesa_set_add(successor->predecessors, predecessor);
490         if (predecessor->successors[0]) {
491                 assert(!predecessor->successors[1]);
492                 predecessor->successors[1] = successor;
493         } else {
494                 predecessor->successors[0] = successor;
495         }
496 }
497 
498 const struct v3d_compiler *
v3d_compiler_init(const struct v3d_device_info * devinfo)499 v3d_compiler_init(const struct v3d_device_info *devinfo)
500 {
501         struct v3d_compiler *compiler = rzalloc(NULL, struct v3d_compiler);
502         if (!compiler)
503                 return NULL;
504 
505         compiler->devinfo = devinfo;
506 
507         if (!vir_init_reg_sets(compiler)) {
508                 ralloc_free(compiler);
509                 return NULL;
510         }
511 
512         return compiler;
513 }
514 
515 void
v3d_compiler_free(const struct v3d_compiler * compiler)516 v3d_compiler_free(const struct v3d_compiler *compiler)
517 {
518         ralloc_free((void *)compiler);
519 }
520 
521 static struct v3d_compile *
vir_compile_init(const struct v3d_compiler * compiler,struct v3d_key * key,nir_shader * s,int program_id,int variant_id)522 vir_compile_init(const struct v3d_compiler *compiler,
523                  struct v3d_key *key,
524                  nir_shader *s,
525                  int program_id, int variant_id)
526 {
527         struct v3d_compile *c = rzalloc(NULL, struct v3d_compile);
528 
529         c->compiler = compiler;
530         c->devinfo = compiler->devinfo;
531         c->key = key;
532         c->program_id = program_id;
533         c->variant_id = variant_id;
534         c->threads = 4;
535 
536         s = nir_shader_clone(c, s);
537         c->s = s;
538 
539         list_inithead(&c->blocks);
540         vir_set_emit_block(c, vir_new_block(c));
541 
542         c->output_position_index = -1;
543         c->output_point_size_index = -1;
544         c->output_sample_mask_index = -1;
545 
546         c->def_ht = _mesa_hash_table_create(c, _mesa_hash_pointer,
547                                             _mesa_key_pointer_equal);
548 
549         return c;
550 }
551 
552 static void
v3d_lower_nir(struct v3d_compile * c)553 v3d_lower_nir(struct v3d_compile *c)
554 {
555         struct nir_lower_tex_options tex_options = {
556                 .lower_txd = true,
557                 .lower_rect = false, /* XXX */
558                 .lower_txp = ~0,
559                 /* Apply swizzles to all samplers. */
560                 .swizzle_result = ~0,
561         };
562 
563         /* Lower the format swizzle and (for 32-bit returns)
564          * ARB_texture_swizzle-style swizzle.
565          */
566         for (int i = 0; i < ARRAY_SIZE(c->key->tex); i++) {
567                 for (int j = 0; j < 4; j++)
568                         tex_options.swizzles[i][j] = c->key->tex[i].swizzle[j];
569 
570                 if (c->key->tex[i].clamp_s)
571                         tex_options.saturate_s |= 1 << i;
572                 if (c->key->tex[i].clamp_t)
573                         tex_options.saturate_t |= 1 << i;
574                 if (c->key->tex[i].clamp_r)
575                         tex_options.saturate_r |= 1 << i;
576         }
577 
578         NIR_PASS_V(c->s, nir_lower_tex, &tex_options);
579 }
580 
581 static void
v3d_lower_nir_late(struct v3d_compile * c)582 v3d_lower_nir_late(struct v3d_compile *c)
583 {
584         NIR_PASS_V(c->s, v3d_nir_lower_io, c);
585         NIR_PASS_V(c->s, v3d_nir_lower_txf_ms, c);
586         NIR_PASS_V(c->s, nir_lower_idiv);
587 }
588 
589 static void
v3d_set_prog_data_uniforms(struct v3d_compile * c,struct v3d_prog_data * prog_data)590 v3d_set_prog_data_uniforms(struct v3d_compile *c,
591                            struct v3d_prog_data *prog_data)
592 {
593         int count = c->num_uniforms;
594         struct v3d_uniform_list *ulist = &prog_data->uniforms;
595 
596         ulist->count = count;
597         ulist->data = ralloc_array(prog_data, uint32_t, count);
598         memcpy(ulist->data, c->uniform_data,
599                count * sizeof(*ulist->data));
600         ulist->contents = ralloc_array(prog_data, enum quniform_contents, count);
601         memcpy(ulist->contents, c->uniform_contents,
602                count * sizeof(*ulist->contents));
603 }
604 
605 /* Copy the compiler UBO range state to the compiled shader, dropping out
606  * arrays that were never referenced by an indirect load.
607  *
608  * (Note that QIR dead code elimination of an array access still leaves that
609  * array alive, though)
610  */
611 static void
v3d_set_prog_data_ubo(struct v3d_compile * c,struct v3d_prog_data * prog_data)612 v3d_set_prog_data_ubo(struct v3d_compile *c,
613                       struct v3d_prog_data *prog_data)
614 {
615         if (!c->num_ubo_ranges)
616                 return;
617 
618         prog_data->num_ubo_ranges = 0;
619         prog_data->ubo_ranges = ralloc_array(prog_data, struct v3d_ubo_range,
620                                              c->num_ubo_ranges);
621         for (int i = 0; i < c->num_ubo_ranges; i++) {
622                 if (!c->ubo_range_used[i])
623                         continue;
624 
625                 struct v3d_ubo_range *range = &c->ubo_ranges[i];
626                 prog_data->ubo_ranges[prog_data->num_ubo_ranges++] = *range;
627                 prog_data->ubo_size += range->size;
628         }
629 
630         if (prog_data->ubo_size) {
631                 if (V3D_DEBUG & V3D_DEBUG_SHADERDB) {
632                         fprintf(stderr, "SHADER-DB: %s prog %d/%d: %d UBO uniforms\n",
633                                 vir_get_stage_name(c),
634                                 c->program_id, c->variant_id,
635                                 prog_data->ubo_size / 4);
636                 }
637         }
638 }
639 
640 static void
v3d_set_prog_data(struct v3d_compile * c,struct v3d_prog_data * prog_data)641 v3d_set_prog_data(struct v3d_compile *c,
642                   struct v3d_prog_data *prog_data)
643 {
644         prog_data->threads = c->threads;
645         prog_data->single_seg = !c->last_thrsw;
646 
647         v3d_set_prog_data_uniforms(c, prog_data);
648         v3d_set_prog_data_ubo(c, prog_data);
649 }
650 
651 static uint64_t *
v3d_return_qpu_insts(struct v3d_compile * c,uint32_t * final_assembly_size)652 v3d_return_qpu_insts(struct v3d_compile *c, uint32_t *final_assembly_size)
653 {
654         *final_assembly_size = c->qpu_inst_count * sizeof(uint64_t);
655 
656         uint64_t *qpu_insts = malloc(*final_assembly_size);
657         if (!qpu_insts)
658                 return NULL;
659 
660         memcpy(qpu_insts, c->qpu_insts, *final_assembly_size);
661 
662         vir_compile_destroy(c);
663 
664         return qpu_insts;
665 }
666 
v3d_compile_vs(const struct v3d_compiler * compiler,struct v3d_vs_key * key,struct v3d_vs_prog_data * prog_data,nir_shader * s,int program_id,int variant_id,uint32_t * final_assembly_size)667 uint64_t *v3d_compile_vs(const struct v3d_compiler *compiler,
668                          struct v3d_vs_key *key,
669                          struct v3d_vs_prog_data *prog_data,
670                          nir_shader *s,
671                          int program_id, int variant_id,
672                          uint32_t *final_assembly_size)
673 {
674         struct v3d_compile *c = vir_compile_init(compiler, &key->base, s,
675                                                  program_id, variant_id);
676 
677         c->vs_key = key;
678 
679         v3d_lower_nir(c);
680 
681         if (key->clamp_color)
682                 NIR_PASS_V(c->s, nir_lower_clamp_color_outputs);
683 
684         if (key->base.ucp_enables) {
685                 NIR_PASS_V(c->s, nir_lower_clip_vs, key->base.ucp_enables);
686                 NIR_PASS_V(c->s, nir_lower_io_to_scalar,
687                            nir_var_shader_out);
688         }
689 
690         /* Note: VS output scalarizing must happen after nir_lower_clip_vs. */
691         NIR_PASS_V(c->s, nir_lower_io_to_scalar, nir_var_shader_out);
692 
693         v3d_lower_nir_late(c);
694         v3d_optimize_nir(c->s);
695         NIR_PASS_V(c->s, nir_convert_from_ssa, true);
696 
697         v3d_nir_to_vir(c);
698 
699         v3d_set_prog_data(c, &prog_data->base);
700 
701         prog_data->base.num_inputs = c->num_inputs;
702 
703         /* The vertex data gets format converted by the VPM so that
704          * each attribute channel takes up a VPM column.  Precompute
705          * the sizes for the shader record.
706          */
707         for (int i = 0; i < ARRAY_SIZE(prog_data->vattr_sizes); i++) {
708                 prog_data->vattr_sizes[i] = c->vattr_sizes[i];
709                 prog_data->vpm_input_size += c->vattr_sizes[i];
710         }
711 
712         /* Input/output segment size are in 8x32-bit multiples. */
713         prog_data->vpm_input_size = align(prog_data->vpm_input_size, 8) / 8;
714         prog_data->vpm_output_size = align(c->num_vpm_writes, 8) / 8;
715 
716         prog_data->uses_vid = (s->info.system_values_read &
717                                (1ull << SYSTEM_VALUE_VERTEX_ID));
718         prog_data->uses_iid = (s->info.system_values_read &
719                                (1ull << SYSTEM_VALUE_INSTANCE_ID));
720 
721         return v3d_return_qpu_insts(c, final_assembly_size);
722 }
723 
724 static void
v3d_set_fs_prog_data_inputs(struct v3d_compile * c,struct v3d_fs_prog_data * prog_data)725 v3d_set_fs_prog_data_inputs(struct v3d_compile *c,
726                             struct v3d_fs_prog_data *prog_data)
727 {
728         prog_data->base.num_inputs = c->num_inputs;
729         memcpy(prog_data->input_slots, c->input_slots,
730                c->num_inputs * sizeof(*c->input_slots));
731 
732         STATIC_ASSERT(ARRAY_SIZE(prog_data->flat_shade_flags) >
733                       (V3D_MAX_FS_INPUTS - 1) / 24);
734         for (int i = 0; i < V3D_MAX_FS_INPUTS; i++) {
735                 if (BITSET_TEST(c->flat_shade_flags, i))
736                         prog_data->flat_shade_flags[i / 24] |= 1 << (i % 24);
737         }
738 }
739 
v3d_compile_fs(const struct v3d_compiler * compiler,struct v3d_fs_key * key,struct v3d_fs_prog_data * prog_data,nir_shader * s,int program_id,int variant_id,uint32_t * final_assembly_size)740 uint64_t *v3d_compile_fs(const struct v3d_compiler *compiler,
741                          struct v3d_fs_key *key,
742                          struct v3d_fs_prog_data *prog_data,
743                          nir_shader *s,
744                          int program_id, int variant_id,
745                          uint32_t *final_assembly_size)
746 {
747         struct v3d_compile *c = vir_compile_init(compiler, &key->base, s,
748                                                  program_id, variant_id);
749 
750         c->fs_key = key;
751 
752         v3d_lower_nir(c);
753 
754         if (key->light_twoside)
755                 NIR_PASS_V(c->s, nir_lower_two_sided_color);
756 
757         if (key->clamp_color)
758                 NIR_PASS_V(c->s, nir_lower_clamp_color_outputs);
759 
760         if (key->alpha_test) {
761                 NIR_PASS_V(c->s, nir_lower_alpha_test, key->alpha_test_func,
762                            false);
763         }
764 
765         if (key->base.ucp_enables)
766                 NIR_PASS_V(c->s, nir_lower_clip_fs, key->base.ucp_enables);
767 
768         /* Note: FS input scalarizing must happen after
769          * nir_lower_two_sided_color, which only handles a vec4 at a time.
770          */
771         NIR_PASS_V(c->s, nir_lower_io_to_scalar, nir_var_shader_in);
772 
773         v3d_lower_nir_late(c);
774         v3d_optimize_nir(c->s);
775         NIR_PASS_V(c->s, nir_convert_from_ssa, true);
776 
777         v3d_nir_to_vir(c);
778 
779         v3d_set_prog_data(c, &prog_data->base);
780         v3d_set_fs_prog_data_inputs(c, prog_data);
781         prog_data->writes_z = (c->s->info.outputs_written &
782                                (1 << FRAG_RESULT_DEPTH));
783         prog_data->discard = c->s->info.fs.uses_discard;
784 
785         return v3d_return_qpu_insts(c, final_assembly_size);
786 }
787 
788 void
vir_remove_instruction(struct v3d_compile * c,struct qinst * qinst)789 vir_remove_instruction(struct v3d_compile *c, struct qinst *qinst)
790 {
791         if (qinst->dst.file == QFILE_TEMP)
792                 c->defs[qinst->dst.index] = NULL;
793 
794         list_del(&qinst->link);
795         free(qinst);
796 }
797 
798 struct qreg
vir_follow_movs(struct v3d_compile * c,struct qreg reg)799 vir_follow_movs(struct v3d_compile *c, struct qreg reg)
800 {
801         /* XXX
802         int pack = reg.pack;
803 
804         while (reg.file == QFILE_TEMP &&
805                c->defs[reg.index] &&
806                (c->defs[reg.index]->op == QOP_MOV ||
807                 c->defs[reg.index]->op == QOP_FMOV) &&
808                !c->defs[reg.index]->dst.pack &&
809                !c->defs[reg.index]->src[0].pack) {
810                 reg = c->defs[reg.index]->src[0];
811         }
812 
813         reg.pack = pack;
814         */
815         return reg;
816 }
817 
818 void
vir_compile_destroy(struct v3d_compile * c)819 vir_compile_destroy(struct v3d_compile *c)
820 {
821         vir_for_each_block(block, c) {
822                 while (!list_empty(&block->instructions)) {
823                         struct qinst *qinst =
824                                 list_first_entry(&block->instructions,
825                                                  struct qinst, link);
826                         vir_remove_instruction(c, qinst);
827                 }
828         }
829 
830         ralloc_free(c);
831 }
832 
833 struct qreg
vir_uniform(struct v3d_compile * c,enum quniform_contents contents,uint32_t data)834 vir_uniform(struct v3d_compile *c,
835             enum quniform_contents contents,
836             uint32_t data)
837 {
838         for (int i = 0; i < c->num_uniforms; i++) {
839                 if (c->uniform_contents[i] == contents &&
840                     c->uniform_data[i] == data) {
841                         return vir_reg(QFILE_UNIF, i);
842                 }
843         }
844 
845         uint32_t uniform = c->num_uniforms++;
846 
847         if (uniform >= c->uniform_array_size) {
848                 c->uniform_array_size = MAX2(MAX2(16, uniform + 1),
849                                              c->uniform_array_size * 2);
850 
851                 c->uniform_data = reralloc(c, c->uniform_data,
852                                            uint32_t,
853                                            c->uniform_array_size);
854                 c->uniform_contents = reralloc(c, c->uniform_contents,
855                                                enum quniform_contents,
856                                                c->uniform_array_size);
857         }
858 
859         c->uniform_contents[uniform] = contents;
860         c->uniform_data[uniform] = data;
861 
862         return vir_reg(QFILE_UNIF, uniform);
863 }
864 
865 void
vir_PF(struct v3d_compile * c,struct qreg src,enum v3d_qpu_pf pf)866 vir_PF(struct v3d_compile *c, struct qreg src, enum v3d_qpu_pf pf)
867 {
868         struct qinst *last_inst = NULL;
869 
870         if (!list_empty(&c->cur_block->instructions))
871                 last_inst = (struct qinst *)c->cur_block->instructions.prev;
872 
873         if (src.file != QFILE_TEMP ||
874             !c->defs[src.index] ||
875             last_inst != c->defs[src.index]) {
876                 /* XXX: Make the MOV be the appropriate type */
877                 last_inst = vir_MOV_dest(c, vir_reg(QFILE_NULL, 0), src);
878                 last_inst = (struct qinst *)c->cur_block->instructions.prev;
879         }
880 
881         vir_set_pf(last_inst, pf);
882 }
883 
884 #define OPTPASS(func)                                                   \
885         do {                                                            \
886                 bool stage_progress = func(c);                          \
887                 if (stage_progress) {                                   \
888                         progress = true;                                \
889                         if (print_opt_debug) {                          \
890                                 fprintf(stderr,                         \
891                                         "VIR opt pass %2d: %s progress\n", \
892                                         pass, #func);                   \
893                         }                                               \
894                         /*XXX vir_validate(c);*/                        \
895                 }                                                       \
896         } while (0)
897 
898 void
vir_optimize(struct v3d_compile * c)899 vir_optimize(struct v3d_compile *c)
900 {
901         bool print_opt_debug = false;
902         int pass = 1;
903 
904         while (true) {
905                 bool progress = false;
906 
907                 OPTPASS(vir_opt_copy_propagate);
908                 OPTPASS(vir_opt_dead_code);
909 
910                 if (!progress)
911                         break;
912 
913                 pass++;
914         }
915 }
916 
917 const char *
vir_get_stage_name(struct v3d_compile * c)918 vir_get_stage_name(struct v3d_compile *c)
919 {
920         if (c->vs_key && c->vs_key->is_coord)
921                 return "MESA_SHADER_COORD";
922         else
923                 return gl_shader_stage_name(c->s->info.stage);
924 }
925