1 /*
2  * Copyright © 2015 Intel Corporation
3  * Copyright © 2014-2015 Broadcom
4  * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice (including the next
14  * paragraph) shall be included in all copies or substantial portions of the
15  * Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
20  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
22  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
23  * IN THE SOFTWARE.
24  */
25 
26 #include "compiler/nir/nir.h"
27 #include "compiler/nir/nir_builder.h"
28 #include "compiler/glsl/list.h"
29 #include "main/imports.h"
30 #include "util/ralloc.h"
31 
32 #include "prog_to_nir.h"
33 #include "prog_instruction.h"
34 #include "prog_parameter.h"
35 #include "prog_print.h"
36 #include "program.h"
37 
38 /**
39  * \file prog_to_nir.c
40  *
41  * A translator from Mesa IR (prog_instruction.h) to NIR.  This is primarily
42  * intended to support ARB_vertex_program, ARB_fragment_program, and fixed-function
43  * vertex processing.  Full GLSL support should use glsl_to_nir instead.
44  */
45 
46 struct ptn_compile {
47    const struct gl_program *prog;
48    nir_builder build;
49    bool error;
50 
51    nir_variable *parameters;
52    nir_variable *input_vars[VARYING_SLOT_MAX];
53    nir_variable *output_vars[VARYING_SLOT_MAX];
54    nir_register **output_regs;
55    nir_register **temp_regs;
56 
57    nir_register *addr_reg;
58 };
59 
60 #define SWIZ(X, Y, Z, W) \
61    (unsigned[4]){ SWIZZLE_##X, SWIZZLE_##Y, SWIZZLE_##Z, SWIZZLE_##W }
62 #define ptn_channel(b, src, ch) nir_swizzle(b, src, SWIZ(ch, ch, ch, ch), 1, true)
63 
64 static nir_ssa_def *
ptn_src_for_dest(struct ptn_compile * c,nir_alu_dest * dest)65 ptn_src_for_dest(struct ptn_compile *c, nir_alu_dest *dest)
66 {
67    nir_builder *b = &c->build;
68 
69    nir_alu_src src;
70    memset(&src, 0, sizeof(src));
71 
72    if (dest->dest.is_ssa)
73       src.src = nir_src_for_ssa(&dest->dest.ssa);
74    else {
75       assert(!dest->dest.reg.indirect);
76       src.src = nir_src_for_reg(dest->dest.reg.reg);
77       src.src.reg.base_offset = dest->dest.reg.base_offset;
78    }
79 
80    for (int i = 0; i < 4; i++)
81       src.swizzle[i] = i;
82 
83    return nir_fmov_alu(b, src, 4);
84 }
85 
86 static nir_alu_dest
ptn_get_dest(struct ptn_compile * c,const struct prog_dst_register * prog_dst)87 ptn_get_dest(struct ptn_compile *c, const struct prog_dst_register *prog_dst)
88 {
89    nir_alu_dest dest;
90 
91    memset(&dest, 0, sizeof(dest));
92 
93    switch (prog_dst->File) {
94    case PROGRAM_TEMPORARY:
95       dest.dest.reg.reg = c->temp_regs[prog_dst->Index];
96       break;
97    case PROGRAM_OUTPUT:
98       dest.dest.reg.reg = c->output_regs[prog_dst->Index];
99       break;
100    case PROGRAM_ADDRESS:
101       assert(prog_dst->Index == 0);
102       dest.dest.reg.reg = c->addr_reg;
103       break;
104    case PROGRAM_UNDEFINED:
105       break;
106    }
107 
108    dest.write_mask = prog_dst->WriteMask;
109    dest.saturate = false;
110 
111    assert(!prog_dst->RelAddr);
112 
113    return dest;
114 }
115 
116 static nir_ssa_def *
ptn_get_src(struct ptn_compile * c,const struct prog_src_register * prog_src)117 ptn_get_src(struct ptn_compile *c, const struct prog_src_register *prog_src)
118 {
119    nir_builder *b = &c->build;
120    nir_alu_src src;
121 
122    memset(&src, 0, sizeof(src));
123 
124    switch (prog_src->File) {
125    case PROGRAM_UNDEFINED:
126       return nir_imm_float(b, 0.0);
127    case PROGRAM_TEMPORARY:
128       assert(!prog_src->RelAddr && prog_src->Index >= 0);
129       src.src.reg.reg = c->temp_regs[prog_src->Index];
130       break;
131    case PROGRAM_INPUT: {
132       /* ARB_vertex_program doesn't allow relative addressing on vertex
133        * attributes; ARB_fragment_program has no relative addressing at all.
134        */
135       assert(!prog_src->RelAddr);
136 
137       assert(prog_src->Index >= 0 && prog_src->Index < VARYING_SLOT_MAX);
138 
139       nir_intrinsic_instr *load =
140          nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_var);
141       load->num_components = 4;
142       load->variables[0] = nir_deref_var_create(load, c->input_vars[prog_src->Index]);
143 
144       nir_ssa_dest_init(&load->instr, &load->dest, 4, 32, NULL);
145       nir_builder_instr_insert(b, &load->instr);
146 
147       src.src = nir_src_for_ssa(&load->dest.ssa);
148       break;
149    }
150    case PROGRAM_STATE_VAR:
151    case PROGRAM_CONSTANT: {
152       /* We actually want to look at the type in the Parameters list for this,
153        * because it lets us upload constant builtin uniforms as actual
154        * constants.
155        */
156       struct gl_program_parameter_list *plist = c->prog->Parameters;
157       gl_register_file file = prog_src->RelAddr ? prog_src->File :
158          plist->Parameters[prog_src->Index].Type;
159 
160       switch (file) {
161       case PROGRAM_CONSTANT:
162          if ((c->prog->arb.IndirectRegisterFiles &
163               (1 << PROGRAM_CONSTANT)) == 0) {
164             float *v = (float *) plist->ParameterValues[prog_src->Index];
165             src.src = nir_src_for_ssa(nir_imm_vec4(b, v[0], v[1], v[2], v[3]));
166             break;
167          }
168          /* FALLTHROUGH */
169       case PROGRAM_STATE_VAR: {
170          assert(c->parameters != NULL);
171 
172          nir_intrinsic_instr *load =
173             nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_var);
174          nir_ssa_dest_init(&load->instr, &load->dest, 4, 32, NULL);
175          load->num_components = 4;
176 
177          load->variables[0] = nir_deref_var_create(load, c->parameters);
178          nir_deref_array *deref_arr =
179             nir_deref_array_create(load->variables[0]);
180          deref_arr->deref.type = glsl_vec4_type();
181          load->variables[0]->deref.child = &deref_arr->deref;
182 
183          if (prog_src->RelAddr) {
184             deref_arr->deref_array_type = nir_deref_array_type_indirect;
185 
186             nir_alu_src addr_src = { NIR_SRC_INIT };
187             addr_src.src = nir_src_for_reg(c->addr_reg);
188             nir_ssa_def *reladdr = nir_imov_alu(b, addr_src, 1);
189 
190             if (prog_src->Index < 0) {
191                /* This is a negative offset which should be added to the address
192                 * register's value.
193                 */
194                reladdr = nir_iadd(b, reladdr, nir_imm_int(b, prog_src->Index));
195 
196                deref_arr->base_offset = 0;
197             } else {
198                deref_arr->base_offset = prog_src->Index;
199             }
200             deref_arr->indirect = nir_src_for_ssa(reladdr);
201          } else {
202             deref_arr->deref_array_type = nir_deref_array_type_direct;
203             deref_arr->base_offset = prog_src->Index;
204          }
205 
206          nir_builder_instr_insert(b, &load->instr);
207 
208          src.src = nir_src_for_ssa(&load->dest.ssa);
209          break;
210       }
211       default:
212          fprintf(stderr, "bad uniform src register file: %s (%d)\n",
213                  _mesa_register_file_name(file), file);
214          abort();
215       }
216       break;
217    }
218    default:
219       fprintf(stderr, "unknown src register file: %s (%d)\n",
220               _mesa_register_file_name(prog_src->File), prog_src->File);
221       abort();
222    }
223 
224    nir_ssa_def *def;
225    if (!HAS_EXTENDED_SWIZZLE(prog_src->Swizzle) &&
226        (prog_src->Negate == NEGATE_NONE || prog_src->Negate == NEGATE_XYZW)) {
227       /* The simple non-SWZ case. */
228       for (int i = 0; i < 4; i++)
229          src.swizzle[i] = GET_SWZ(prog_src->Swizzle, i);
230 
231       def = nir_fmov_alu(b, src, 4);
232 
233       if (prog_src->Negate)
234          def = nir_fneg(b, def);
235    } else {
236       /* The SWZ instruction allows per-component zero/one swizzles, and also
237        * per-component negation.
238        */
239       nir_ssa_def *chans[4];
240       for (int i = 0; i < 4; i++) {
241          int swizzle = GET_SWZ(prog_src->Swizzle, i);
242          if (swizzle == SWIZZLE_ZERO) {
243             chans[i] = nir_imm_float(b, 0.0);
244          } else if (swizzle == SWIZZLE_ONE) {
245             chans[i] = nir_imm_float(b, 1.0);
246          } else {
247             assert(swizzle != SWIZZLE_NIL);
248             nir_alu_instr *mov = nir_alu_instr_create(b->shader, nir_op_fmov);
249             nir_ssa_dest_init(&mov->instr, &mov->dest.dest, 1, 32, NULL);
250             mov->dest.write_mask = 0x1;
251             mov->src[0] = src;
252             mov->src[0].swizzle[0] = swizzle;
253             nir_builder_instr_insert(b, &mov->instr);
254 
255             chans[i] = &mov->dest.dest.ssa;
256          }
257 
258          if (prog_src->Negate & (1 << i))
259             chans[i] = nir_fneg(b, chans[i]);
260       }
261       def = nir_vec4(b, chans[0], chans[1], chans[2], chans[3]);
262    }
263 
264    return def;
265 }
266 
267 static void
ptn_alu(nir_builder * b,nir_op op,nir_alu_dest dest,nir_ssa_def ** src)268 ptn_alu(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
269 {
270    unsigned num_srcs = nir_op_infos[op].num_inputs;
271    nir_alu_instr *instr = nir_alu_instr_create(b->shader, op);
272    unsigned i;
273 
274    for (i = 0; i < num_srcs; i++)
275       instr->src[i].src = nir_src_for_ssa(src[i]);
276 
277    instr->dest = dest;
278    nir_builder_instr_insert(b, &instr->instr);
279 }
280 
281 static void
ptn_move_dest_masked(nir_builder * b,nir_alu_dest dest,nir_ssa_def * def,unsigned write_mask)282 ptn_move_dest_masked(nir_builder *b, nir_alu_dest dest,
283                      nir_ssa_def *def, unsigned write_mask)
284 {
285    if (!(dest.write_mask & write_mask))
286       return;
287 
288    nir_alu_instr *mov = nir_alu_instr_create(b->shader, nir_op_fmov);
289    if (!mov)
290       return;
291 
292    mov->dest = dest;
293    mov->dest.write_mask &= write_mask;
294    mov->src[0].src = nir_src_for_ssa(def);
295    for (unsigned i = def->num_components; i < 4; i++)
296       mov->src[0].swizzle[i] = def->num_components - 1;
297    nir_builder_instr_insert(b, &mov->instr);
298 }
299 
300 static void
ptn_move_dest(nir_builder * b,nir_alu_dest dest,nir_ssa_def * def)301 ptn_move_dest(nir_builder *b, nir_alu_dest dest, nir_ssa_def *def)
302 {
303    ptn_move_dest_masked(b, dest, def, WRITEMASK_XYZW);
304 }
305 
306 static void
ptn_arl(nir_builder * b,nir_alu_dest dest,nir_ssa_def ** src)307 ptn_arl(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
308 {
309    ptn_move_dest(b, dest, nir_f2i32(b, nir_ffloor(b, src[0])));
310 }
311 
312 /* EXP - Approximate Exponential Base 2
313  *  dst.x = 2^{\lfloor src.x\rfloor}
314  *  dst.y = src.x - \lfloor src.x\rfloor
315  *  dst.z = 2^{src.x}
316  *  dst.w = 1.0
317  */
318 static void
ptn_exp(nir_builder * b,nir_alu_dest dest,nir_ssa_def ** src)319 ptn_exp(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
320 {
321    nir_ssa_def *srcx = ptn_channel(b, src[0], X);
322 
323    ptn_move_dest_masked(b, dest, nir_fexp2(b, nir_ffloor(b, srcx)), WRITEMASK_X);
324    ptn_move_dest_masked(b, dest, nir_fsub(b, srcx, nir_ffloor(b, srcx)), WRITEMASK_Y);
325    ptn_move_dest_masked(b, dest, nir_fexp2(b, srcx), WRITEMASK_Z);
326    ptn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), WRITEMASK_W);
327 }
328 
329 /* LOG - Approximate Logarithm Base 2
330  *  dst.x = \lfloor\log_2{|src.x|}\rfloor
331  *  dst.y = |src.x| * 2^{-\lfloor\log_2{|src.x|}\rfloor}}
332  *  dst.z = \log_2{|src.x|}
333  *  dst.w = 1.0
334  */
335 static void
ptn_log(nir_builder * b,nir_alu_dest dest,nir_ssa_def ** src)336 ptn_log(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
337 {
338    nir_ssa_def *abs_srcx = nir_fabs(b, ptn_channel(b, src[0], X));
339    nir_ssa_def *log2 = nir_flog2(b, abs_srcx);
340    nir_ssa_def *floor_log2 = nir_ffloor(b, log2);
341 
342    ptn_move_dest_masked(b, dest, floor_log2, WRITEMASK_X);
343    ptn_move_dest_masked(b, dest,
344                         nir_fmul(b, abs_srcx,
345                                  nir_fexp2(b, nir_fneg(b, floor_log2))),
346                         WRITEMASK_Y);
347    ptn_move_dest_masked(b, dest, log2, WRITEMASK_Z);
348    ptn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), WRITEMASK_W);
349 }
350 
351 /* DST - Distance Vector
352  *   dst.x = 1.0
353  *   dst.y = src0.y \times src1.y
354  *   dst.z = src0.z
355  *   dst.w = src1.w
356  */
357 static void
ptn_dst(nir_builder * b,nir_alu_dest dest,nir_ssa_def ** src)358 ptn_dst(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
359 {
360    ptn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), WRITEMASK_X);
361    ptn_move_dest_masked(b, dest, nir_fmul(b, src[0], src[1]), WRITEMASK_Y);
362    ptn_move_dest_masked(b, dest, nir_fmov(b, src[0]), WRITEMASK_Z);
363    ptn_move_dest_masked(b, dest, nir_fmov(b, src[1]), WRITEMASK_W);
364 }
365 
366 /* LIT - Light Coefficients
367  *  dst.x = 1.0
368  *  dst.y = max(src.x, 0.0)
369  *  dst.z = (src.x > 0.0) ? max(src.y, 0.0)^{clamp(src.w, -128.0, 128.0))} : 0
370  *  dst.w = 1.0
371  */
372 static void
ptn_lit(nir_builder * b,nir_alu_dest dest,nir_ssa_def ** src)373 ptn_lit(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
374 {
375    ptn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), WRITEMASK_XW);
376 
377    ptn_move_dest_masked(b, dest, nir_fmax(b, ptn_channel(b, src[0], X),
378                                           nir_imm_float(b, 0.0)), WRITEMASK_Y);
379 
380    if (dest.write_mask & WRITEMASK_Z) {
381       nir_ssa_def *src0_y = ptn_channel(b, src[0], Y);
382       nir_ssa_def *wclamp = nir_fmax(b, nir_fmin(b, ptn_channel(b, src[0], W),
383                                                  nir_imm_float(b, 128.0)),
384                                      nir_imm_float(b, -128.0));
385       nir_ssa_def *pow = nir_fpow(b, nir_fmax(b, src0_y, nir_imm_float(b, 0.0)),
386                                   wclamp);
387 
388       nir_ssa_def *z;
389       if (b->shader->options->native_integers) {
390          z = nir_bcsel(b,
391                        nir_fge(b, nir_imm_float(b, 0.0), ptn_channel(b, src[0], X)),
392                        nir_imm_float(b, 0.0),
393                        pow);
394       } else {
395          z = nir_fcsel(b,
396                        nir_sge(b, nir_imm_float(b, 0.0), ptn_channel(b, src[0], X)),
397                        nir_imm_float(b, 0.0),
398                        pow);
399       }
400 
401       ptn_move_dest_masked(b, dest, z, WRITEMASK_Z);
402    }
403 }
404 
405 /* SCS - Sine Cosine
406  *   dst.x = \cos{src.x}
407  *   dst.y = \sin{src.x}
408  *   dst.z = 0.0
409  *   dst.w = 1.0
410  */
411 static void
ptn_scs(nir_builder * b,nir_alu_dest dest,nir_ssa_def ** src)412 ptn_scs(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
413 {
414    ptn_move_dest_masked(b, dest, nir_fcos(b, ptn_channel(b, src[0], X)),
415                         WRITEMASK_X);
416    ptn_move_dest_masked(b, dest, nir_fsin(b, ptn_channel(b, src[0], X)),
417                         WRITEMASK_Y);
418    ptn_move_dest_masked(b, dest, nir_imm_float(b, 0.0), WRITEMASK_Z);
419    ptn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), WRITEMASK_W);
420 }
421 
422 /**
423  * Emit SLT.  For platforms with integers, prefer b2f(flt(...)).
424  */
425 static void
ptn_slt(nir_builder * b,nir_alu_dest dest,nir_ssa_def ** src)426 ptn_slt(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
427 {
428    if (b->shader->options->native_integers) {
429       ptn_move_dest(b, dest, nir_b2f(b, nir_flt(b, src[0], src[1])));
430    } else {
431       ptn_move_dest(b, dest, nir_slt(b, src[0], src[1]));
432    }
433 }
434 
435 /**
436  * Emit SGE.  For platforms with integers, prefer b2f(fge(...)).
437  */
438 static void
ptn_sge(nir_builder * b,nir_alu_dest dest,nir_ssa_def ** src)439 ptn_sge(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
440 {
441    if (b->shader->options->native_integers) {
442       ptn_move_dest(b, dest, nir_b2f(b, nir_fge(b, src[0], src[1])));
443    } else {
444       ptn_move_dest(b, dest, nir_sge(b, src[0], src[1]));
445    }
446 }
447 
448 static void
ptn_xpd(nir_builder * b,nir_alu_dest dest,nir_ssa_def ** src)449 ptn_xpd(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
450 {
451    ptn_move_dest_masked(b, dest,
452                         nir_fsub(b,
453                                  nir_fmul(b,
454                                           nir_swizzle(b, src[0], SWIZ(Y, Z, X, W), 3, true),
455                                           nir_swizzle(b, src[1], SWIZ(Z, X, Y, W), 3, true)),
456                                  nir_fmul(b,
457                                           nir_swizzle(b, src[1], SWIZ(Y, Z, X, W), 3, true),
458                                           nir_swizzle(b, src[0], SWIZ(Z, X, Y, W), 3, true))),
459                         WRITEMASK_XYZ);
460    ptn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), WRITEMASK_W);
461 }
462 
463 static void
ptn_dp2(nir_builder * b,nir_alu_dest dest,nir_ssa_def ** src)464 ptn_dp2(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
465 {
466    ptn_move_dest(b, dest, nir_fdot2(b, src[0], src[1]));
467 }
468 
469 static void
ptn_dp3(nir_builder * b,nir_alu_dest dest,nir_ssa_def ** src)470 ptn_dp3(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
471 {
472    ptn_move_dest(b, dest, nir_fdot3(b, src[0], src[1]));
473 }
474 
475 static void
ptn_dp4(nir_builder * b,nir_alu_dest dest,nir_ssa_def ** src)476 ptn_dp4(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
477 {
478    ptn_move_dest(b, dest, nir_fdot4(b, src[0], src[1]));
479 }
480 
481 static void
ptn_dph(nir_builder * b,nir_alu_dest dest,nir_ssa_def ** src)482 ptn_dph(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
483 {
484    ptn_move_dest(b, dest, nir_fdph(b, src[0], src[1]));
485 }
486 
487 static void
ptn_cmp(nir_builder * b,nir_alu_dest dest,nir_ssa_def ** src)488 ptn_cmp(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
489 {
490    if (b->shader->options->native_integers) {
491       ptn_move_dest(b, dest, nir_bcsel(b,
492                                        nir_flt(b, src[0], nir_imm_float(b, 0.0)),
493                                        src[1], src[2]));
494    } else {
495       ptn_move_dest(b, dest, nir_fcsel(b,
496                                        nir_slt(b, src[0], nir_imm_float(b, 0.0)),
497                                        src[1], src[2]));
498    }
499 }
500 
501 static void
ptn_lrp(nir_builder * b,nir_alu_dest dest,nir_ssa_def ** src)502 ptn_lrp(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
503 {
504    ptn_move_dest(b, dest, nir_flrp(b, src[2], src[1], src[0]));
505 }
506 
507 static void
ptn_kil(nir_builder * b,nir_ssa_def ** src)508 ptn_kil(nir_builder *b, nir_ssa_def **src)
509 {
510    nir_ssa_def *cmp = b->shader->options->native_integers ?
511       nir_bany_inequal4(b, nir_flt(b, src[0], nir_imm_float(b, 0.0)), nir_imm_int(b, 0)) :
512       nir_fany_nequal4(b, nir_slt(b, src[0], nir_imm_float(b, 0.0)), nir_imm_float(b, 0.0));
513 
514    nir_intrinsic_instr *discard =
515       nir_intrinsic_instr_create(b->shader, nir_intrinsic_discard_if);
516    discard->src[0] = nir_src_for_ssa(cmp);
517    nir_builder_instr_insert(b, &discard->instr);
518 }
519 
520 static void
ptn_tex(nir_builder * b,nir_alu_dest dest,nir_ssa_def ** src,struct prog_instruction * prog_inst)521 ptn_tex(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src,
522         struct prog_instruction *prog_inst)
523 {
524    nir_tex_instr *instr;
525    nir_texop op;
526    unsigned num_srcs;
527 
528    switch (prog_inst->Opcode) {
529    case OPCODE_TEX:
530       op = nir_texop_tex;
531       num_srcs = 1;
532       break;
533    case OPCODE_TXB:
534       op = nir_texop_txb;
535       num_srcs = 2;
536       break;
537    case OPCODE_TXD:
538       op = nir_texop_txd;
539       num_srcs = 3;
540       break;
541    case OPCODE_TXL:
542       op = nir_texop_txl;
543       num_srcs = 2;
544       break;
545    case OPCODE_TXP:
546       op = nir_texop_tex;
547       num_srcs = 2;
548       break;
549    default:
550       fprintf(stderr, "unknown tex op %d\n", prog_inst->Opcode);
551       abort();
552    }
553 
554    if (prog_inst->TexShadow)
555       num_srcs++;
556 
557    instr = nir_tex_instr_create(b->shader, num_srcs);
558    instr->op = op;
559    instr->dest_type = nir_type_float;
560    instr->is_shadow = prog_inst->TexShadow;
561    instr->texture_index = prog_inst->TexSrcUnit;
562    instr->sampler_index = prog_inst->TexSrcUnit;
563 
564    switch (prog_inst->TexSrcTarget) {
565    case TEXTURE_1D_INDEX:
566       instr->sampler_dim = GLSL_SAMPLER_DIM_1D;
567       break;
568    case TEXTURE_2D_INDEX:
569       instr->sampler_dim = GLSL_SAMPLER_DIM_2D;
570       break;
571    case TEXTURE_3D_INDEX:
572       instr->sampler_dim = GLSL_SAMPLER_DIM_3D;
573       break;
574    case TEXTURE_CUBE_INDEX:
575       instr->sampler_dim = GLSL_SAMPLER_DIM_CUBE;
576       break;
577    case TEXTURE_RECT_INDEX:
578       instr->sampler_dim = GLSL_SAMPLER_DIM_RECT;
579       break;
580    default:
581       fprintf(stderr, "Unknown texture target %d\n", prog_inst->TexSrcTarget);
582       abort();
583    }
584 
585    switch (instr->sampler_dim) {
586    case GLSL_SAMPLER_DIM_1D:
587    case GLSL_SAMPLER_DIM_BUF:
588       instr->coord_components = 1;
589       break;
590    case GLSL_SAMPLER_DIM_2D:
591    case GLSL_SAMPLER_DIM_RECT:
592    case GLSL_SAMPLER_DIM_EXTERNAL:
593    case GLSL_SAMPLER_DIM_MS:
594       instr->coord_components = 2;
595       break;
596    case GLSL_SAMPLER_DIM_3D:
597    case GLSL_SAMPLER_DIM_CUBE:
598       instr->coord_components = 3;
599       break;
600    case GLSL_SAMPLER_DIM_SUBPASS:
601    case GLSL_SAMPLER_DIM_SUBPASS_MS:
602       unreachable("can't reach");
603    }
604 
605    unsigned src_number = 0;
606 
607    instr->src[src_number].src =
608       nir_src_for_ssa(nir_swizzle(b, src[0], SWIZ(X, Y, Z, W),
609                                   instr->coord_components, true));
610    instr->src[src_number].src_type = nir_tex_src_coord;
611    src_number++;
612 
613    if (prog_inst->Opcode == OPCODE_TXP) {
614       instr->src[src_number].src = nir_src_for_ssa(ptn_channel(b, src[0], W));
615       instr->src[src_number].src_type = nir_tex_src_projector;
616       src_number++;
617    }
618 
619    if (prog_inst->Opcode == OPCODE_TXB) {
620       instr->src[src_number].src = nir_src_for_ssa(ptn_channel(b, src[0], W));
621       instr->src[src_number].src_type = nir_tex_src_bias;
622       src_number++;
623    }
624 
625    if (prog_inst->Opcode == OPCODE_TXL) {
626       instr->src[src_number].src = nir_src_for_ssa(ptn_channel(b, src[0], W));
627       instr->src[src_number].src_type = nir_tex_src_lod;
628       src_number++;
629    }
630 
631    if (instr->is_shadow) {
632       if (instr->coord_components < 3)
633          instr->src[src_number].src = nir_src_for_ssa(ptn_channel(b, src[0], Z));
634       else
635          instr->src[src_number].src = nir_src_for_ssa(ptn_channel(b, src[0], W));
636 
637       instr->src[src_number].src_type = nir_tex_src_comparator;
638       src_number++;
639    }
640 
641    assert(src_number == num_srcs);
642 
643    nir_ssa_dest_init(&instr->instr, &instr->dest, 4, 32, NULL);
644    nir_builder_instr_insert(b, &instr->instr);
645 
646    /* Resolve the writemask on the texture op. */
647    ptn_move_dest(b, dest, &instr->dest.ssa);
648 }
649 
650 static const nir_op op_trans[MAX_OPCODE] = {
651    [OPCODE_NOP] = 0,
652    [OPCODE_ABS] = nir_op_fabs,
653    [OPCODE_ADD] = nir_op_fadd,
654    [OPCODE_ARL] = 0,
655    [OPCODE_CMP] = 0,
656    [OPCODE_COS] = 0,
657    [OPCODE_DDX] = nir_op_fddx,
658    [OPCODE_DDY] = nir_op_fddy,
659    [OPCODE_DP2] = 0,
660    [OPCODE_DP3] = 0,
661    [OPCODE_DP4] = 0,
662    [OPCODE_DPH] = 0,
663    [OPCODE_DST] = 0,
664    [OPCODE_END] = 0,
665    [OPCODE_EX2] = 0,
666    [OPCODE_EXP] = 0,
667    [OPCODE_FLR] = nir_op_ffloor,
668    [OPCODE_FRC] = nir_op_ffract,
669    [OPCODE_LG2] = 0,
670    [OPCODE_LIT] = 0,
671    [OPCODE_LOG] = 0,
672    [OPCODE_LRP] = 0,
673    [OPCODE_MAD] = 0,
674    [OPCODE_MAX] = nir_op_fmax,
675    [OPCODE_MIN] = nir_op_fmin,
676    [OPCODE_MOV] = nir_op_fmov,
677    [OPCODE_MUL] = nir_op_fmul,
678    [OPCODE_POW] = 0,
679    [OPCODE_RCP] = 0,
680 
681    [OPCODE_RSQ] = 0,
682    [OPCODE_SCS] = 0,
683    [OPCODE_SGE] = 0,
684    [OPCODE_SIN] = 0,
685    [OPCODE_SLT] = 0,
686    [OPCODE_SSG] = nir_op_fsign,
687    [OPCODE_SUB] = nir_op_fsub,
688    [OPCODE_SWZ] = 0,
689    [OPCODE_TEX] = 0,
690    [OPCODE_TRUNC] = nir_op_ftrunc,
691    [OPCODE_TXB] = 0,
692    [OPCODE_TXD] = 0,
693    [OPCODE_TXL] = 0,
694    [OPCODE_TXP] = 0,
695    [OPCODE_XPD] = 0,
696 };
697 
698 static void
ptn_emit_instruction(struct ptn_compile * c,struct prog_instruction * prog_inst)699 ptn_emit_instruction(struct ptn_compile *c, struct prog_instruction *prog_inst)
700 {
701    nir_builder *b = &c->build;
702    unsigned i;
703    const unsigned op = prog_inst->Opcode;
704 
705    if (op == OPCODE_END)
706       return;
707 
708    nir_ssa_def *src[3];
709    for (i = 0; i < 3; i++) {
710       src[i] = ptn_get_src(c, &prog_inst->SrcReg[i]);
711    }
712    nir_alu_dest dest = ptn_get_dest(c, &prog_inst->DstReg);
713    if (c->error)
714       return;
715 
716    switch (op) {
717    case OPCODE_RSQ:
718       ptn_move_dest(b, dest,
719                     nir_frsq(b, nir_fabs(b, ptn_channel(b, src[0], X))));
720       break;
721 
722    case OPCODE_RCP:
723       ptn_move_dest(b, dest, nir_frcp(b, ptn_channel(b, src[0], X)));
724       break;
725 
726    case OPCODE_EX2:
727       ptn_move_dest(b, dest, nir_fexp2(b, ptn_channel(b, src[0], X)));
728       break;
729 
730    case OPCODE_LG2:
731       ptn_move_dest(b, dest, nir_flog2(b, ptn_channel(b, src[0], X)));
732       break;
733 
734    case OPCODE_POW:
735       ptn_move_dest(b, dest, nir_fpow(b,
736                                       ptn_channel(b, src[0], X),
737                                       ptn_channel(b, src[1], X)));
738       break;
739 
740    case OPCODE_COS:
741       ptn_move_dest(b, dest, nir_fcos(b, ptn_channel(b, src[0], X)));
742       break;
743 
744    case OPCODE_SIN:
745       ptn_move_dest(b, dest, nir_fsin(b, ptn_channel(b, src[0], X)));
746       break;
747 
748    case OPCODE_ARL:
749       ptn_arl(b, dest, src);
750       break;
751 
752    case OPCODE_EXP:
753       ptn_exp(b, dest, src);
754       break;
755 
756    case OPCODE_LOG:
757       ptn_log(b, dest, src);
758       break;
759 
760    case OPCODE_LRP:
761       ptn_lrp(b, dest, src);
762       break;
763 
764    case OPCODE_MAD:
765       ptn_move_dest(b, dest, nir_fadd(b, nir_fmul(b, src[0], src[1]), src[2]));
766       break;
767 
768    case OPCODE_DST:
769       ptn_dst(b, dest, src);
770       break;
771 
772    case OPCODE_LIT:
773       ptn_lit(b, dest, src);
774       break;
775 
776    case OPCODE_XPD:
777       ptn_xpd(b, dest, src);
778       break;
779 
780    case OPCODE_DP2:
781       ptn_dp2(b, dest, src);
782       break;
783 
784    case OPCODE_DP3:
785       ptn_dp3(b, dest, src);
786       break;
787 
788    case OPCODE_DP4:
789       ptn_dp4(b, dest, src);
790       break;
791 
792    case OPCODE_DPH:
793       ptn_dph(b, dest, src);
794       break;
795 
796    case OPCODE_KIL:
797       ptn_kil(b, src);
798       break;
799 
800    case OPCODE_CMP:
801       ptn_cmp(b, dest, src);
802       break;
803 
804    case OPCODE_SCS:
805       ptn_scs(b, dest, src);
806       break;
807 
808    case OPCODE_SLT:
809       ptn_slt(b, dest, src);
810       break;
811 
812    case OPCODE_SGE:
813       ptn_sge(b, dest, src);
814       break;
815 
816    case OPCODE_TEX:
817    case OPCODE_TXB:
818    case OPCODE_TXD:
819    case OPCODE_TXL:
820    case OPCODE_TXP:
821       ptn_tex(b, dest, src, prog_inst);
822       break;
823 
824    case OPCODE_SWZ:
825       /* Extended swizzles were already handled in ptn_get_src(). */
826       ptn_alu(b, nir_op_fmov, dest, src);
827       break;
828 
829    case OPCODE_NOP:
830       break;
831 
832    default:
833       if (op_trans[op] != 0) {
834          ptn_alu(b, op_trans[op], dest, src);
835       } else {
836          fprintf(stderr, "unknown opcode: %s\n", _mesa_opcode_string(op));
837          abort();
838       }
839       break;
840    }
841 
842    if (prog_inst->Saturate) {
843       assert(prog_inst->Saturate);
844       assert(!dest.dest.is_ssa);
845       ptn_move_dest(b, dest, nir_fsat(b, ptn_src_for_dest(c, &dest)));
846    }
847 }
848 
849 /**
850  * Puts a NIR intrinsic to store of each PROGRAM_OUTPUT value to the output
851  * variables at the end of the shader.
852  *
853  * We don't generate these incrementally as the PROGRAM_OUTPUT values are
854  * written, because there's no output load intrinsic, which means we couldn't
855  * handle writemasks.
856  */
857 static void
ptn_add_output_stores(struct ptn_compile * c)858 ptn_add_output_stores(struct ptn_compile *c)
859 {
860    nir_builder *b = &c->build;
861 
862    nir_foreach_variable(var, &b->shader->outputs) {
863       nir_intrinsic_instr *store =
864          nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_var);
865       store->num_components = glsl_get_vector_elements(var->type);
866       nir_intrinsic_set_write_mask(store, (1 << store->num_components) - 1);
867       store->variables[0] =
868          nir_deref_var_create(store, c->output_vars[var->data.location]);
869 
870       if (c->prog->Target == GL_FRAGMENT_PROGRAM_ARB &&
871           var->data.location == FRAG_RESULT_DEPTH) {
872          /* result.depth has this strange convention of being the .z component of
873           * a vec4 with undefined .xyw components.  We resolve it to a scalar, to
874           * match GLSL's gl_FragDepth and the expectations of most backends.
875           */
876          nir_alu_src alu_src = { NIR_SRC_INIT };
877          alu_src.src = nir_src_for_reg(c->output_regs[FRAG_RESULT_DEPTH]);
878          alu_src.swizzle[0] = SWIZZLE_Z;
879          store->src[0] = nir_src_for_ssa(nir_fmov_alu(b, alu_src, 1));
880       } else {
881          store->src[0].reg.reg = c->output_regs[var->data.location];
882       }
883       nir_builder_instr_insert(b, &store->instr);
884    }
885 }
886 
887 static void
setup_registers_and_variables(struct ptn_compile * c)888 setup_registers_and_variables(struct ptn_compile *c)
889 {
890    nir_builder *b = &c->build;
891    struct nir_shader *shader = b->shader;
892 
893    /* Create input variables. */
894    const int num_inputs = util_last_bit64(c->prog->info.inputs_read);
895    for (int i = 0; i < num_inputs; i++) {
896       if (!(c->prog->info.inputs_read & BITFIELD64_BIT(i)))
897          continue;
898 
899       nir_variable *var =
900          nir_variable_create(shader, nir_var_shader_in, glsl_vec4_type(),
901                              ralloc_asprintf(shader, "in_%d", i));
902       var->data.location = i;
903       var->data.index = 0;
904 
905       if (c->prog->Target == GL_FRAGMENT_PROGRAM_ARB) {
906          if (i == VARYING_SLOT_POS) {
907             var->data.origin_upper_left = c->prog->OriginUpperLeft;
908             var->data.pixel_center_integer = c->prog->PixelCenterInteger;
909          } else if (i == VARYING_SLOT_FOGC) {
910             /* fogcoord is defined as <f, 0.0, 0.0, 1.0>.  Make the actual
911              * input variable a float, and create a local containing the
912              * full vec4 value.
913              */
914             var->type = glsl_float_type();
915 
916             nir_intrinsic_instr *load_x =
917                nir_intrinsic_instr_create(shader, nir_intrinsic_load_var);
918             load_x->num_components = 1;
919             load_x->variables[0] = nir_deref_var_create(load_x, var);
920             nir_ssa_dest_init(&load_x->instr, &load_x->dest, 1, 32, NULL);
921             nir_builder_instr_insert(b, &load_x->instr);
922 
923             nir_ssa_def *f001 = nir_vec4(b, &load_x->dest.ssa, nir_imm_float(b, 0.0),
924                                          nir_imm_float(b, 0.0), nir_imm_float(b, 1.0));
925 
926             nir_variable *fullvar =
927                nir_local_variable_create(b->impl, glsl_vec4_type(),
928                                          "fogcoord_tmp");
929             nir_intrinsic_instr *store =
930                nir_intrinsic_instr_create(shader, nir_intrinsic_store_var);
931             store->num_components = 4;
932             nir_intrinsic_set_write_mask(store, WRITEMASK_XYZW);
933             store->variables[0] = nir_deref_var_create(store, fullvar);
934             store->src[0] = nir_src_for_ssa(f001);
935             nir_builder_instr_insert(b, &store->instr);
936 
937             /* We inserted the real input into the list so the driver has real
938              * inputs, but we set c->input_vars[i] to the temporary so we use
939              * the splatted value.
940              */
941             c->input_vars[i] = fullvar;
942             continue;
943          }
944       }
945 
946       c->input_vars[i] = var;
947    }
948 
949    /* Create output registers and variables. */
950    int max_outputs = util_last_bit(c->prog->info.outputs_written);
951    c->output_regs = rzalloc_array(c, nir_register *, max_outputs);
952 
953    for (int i = 0; i < max_outputs; i++) {
954       if (!(c->prog->info.outputs_written & BITFIELD64_BIT(i)))
955          continue;
956 
957       /* Since we can't load from outputs in the IR, we make temporaries
958        * for the outputs and emit stores to the real outputs at the end of
959        * the shader.
960        */
961       nir_register *reg = nir_local_reg_create(b->impl);
962       reg->num_components = 4;
963 
964       nir_variable *var = rzalloc(shader, nir_variable);
965       if (c->prog->Target == GL_FRAGMENT_PROGRAM_ARB && i == FRAG_RESULT_DEPTH)
966          var->type = glsl_float_type();
967       else
968          var->type = glsl_vec4_type();
969       var->data.mode = nir_var_shader_out;
970       var->name = ralloc_asprintf(var, "out_%d", i);
971 
972       var->data.location = i;
973       var->data.index = 0;
974 
975       c->output_regs[i] = reg;
976 
977       exec_list_push_tail(&shader->outputs, &var->node);
978       c->output_vars[i] = var;
979    }
980 
981    /* Create temporary registers. */
982    c->temp_regs = rzalloc_array(c, nir_register *,
983                                 c->prog->arb.NumTemporaries);
984 
985    nir_register *reg;
986    for (unsigned i = 0; i < c->prog->arb.NumTemporaries; i++) {
987       reg = nir_local_reg_create(b->impl);
988       if (!reg) {
989          c->error = true;
990          return;
991       }
992       reg->num_components = 4;
993       c->temp_regs[i] = reg;
994    }
995 
996    /* Create the address register (for ARB_vertex_program). */
997    reg = nir_local_reg_create(b->impl);
998    if (!reg) {
999       c->error = true;
1000       return;
1001    }
1002    reg->num_components = 1;
1003    c->addr_reg = reg;
1004 }
1005 
1006 struct nir_shader *
prog_to_nir(const struct gl_program * prog,const nir_shader_compiler_options * options)1007 prog_to_nir(const struct gl_program *prog,
1008             const nir_shader_compiler_options *options)
1009 {
1010    struct ptn_compile *c;
1011    struct nir_shader *s;
1012    gl_shader_stage stage = _mesa_program_enum_to_shader_stage(prog->Target);
1013 
1014    c = rzalloc(NULL, struct ptn_compile);
1015    if (!c)
1016       return NULL;
1017    c->prog = prog;
1018 
1019    nir_builder_init_simple_shader(&c->build, NULL, stage, options);
1020 
1021    /* Copy the shader_info from the gl_program */
1022    c->build.shader->info = prog->info;
1023 
1024    s = c->build.shader;
1025 
1026    if (prog->Parameters->NumParameters > 0) {
1027       c->parameters = rzalloc(s, nir_variable);
1028       c->parameters->type =
1029          glsl_array_type(glsl_vec4_type(), prog->Parameters->NumParameters);
1030       c->parameters->name = "parameters";
1031       c->parameters->data.read_only = true;
1032       c->parameters->data.mode = nir_var_uniform;
1033       exec_list_push_tail(&s->uniforms, &c->parameters->node);
1034    }
1035 
1036    setup_registers_and_variables(c);
1037    if (unlikely(c->error))
1038       goto fail;
1039 
1040    for (unsigned int i = 0; i < prog->arb.NumInstructions; i++) {
1041       ptn_emit_instruction(c, &prog->arb.Instructions[i]);
1042 
1043       if (unlikely(c->error))
1044          break;
1045    }
1046 
1047    ptn_add_output_stores(c);
1048 
1049    s->info.name = ralloc_asprintf(s, "ARB%d", prog->Id);
1050    s->info.num_textures = util_last_bit(prog->SamplersUsed);
1051    s->info.num_ubos = 0;
1052    s->info.num_abos = 0;
1053    s->info.num_ssbos = 0;
1054    s->info.num_images = 0;
1055    s->info.uses_texture_gather = false;
1056    s->info.clip_distance_array_size = 0;
1057    s->info.cull_distance_array_size = 0;
1058    s->info.separate_shader = false;
1059 
1060 fail:
1061    if (c->error) {
1062       ralloc_free(s);
1063       s = NULL;
1064    }
1065    ralloc_free(c);
1066    return s;
1067 }
1068