1 /**************************************************************************
2  *
3  * Copyright 2007-2008 VMware, Inc.
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the
8  * "Software"), to deal in the Software without restriction, including
9  * without limitation the rights to use, copy, modify, merge, publish,
10  * distribute, sub license, and/or sell copies of the Software, and to
11  * permit persons to whom the Software is furnished to do so, subject to
12  * the following conditions:
13  *
14  * The above copyright notice and this permission notice (including the
15  * next paragraph) shall be included in all copies or substantial portions
16  * of the Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25  *
26  **************************************************************************/
27 
28 /*
29  * \author
30  * Michal Krol,
31  * Keith Whitwell
32  */
33 
34 #include "pipe/p_compiler.h"
35 #include "pipe/p_context.h"
36 #include "pipe/p_screen.h"
37 #include "pipe/p_shader_tokens.h"
38 #include "pipe/p_state.h"
39 #include "tgsi/tgsi_ureg.h"
40 #include "st_mesa_to_tgsi.h"
41 #include "st_context.h"
42 #include "program/prog_instruction.h"
43 #include "program/prog_parameter.h"
44 #include "util/u_debug.h"
45 #include "util/u_math.h"
46 #include "util/u_memory.h"
47 #include "st_glsl_to_tgsi.h" /* for _mesa_sysval_to_semantic */
48 
49 
50 #define PROGRAM_ANY_CONST ((1 << PROGRAM_STATE_VAR) |    \
51                            (1 << PROGRAM_CONSTANT) |     \
52                            (1 << PROGRAM_UNIFORM))
53 
54 /**
55  * Intermediate state used during shader translation.
56  */
57 struct st_translate {
58    struct ureg_program *ureg;
59 
60    struct ureg_dst temps[MAX_PROGRAM_TEMPS];
61    struct ureg_src *constants;
62    struct ureg_dst outputs[PIPE_MAX_SHADER_OUTPUTS];
63    struct ureg_src inputs[PIPE_MAX_SHADER_INPUTS];
64    struct ureg_dst address[1];
65    struct ureg_src samplers[PIPE_MAX_SAMPLERS];
66    struct ureg_src systemValues[SYSTEM_VALUE_MAX];
67 
68    const ubyte *inputMapping;
69    const ubyte *outputMapping;
70 
71    unsigned procType;  /**< PIPE_SHADER_VERTEX/FRAGMENT */
72 };
73 
74 
75 /**
76  * Map a Mesa dst register to a TGSI ureg_dst register.
77  */
78 static struct ureg_dst
dst_register(struct st_translate * t,gl_register_file file,GLuint index)79 dst_register(struct st_translate *t, gl_register_file file, GLuint index)
80 {
81    switch(file) {
82    case PROGRAM_UNDEFINED:
83       return ureg_dst_undef();
84 
85    case PROGRAM_TEMPORARY:
86       if (ureg_dst_is_undef(t->temps[index]))
87          t->temps[index] = ureg_DECL_temporary(t->ureg);
88 
89       return t->temps[index];
90 
91    case PROGRAM_OUTPUT:
92       if (t->procType == PIPE_SHADER_VERTEX)
93          assert(index < VARYING_SLOT_MAX);
94       else if (t->procType == PIPE_SHADER_FRAGMENT)
95          assert(index < FRAG_RESULT_MAX);
96       else
97          assert(index < VARYING_SLOT_MAX);
98 
99       assert(t->outputMapping[index] < ARRAY_SIZE(t->outputs));
100 
101       return t->outputs[t->outputMapping[index]];
102 
103    case PROGRAM_ADDRESS:
104       return t->address[index];
105 
106    default:
107       debug_assert(0);
108       return ureg_dst_undef();
109    }
110 }
111 
112 
113 /**
114  * Map a Mesa src register to a TGSI ureg_src register.
115  */
116 static struct ureg_src
src_register(struct st_translate * t,gl_register_file file,GLint index)117 src_register(struct st_translate *t,
118               gl_register_file file,
119               GLint index)
120 {
121    switch(file) {
122    case PROGRAM_UNDEFINED:
123       return ureg_src_undef();
124 
125    case PROGRAM_TEMPORARY:
126       assert(index >= 0);
127       assert(index < ARRAY_SIZE(t->temps));
128       if (ureg_dst_is_undef(t->temps[index]))
129          t->temps[index] = ureg_DECL_temporary(t->ureg);
130       return ureg_src(t->temps[index]);
131 
132    case PROGRAM_UNIFORM:
133       assert(index >= 0);
134       return t->constants[index];
135    case PROGRAM_STATE_VAR:
136    case PROGRAM_CONSTANT:       /* ie, immediate */
137       if (index < 0)
138          return ureg_DECL_constant(t->ureg, 0);
139       else
140          return t->constants[index];
141 
142    case PROGRAM_INPUT:
143       assert(t->inputMapping[index] < ARRAY_SIZE(t->inputs));
144       return t->inputs[t->inputMapping[index]];
145 
146    case PROGRAM_OUTPUT:
147       assert(t->outputMapping[index] < ARRAY_SIZE(t->outputs));
148       return ureg_src(t->outputs[t->outputMapping[index]]); /* not needed? */
149 
150    case PROGRAM_ADDRESS:
151       return ureg_src(t->address[index]);
152 
153    case PROGRAM_SYSTEM_VALUE:
154       assert(index < ARRAY_SIZE(t->systemValues));
155       return t->systemValues[index];
156 
157    default:
158       debug_assert(0);
159       return ureg_src_undef();
160    }
161 }
162 
163 
164 /**
165  * Map mesa texture target to TGSI texture target.
166  */
167 enum tgsi_texture_type
st_translate_texture_target(gl_texture_index textarget,GLboolean shadow)168 st_translate_texture_target(gl_texture_index textarget, GLboolean shadow)
169 {
170    if (shadow) {
171       switch (textarget) {
172       case TEXTURE_1D_INDEX:
173          return TGSI_TEXTURE_SHADOW1D;
174       case TEXTURE_2D_INDEX:
175          return TGSI_TEXTURE_SHADOW2D;
176       case TEXTURE_RECT_INDEX:
177          return TGSI_TEXTURE_SHADOWRECT;
178       case TEXTURE_1D_ARRAY_INDEX:
179          return TGSI_TEXTURE_SHADOW1D_ARRAY;
180       case TEXTURE_2D_ARRAY_INDEX:
181          return TGSI_TEXTURE_SHADOW2D_ARRAY;
182       case TEXTURE_CUBE_INDEX:
183          return TGSI_TEXTURE_SHADOWCUBE;
184       case TEXTURE_CUBE_ARRAY_INDEX:
185          return TGSI_TEXTURE_SHADOWCUBE_ARRAY;
186       default:
187          break;
188       }
189    }
190 
191    switch (textarget) {
192    case TEXTURE_2D_MULTISAMPLE_INDEX:
193       return TGSI_TEXTURE_2D_MSAA;
194    case TEXTURE_2D_MULTISAMPLE_ARRAY_INDEX:
195       return TGSI_TEXTURE_2D_ARRAY_MSAA;
196    case TEXTURE_BUFFER_INDEX:
197       return TGSI_TEXTURE_BUFFER;
198    case TEXTURE_1D_INDEX:
199       return TGSI_TEXTURE_1D;
200    case TEXTURE_2D_INDEX:
201       return TGSI_TEXTURE_2D;
202    case TEXTURE_3D_INDEX:
203       return TGSI_TEXTURE_3D;
204    case TEXTURE_CUBE_INDEX:
205       return TGSI_TEXTURE_CUBE;
206    case TEXTURE_CUBE_ARRAY_INDEX:
207       return TGSI_TEXTURE_CUBE_ARRAY;
208    case TEXTURE_RECT_INDEX:
209       return TGSI_TEXTURE_RECT;
210    case TEXTURE_1D_ARRAY_INDEX:
211       return TGSI_TEXTURE_1D_ARRAY;
212    case TEXTURE_2D_ARRAY_INDEX:
213       return TGSI_TEXTURE_2D_ARRAY;
214    case TEXTURE_EXTERNAL_INDEX:
215       return TGSI_TEXTURE_2D;
216    default:
217       debug_assert(!"unexpected texture target index");
218       return TGSI_TEXTURE_1D;
219    }
220 }
221 
222 
223 /**
224  * Map GLSL base type to TGSI return type.
225  */
226 enum tgsi_return_type
st_translate_texture_type(enum glsl_base_type type)227 st_translate_texture_type(enum glsl_base_type type)
228 {
229    switch (type) {
230    case GLSL_TYPE_INT:
231       return TGSI_RETURN_TYPE_SINT;
232    case GLSL_TYPE_UINT:
233       return TGSI_RETURN_TYPE_UINT;
234    case GLSL_TYPE_FLOAT:
235       return TGSI_RETURN_TYPE_FLOAT;
236    default:
237       assert(!"unexpected texture type");
238       return TGSI_RETURN_TYPE_UNKNOWN;
239    }
240 }
241 
242 
243 /**
244  * Translate a (1 << TEXTURE_x_INDEX) bit into a TGSI_TEXTURE_x enum.
245  */
246 static unsigned
translate_texture_index(GLbitfield texBit,bool shadow)247 translate_texture_index(GLbitfield texBit, bool shadow)
248 {
249    int index = ffs(texBit);
250    assert(index > 0);
251    assert(index - 1 < NUM_TEXTURE_TARGETS);
252    return st_translate_texture_target(index - 1, shadow);
253 }
254 
255 
256 /**
257  * Create a TGSI ureg_dst register from a Mesa dest register.
258  */
259 static struct ureg_dst
translate_dst(struct st_translate * t,const struct prog_dst_register * DstReg,boolean saturate)260 translate_dst(struct st_translate *t,
261               const struct prog_dst_register *DstReg,
262               boolean saturate)
263 {
264    struct ureg_dst dst = dst_register(t, DstReg->File, DstReg->Index);
265 
266    dst = ureg_writemask(dst, DstReg->WriteMask);
267 
268    if (saturate)
269       dst = ureg_saturate(dst);
270 
271    if (DstReg->RelAddr)
272       dst = ureg_dst_indirect(dst, ureg_src(t->address[0]));
273 
274    return dst;
275 }
276 
277 
278 /**
279  * Create a TGSI ureg_src register from a Mesa src register.
280  */
281 static struct ureg_src
translate_src(struct st_translate * t,const struct prog_src_register * SrcReg)282 translate_src(struct st_translate *t,
283               const struct prog_src_register *SrcReg)
284 {
285    struct ureg_src src = src_register(t, SrcReg->File, SrcReg->Index);
286 
287    src = ureg_swizzle(src,
288                       GET_SWZ(SrcReg->Swizzle, 0) & 0x3,
289                       GET_SWZ(SrcReg->Swizzle, 1) & 0x3,
290                       GET_SWZ(SrcReg->Swizzle, 2) & 0x3,
291                       GET_SWZ(SrcReg->Swizzle, 3) & 0x3);
292 
293    if (SrcReg->Negate == NEGATE_XYZW)
294       src = ureg_negate(src);
295 
296    if (SrcReg->RelAddr) {
297       src = ureg_src_indirect(src, ureg_src(t->address[0]));
298       if (SrcReg->File != PROGRAM_INPUT &&
299           SrcReg->File != PROGRAM_OUTPUT) {
300          /* If SrcReg->Index was negative, it was set to zero in
301           * src_register().  Reassign it now.  But don't do this
302           * for input/output regs since they get remapped while
303           * const buffers don't.
304           */
305          src.Index = SrcReg->Index;
306       }
307    }
308 
309    return src;
310 }
311 
312 
313 static struct ureg_src
swizzle_4v(struct ureg_src src,const unsigned * swz)314 swizzle_4v(struct ureg_src src, const unsigned *swz)
315 {
316    return ureg_swizzle(src, swz[0], swz[1], swz[2], swz[3]);
317 }
318 
319 
320 /**
321  * Translate a SWZ instruction into a MOV, MUL or MAD instruction.  EG:
322  *
323  *   SWZ dst, src.x-y10
324  *
325  * becomes:
326  *
327  *   MAD dst {1,-1,0,0}, src.xyxx, {0,0,1,0}
328  */
329 static void
emit_swz(struct st_translate * t,struct ureg_dst dst,const struct prog_src_register * SrcReg)330 emit_swz(struct st_translate *t,
331          struct ureg_dst dst,
332          const struct prog_src_register *SrcReg)
333 {
334    struct ureg_program *ureg = t->ureg;
335    struct ureg_src src = src_register(t, SrcReg->File, SrcReg->Index);
336 
337    unsigned negate_mask =  SrcReg->Negate;
338 
339    unsigned one_mask = ((GET_SWZ(SrcReg->Swizzle, 0) == SWIZZLE_ONE) << 0 |
340                         (GET_SWZ(SrcReg->Swizzle, 1) == SWIZZLE_ONE) << 1 |
341                         (GET_SWZ(SrcReg->Swizzle, 2) == SWIZZLE_ONE) << 2 |
342                         (GET_SWZ(SrcReg->Swizzle, 3) == SWIZZLE_ONE) << 3);
343 
344    unsigned zero_mask = ((GET_SWZ(SrcReg->Swizzle, 0) == SWIZZLE_ZERO) << 0 |
345                          (GET_SWZ(SrcReg->Swizzle, 1) == SWIZZLE_ZERO) << 1 |
346                          (GET_SWZ(SrcReg->Swizzle, 2) == SWIZZLE_ZERO) << 2 |
347                          (GET_SWZ(SrcReg->Swizzle, 3) == SWIZZLE_ZERO) << 3);
348 
349    unsigned negative_one_mask = one_mask & negate_mask;
350    unsigned positive_one_mask = one_mask & ~negate_mask;
351 
352    struct ureg_src imm;
353    unsigned i;
354    unsigned mul_swizzle[4] = {0,0,0,0};
355    unsigned add_swizzle[4] = {0,0,0,0};
356    unsigned src_swizzle[4] = {0,0,0,0};
357    boolean need_add = FALSE;
358    boolean need_mul = FALSE;
359 
360    if (dst.WriteMask == 0)
361       return;
362 
363    /* Is this just a MOV?
364     */
365    if (zero_mask == 0 &&
366        one_mask == 0 &&
367        (negate_mask == 0 || negate_mask == TGSI_WRITEMASK_XYZW)) {
368       ureg_MOV(ureg, dst, translate_src(t, SrcReg));
369       return;
370    }
371 
372 #define IMM_ZERO    0
373 #define IMM_ONE     1
374 #define IMM_NEG_ONE 2
375 
376    imm = ureg_imm3f(ureg, 0, 1, -1);
377 
378    for (i = 0; i < 4; i++) {
379       unsigned bit = 1 << i;
380 
381       if (dst.WriteMask & bit) {
382          if (positive_one_mask & bit) {
383             mul_swizzle[i] = IMM_ZERO;
384             add_swizzle[i] = IMM_ONE;
385             need_add = TRUE;
386          }
387          else if (negative_one_mask & bit) {
388             mul_swizzle[i] = IMM_ZERO;
389             add_swizzle[i] = IMM_NEG_ONE;
390             need_add = TRUE;
391          }
392          else if (zero_mask & bit) {
393             mul_swizzle[i] = IMM_ZERO;
394             add_swizzle[i] = IMM_ZERO;
395             need_add = TRUE;
396          }
397          else {
398             add_swizzle[i] = IMM_ZERO;
399             src_swizzle[i] = GET_SWZ(SrcReg->Swizzle, i);
400             need_mul = TRUE;
401             if (negate_mask & bit) {
402                mul_swizzle[i] = IMM_NEG_ONE;
403             }
404             else {
405                mul_swizzle[i] = IMM_ONE;
406             }
407          }
408       }
409    }
410 
411    if (need_mul && need_add) {
412       ureg_MAD(ureg,
413                dst,
414                swizzle_4v(src, src_swizzle),
415                swizzle_4v(imm, mul_swizzle),
416                swizzle_4v(imm, add_swizzle));
417    }
418    else if (need_mul) {
419       ureg_MUL(ureg,
420                dst,
421                swizzle_4v(src, src_swizzle),
422                swizzle_4v(imm, mul_swizzle));
423    }
424    else if (need_add) {
425       ureg_MOV(ureg,
426                dst,
427                swizzle_4v(imm, add_swizzle));
428    }
429    else {
430       debug_assert(0);
431    }
432 
433 #undef IMM_ZERO
434 #undef IMM_ONE
435 #undef IMM_NEG_ONE
436 }
437 
438 
439 static unsigned
translate_opcode(unsigned op)440 translate_opcode(unsigned op)
441 {
442    switch(op) {
443    case OPCODE_ARL:
444       return TGSI_OPCODE_ARL;
445    case OPCODE_ADD:
446       return TGSI_OPCODE_ADD;
447    case OPCODE_CMP:
448       return TGSI_OPCODE_CMP;
449    case OPCODE_COS:
450       return TGSI_OPCODE_COS;
451    case OPCODE_DP3:
452       return TGSI_OPCODE_DP3;
453    case OPCODE_DP4:
454       return TGSI_OPCODE_DP4;
455    case OPCODE_DST:
456       return TGSI_OPCODE_DST;
457    case OPCODE_EX2:
458       return TGSI_OPCODE_EX2;
459    case OPCODE_EXP:
460       return TGSI_OPCODE_EXP;
461    case OPCODE_FLR:
462       return TGSI_OPCODE_FLR;
463    case OPCODE_FRC:
464       return TGSI_OPCODE_FRC;
465    case OPCODE_KIL:
466       return TGSI_OPCODE_KILL_IF;
467    case OPCODE_LG2:
468       return TGSI_OPCODE_LG2;
469    case OPCODE_LOG:
470       return TGSI_OPCODE_LOG;
471    case OPCODE_LIT:
472       return TGSI_OPCODE_LIT;
473    case OPCODE_LRP:
474       return TGSI_OPCODE_LRP;
475    case OPCODE_MAD:
476       return TGSI_OPCODE_MAD;
477    case OPCODE_MAX:
478       return TGSI_OPCODE_MAX;
479    case OPCODE_MIN:
480       return TGSI_OPCODE_MIN;
481    case OPCODE_MOV:
482       return TGSI_OPCODE_MOV;
483    case OPCODE_MUL:
484       return TGSI_OPCODE_MUL;
485    case OPCODE_POW:
486       return TGSI_OPCODE_POW;
487    case OPCODE_RCP:
488       return TGSI_OPCODE_RCP;
489    case OPCODE_SGE:
490       return TGSI_OPCODE_SGE;
491    case OPCODE_SIN:
492       return TGSI_OPCODE_SIN;
493    case OPCODE_SLT:
494       return TGSI_OPCODE_SLT;
495    case OPCODE_TEX:
496       return TGSI_OPCODE_TEX;
497    case OPCODE_TXB:
498       return TGSI_OPCODE_TXB;
499    case OPCODE_TXP:
500       return TGSI_OPCODE_TXP;
501    case OPCODE_END:
502       return TGSI_OPCODE_END;
503    default:
504       debug_assert(0);
505       return TGSI_OPCODE_NOP;
506    }
507 }
508 
509 
510 static void
compile_instruction(struct gl_context * ctx,struct st_translate * t,const struct prog_instruction * inst)511 compile_instruction(struct gl_context *ctx,
512                     struct st_translate *t,
513                     const struct prog_instruction *inst)
514 {
515    struct ureg_program *ureg = t->ureg;
516    GLuint i;
517    struct ureg_dst dst[1] = { { 0 } };
518    struct ureg_src src[4];
519    unsigned num_dst;
520    unsigned num_src;
521 
522    num_dst = _mesa_num_inst_dst_regs(inst->Opcode);
523    num_src = _mesa_num_inst_src_regs(inst->Opcode);
524 
525    if (num_dst)
526       dst[0] = translate_dst(t, &inst->DstReg, inst->Saturate);
527 
528    for (i = 0; i < num_src; i++)
529       src[i] = translate_src(t, &inst->SrcReg[i]);
530 
531    switch(inst->Opcode) {
532    case OPCODE_SWZ:
533       emit_swz(t, dst[0], &inst->SrcReg[0]);
534       return;
535 
536    case OPCODE_TEX:
537    case OPCODE_TXB:
538    case OPCODE_TXP:
539       src[num_src++] = t->samplers[inst->TexSrcUnit];
540       ureg_tex_insn(ureg,
541                     translate_opcode(inst->Opcode),
542                     dst, num_dst,
543                     st_translate_texture_target(inst->TexSrcTarget,
544                                                 inst->TexShadow),
545                     TGSI_RETURN_TYPE_FLOAT,
546                     NULL, 0,
547                     src, num_src);
548       return;
549 
550    case OPCODE_SCS:
551       ureg_COS(ureg, ureg_writemask(dst[0], TGSI_WRITEMASK_X),
552                ureg_scalar(src[0], TGSI_SWIZZLE_X));
553       ureg_SIN(ureg, ureg_writemask(dst[0], TGSI_WRITEMASK_Y),
554                ureg_scalar(src[0], TGSI_SWIZZLE_X));
555       break;
556 
557    case OPCODE_XPD: {
558       struct ureg_dst tmp = ureg_DECL_temporary(ureg);
559 
560       ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ),
561                ureg_swizzle(src[0], TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Z,
562                             TGSI_SWIZZLE_X, 0),
563                ureg_swizzle(src[1], TGSI_SWIZZLE_Z, TGSI_SWIZZLE_X,
564                             TGSI_SWIZZLE_Y, 0));
565       ureg_MAD(ureg, ureg_writemask(dst[0], TGSI_WRITEMASK_XYZ),
566                ureg_swizzle(src[0], TGSI_SWIZZLE_Z, TGSI_SWIZZLE_X,
567                             TGSI_SWIZZLE_Y, 0),
568                ureg_negate(ureg_swizzle(src[1], TGSI_SWIZZLE_Y,
569                                         TGSI_SWIZZLE_Z, TGSI_SWIZZLE_X, 0)),
570                ureg_src(tmp));
571       break;
572    }
573 
574    case OPCODE_RSQ:
575       ureg_RSQ(ureg, dst[0], ureg_abs(src[0]));
576       break;
577 
578    case OPCODE_ABS:
579       ureg_MOV(ureg, dst[0], ureg_abs(src[0]));
580       break;
581 
582    case OPCODE_SUB:
583       ureg_ADD(ureg, dst[0], src[0], ureg_negate(src[1]));
584       break;
585 
586    case OPCODE_DPH: {
587       struct ureg_dst temp = ureg_DECL_temporary(ureg);
588 
589       /* DPH = DP4(src0, src1) where src0.w = 1. */
590       ureg_MOV(ureg, ureg_writemask(temp, TGSI_WRITEMASK_XYZ), src[0]);
591       ureg_MOV(ureg, ureg_writemask(temp, TGSI_WRITEMASK_W),
592                ureg_imm1f(ureg, 1));
593       ureg_DP4(ureg, dst[0], ureg_src(temp), src[1]);
594       break;
595    }
596 
597    default:
598       ureg_insn(ureg,
599                  translate_opcode(inst->Opcode),
600                  dst, num_dst,
601                  src, num_src, 0);
602       break;
603    }
604 }
605 
606 
607 /**
608  * Emit the TGSI instructions for inverting and adjusting WPOS.
609  * This code is unavoidable because it also depends on whether
610  * a FBO is bound (STATE_FB_WPOS_Y_TRANSFORM).
611  */
612 static void
emit_wpos_adjustment(struct gl_context * ctx,struct st_translate * t,const struct gl_program * program,boolean invert,GLfloat adjX,GLfloat adjY[2])613 emit_wpos_adjustment(struct gl_context *ctx,
614                      struct st_translate *t,
615                      const struct gl_program *program,
616                      boolean invert,
617                      GLfloat adjX, GLfloat adjY[2])
618 {
619    struct ureg_program *ureg = t->ureg;
620 
621    /* Fragment program uses fragment position input.
622     * Need to replace instances of INPUT[WPOS] with temp T
623     * where T = INPUT[WPOS] by y is inverted.
624     */
625    static const gl_state_index wposTransformState[STATE_LENGTH]
626       = { STATE_INTERNAL, STATE_FB_WPOS_Y_TRANSFORM, 0, 0, 0 };
627 
628    /* XXX: note we are modifying the incoming shader here!  Need to
629     * do this before emitting the constant decls below, or this
630     * will be missed:
631     */
632    unsigned wposTransConst = _mesa_add_state_reference(program->Parameters,
633                                                        wposTransformState);
634 
635    struct ureg_src wpostrans = ureg_DECL_constant(ureg, wposTransConst);
636    struct ureg_dst wpos_temp = ureg_DECL_temporary(ureg);
637    struct ureg_src *wpos =
638       ctx->Const.GLSLFragCoordIsSysVal ?
639          &t->systemValues[SYSTEM_VALUE_FRAG_COORD] :
640          &t->inputs[t->inputMapping[VARYING_SLOT_POS]];
641    struct ureg_src wpos_input = *wpos;
642 
643    /* First, apply the coordinate shift: */
644    if (adjX || adjY[0] || adjY[1]) {
645       if (adjY[0] != adjY[1]) {
646          /* Adjust the y coordinate by adjY[1] or adjY[0] respectively
647           * depending on whether inversion is actually going to be applied
648           * or not, which is determined by testing against the inversion
649           * state variable used below, which will be either +1 or -1.
650           */
651          struct ureg_dst adj_temp = ureg_DECL_temporary(ureg);
652 
653          ureg_CMP(ureg, adj_temp,
654                   ureg_scalar(wpostrans, invert ? 2 : 0),
655                   ureg_imm4f(ureg, adjX, adjY[0], 0.0f, 0.0f),
656                   ureg_imm4f(ureg, adjX, adjY[1], 0.0f, 0.0f));
657          ureg_ADD(ureg, wpos_temp, wpos_input, ureg_src(adj_temp));
658       } else {
659          ureg_ADD(ureg, wpos_temp, wpos_input,
660                   ureg_imm4f(ureg, adjX, adjY[0], 0.0f, 0.0f));
661       }
662       wpos_input = ureg_src(wpos_temp);
663    } else {
664       /* MOV wpos_temp, input[wpos]
665        */
666       ureg_MOV(ureg, wpos_temp, wpos_input);
667    }
668 
669    /* Now the conditional y flip: STATE_FB_WPOS_Y_TRANSFORM.xy/zw will be
670     * inversion/identity, or the other way around if we're drawing to an FBO.
671     */
672    if (invert) {
673       /* MAD wpos_temp.y, wpos_input, wpostrans.xxxx, wpostrans.yyyy
674        */
675       ureg_MAD(ureg,
676                 ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y),
677                 wpos_input,
678                 ureg_scalar(wpostrans, 0),
679                 ureg_scalar(wpostrans, 1));
680    } else {
681       /* MAD wpos_temp.y, wpos_input, wpostrans.zzzz, wpostrans.wwww
682        */
683       ureg_MAD(ureg,
684                 ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y),
685                 wpos_input,
686                 ureg_scalar(wpostrans, 2),
687                 ureg_scalar(wpostrans, 3));
688    }
689 
690    /* Use wpos_temp as position input from here on:
691     */
692    *wpos = ureg_src(wpos_temp);
693 }
694 
695 
696 /**
697  * Emit fragment position/coordinate code.
698  */
699 static void
emit_wpos(struct st_context * st,struct st_translate * t,const struct gl_program * program,struct ureg_program * ureg)700 emit_wpos(struct st_context *st,
701           struct st_translate *t,
702           const struct gl_program *program,
703           struct ureg_program *ureg)
704 {
705    struct pipe_screen *pscreen = st->pipe->screen;
706    GLfloat adjX = 0.0f;
707    GLfloat adjY[2] = { 0.0f, 0.0f };
708    boolean invert = FALSE;
709 
710    /* Query the pixel center conventions supported by the pipe driver and set
711     * adjX, adjY to help out if it cannot handle the requested one internally.
712     *
713     * The bias of the y-coordinate depends on whether y-inversion takes place
714     * (adjY[1]) or not (adjY[0]), which is in turn dependent on whether we are
715     * drawing to an FBO (causes additional inversion), and whether the pipe
716     * driver origin and the requested origin differ (the latter condition is
717     * stored in the 'invert' variable).
718     *
719     * For height = 100 (i = integer, h = half-integer, l = lower, u = upper):
720     *
721     * center shift only:
722     * i -> h: +0.5
723     * h -> i: -0.5
724     *
725     * inversion only:
726     * l,i -> u,i: ( 0.0 + 1.0) * -1 + 100 = 99
727     * l,h -> u,h: ( 0.5 + 0.0) * -1 + 100 = 99.5
728     * u,i -> l,i: (99.0 + 1.0) * -1 + 100 = 0
729     * u,h -> l,h: (99.5 + 0.0) * -1 + 100 = 0.5
730     *
731     * inversion and center shift:
732     * l,i -> u,h: ( 0.0 + 0.5) * -1 + 100 = 99.5
733     * l,h -> u,i: ( 0.5 + 0.5) * -1 + 100 = 99
734     * u,i -> l,h: (99.0 + 0.5) * -1 + 100 = 0.5
735     * u,h -> l,i: (99.5 + 0.5) * -1 + 100 = 0
736     */
737    if (program->OriginUpperLeft) {
738       /* Fragment shader wants origin in upper-left */
739       if (pscreen->get_param(pscreen,
740                              PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT)) {
741          /* the driver supports upper-left origin */
742       }
743       else if (pscreen->get_param(pscreen,
744                                   PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT)) {
745          /* the driver supports lower-left origin, need to invert Y */
746          ureg_property(ureg, TGSI_PROPERTY_FS_COORD_ORIGIN,
747                        TGSI_FS_COORD_ORIGIN_LOWER_LEFT);
748          invert = TRUE;
749       }
750       else
751          assert(0);
752    }
753    else {
754       /* Fragment shader wants origin in lower-left */
755       if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT))
756          /* the driver supports lower-left origin */
757          ureg_property(ureg, TGSI_PROPERTY_FS_COORD_ORIGIN,
758                        TGSI_FS_COORD_ORIGIN_LOWER_LEFT);
759       else if (pscreen->get_param(pscreen,
760                                   PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT))
761          /* the driver supports upper-left origin, need to invert Y */
762          invert = TRUE;
763       else
764          assert(0);
765    }
766 
767    if (program->PixelCenterInteger) {
768       /* Fragment shader wants pixel center integer */
769       if (pscreen->get_param(pscreen,
770                              PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER)) {
771          /* the driver supports pixel center integer */
772          adjY[1] = 1.0f;
773          ureg_property(ureg, TGSI_PROPERTY_FS_COORD_PIXEL_CENTER,
774                        TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
775       }
776       else if (pscreen->get_param(pscreen,
777                             PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER)) {
778          /* the driver supports pixel center half integer, need to bias X,Y */
779          adjX = -0.5f;
780          adjY[0] = -0.5f;
781          adjY[1] = 0.5f;
782       }
783       else
784          assert(0);
785    }
786    else {
787       /* Fragment shader wants pixel center half integer */
788       if (pscreen->get_param(pscreen,
789                           PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER)) {
790          /* the driver supports pixel center half integer */
791       }
792       else if (pscreen->get_param(pscreen,
793                                PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER)) {
794          /* the driver supports pixel center integer, need to bias X,Y */
795          adjX = adjY[0] = adjY[1] = 0.5f;
796          ureg_property(ureg, TGSI_PROPERTY_FS_COORD_PIXEL_CENTER,
797                        TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
798       }
799       else
800          assert(0);
801    }
802 
803    /* we invert after adjustment so that we avoid the MOV to temporary,
804     * and reuse the adjustment ADD instead */
805    emit_wpos_adjustment(st->ctx, t, program, invert, adjX, adjY);
806 }
807 
808 
809 /**
810  * Translate Mesa program to TGSI format.
811  * \param program  the program to translate
812  * \param numInputs  number of input registers used
813  * \param inputMapping  maps Mesa fragment program inputs to TGSI generic
814  *                      input indexes
815  * \param inputSemanticName  the TGSI_SEMANTIC flag for each input
816  * \param inputSemanticIndex  the semantic index (ex: which texcoord) for
817  *                            each input
818  * \param interpMode  the TGSI_INTERPOLATE_LINEAR/PERSP mode for each input
819  * \param numOutputs  number of output registers used
820  * \param outputMapping  maps Mesa fragment program outputs to TGSI
821  *                       generic outputs
822  * \param outputSemanticName  the TGSI_SEMANTIC flag for each output
823  * \param outputSemanticIndex  the semantic index (ex: which texcoord) for
824  *                             each output
825  *
826  * \return  PIPE_OK or PIPE_ERROR_OUT_OF_MEMORY
827  */
828 enum pipe_error
st_translate_mesa_program(struct gl_context * ctx,uint procType,struct ureg_program * ureg,const struct gl_program * program,GLuint numInputs,const ubyte inputMapping[],const ubyte inputSemanticName[],const ubyte inputSemanticIndex[],const ubyte interpMode[],GLuint numOutputs,const ubyte outputMapping[],const ubyte outputSemanticName[],const ubyte outputSemanticIndex[])829 st_translate_mesa_program(struct gl_context *ctx,
830                           uint procType,
831                           struct ureg_program *ureg,
832                           const struct gl_program *program,
833                           GLuint numInputs,
834                           const ubyte inputMapping[],
835                           const ubyte inputSemanticName[],
836                           const ubyte inputSemanticIndex[],
837                           const ubyte interpMode[],
838                           GLuint numOutputs,
839                           const ubyte outputMapping[],
840                           const ubyte outputSemanticName[],
841                           const ubyte outputSemanticIndex[])
842 {
843    struct st_translate translate, *t;
844    unsigned i;
845    enum pipe_error ret = PIPE_OK;
846 
847    assert(numInputs <= ARRAY_SIZE(t->inputs));
848    assert(numOutputs <= ARRAY_SIZE(t->outputs));
849 
850    t = &translate;
851    memset(t, 0, sizeof *t);
852 
853    t->procType = procType;
854    t->inputMapping = inputMapping;
855    t->outputMapping = outputMapping;
856    t->ureg = ureg;
857 
858    /*_mesa_print_program(program);*/
859 
860    /*
861     * Declare input attributes.
862     */
863    if (procType == PIPE_SHADER_FRAGMENT) {
864       for (i = 0; i < numInputs; i++) {
865          t->inputs[i] = ureg_DECL_fs_input(ureg,
866                                            inputSemanticName[i],
867                                            inputSemanticIndex[i],
868                                            interpMode[i]);
869       }
870 
871       if (program->info.inputs_read & VARYING_BIT_POS) {
872          /* Must do this after setting up t->inputs, and before
873           * emitting constant references, below:
874           */
875          emit_wpos(st_context(ctx), t, program, ureg);
876       }
877 
878       /*
879        * Declare output attributes.
880        */
881       for (i = 0; i < numOutputs; i++) {
882          switch (outputSemanticName[i]) {
883          case TGSI_SEMANTIC_POSITION:
884             t->outputs[i] = ureg_DECL_output(ureg,
885                                              TGSI_SEMANTIC_POSITION, /* Z / Depth */
886                                              outputSemanticIndex[i]);
887 
888             t->outputs[i] = ureg_writemask(t->outputs[i],
889                                            TGSI_WRITEMASK_Z);
890             break;
891          case TGSI_SEMANTIC_STENCIL:
892             t->outputs[i] = ureg_DECL_output(ureg,
893                                              TGSI_SEMANTIC_STENCIL, /* Stencil */
894                                              outputSemanticIndex[i]);
895             t->outputs[i] = ureg_writemask(t->outputs[i],
896                                            TGSI_WRITEMASK_Y);
897             break;
898          case TGSI_SEMANTIC_COLOR:
899             t->outputs[i] = ureg_DECL_output(ureg,
900                                              TGSI_SEMANTIC_COLOR,
901                                              outputSemanticIndex[i]);
902             break;
903          default:
904             debug_assert(0);
905             return 0;
906          }
907       }
908    }
909    else if (procType == PIPE_SHADER_GEOMETRY) {
910       for (i = 0; i < numInputs; i++) {
911          t->inputs[i] = ureg_DECL_input(ureg,
912                                         inputSemanticName[i],
913                                         inputSemanticIndex[i], 0, 1);
914       }
915 
916       for (i = 0; i < numOutputs; i++) {
917          t->outputs[i] = ureg_DECL_output(ureg,
918                                           outputSemanticName[i],
919                                           outputSemanticIndex[i]);
920       }
921    }
922    else {
923       assert(procType == PIPE_SHADER_VERTEX);
924 
925       for (i = 0; i < numInputs; i++) {
926          t->inputs[i] = ureg_DECL_vs_input(ureg, i);
927       }
928 
929       for (i = 0; i < numOutputs; i++) {
930          t->outputs[i] = ureg_DECL_output(ureg,
931                                           outputSemanticName[i],
932                                           outputSemanticIndex[i]);
933          if (outputSemanticName[i] == TGSI_SEMANTIC_FOG) {
934             /* force register to contain a fog coordinate in the
935              * form (F, 0, 0, 1).
936              */
937             ureg_MOV(ureg,
938                      ureg_writemask(t->outputs[i], TGSI_WRITEMASK_YZW),
939                      ureg_imm4f(ureg, 0.0f, 0.0f, 0.0f, 1.0f));
940             t->outputs[i] = ureg_writemask(t->outputs[i], TGSI_WRITEMASK_X);
941          }
942       }
943    }
944 
945    /* Declare address register.
946     */
947    if (program->arb.NumAddressRegs > 0) {
948       debug_assert(program->arb.NumAddressRegs == 1);
949       t->address[0] = ureg_DECL_address(ureg);
950    }
951 
952    /* Declare misc input registers
953     */
954    GLbitfield sysInputs = program->info.system_values_read;
955    for (i = 0; sysInputs; i++) {
956       if (sysInputs & (1 << i)) {
957          unsigned semName = _mesa_sysval_to_semantic(i);
958 
959          t->systemValues[i] = ureg_DECL_system_value(ureg, semName, 0);
960 
961          if (semName == TGSI_SEMANTIC_INSTANCEID ||
962              semName == TGSI_SEMANTIC_VERTEXID) {
963             /* From Gallium perspective, these system values are always
964              * integer, and require native integer support.  However, if
965              * native integer is supported on the vertex stage but not the
966              * pixel stage (e.g, i915g + draw), Mesa will generate IR that
967              * assumes these system values are floats. To resolve the
968              * inconsistency, we insert a U2F.
969              */
970             struct st_context *st = st_context(ctx);
971             struct pipe_screen *pscreen = st->pipe->screen;
972             assert(procType == PIPE_SHADER_VERTEX);
973             assert(pscreen->get_shader_param(pscreen, PIPE_SHADER_VERTEX,
974                    PIPE_SHADER_CAP_INTEGERS));
975             (void) pscreen;  /* silence non-debug build warnings */
976             if (!ctx->Const.NativeIntegers) {
977                struct ureg_dst temp = ureg_DECL_local_temporary(t->ureg);
978                ureg_U2F(t->ureg, ureg_writemask(temp, TGSI_WRITEMASK_X),
979                         t->systemValues[i]);
980                t->systemValues[i] = ureg_scalar(ureg_src(temp), 0);
981             }
982          }
983 
984          if (procType == PIPE_SHADER_FRAGMENT &&
985              semName == TGSI_SEMANTIC_POSITION)
986             emit_wpos(st_context(ctx), t, program, ureg);
987 
988           sysInputs &= ~(1 << i);
989       }
990    }
991 
992    if (program->arb.IndirectRegisterFiles & (1 << PROGRAM_TEMPORARY)) {
993       /* If temps are accessed with indirect addressing, declare temporaries
994        * in sequential order.  Else, we declare them on demand elsewhere.
995        */
996       for (i = 0; i < program->arb.NumTemporaries; i++) {
997          /* XXX use TGSI_FILE_TEMPORARY_ARRAY when it's supported by ureg */
998          t->temps[i] = ureg_DECL_temporary(t->ureg);
999       }
1000    }
1001 
1002    /* Emit constants and immediates.  Mesa uses a single index space
1003     * for these, so we put all the translated regs in t->constants.
1004     */
1005    if (program->Parameters) {
1006       t->constants = calloc(program->Parameters->NumParameters,
1007                              sizeof t->constants[0]);
1008       if (t->constants == NULL) {
1009          ret = PIPE_ERROR_OUT_OF_MEMORY;
1010          goto out;
1011       }
1012 
1013       for (i = 0; i < program->Parameters->NumParameters; i++) {
1014          switch (program->Parameters->Parameters[i].Type) {
1015          case PROGRAM_STATE_VAR:
1016          case PROGRAM_UNIFORM:
1017             t->constants[i] = ureg_DECL_constant(ureg, i);
1018             break;
1019 
1020             /* Emit immediates only when there's no indirect addressing of
1021              * the const buffer.
1022              * FIXME: Be smarter and recognize param arrays:
1023              * indirect addressing is only valid within the referenced
1024              * array.
1025              */
1026          case PROGRAM_CONSTANT:
1027             if (program->arb.IndirectRegisterFiles & PROGRAM_ANY_CONST)
1028                t->constants[i] = ureg_DECL_constant(ureg, i);
1029             else
1030                t->constants[i] =
1031                   ureg_DECL_immediate(ureg,
1032                                       (const float *)
1033                                       program->Parameters->ParameterValues[i],
1034                                       4);
1035             break;
1036          default:
1037             break;
1038          }
1039       }
1040    }
1041 
1042    /* texture samplers */
1043    for (i = 0;
1044         i < ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits; i++) {
1045       if (program->SamplersUsed & (1u << i)) {
1046          unsigned target =
1047             translate_texture_index(program->TexturesUsed[i],
1048                                     !!(program->ShadowSamplers & (1 << i)));
1049          t->samplers[i] = ureg_DECL_sampler(ureg, i);
1050          ureg_DECL_sampler_view(ureg, i, target,
1051                                 TGSI_RETURN_TYPE_FLOAT,
1052                                 TGSI_RETURN_TYPE_FLOAT,
1053                                 TGSI_RETURN_TYPE_FLOAT,
1054                                 TGSI_RETURN_TYPE_FLOAT);
1055 
1056       }
1057    }
1058 
1059    /* Emit each instruction in turn:
1060     */
1061    for (i = 0; i < program->arb.NumInstructions; i++)
1062       compile_instruction(ctx, t, &program->arb.Instructions[i]);
1063 
1064 out:
1065    free(t->constants);
1066    return ret;
1067 }
1068