1 /*
2  * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  *
23  * Authors:
24  *    Rob Clark <robclark@freedesktop.org>
25  */
26 
27 #include "tgsi/tgsi_transform.h"
28 #include "tgsi/tgsi_scan.h"
29 #include "tgsi/tgsi_dump.h"
30 
31 #include "util/u_debug.h"
32 #include "util/u_math.h"
33 
34 #include "tgsi_lowering.h"
35 
36 struct tgsi_lowering_context {
37    struct tgsi_transform_context base;
38    const struct tgsi_lowering_config *config;
39    struct tgsi_shader_info *info;
40    unsigned two_side_colors;
41    unsigned two_side_idx[PIPE_MAX_SHADER_INPUTS];
42    unsigned color_base;  /* base register for chosen COLOR/BCOLOR's */
43    int face_idx;
44    unsigned numtmp;
45    struct {
46       struct tgsi_full_src_register src;
47       struct tgsi_full_dst_register dst;
48    } tmp[2];
49 #define A 0
50 #define B 1
51    struct tgsi_full_src_register imm;
52    int emitted_decls;
53    unsigned saturate;
54 };
55 
56 static inline struct tgsi_lowering_context *
tgsi_lowering_context(struct tgsi_transform_context * tctx)57 tgsi_lowering_context(struct tgsi_transform_context *tctx)
58 {
59    return (struct tgsi_lowering_context *)tctx;
60 }
61 
62 /*
63  * Utility helpers:
64  */
65 
66 static void
reg_dst(struct tgsi_full_dst_register * dst,const struct tgsi_full_dst_register * orig_dst,unsigned wrmask)67 reg_dst(struct tgsi_full_dst_register *dst,
68 	const struct tgsi_full_dst_register *orig_dst, unsigned wrmask)
69 {
70    *dst = *orig_dst;
71    dst->Register.WriteMask &= wrmask;
72    assert(dst->Register.WriteMask);
73 }
74 
75 static inline void
get_swiz(unsigned * swiz,const struct tgsi_src_register * src)76 get_swiz(unsigned *swiz, const struct tgsi_src_register *src)
77 {
78    swiz[0] = src->SwizzleX;
79    swiz[1] = src->SwizzleY;
80    swiz[2] = src->SwizzleZ;
81    swiz[3] = src->SwizzleW;
82 }
83 
84 static void
reg_src(struct tgsi_full_src_register * src,const struct tgsi_full_src_register * orig_src,unsigned sx,unsigned sy,unsigned sz,unsigned sw)85 reg_src(struct tgsi_full_src_register *src,
86 	const struct tgsi_full_src_register *orig_src,
87 	unsigned sx, unsigned sy, unsigned sz, unsigned sw)
88 {
89    unsigned swiz[4];
90    get_swiz(swiz, &orig_src->Register);
91    *src = *orig_src;
92    src->Register.SwizzleX = swiz[sx];
93    src->Register.SwizzleY = swiz[sy];
94    src->Register.SwizzleZ = swiz[sz];
95    src->Register.SwizzleW = swiz[sw];
96 }
97 
98 #define TGSI_SWIZZLE__ TGSI_SWIZZLE_X  /* don't-care value! */
99 #define SWIZ(x,y,z,w) TGSI_SWIZZLE_ ## x, TGSI_SWIZZLE_ ## y,   \
100       TGSI_SWIZZLE_ ## z, TGSI_SWIZZLE_ ## w
101 
102 /*
103  * if (dst.x aliases src.x) {
104  *   MOV tmpA.x, src.x
105  *   src = tmpA
106  * }
107  * COS dst.x, src.x
108  * SIN dst.y, src.x
109  * MOV dst.zw, imm{0.0, 1.0}
110  */
111 static bool
aliases(const struct tgsi_full_dst_register * dst,unsigned dst_mask,const struct tgsi_full_src_register * src,unsigned src_mask)112 aliases(const struct tgsi_full_dst_register *dst, unsigned dst_mask,
113 	const struct tgsi_full_src_register *src, unsigned src_mask)
114 {
115    if ((dst->Register.File == src->Register.File) &&
116        (dst->Register.Index == src->Register.Index)) {
117       unsigned i, actual_mask = 0;
118       unsigned swiz[4];
119       get_swiz(swiz, &src->Register);
120       for (i = 0; i < 4; i++)
121          if (src_mask & (1 << i))
122             actual_mask |= (1 << swiz[i]);
123       if (actual_mask & dst_mask)
124          return true;
125    }
126    return false;
127 }
128 
129 static void
create_mov(struct tgsi_transform_context * tctx,const struct tgsi_full_dst_register * dst,const struct tgsi_full_src_register * src,unsigned mask,unsigned saturate)130 create_mov(struct tgsi_transform_context *tctx,
131            const struct tgsi_full_dst_register *dst,
132            const struct tgsi_full_src_register *src,
133            unsigned mask, unsigned saturate)
134 {
135    struct tgsi_full_instruction new_inst;
136 
137    new_inst = tgsi_default_full_instruction();
138    new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
139    new_inst.Instruction.Saturate = saturate;
140    new_inst.Instruction.NumDstRegs = 1;
141    reg_dst(&new_inst.Dst[0], dst, mask);
142    new_inst.Instruction.NumSrcRegs = 1;
143    reg_src(&new_inst.Src[0], src, SWIZ(X, Y, Z, W));
144    tctx->emit_instruction(tctx, &new_inst);
145 }
146 
147 /* to help calculate # of tgsi tokens for a lowering.. we assume
148  * the worst case, ie. removed instructions don't have ADDR[] or
149  * anything which increases the # of tokens per src/dst and the
150  * inserted instructions do.
151  *
152  * OINST() - old instruction
153  *    1         : instruction itself
154  *    1         : dst
155  *    1 * nargs : srcN
156  *
157  * NINST() - new instruction
158  *    1         : instruction itself
159  *    2         : dst
160  *    2 * nargs : srcN
161  */
162 
163 #define OINST(nargs)  (1 + 1 + 1 * (nargs))
164 #define NINST(nargs)  (1 + 2 + 2 * (nargs))
165 
166 /*
167  * Lowering Translators:
168  */
169 
170 /* DST - Distance Vector
171  *   dst.x = 1.0
172  *   dst.y = src0.y \times src1.y
173  *   dst.z = src0.z
174  *   dst.w = src1.w
175  *
176  * ; note: could be more clever and use just a single temp
177  * ;       if I was clever enough to re-write the swizzles.
178  * ; needs: 2 tmp, imm{1.0}
179  * if (dst.y aliases src0.z) {
180  *   MOV tmpA.yz, src0.yz
181  *   src0 = tmpA
182  * }
183  * if (dst.yz aliases src1.w) {
184  *   MOV tmpB.yw, src1.yw
185  *   src1 = tmpB
186  * }
187  * MUL dst.y, src0.y, src1.y
188  * MOV dst.z, src0.z
189  * MOV dst.w, src1.w
190  * MOV dst.x, imm{1.0}
191  */
192 #define DST_GROW (NINST(1) + NINST(1) + NINST(2) + NINST(1) + \
193 		NINST(1) + NINST(1) - OINST(2))
194 #define DST_TMP  2
195 static void
transform_dst(struct tgsi_transform_context * tctx,struct tgsi_full_instruction * inst)196 transform_dst(struct tgsi_transform_context *tctx,
197               struct tgsi_full_instruction *inst)
198 {
199    struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
200    struct tgsi_full_dst_register *dst  = &inst->Dst[0];
201    struct tgsi_full_src_register *src0 = &inst->Src[0];
202    struct tgsi_full_src_register *src1 = &inst->Src[1];
203    struct tgsi_full_instruction new_inst;
204 
205    if (aliases(dst, TGSI_WRITEMASK_Y, src0, TGSI_WRITEMASK_Z)) {
206       create_mov(tctx, &ctx->tmp[A].dst, src0, TGSI_WRITEMASK_YZ, 0);
207       src0 = &ctx->tmp[A].src;
208    }
209 
210    if (aliases(dst, TGSI_WRITEMASK_YZ, src1, TGSI_WRITEMASK_W)) {
211       create_mov(tctx, &ctx->tmp[B].dst, src1, TGSI_WRITEMASK_YW, 0);
212       src1 = &ctx->tmp[B].src;
213    }
214 
215    if (dst->Register.WriteMask & TGSI_WRITEMASK_Y) {
216       /* MUL dst.y, src0.y, src1.y */
217       new_inst = tgsi_default_full_instruction();
218       new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
219       new_inst.Instruction.NumDstRegs = 1;
220       reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Y);
221       new_inst.Instruction.NumSrcRegs = 2;
222       reg_src(&new_inst.Src[0], src0, SWIZ(_, Y, _, _));
223       reg_src(&new_inst.Src[1], src1, SWIZ(_, Y, _, _));
224       tctx->emit_instruction(tctx, &new_inst);
225    }
226 
227    if (dst->Register.WriteMask & TGSI_WRITEMASK_Z) {
228       /* MOV dst.z, src0.z */
229       new_inst = tgsi_default_full_instruction();
230       new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
231       new_inst.Instruction.NumDstRegs = 1;
232       reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Z);
233       new_inst.Instruction.NumSrcRegs = 1;
234       reg_src(&new_inst.Src[0], src0, SWIZ(_, _, Z, _));
235       tctx->emit_instruction(tctx, &new_inst);
236    }
237 
238    if (dst->Register.WriteMask & TGSI_WRITEMASK_W) {
239       /* MOV dst.w, src1.w */
240       new_inst = tgsi_default_full_instruction();
241       new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
242       new_inst.Instruction.NumDstRegs = 1;
243       reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_W);
244       new_inst.Instruction.NumSrcRegs = 1;
245       reg_src(&new_inst.Src[0], src1, SWIZ(_, _, _, W));
246       tctx->emit_instruction(tctx, &new_inst);
247    }
248 
249    if (dst->Register.WriteMask & TGSI_WRITEMASK_X) {
250       /* MOV dst.x, imm{1.0} */
251       new_inst = tgsi_default_full_instruction();
252       new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
253       new_inst.Instruction.NumDstRegs = 1;
254       reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_X);
255       new_inst.Instruction.NumSrcRegs = 1;
256       reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(Y, _, _, _));
257       tctx->emit_instruction(tctx, &new_inst);
258    }
259 }
260 
261 /* LRP - Linear Interpolate
262  *  dst.x = src0.x \times src1.x + (1.0 - src0.x) \times src2.x
263  *  dst.y = src0.y \times src1.y + (1.0 - src0.y) \times src2.y
264  *  dst.z = src0.z \times src1.z + (1.0 - src0.z) \times src2.z
265  *  dst.w = src0.w \times src1.w + (1.0 - src0.w) \times src2.w
266  *
267  * This becomes: src0 \times src1 + src2 - src0 \times src2, which
268  * can then become: src0 \times src1 - (src0 \times src2 - src2)
269  *
270  * ; needs: 1 tmp
271  * MAD tmpA, src0, src2, -src2
272  * MAD dst, src0, src1, -tmpA
273  */
274 #define LRP_GROW (NINST(3) + NINST(3) - OINST(3))
275 #define LRP_TMP  1
276 static void
transform_lrp(struct tgsi_transform_context * tctx,struct tgsi_full_instruction * inst)277 transform_lrp(struct tgsi_transform_context *tctx,
278               struct tgsi_full_instruction *inst)
279 {
280    struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
281    struct tgsi_full_dst_register *dst  = &inst->Dst[0];
282    struct tgsi_full_src_register *src0 = &inst->Src[0];
283    struct tgsi_full_src_register *src1 = &inst->Src[1];
284    struct tgsi_full_src_register *src2 = &inst->Src[2];
285    struct tgsi_full_instruction new_inst;
286 
287    if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) {
288       /* MAD tmpA, src0, src2, -src2 */
289       new_inst = tgsi_default_full_instruction();
290       new_inst.Instruction.Opcode = TGSI_OPCODE_MAD;
291       new_inst.Instruction.NumDstRegs = 1;
292       reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW);
293       new_inst.Instruction.NumSrcRegs = 3;
294       reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W));
295       reg_src(&new_inst.Src[1], src2, SWIZ(X, Y, Z, W));
296       reg_src(&new_inst.Src[2], src2, SWIZ(X, Y, Z, W));
297       new_inst.Src[2].Register.Negate = !new_inst.Src[2].Register.Negate;
298       tctx->emit_instruction(tctx, &new_inst);
299 
300       /* MAD dst, src0, src1, -tmpA */
301       new_inst = tgsi_default_full_instruction();
302       new_inst.Instruction.Opcode = TGSI_OPCODE_MAD;
303       new_inst.Instruction.NumDstRegs = 1;
304       reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW);
305       new_inst.Instruction.NumSrcRegs = 3;
306       reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W));
307       reg_src(&new_inst.Src[1], src1, SWIZ(X, Y, Z, W));
308       reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
309       new_inst.Src[2].Register.Negate = true;
310       tctx->emit_instruction(tctx, &new_inst);
311    }
312 }
313 
314 /* FRC - Fraction
315  *  dst.x = src.x - \lfloor src.x\rfloor
316  *  dst.y = src.y - \lfloor src.y\rfloor
317  *  dst.z = src.z - \lfloor src.z\rfloor
318  *  dst.w = src.w - \lfloor src.w\rfloor
319  *
320  * ; needs: 1 tmp
321  * FLR tmpA, src
322  * SUB dst, src, tmpA
323  */
324 #define FRC_GROW (NINST(1) + NINST(2) - OINST(1))
325 #define FRC_TMP  1
326 static void
transform_frc(struct tgsi_transform_context * tctx,struct tgsi_full_instruction * inst)327 transform_frc(struct tgsi_transform_context *tctx,
328               struct tgsi_full_instruction *inst)
329 {
330    struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
331    struct tgsi_full_dst_register *dst = &inst->Dst[0];
332    struct tgsi_full_src_register *src = &inst->Src[0];
333    struct tgsi_full_instruction new_inst;
334 
335    if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) {
336       /* FLR tmpA, src */
337       new_inst = tgsi_default_full_instruction();
338       new_inst.Instruction.Opcode = TGSI_OPCODE_FLR;
339       new_inst.Instruction.NumDstRegs = 1;
340       reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW);
341       new_inst.Instruction.NumSrcRegs = 1;
342       reg_src(&new_inst.Src[0], src, SWIZ(X, Y, Z, W));
343       tctx->emit_instruction(tctx, &new_inst);
344 
345       /* SUB dst, src, tmpA */
346       new_inst = tgsi_default_full_instruction();
347       new_inst.Instruction.Opcode = TGSI_OPCODE_ADD;
348       new_inst.Instruction.NumDstRegs = 1;
349       reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW);
350       new_inst.Instruction.NumSrcRegs = 2;
351       reg_src(&new_inst.Src[0], src, SWIZ(X, Y, Z, W));
352       reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
353       new_inst.Src[1].Register.Negate = 1;
354       tctx->emit_instruction(tctx, &new_inst);
355    }
356 }
357 
358 /* POW - Power
359  *  dst.x = src0.x^{src1.x}
360  *  dst.y = src0.x^{src1.x}
361  *  dst.z = src0.x^{src1.x}
362  *  dst.w = src0.x^{src1.x}
363  *
364  * ; needs: 1 tmp
365  * LG2 tmpA.x, src0.x
366  * MUL tmpA.x, src1.x, tmpA.x
367  * EX2 dst, tmpA.x
368  */
369 #define POW_GROW (NINST(1) + NINST(2) + NINST(1) - OINST(2))
370 #define POW_TMP  1
371 static void
transform_pow(struct tgsi_transform_context * tctx,struct tgsi_full_instruction * inst)372 transform_pow(struct tgsi_transform_context *tctx,
373               struct tgsi_full_instruction *inst)
374 {
375    struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
376    struct tgsi_full_dst_register *dst  = &inst->Dst[0];
377    struct tgsi_full_src_register *src0 = &inst->Src[0];
378    struct tgsi_full_src_register *src1 = &inst->Src[1];
379    struct tgsi_full_instruction new_inst;
380 
381    if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) {
382       /* LG2 tmpA.x, src0.x */
383       new_inst = tgsi_default_full_instruction();
384       new_inst.Instruction.Opcode = TGSI_OPCODE_LG2;
385       new_inst.Instruction.NumDstRegs = 1;
386       reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
387       new_inst.Instruction.NumSrcRegs = 1;
388       reg_src(&new_inst.Src[0], src0, SWIZ(X, _, _, _));
389       tctx->emit_instruction(tctx, &new_inst);
390 
391       /* MUL tmpA.x, src1.x, tmpA.x */
392       new_inst = tgsi_default_full_instruction();
393       new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
394       new_inst.Instruction.NumDstRegs = 1;
395       reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
396       new_inst.Instruction.NumSrcRegs = 2;
397       reg_src(&new_inst.Src[0], src1, SWIZ(X, _, _, _));
398       reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, _, _, _));
399       tctx->emit_instruction(tctx, &new_inst);
400 
401       /* EX2 dst, tmpA.x */
402       new_inst = tgsi_default_full_instruction();
403       new_inst.Instruction.Opcode = TGSI_OPCODE_EX2;
404       new_inst.Instruction.NumDstRegs = 1;
405       reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW);
406       new_inst.Instruction.NumSrcRegs = 1;
407       reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(X, _, _, _));
408       tctx->emit_instruction(tctx, &new_inst);
409    }
410 }
411 
412 /* LIT - Light Coefficients
413  *  dst.x = 1.0
414  *  dst.y = max(src.x, 0.0)
415  *  dst.z = (src.x > 0.0) ? max(src.y, 0.0)^{clamp(src.w, -128.0, 128.0))} : 0
416  *  dst.w = 1.0
417  *
418  * ; needs: 1 tmp, imm{0.0}, imm{1.0}, imm{128.0}
419  * MAX tmpA.xy, src.xy, imm{0.0}
420  * CLAMP tmpA.z, src.w, -imm{128.0}, imm{128.0}
421  * LG2 tmpA.y, tmpA.y
422  * MUL tmpA.y, tmpA.z, tmpA.y
423  * EX2 tmpA.y, tmpA.y
424  * CMP tmpA.y, -src.x, tmpA.y, imm{0.0}
425  * MOV dst.yz, tmpA.xy
426  * MOV dst.xw, imm{1.0}
427  */
428 #define LIT_GROW (NINST(1) + NINST(3) + NINST(1) + NINST(2) + \
429 		NINST(1) + NINST(3) + NINST(1) + NINST(1) - OINST(1))
430 #define LIT_TMP  1
431 static void
transform_lit(struct tgsi_transform_context * tctx,struct tgsi_full_instruction * inst)432 transform_lit(struct tgsi_transform_context *tctx,
433               struct tgsi_full_instruction *inst)
434 {
435    struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
436    struct tgsi_full_dst_register *dst = &inst->Dst[0];
437    struct tgsi_full_src_register *src = &inst->Src[0];
438    struct tgsi_full_instruction new_inst;
439 
440    if (dst->Register.WriteMask & TGSI_WRITEMASK_YZ) {
441       /* MAX tmpA.xy, src.xy, imm{0.0} */
442       new_inst = tgsi_default_full_instruction();
443       new_inst.Instruction.Opcode = TGSI_OPCODE_MAX;
444       new_inst.Instruction.NumDstRegs = 1;
445       reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XY);
446       new_inst.Instruction.NumSrcRegs = 2;
447       reg_src(&new_inst.Src[0], src, SWIZ(X, Y, _, _));
448       reg_src(&new_inst.Src[1], &ctx->imm, SWIZ(X, X, _, _));
449       tctx->emit_instruction(tctx, &new_inst);
450 
451       /* MIN tmpA.z, src.w, imm{128.0} */
452       new_inst = tgsi_default_full_instruction();
453       new_inst.Instruction.Opcode = TGSI_OPCODE_MIN;
454       new_inst.Instruction.NumDstRegs = 1;
455       reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Z);
456       new_inst.Instruction.NumSrcRegs = 2;
457       reg_src(&new_inst.Src[0], src, SWIZ(_, _, W, _));
458       reg_src(&new_inst.Src[1], &ctx->imm, SWIZ(_, _, Z, _));
459       tctx->emit_instruction(tctx, &new_inst);
460 
461       /* MAX tmpA.z, tmpA.z, -imm{128.0} */
462       new_inst = tgsi_default_full_instruction();
463       new_inst.Instruction.Opcode = TGSI_OPCODE_MAX;
464       new_inst.Instruction.NumDstRegs = 1;
465       reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Z);
466       new_inst.Instruction.NumSrcRegs = 2;
467       reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, _, Z, _));
468       reg_src(&new_inst.Src[1], &ctx->imm, SWIZ(_, _, Z, _));
469       new_inst.Src[1].Register.Negate = true;
470       tctx->emit_instruction(tctx, &new_inst);
471 
472       /* LG2 tmpA.y, tmpA.y */
473       new_inst = tgsi_default_full_instruction();
474       new_inst.Instruction.Opcode = TGSI_OPCODE_LG2;
475       new_inst.Instruction.NumDstRegs = 1;
476       reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
477       new_inst.Instruction.NumSrcRegs = 1;
478       reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(Y, _, _, _));
479       tctx->emit_instruction(tctx, &new_inst);
480 
481       /* MUL tmpA.y, tmpA.z, tmpA.y */
482       new_inst = tgsi_default_full_instruction();
483       new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
484       new_inst.Instruction.NumDstRegs = 1;
485       reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
486       new_inst.Instruction.NumSrcRegs = 2;
487       reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, Z, _, _));
488       reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(_, Y, _, _));
489       tctx->emit_instruction(tctx, &new_inst);
490 
491       /* EX2 tmpA.y, tmpA.y */
492       new_inst = tgsi_default_full_instruction();
493       new_inst.Instruction.Opcode = TGSI_OPCODE_EX2;
494       new_inst.Instruction.NumDstRegs = 1;
495       reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
496       new_inst.Instruction.NumSrcRegs = 1;
497       reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(Y, _, _, _));
498       tctx->emit_instruction(tctx, &new_inst);
499 
500       /* CMP tmpA.y, -src.x, tmpA.y, imm{0.0} */
501       new_inst = tgsi_default_full_instruction();
502       new_inst.Instruction.Opcode = TGSI_OPCODE_CMP;
503       new_inst.Instruction.NumDstRegs = 1;
504       reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
505       new_inst.Instruction.NumSrcRegs = 3;
506       reg_src(&new_inst.Src[0], src, SWIZ(_, X, _, _));
507       new_inst.Src[0].Register.Negate = true;
508       reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(_, Y, _, _));
509       reg_src(&new_inst.Src[2], &ctx->imm, SWIZ(_, X, _, _));
510       tctx->emit_instruction(tctx, &new_inst);
511 
512       /* MOV dst.yz, tmpA.xy */
513       new_inst = tgsi_default_full_instruction();
514       new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
515       new_inst.Instruction.NumDstRegs = 1;
516       reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_YZ);
517       new_inst.Instruction.NumSrcRegs = 1;
518       reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, X, Y, _));
519       tctx->emit_instruction(tctx, &new_inst);
520    }
521 
522    if (dst->Register.WriteMask & TGSI_WRITEMASK_XW) {
523       /* MOV dst.xw, imm{1.0} */
524       new_inst = tgsi_default_full_instruction();
525       new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
526       new_inst.Instruction.NumDstRegs = 1;
527       reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XW);
528       new_inst.Instruction.NumSrcRegs = 1;
529       reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(Y, _, _, Y));
530       tctx->emit_instruction(tctx, &new_inst);
531    }
532 }
533 
534 /* EXP - Approximate Exponential Base 2
535  *  dst.x = 2^{\lfloor src.x\rfloor}
536  *  dst.y = src.x - \lfloor src.x\rfloor
537  *  dst.z = 2^{src.x}
538  *  dst.w = 1.0
539  *
540  * ; needs: 1 tmp, imm{1.0}
541  * if (lowering FLR) {
542  *   FRC tmpA.x, src.x
543  *   SUB tmpA.x, src.x, tmpA.x
544  * } else {
545  *   FLR tmpA.x, src.x
546  * }
547  * EX2 tmpA.y, src.x
548  * SUB dst.y, src.x, tmpA.x
549  * EX2 dst.x, tmpA.x
550  * MOV dst.z, tmpA.y
551  * MOV dst.w, imm{1.0}
552  */
553 #define EXP_GROW (NINST(1) + NINST(2) + NINST(1) + NINST(2) + NINST(1) + \
554 		NINST(1)+ NINST(1) - OINST(1))
555 #define EXP_TMP  1
556 static void
transform_exp(struct tgsi_transform_context * tctx,struct tgsi_full_instruction * inst)557 transform_exp(struct tgsi_transform_context *tctx,
558               struct tgsi_full_instruction *inst)
559 {
560    struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
561    struct tgsi_full_dst_register *dst = &inst->Dst[0];
562    struct tgsi_full_src_register *src = &inst->Src[0];
563    struct tgsi_full_instruction new_inst;
564 
565    if (dst->Register.WriteMask & TGSI_WRITEMASK_XY) {
566       if (ctx->config->lower_FLR) {
567          /* FRC tmpA.x, src.x */
568          new_inst = tgsi_default_full_instruction();
569          new_inst.Instruction.Opcode = TGSI_OPCODE_FRC;
570          new_inst.Instruction.NumDstRegs = 1;
571          reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
572          new_inst.Instruction.NumSrcRegs = 1;
573          reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _));
574          tctx->emit_instruction(tctx, &new_inst);
575 
576          /* SUB tmpA.x, src.x, tmpA.x */
577          new_inst = tgsi_default_full_instruction();
578          new_inst.Instruction.Opcode = TGSI_OPCODE_ADD;
579          new_inst.Instruction.NumDstRegs = 1;
580          reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
581          new_inst.Instruction.NumSrcRegs = 2;
582          reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _));
583          reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, _, _, _));
584          new_inst.Src[1].Register.Negate = 1;
585          tctx->emit_instruction(tctx, &new_inst);
586      } else {
587          /* FLR tmpA.x, src.x */
588          new_inst = tgsi_default_full_instruction();
589          new_inst.Instruction.Opcode = TGSI_OPCODE_FLR;
590          new_inst.Instruction.NumDstRegs = 1;
591          reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
592          new_inst.Instruction.NumSrcRegs = 1;
593          reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _));
594          tctx->emit_instruction(tctx, &new_inst);
595       }
596    }
597 
598    if (dst->Register.WriteMask & TGSI_WRITEMASK_Z) {
599       /* EX2 tmpA.y, src.x */
600       new_inst = tgsi_default_full_instruction();
601       new_inst.Instruction.Opcode = TGSI_OPCODE_EX2;
602       new_inst.Instruction.NumDstRegs = 1;
603       reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
604       new_inst.Instruction.NumSrcRegs = 1;
605       reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _));
606       tctx->emit_instruction(tctx, &new_inst);
607    }
608 
609    if (dst->Register.WriteMask & TGSI_WRITEMASK_Y) {
610       /* SUB dst.y, src.x, tmpA.x */
611       new_inst = tgsi_default_full_instruction();
612       new_inst.Instruction.Opcode = TGSI_OPCODE_ADD;
613       new_inst.Instruction.NumDstRegs = 1;
614       reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Y);
615       new_inst.Instruction.NumSrcRegs = 2;
616       reg_src(&new_inst.Src[0], src, SWIZ(_, X, _, _));
617       reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(_, X, _, _));
618       new_inst.Src[1].Register.Negate = 1;
619       tctx->emit_instruction(tctx, &new_inst);
620    }
621 
622    if (dst->Register.WriteMask & TGSI_WRITEMASK_X) {
623       /* EX2 dst.x, tmpA.x */
624       new_inst = tgsi_default_full_instruction();
625       new_inst.Instruction.Opcode = TGSI_OPCODE_EX2;
626       new_inst.Instruction.NumDstRegs = 1;
627       reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_X);
628       new_inst.Instruction.NumSrcRegs = 1;
629       reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(X, _, _, _));
630       tctx->emit_instruction(tctx, &new_inst);
631    }
632 
633    if (dst->Register.WriteMask & TGSI_WRITEMASK_Z) {
634       /* MOV dst.z, tmpA.y */
635       new_inst = tgsi_default_full_instruction();
636       new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
637       new_inst.Instruction.NumDstRegs = 1;
638       reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Z);
639       new_inst.Instruction.NumSrcRegs = 1;
640       reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, _, Y, _));
641       tctx->emit_instruction(tctx, &new_inst);
642    }
643 
644    if (dst->Register.WriteMask & TGSI_WRITEMASK_W) {
645       /* MOV dst.w, imm{1.0} */
646       new_inst = tgsi_default_full_instruction();
647       new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
648       new_inst.Instruction.NumDstRegs = 1;
649       reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_W);
650       new_inst.Instruction.NumSrcRegs = 1;
651       reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(_, _, _, Y));
652       tctx->emit_instruction(tctx, &new_inst);
653    }
654 }
655 
656 /* LOG - Approximate Logarithm Base 2
657  *  dst.x = \lfloor\log_2{|src.x|}\rfloor
658  *  dst.y = \frac{|src.x|}{2^{\lfloor\log_2{|src.x|}\rfloor}}
659  *  dst.z = \log_2{|src.x|}
660  *  dst.w = 1.0
661  *
662  * ; needs: 1 tmp, imm{1.0}
663  * LG2 tmpA.x, |src.x|
664  * if (lowering FLR) {
665  *   FRC tmpA.y, tmpA.x
666  *   SUB tmpA.y, tmpA.x, tmpA.y
667  * } else {
668  *   FLR tmpA.y, tmpA.x
669  * }
670  * EX2 tmpA.z, tmpA.y
671  * RCP tmpA.z, tmpA.z
672  * MUL dst.y, |src.x|, tmpA.z
673  * MOV dst.xz, tmpA.yx
674  * MOV dst.w, imm{1.0}
675  */
676 #define LOG_GROW (NINST(1) + NINST(1) + NINST(2) + NINST(1) + NINST(1) + \
677 		NINST(2) + NINST(1) + NINST(1) - OINST(1))
678 #define LOG_TMP  1
679 static void
transform_log(struct tgsi_transform_context * tctx,struct tgsi_full_instruction * inst)680 transform_log(struct tgsi_transform_context *tctx,
681               struct tgsi_full_instruction *inst)
682 {
683    struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
684    struct tgsi_full_dst_register *dst = &inst->Dst[0];
685    struct tgsi_full_src_register *src = &inst->Src[0];
686    struct tgsi_full_instruction new_inst;
687 
688    if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZ) {
689       /* LG2 tmpA.x, |src.x| */
690       new_inst = tgsi_default_full_instruction();
691       new_inst.Instruction.Opcode = TGSI_OPCODE_LG2;
692       new_inst.Instruction.NumDstRegs = 1;
693       reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
694       new_inst.Instruction.NumSrcRegs = 1;
695       reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _));
696       new_inst.Src[0].Register.Absolute = true;
697       tctx->emit_instruction(tctx, &new_inst);
698    }
699 
700    if (dst->Register.WriteMask & TGSI_WRITEMASK_XY) {
701       if (ctx->config->lower_FLR) {
702          /* FRC tmpA.y, tmpA.x */
703          new_inst = tgsi_default_full_instruction();
704          new_inst.Instruction.Opcode = TGSI_OPCODE_FRC;
705          new_inst.Instruction.NumDstRegs = 1;
706          reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
707          new_inst.Instruction.NumSrcRegs = 1;
708          reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, X, _, _));
709          tctx->emit_instruction(tctx, &new_inst);
710 
711          /* SUB tmpA.y, tmpA.x, tmpA.y */
712          new_inst = tgsi_default_full_instruction();
713          new_inst.Instruction.Opcode = TGSI_OPCODE_ADD;
714          new_inst.Instruction.NumDstRegs = 1;
715          reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
716          new_inst.Instruction.NumSrcRegs = 2;
717          reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, X, _, _));
718          reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(_, Y, _, _));
719          new_inst.Src[1].Register.Negate = 1;
720          tctx->emit_instruction(tctx, &new_inst);
721       } else {
722          /* FLR tmpA.y, tmpA.x */
723          new_inst = tgsi_default_full_instruction();
724          new_inst.Instruction.Opcode = TGSI_OPCODE_FLR;
725          new_inst.Instruction.NumDstRegs = 1;
726          reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
727          new_inst.Instruction.NumSrcRegs = 1;
728          reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, X, _, _));
729          tctx->emit_instruction(tctx, &new_inst);
730       }
731    }
732 
733    if (dst->Register.WriteMask & TGSI_WRITEMASK_Y) {
734       /* EX2 tmpA.z, tmpA.y */
735       new_inst = tgsi_default_full_instruction();
736       new_inst.Instruction.Opcode = TGSI_OPCODE_EX2;
737       new_inst.Instruction.NumDstRegs = 1;
738       reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Z);
739       new_inst.Instruction.NumSrcRegs = 1;
740       reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(Y, _, _, _));
741       tctx->emit_instruction(tctx, &new_inst);
742 
743       /* RCP tmpA.z, tmpA.z */
744       new_inst = tgsi_default_full_instruction();
745       new_inst.Instruction.Opcode = TGSI_OPCODE_RCP;
746       new_inst.Instruction.NumDstRegs = 1;
747       reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Z);
748       new_inst.Instruction.NumSrcRegs = 1;
749       reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(Z, _, _, _));
750       tctx->emit_instruction(tctx, &new_inst);
751 
752       /* MUL dst.y, |src.x|, tmpA.z */
753       new_inst = tgsi_default_full_instruction();
754       new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
755       new_inst.Instruction.NumDstRegs = 1;
756       reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Y);
757       new_inst.Instruction.NumSrcRegs = 2;
758       reg_src(&new_inst.Src[0], src, SWIZ(_, X, _, _));
759       new_inst.Src[0].Register.Absolute = true;
760       reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(_, Z, _, _));
761       tctx->emit_instruction(tctx, &new_inst);
762    }
763 
764    if (dst->Register.WriteMask & TGSI_WRITEMASK_XZ) {
765       /* MOV dst.xz, tmpA.yx */
766       new_inst = tgsi_default_full_instruction();
767       new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
768       new_inst.Instruction.NumDstRegs = 1;
769       reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XZ);
770       new_inst.Instruction.NumSrcRegs = 1;
771       reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(Y, _, X, _));
772       tctx->emit_instruction(tctx, &new_inst);
773    }
774 
775    if (dst->Register.WriteMask & TGSI_WRITEMASK_W) {
776       /* MOV dst.w, imm{1.0} */
777       new_inst = tgsi_default_full_instruction();
778       new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
779       new_inst.Instruction.NumDstRegs = 1;
780       reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_W);
781       new_inst.Instruction.NumSrcRegs = 1;
782       reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(_, _, _, Y));
783       tctx->emit_instruction(tctx, &new_inst);
784    }
785 }
786 
787 /* DP4 - 4-component Dot Product
788  *   dst = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src0.w \times src1.w
789  *
790  * DP3 - 3-component Dot Product
791  *   dst = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z
792  *
793  * DP2 - 2-component Dot Product
794  *   dst = src0.x \times src1.x + src0.y \times src1.y
795  *
796  * NOTE: these are translated into sequence of MUL/MAD(/ADD) scalar
797  * operations, which is what you'd prefer for a ISA that is natively
798  * scalar.  Probably a native vector ISA would at least already have
799  * DP4/DP3 instructions, but perhaps there is room for an alternative
800  * translation for DP2 using vector instructions.
801  *
802  * ; needs: 1 tmp
803  * MUL tmpA.x, src0.x, src1.x
804  * MAD tmpA.x, src0.y, src1.y, tmpA.x
805  * if (DP3 || DP4) {
806  *   MAD tmpA.x, src0.z, src1.z, tmpA.x
807  *   if (DP4) {
808  *     MAD tmpA.x, src0.w, src1.w, tmpA.x
809  *   }
810  * }
811  * ; fixup last instruction to replicate into dst
812  */
813 #define DP4_GROW  (NINST(2) + NINST(3) + NINST(3) + NINST(3) - OINST(2))
814 #define DP3_GROW  (NINST(2) + NINST(3) + NINST(3) - OINST(2))
815 #define DP2_GROW  (NINST(2) + NINST(3) - OINST(2))
816 #define DOTP_TMP  1
817 static void
transform_dotp(struct tgsi_transform_context * tctx,struct tgsi_full_instruction * inst)818 transform_dotp(struct tgsi_transform_context *tctx,
819                struct tgsi_full_instruction *inst)
820 {
821    struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
822    struct tgsi_full_dst_register *dst  = &inst->Dst[0];
823    struct tgsi_full_src_register *src0 = &inst->Src[0];
824    struct tgsi_full_src_register *src1 = &inst->Src[1];
825    struct tgsi_full_instruction new_inst;
826    unsigned opcode = inst->Instruction.Opcode;
827 
828    /* NOTE: any potential last instruction must replicate src on all
829     * components (since it could be re-written to write to final dst)
830     */
831 
832    if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) {
833       /* MUL tmpA.x, src0.x, src1.x */
834       new_inst = tgsi_default_full_instruction();
835       new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
836       new_inst.Instruction.NumDstRegs = 1;
837       reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
838       new_inst.Instruction.NumSrcRegs = 2;
839       reg_src(&new_inst.Src[0], src0, SWIZ(X, _, _, _));
840       reg_src(&new_inst.Src[1], src1, SWIZ(X, _, _, _));
841       tctx->emit_instruction(tctx, &new_inst);
842 
843       /* MAD tmpA.x, src0.y, src1.y, tmpA.x */
844       new_inst = tgsi_default_full_instruction();
845       new_inst.Instruction.Opcode = TGSI_OPCODE_MAD;
846       new_inst.Instruction.NumDstRegs = 1;
847       reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
848       new_inst.Instruction.NumSrcRegs = 3;
849       reg_src(&new_inst.Src[0], src0, SWIZ(Y, Y, Y, Y));
850       reg_src(&new_inst.Src[1], src1, SWIZ(Y, Y, Y, Y));
851       reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, X, X, X));
852 
853       if ((opcode == TGSI_OPCODE_DP3) ||
854           (opcode == TGSI_OPCODE_DP4)) {
855          tctx->emit_instruction(tctx, &new_inst);
856 
857          /* MAD tmpA.x, src0.z, src1.z, tmpA.x */
858          new_inst = tgsi_default_full_instruction();
859          new_inst.Instruction.Opcode = TGSI_OPCODE_MAD;
860          new_inst.Instruction.NumDstRegs = 1;
861          reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
862          new_inst.Instruction.NumSrcRegs = 3;
863          reg_src(&new_inst.Src[0], src0, SWIZ(Z, Z, Z, Z));
864          reg_src(&new_inst.Src[1], src1, SWIZ(Z, Z, Z, Z));
865          reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, X, X, X));
866 
867          if (opcode == TGSI_OPCODE_DP4) {
868             tctx->emit_instruction(tctx, &new_inst);
869 
870             /* MAD tmpA.x, src0.w, src1.w, tmpA.x */
871             new_inst = tgsi_default_full_instruction();
872             new_inst.Instruction.Opcode = TGSI_OPCODE_MAD;
873             new_inst.Instruction.NumDstRegs = 1;
874             reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
875             new_inst.Instruction.NumSrcRegs = 3;
876             reg_src(&new_inst.Src[0], src0, SWIZ(W, W, W, W));
877             reg_src(&new_inst.Src[1], src1, SWIZ(W, W, W, W));
878             reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, X, X, X));
879          }
880       }
881 
882       /* fixup last instruction to write to dst: */
883       reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW);
884 
885       tctx->emit_instruction(tctx, &new_inst);
886    }
887 }
888 
889 /* FLR - floor, CEIL - ceil
890  * ; needs: 1 tmp
891  * if (CEIL) {
892  *   FRC tmpA, -src
893  *   ADD dst, src, tmpA
894  * } else {
895  *   FRC tmpA, src
896  *   SUB dst, src, tmpA
897  * }
898  */
899 #define FLR_GROW (NINST(1) + NINST(2) - OINST(1))
900 #define CEIL_GROW (NINST(1) + NINST(2) - OINST(1))
901 #define FLR_TMP 1
902 #define CEIL_TMP 1
903 static void
transform_flr_ceil(struct tgsi_transform_context * tctx,struct tgsi_full_instruction * inst)904 transform_flr_ceil(struct tgsi_transform_context *tctx,
905                    struct tgsi_full_instruction *inst)
906 {
907    struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
908    struct tgsi_full_dst_register *dst  = &inst->Dst[0];
909    struct tgsi_full_src_register *src0 = &inst->Src[0];
910    struct tgsi_full_instruction new_inst;
911    unsigned opcode = inst->Instruction.Opcode;
912 
913    if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) {
914       /* FLR: FRC tmpA, src  CEIL: FRC tmpA, -src */
915       new_inst = tgsi_default_full_instruction();
916       new_inst.Instruction.Opcode = TGSI_OPCODE_FRC;
917       new_inst.Instruction.NumDstRegs = 1;
918       reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW);
919       new_inst.Instruction.NumSrcRegs = 1;
920       reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W));
921 
922       if (opcode == TGSI_OPCODE_CEIL)
923          new_inst.Src[0].Register.Negate = !new_inst.Src[0].Register.Negate;
924       tctx->emit_instruction(tctx, &new_inst);
925 
926       /* FLR: SUB dst, src, tmpA  CEIL: ADD dst, src, tmpA */
927       new_inst = tgsi_default_full_instruction();
928       new_inst.Instruction.Opcode = TGSI_OPCODE_ADD;
929       new_inst.Instruction.NumDstRegs = 1;
930       reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW);
931       new_inst.Instruction.NumSrcRegs = 2;
932       reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W));
933       reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
934       if (opcode == TGSI_OPCODE_FLR)
935          new_inst.Src[1].Register.Negate = 1;
936       tctx->emit_instruction(tctx, &new_inst);
937    }
938 }
939 
940 /* TRUNC - truncate off fractional part
941  *  dst.x = trunc(src.x)
942  *  dst.y = trunc(src.y)
943  *  dst.z = trunc(src.z)
944  *  dst.w = trunc(src.w)
945  *
946  * ; needs: 1 tmp
947  * if (lower FLR) {
948  *   FRC tmpA, |src|
949  *   SUB tmpA, |src|, tmpA
950  * } else {
951  *   FLR tmpA, |src|
952  * }
953  * CMP dst, src, -tmpA, tmpA
954  */
955 #define TRUNC_GROW (NINST(1) + NINST(2) + NINST(3) - OINST(1))
956 #define TRUNC_TMP 1
957 static void
transform_trunc(struct tgsi_transform_context * tctx,struct tgsi_full_instruction * inst)958 transform_trunc(struct tgsi_transform_context *tctx,
959                 struct tgsi_full_instruction *inst)
960 {
961    struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
962    struct tgsi_full_dst_register *dst  = &inst->Dst[0];
963    struct tgsi_full_src_register *src0 = &inst->Src[0];
964    struct tgsi_full_instruction new_inst;
965 
966    if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) {
967       if (ctx->config->lower_FLR) {
968          new_inst = tgsi_default_full_instruction();
969          new_inst.Instruction.Opcode = TGSI_OPCODE_FRC;
970          new_inst.Instruction.NumDstRegs = 1;
971          reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW);
972          new_inst.Instruction.NumSrcRegs = 1;
973          reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W));
974          new_inst.Src[0].Register.Absolute = true;
975          new_inst.Src[0].Register.Negate = false;
976          tctx->emit_instruction(tctx, &new_inst);
977 
978          new_inst = tgsi_default_full_instruction();
979          new_inst.Instruction.Opcode = TGSI_OPCODE_ADD;
980          new_inst.Instruction.NumDstRegs = 1;
981          reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW);
982          new_inst.Instruction.NumSrcRegs = 2;
983          reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W));
984          new_inst.Src[0].Register.Absolute = true;
985          new_inst.Src[0].Register.Negate = false;
986          reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
987          new_inst.Src[1].Register.Negate = 1;
988          tctx->emit_instruction(tctx, &new_inst);
989       } else {
990          new_inst = tgsi_default_full_instruction();
991          new_inst.Instruction.Opcode = TGSI_OPCODE_FLR;
992          new_inst.Instruction.NumDstRegs = 1;
993          reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW);
994          new_inst.Instruction.NumSrcRegs = 1;
995          reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W));
996          new_inst.Src[0].Register.Absolute = true;
997          new_inst.Src[0].Register.Negate = false;
998          tctx->emit_instruction(tctx, &new_inst);
999       }
1000 
1001       new_inst = tgsi_default_full_instruction();
1002       new_inst.Instruction.Opcode = TGSI_OPCODE_CMP;
1003       new_inst.Instruction.NumDstRegs = 1;
1004       reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW);
1005       new_inst.Instruction.NumSrcRegs = 3;
1006       reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W));
1007       reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
1008       new_inst.Src[1].Register.Negate = true;
1009       reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
1010       tctx->emit_instruction(tctx, &new_inst);
1011    }
1012 }
1013 
1014 /* Inserts a MOV_SAT for the needed components of tex coord.  Note that
1015  * in the case of TXP, the clamping must happen *after* projection, so
1016  * we need to lower TXP to TEX.
1017  *
1018  *   MOV tmpA, src0
1019  *   if (opc == TXP) {
1020  *     ; do perspective division manually before clamping:
1021  *     RCP tmpB, tmpA.w
1022  *     MUL tmpB.<pmask>, tmpA, tmpB.xxxx
1023  *     opc = TEX;
1024  *   }
1025  *   MOV_SAT tmpA.<mask>, tmpA  ; <mask> is the clamped s/t/r coords
1026  *   <opc> dst, tmpA, ...
1027  */
1028 #define SAMP_GROW (NINST(1) + NINST(1) + NINST(2) + NINST(1))
1029 #define SAMP_TMP  2
1030 static int
transform_samp(struct tgsi_transform_context * tctx,struct tgsi_full_instruction * inst)1031 transform_samp(struct tgsi_transform_context *tctx,
1032                struct tgsi_full_instruction *inst)
1033 {
1034    struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
1035    struct tgsi_full_src_register *coord = &inst->Src[0];
1036    struct tgsi_full_src_register *samp;
1037    struct tgsi_full_instruction new_inst;
1038    /* mask is clamped coords, pmask is all coords (for projection): */
1039    unsigned mask = 0, pmask = 0, smask;
1040    unsigned tex = inst->Texture.Texture;
1041    unsigned opcode = inst->Instruction.Opcode;
1042    bool lower_txp = (opcode == TGSI_OPCODE_TXP) &&
1043 		   (ctx->config->lower_TXP & (1 << tex));
1044 
1045    if (opcode == TGSI_OPCODE_TXB2) {
1046       samp = &inst->Src[2];
1047    } else {
1048       samp = &inst->Src[1];
1049    }
1050 
1051    /* convert sampler # to bitmask to test: */
1052    smask = 1 << samp->Register.Index;
1053 
1054    /* check if we actually need to lower this one: */
1055    if (!(ctx->saturate & smask) && !lower_txp)
1056       return -1;
1057 
1058    /* figure out which coordinates need saturating:
1059     *   - RECT textures should not get saturated
1060     *   - array index coords should not get saturated
1061     */
1062    switch (tex) {
1063    case TGSI_TEXTURE_3D:
1064    case TGSI_TEXTURE_CUBE:
1065    case TGSI_TEXTURE_CUBE_ARRAY:
1066    case TGSI_TEXTURE_SHADOWCUBE:
1067    case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
1068       if (ctx->config->saturate_r & smask)
1069          mask |= TGSI_WRITEMASK_Z;
1070       pmask |= TGSI_WRITEMASK_Z;
1071       /* fallthrough */
1072 
1073    case TGSI_TEXTURE_2D:
1074    case TGSI_TEXTURE_2D_ARRAY:
1075    case TGSI_TEXTURE_SHADOW2D:
1076    case TGSI_TEXTURE_SHADOW2D_ARRAY:
1077    case TGSI_TEXTURE_2D_MSAA:
1078    case TGSI_TEXTURE_2D_ARRAY_MSAA:
1079       if (ctx->config->saturate_t & smask)
1080          mask |= TGSI_WRITEMASK_Y;
1081       pmask |= TGSI_WRITEMASK_Y;
1082       /* fallthrough */
1083 
1084    case TGSI_TEXTURE_1D:
1085    case TGSI_TEXTURE_1D_ARRAY:
1086    case TGSI_TEXTURE_SHADOW1D:
1087    case TGSI_TEXTURE_SHADOW1D_ARRAY:
1088       if (ctx->config->saturate_s & smask)
1089          mask |= TGSI_WRITEMASK_X;
1090       pmask |= TGSI_WRITEMASK_X;
1091       break;
1092 
1093    case TGSI_TEXTURE_RECT:
1094    case TGSI_TEXTURE_SHADOWRECT:
1095       /* we don't saturate, but in case of lower_txp we
1096        * still need to do the perspective divide:
1097        */
1098        pmask = TGSI_WRITEMASK_XY;
1099        break;
1100    }
1101 
1102    /* sanity check.. driver could be asking to saturate a non-
1103     * existent coordinate component:
1104     */
1105    if (!mask && !lower_txp)
1106       return -1;
1107 
1108    /* MOV tmpA, src0 */
1109    create_mov(tctx, &ctx->tmp[A].dst, coord, TGSI_WRITEMASK_XYZW, 0);
1110 
1111    /* This is a bit sad.. we need to clamp *after* the coords
1112     * are projected, which means lowering TXP to TEX and doing
1113     * the projection ourself.  But since I haven't figured out
1114     * how to make the lowering code deliver an electric shock
1115     * to anyone using GL_CLAMP, we must do this instead:
1116     */
1117    if (opcode == TGSI_OPCODE_TXP) {
1118       /* RCP tmpB.x tmpA.w */
1119       new_inst = tgsi_default_full_instruction();
1120       new_inst.Instruction.Opcode = TGSI_OPCODE_RCP;
1121       new_inst.Instruction.NumDstRegs = 1;
1122       reg_dst(&new_inst.Dst[0], &ctx->tmp[B].dst, TGSI_WRITEMASK_X);
1123       new_inst.Instruction.NumSrcRegs = 1;
1124       reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(W, _, _, _));
1125       tctx->emit_instruction(tctx, &new_inst);
1126 
1127       /* MUL tmpA.mask, tmpA, tmpB.xxxx */
1128       new_inst = tgsi_default_full_instruction();
1129       new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
1130       new_inst.Instruction.NumDstRegs = 1;
1131       reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, pmask);
1132       new_inst.Instruction.NumSrcRegs = 2;
1133       reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
1134       reg_src(&new_inst.Src[1], &ctx->tmp[B].src, SWIZ(X, X, X, X));
1135       tctx->emit_instruction(tctx, &new_inst);
1136 
1137       opcode = TGSI_OPCODE_TEX;
1138    }
1139 
1140    /* MOV_SAT tmpA.<mask>, tmpA */
1141    if (mask) {
1142       create_mov(tctx, &ctx->tmp[A].dst, &ctx->tmp[A].src, mask, 1);
1143    }
1144 
1145    /* modify the texture samp instruction to take fixed up coord: */
1146    new_inst = *inst;
1147    new_inst.Instruction.Opcode = opcode;
1148    new_inst.Src[0] = ctx->tmp[A].src;
1149    tctx->emit_instruction(tctx, &new_inst);
1150 
1151    return 0;
1152 }
1153 
1154 /* Two-sided color emulation:
1155  * For each COLOR input, create a corresponding BCOLOR input, plus
1156  * CMP instruction to select front or back color based on FACE
1157  */
1158 #define TWOSIDE_GROW(n)  (                      \
1159       2 +         /* FACE */                    \
1160       ((n) * 3) + /* IN[], BCOLOR[n], <intrp> */\
1161       ((n) * 1) + /* TEMP[] */                  \
1162       ((n) * NINST(3))   /* CMP instr */        \
1163       )
1164 
1165 static void
emit_twoside(struct tgsi_transform_context * tctx)1166 emit_twoside(struct tgsi_transform_context *tctx)
1167 {
1168    struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
1169    struct tgsi_shader_info *info = ctx->info;
1170    struct tgsi_full_declaration decl;
1171    struct tgsi_full_instruction new_inst;
1172    unsigned inbase, tmpbase;
1173    int i;
1174 
1175    inbase  = info->file_max[TGSI_FILE_INPUT] + 1;
1176    tmpbase = info->file_max[TGSI_FILE_TEMPORARY] + 1;
1177 
1178    /* additional inputs for BCOLOR's */
1179    for (i = 0; i < ctx->two_side_colors; i++) {
1180       unsigned in_idx = ctx->two_side_idx[i];
1181       decl = tgsi_default_full_declaration();
1182       decl.Declaration.File = TGSI_FILE_INPUT;
1183       decl.Declaration.Semantic = true;
1184       decl.Range.First = decl.Range.Last = inbase + i;
1185       decl.Semantic.Name = TGSI_SEMANTIC_BCOLOR;
1186       decl.Semantic.Index = info->input_semantic_index[in_idx];
1187       decl.Declaration.Interpolate = true;
1188       decl.Interp.Interpolate = info->input_interpolate[in_idx];
1189       decl.Interp.Location = info->input_interpolate_loc[in_idx];
1190       decl.Interp.CylindricalWrap = info->input_cylindrical_wrap[in_idx];
1191       tctx->emit_declaration(tctx, &decl);
1192    }
1193 
1194    /* additional input for FACE */
1195    if (ctx->two_side_colors && (ctx->face_idx == -1)) {
1196       decl = tgsi_default_full_declaration();
1197       decl.Declaration.File = TGSI_FILE_INPUT;
1198       decl.Declaration.Semantic = true;
1199       decl.Range.First = decl.Range.Last = inbase + ctx->two_side_colors;
1200       decl.Semantic.Name = TGSI_SEMANTIC_FACE;
1201       decl.Semantic.Index = 0;
1202       tctx->emit_declaration(tctx, &decl);
1203 
1204       ctx->face_idx = decl.Range.First;
1205    }
1206 
1207    /* additional temps for COLOR/BCOLOR selection: */
1208    for (i = 0; i < ctx->two_side_colors; i++) {
1209       decl = tgsi_default_full_declaration();
1210       decl.Declaration.File = TGSI_FILE_TEMPORARY;
1211       decl.Range.First = decl.Range.Last = tmpbase + ctx->numtmp + i;
1212       tctx->emit_declaration(tctx, &decl);
1213    }
1214 
1215    /* and finally additional instructions to select COLOR/BCOLOR: */
1216    for (i = 0; i < ctx->two_side_colors; i++) {
1217       new_inst = tgsi_default_full_instruction();
1218       new_inst.Instruction.Opcode = TGSI_OPCODE_CMP;
1219 
1220       new_inst.Instruction.NumDstRegs = 1;
1221       new_inst.Dst[0].Register.File  = TGSI_FILE_TEMPORARY;
1222       new_inst.Dst[0].Register.Index = tmpbase + ctx->numtmp + i;
1223       new_inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZW;
1224 
1225       new_inst.Instruction.NumSrcRegs = 3;
1226       new_inst.Src[0].Register.File  = TGSI_FILE_INPUT;
1227       new_inst.Src[0].Register.Index = ctx->face_idx;
1228       new_inst.Src[0].Register.SwizzleX = TGSI_SWIZZLE_X;
1229       new_inst.Src[0].Register.SwizzleY = TGSI_SWIZZLE_X;
1230       new_inst.Src[0].Register.SwizzleZ = TGSI_SWIZZLE_X;
1231       new_inst.Src[0].Register.SwizzleW = TGSI_SWIZZLE_X;
1232       new_inst.Src[1].Register.File  = TGSI_FILE_INPUT;
1233       new_inst.Src[1].Register.Index = inbase + i;
1234       new_inst.Src[1].Register.SwizzleX = TGSI_SWIZZLE_X;
1235       new_inst.Src[1].Register.SwizzleY = TGSI_SWIZZLE_Y;
1236       new_inst.Src[1].Register.SwizzleZ = TGSI_SWIZZLE_Z;
1237       new_inst.Src[1].Register.SwizzleW = TGSI_SWIZZLE_W;
1238       new_inst.Src[2].Register.File  = TGSI_FILE_INPUT;
1239       new_inst.Src[2].Register.Index = ctx->two_side_idx[i];
1240       new_inst.Src[2].Register.SwizzleX = TGSI_SWIZZLE_X;
1241       new_inst.Src[2].Register.SwizzleY = TGSI_SWIZZLE_Y;
1242       new_inst.Src[2].Register.SwizzleZ = TGSI_SWIZZLE_Z;
1243       new_inst.Src[2].Register.SwizzleW = TGSI_SWIZZLE_W;
1244 
1245       tctx->emit_instruction(tctx, &new_inst);
1246    }
1247 }
1248 
1249 static void
emit_decls(struct tgsi_transform_context * tctx)1250 emit_decls(struct tgsi_transform_context *tctx)
1251 {
1252    struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
1253    struct tgsi_shader_info *info = ctx->info;
1254    struct tgsi_full_declaration decl;
1255    struct tgsi_full_immediate immed;
1256    unsigned tmpbase;
1257    int i;
1258 
1259    tmpbase = info->file_max[TGSI_FILE_TEMPORARY] + 1;
1260 
1261    ctx->color_base = tmpbase + ctx->numtmp;
1262 
1263    /* declare immediate: */
1264    immed = tgsi_default_full_immediate();
1265    immed.Immediate.NrTokens = 1 + 4; /* one for the token itself */
1266    immed.u[0].Float = 0.0;
1267    immed.u[1].Float = 1.0;
1268    immed.u[2].Float = 128.0;
1269    immed.u[3].Float = 0.0;
1270    tctx->emit_immediate(tctx, &immed);
1271 
1272    ctx->imm.Register.File = TGSI_FILE_IMMEDIATE;
1273    ctx->imm.Register.Index = info->immediate_count;
1274    ctx->imm.Register.SwizzleX = TGSI_SWIZZLE_X;
1275    ctx->imm.Register.SwizzleY = TGSI_SWIZZLE_Y;
1276    ctx->imm.Register.SwizzleZ = TGSI_SWIZZLE_Z;
1277    ctx->imm.Register.SwizzleW = TGSI_SWIZZLE_W;
1278 
1279    /* declare temp regs: */
1280    for (i = 0; i < ctx->numtmp; i++) {
1281       decl = tgsi_default_full_declaration();
1282       decl.Declaration.File = TGSI_FILE_TEMPORARY;
1283       decl.Range.First = decl.Range.Last = tmpbase + i;
1284       tctx->emit_declaration(tctx, &decl);
1285 
1286       ctx->tmp[i].src.Register.File  = TGSI_FILE_TEMPORARY;
1287       ctx->tmp[i].src.Register.Index = tmpbase + i;
1288       ctx->tmp[i].src.Register.SwizzleX = TGSI_SWIZZLE_X;
1289       ctx->tmp[i].src.Register.SwizzleY = TGSI_SWIZZLE_Y;
1290       ctx->tmp[i].src.Register.SwizzleZ = TGSI_SWIZZLE_Z;
1291       ctx->tmp[i].src.Register.SwizzleW = TGSI_SWIZZLE_W;
1292 
1293       ctx->tmp[i].dst.Register.File  = TGSI_FILE_TEMPORARY;
1294       ctx->tmp[i].dst.Register.Index = tmpbase + i;
1295       ctx->tmp[i].dst.Register.WriteMask = TGSI_WRITEMASK_XYZW;
1296    }
1297 
1298    if (ctx->two_side_colors)
1299       emit_twoside(tctx);
1300 }
1301 
1302 static void
rename_color_inputs(struct tgsi_lowering_context * ctx,struct tgsi_full_instruction * inst)1303 rename_color_inputs(struct tgsi_lowering_context *ctx,
1304                     struct tgsi_full_instruction *inst)
1305 {
1306    unsigned i, j;
1307    for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
1308       struct tgsi_src_register *src = &inst->Src[i].Register;
1309       if (src->File == TGSI_FILE_INPUT) {
1310          for (j = 0; j < ctx->two_side_colors; j++) {
1311             if (src->Index == ctx->two_side_idx[j]) {
1312                src->File = TGSI_FILE_TEMPORARY;
1313                src->Index = ctx->color_base + j;
1314                break;
1315             }
1316          }
1317       }
1318    }
1319 
1320 }
1321 
1322 static void
transform_instr(struct tgsi_transform_context * tctx,struct tgsi_full_instruction * inst)1323 transform_instr(struct tgsi_transform_context *tctx,
1324 		struct tgsi_full_instruction *inst)
1325 {
1326    struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
1327 
1328    if (!ctx->emitted_decls) {
1329       emit_decls(tctx);
1330       ctx->emitted_decls = 1;
1331    }
1332 
1333    /* if emulating two-sided-color, we need to re-write some
1334     * src registers:
1335     */
1336    if (ctx->two_side_colors)
1337       rename_color_inputs(ctx, inst);
1338 
1339    switch (inst->Instruction.Opcode) {
1340    case TGSI_OPCODE_DST:
1341       if (!ctx->config->lower_DST)
1342          goto skip;
1343       transform_dst(tctx, inst);
1344       break;
1345    case TGSI_OPCODE_LRP:
1346       if (!ctx->config->lower_LRP)
1347          goto skip;
1348       transform_lrp(tctx, inst);
1349       break;
1350    case TGSI_OPCODE_FRC:
1351       if (!ctx->config->lower_FRC)
1352          goto skip;
1353       transform_frc(tctx, inst);
1354       break;
1355    case TGSI_OPCODE_POW:
1356       if (!ctx->config->lower_POW)
1357          goto skip;
1358       transform_pow(tctx, inst);
1359       break;
1360    case TGSI_OPCODE_LIT:
1361       if (!ctx->config->lower_LIT)
1362          goto skip;
1363       transform_lit(tctx, inst);
1364       break;
1365    case TGSI_OPCODE_EXP:
1366       if (!ctx->config->lower_EXP)
1367          goto skip;
1368       transform_exp(tctx, inst);
1369       break;
1370    case TGSI_OPCODE_LOG:
1371       if (!ctx->config->lower_LOG)
1372          goto skip;
1373       transform_log(tctx, inst);
1374       break;
1375    case TGSI_OPCODE_DP4:
1376       if (!ctx->config->lower_DP4)
1377          goto skip;
1378       transform_dotp(tctx, inst);
1379       break;
1380    case TGSI_OPCODE_DP3:
1381       if (!ctx->config->lower_DP3)
1382          goto skip;
1383       transform_dotp(tctx, inst);
1384       break;
1385    case TGSI_OPCODE_DP2:
1386       if (!ctx->config->lower_DP2)
1387          goto skip;
1388       transform_dotp(tctx, inst);
1389       break;
1390    case TGSI_OPCODE_FLR:
1391       if (!ctx->config->lower_FLR)
1392          goto skip;
1393       transform_flr_ceil(tctx, inst);
1394       break;
1395    case TGSI_OPCODE_CEIL:
1396       if (!ctx->config->lower_CEIL)
1397          goto skip;
1398       transform_flr_ceil(tctx, inst);
1399       break;
1400    case TGSI_OPCODE_TRUNC:
1401       if (!ctx->config->lower_TRUNC)
1402          goto skip;
1403       transform_trunc(tctx, inst);
1404       break;
1405    case TGSI_OPCODE_TEX:
1406    case TGSI_OPCODE_TXP:
1407    case TGSI_OPCODE_TXB:
1408    case TGSI_OPCODE_TXB2:
1409    case TGSI_OPCODE_TXL:
1410       if (transform_samp(tctx, inst))
1411          goto skip;
1412       break;
1413    default:
1414    skip:
1415       tctx->emit_instruction(tctx, inst);
1416       break;
1417    }
1418 }
1419 
1420 /* returns NULL if no lowering required, else returns the new
1421  * tokens (which caller is required to free()).  In either case
1422  * returns the current info.
1423  */
1424 const struct tgsi_token *
tgsi_transform_lowering(const struct tgsi_lowering_config * config,const struct tgsi_token * tokens,struct tgsi_shader_info * info)1425 tgsi_transform_lowering(const struct tgsi_lowering_config *config,
1426                         const struct tgsi_token *tokens,
1427                         struct tgsi_shader_info *info)
1428 {
1429    struct tgsi_lowering_context ctx;
1430    struct tgsi_token *newtoks;
1431    int newlen, numtmp;
1432 
1433    /* sanity check in case limit is ever increased: */
1434    STATIC_ASSERT((sizeof(config->saturate_s) * 8) >= PIPE_MAX_SAMPLERS);
1435 
1436    /* sanity check the lowering */
1437    assert(!(config->lower_FRC && (config->lower_FLR || config->lower_CEIL)));
1438    assert(!(config->lower_FRC && config->lower_TRUNC));
1439 
1440    memset(&ctx, 0, sizeof(ctx));
1441    ctx.base.transform_instruction = transform_instr;
1442    ctx.info = info;
1443    ctx.config = config;
1444 
1445    tgsi_scan_shader(tokens, info);
1446 
1447    /* if we are adding fragment shader support to emulate two-sided
1448     * color, then figure out the number of additional inputs we need
1449     * to create for BCOLOR's..
1450     */
1451    if ((info->processor == PIPE_SHADER_FRAGMENT) &&
1452        config->color_two_side) {
1453       int i;
1454       ctx.face_idx = -1;
1455       for (i = 0; i <= info->file_max[TGSI_FILE_INPUT]; i++) {
1456          if (info->input_semantic_name[i] == TGSI_SEMANTIC_COLOR)
1457             ctx.two_side_idx[ctx.two_side_colors++] = i;
1458          if (info->input_semantic_name[i] == TGSI_SEMANTIC_FACE)
1459             ctx.face_idx = i;
1460       }
1461    }
1462 
1463    ctx.saturate = config->saturate_r | config->saturate_s | config->saturate_t;
1464 
1465 #define OPCS(x) ((config->lower_ ## x) ? info->opcode_count[TGSI_OPCODE_ ## x] : 0)
1466    /* if there are no instructions to lower, then we are done: */
1467    if (!(OPCS(DST) ||
1468          OPCS(LRP) ||
1469          OPCS(FRC) ||
1470          OPCS(POW) ||
1471          OPCS(LIT) ||
1472          OPCS(EXP) ||
1473          OPCS(LOG) ||
1474          OPCS(DP4) ||
1475          OPCS(DP3) ||
1476          OPCS(DP2) ||
1477          OPCS(FLR) ||
1478          OPCS(CEIL) ||
1479          OPCS(TRUNC) ||
1480          OPCS(TXP) ||
1481          ctx.two_side_colors ||
1482          ctx.saturate))
1483       return NULL;
1484 
1485 #if 0  /* debug */
1486    _debug_printf("BEFORE:");
1487    tgsi_dump(tokens, 0);
1488 #endif
1489 
1490    numtmp = 0;
1491    newlen = tgsi_num_tokens(tokens);
1492    if (OPCS(DST)) {
1493       newlen += DST_GROW * OPCS(DST);
1494       numtmp = MAX2(numtmp, DST_TMP);
1495    }
1496    if (OPCS(LRP)) {
1497       newlen += LRP_GROW * OPCS(LRP);
1498       numtmp = MAX2(numtmp, LRP_TMP);
1499    }
1500    if (OPCS(FRC)) {
1501       newlen += FRC_GROW * OPCS(FRC);
1502       numtmp = MAX2(numtmp, FRC_TMP);
1503    }
1504    if (OPCS(POW)) {
1505       newlen += POW_GROW * OPCS(POW);
1506       numtmp = MAX2(numtmp, POW_TMP);
1507    }
1508    if (OPCS(LIT)) {
1509       newlen += LIT_GROW * OPCS(LIT);
1510       numtmp = MAX2(numtmp, LIT_TMP);
1511    }
1512    if (OPCS(EXP)) {
1513       newlen += EXP_GROW * OPCS(EXP);
1514       numtmp = MAX2(numtmp, EXP_TMP);
1515    }
1516    if (OPCS(LOG)) {
1517       newlen += LOG_GROW * OPCS(LOG);
1518       numtmp = MAX2(numtmp, LOG_TMP);
1519    }
1520    if (OPCS(DP4)) {
1521       newlen += DP4_GROW * OPCS(DP4);
1522       numtmp = MAX2(numtmp, DOTP_TMP);
1523    }
1524    if (OPCS(DP3)) {
1525       newlen += DP3_GROW * OPCS(DP3);
1526       numtmp = MAX2(numtmp, DOTP_TMP);
1527    }
1528    if (OPCS(DP2)) {
1529       newlen += DP2_GROW * OPCS(DP2);
1530       numtmp = MAX2(numtmp, DOTP_TMP);
1531    }
1532    if (OPCS(FLR)) {
1533       newlen += FLR_GROW * OPCS(FLR);
1534       numtmp = MAX2(numtmp, FLR_TMP);
1535    }
1536    if (OPCS(CEIL)) {
1537       newlen += CEIL_GROW * OPCS(CEIL);
1538       numtmp = MAX2(numtmp, CEIL_TMP);
1539    }
1540    if (OPCS(TRUNC)) {
1541       newlen += TRUNC_GROW * OPCS(TRUNC);
1542       numtmp = MAX2(numtmp, TRUNC_TMP);
1543    }
1544    if (ctx.saturate || config->lower_TXP) {
1545       int n = 0;
1546 
1547       if (ctx.saturate) {
1548          n = info->opcode_count[TGSI_OPCODE_TEX] +
1549             info->opcode_count[TGSI_OPCODE_TXP] +
1550             info->opcode_count[TGSI_OPCODE_TXB] +
1551             info->opcode_count[TGSI_OPCODE_TXB2] +
1552             info->opcode_count[TGSI_OPCODE_TXL];
1553       } else if (config->lower_TXP) {
1554           n = info->opcode_count[TGSI_OPCODE_TXP];
1555       }
1556 
1557       newlen += SAMP_GROW * n;
1558       numtmp = MAX2(numtmp, SAMP_TMP);
1559    }
1560 
1561    /* specifically don't include two_side_colors temps in the count: */
1562    ctx.numtmp = numtmp;
1563 
1564    if (ctx.two_side_colors) {
1565       newlen += TWOSIDE_GROW(ctx.two_side_colors);
1566       /* note: we permanently consume temp regs, re-writing references
1567        * to IN.COLOR[n] to TEMP[m] (holding the output of of the CMP
1568        * instruction that selects which varying to use):
1569        */
1570       numtmp += ctx.two_side_colors;
1571    }
1572 
1573    newlen += 2 * numtmp;
1574    newlen += 5;        /* immediate */
1575 
1576    newtoks = tgsi_alloc_tokens(newlen);
1577    if (!newtoks)
1578       return NULL;
1579 
1580    tgsi_transform_shader(tokens, newtoks, newlen, &ctx.base);
1581 
1582    tgsi_scan_shader(newtoks, info);
1583 
1584 #if 0  /* debug */
1585    _debug_printf("AFTER:");
1586    tgsi_dump(newtoks, 0);
1587 #endif
1588 
1589    return newtoks;
1590 }
1591