1 /**************************************************************************
2  *
3  * Copyright 2007-2008 VMware, Inc.
4  * All Rights Reserved.
5  * Copyright 2009-2010 VMware, Inc.  All rights Reserved.
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining a
8  * copy of this software and associated documentation files (the
9  * "Software"), to deal in the Software without restriction, including
10  * without limitation the rights to use, copy, modify, merge, publish,
11  * distribute, sub license, and/or sell copies of the Software, and to
12  * permit persons to whom the Software is furnished to do so, subject to
13  * the following conditions:
14  *
15  * The above copyright notice and this permission notice (including the
16  * next paragraph) shall be included in all copies or substantial portions
17  * of the Software.
18  *
19  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
23  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26  *
27  **************************************************************************/
28 
29 /**
30  * TGSI interpreter/executor.
31  *
32  * Flow control information:
33  *
34  * Since we operate on 'quads' (4 pixels or 4 vertices in parallel)
35  * flow control statements (IF/ELSE/ENDIF, LOOP/ENDLOOP) require special
36  * care since a condition may be true for some quad components but false
37  * for other components.
38  *
39  * We basically execute all statements (even if they're in the part of
40  * an IF/ELSE clause that's "not taken") and use a special mask to
41  * control writing to destination registers.  This is the ExecMask.
42  * See store_dest().
43  *
44  * The ExecMask is computed from three other masks (CondMask, LoopMask and
45  * ContMask) which are controlled by the flow control instructions (namely:
46  * (IF/ELSE/ENDIF, LOOP/ENDLOOP and CONT).
47  *
48  *
49  * Authors:
50  *   Michal Krol
51  *   Brian Paul
52  */
53 
54 #include "pipe/p_compiler.h"
55 #include "pipe/p_state.h"
56 #include "pipe/p_shader_tokens.h"
57 #include "tgsi/tgsi_dump.h"
58 #include "tgsi/tgsi_parse.h"
59 #include "tgsi/tgsi_util.h"
60 #include "tgsi_exec.h"
61 #include "util/half_float.h"
62 #include "util/u_memory.h"
63 #include "util/u_math.h"
64 #include "util/rounding.h"
65 
66 
67 #define DEBUG_EXECUTION 0
68 
69 
70 #define FAST_MATH 0
71 
72 #define TILE_TOP_LEFT     0
73 #define TILE_TOP_RIGHT    1
74 #define TILE_BOTTOM_LEFT  2
75 #define TILE_BOTTOM_RIGHT 3
76 
77 union tgsi_double_channel {
78    double d[TGSI_QUAD_SIZE];
79    unsigned u[TGSI_QUAD_SIZE][2];
80    uint64_t u64[TGSI_QUAD_SIZE];
81    int64_t i64[TGSI_QUAD_SIZE];
82 };
83 
84 struct tgsi_double_vector {
85    union tgsi_double_channel xy;
86    union tgsi_double_channel zw;
87 };
88 
89 static void
micro_abs(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)90 micro_abs(union tgsi_exec_channel *dst,
91           const union tgsi_exec_channel *src)
92 {
93    dst->f[0] = fabsf(src->f[0]);
94    dst->f[1] = fabsf(src->f[1]);
95    dst->f[2] = fabsf(src->f[2]);
96    dst->f[3] = fabsf(src->f[3]);
97 }
98 
99 static void
micro_arl(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)100 micro_arl(union tgsi_exec_channel *dst,
101           const union tgsi_exec_channel *src)
102 {
103    dst->i[0] = (int)floorf(src->f[0]);
104    dst->i[1] = (int)floorf(src->f[1]);
105    dst->i[2] = (int)floorf(src->f[2]);
106    dst->i[3] = (int)floorf(src->f[3]);
107 }
108 
109 static void
micro_arr(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)110 micro_arr(union tgsi_exec_channel *dst,
111           const union tgsi_exec_channel *src)
112 {
113    dst->i[0] = (int)floorf(src->f[0] + 0.5f);
114    dst->i[1] = (int)floorf(src->f[1] + 0.5f);
115    dst->i[2] = (int)floorf(src->f[2] + 0.5f);
116    dst->i[3] = (int)floorf(src->f[3] + 0.5f);
117 }
118 
119 static void
micro_ceil(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)120 micro_ceil(union tgsi_exec_channel *dst,
121            const union tgsi_exec_channel *src)
122 {
123    dst->f[0] = ceilf(src->f[0]);
124    dst->f[1] = ceilf(src->f[1]);
125    dst->f[2] = ceilf(src->f[2]);
126    dst->f[3] = ceilf(src->f[3]);
127 }
128 
129 static void
micro_cmp(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1,const union tgsi_exec_channel * src2)130 micro_cmp(union tgsi_exec_channel *dst,
131           const union tgsi_exec_channel *src0,
132           const union tgsi_exec_channel *src1,
133           const union tgsi_exec_channel *src2)
134 {
135    dst->f[0] = src0->f[0] < 0.0f ? src1->f[0] : src2->f[0];
136    dst->f[1] = src0->f[1] < 0.0f ? src1->f[1] : src2->f[1];
137    dst->f[2] = src0->f[2] < 0.0f ? src1->f[2] : src2->f[2];
138    dst->f[3] = src0->f[3] < 0.0f ? src1->f[3] : src2->f[3];
139 }
140 
141 static void
micro_cos(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)142 micro_cos(union tgsi_exec_channel *dst,
143           const union tgsi_exec_channel *src)
144 {
145    dst->f[0] = cosf(src->f[0]);
146    dst->f[1] = cosf(src->f[1]);
147    dst->f[2] = cosf(src->f[2]);
148    dst->f[3] = cosf(src->f[3]);
149 }
150 
151 static void
micro_d2f(union tgsi_exec_channel * dst,const union tgsi_double_channel * src)152 micro_d2f(union tgsi_exec_channel *dst,
153           const union tgsi_double_channel *src)
154 {
155    dst->f[0] = (float)src->d[0];
156    dst->f[1] = (float)src->d[1];
157    dst->f[2] = (float)src->d[2];
158    dst->f[3] = (float)src->d[3];
159 }
160 
161 static void
micro_d2i(union tgsi_exec_channel * dst,const union tgsi_double_channel * src)162 micro_d2i(union tgsi_exec_channel *dst,
163           const union tgsi_double_channel *src)
164 {
165    dst->i[0] = (int)src->d[0];
166    dst->i[1] = (int)src->d[1];
167    dst->i[2] = (int)src->d[2];
168    dst->i[3] = (int)src->d[3];
169 }
170 
171 static void
micro_d2u(union tgsi_exec_channel * dst,const union tgsi_double_channel * src)172 micro_d2u(union tgsi_exec_channel *dst,
173           const union tgsi_double_channel *src)
174 {
175    dst->u[0] = (unsigned)src->d[0];
176    dst->u[1] = (unsigned)src->d[1];
177    dst->u[2] = (unsigned)src->d[2];
178    dst->u[3] = (unsigned)src->d[3];
179 }
180 static void
micro_dabs(union tgsi_double_channel * dst,const union tgsi_double_channel * src)181 micro_dabs(union tgsi_double_channel *dst,
182            const union tgsi_double_channel *src)
183 {
184    dst->d[0] = src->d[0] >= 0.0 ? src->d[0] : -src->d[0];
185    dst->d[1] = src->d[1] >= 0.0 ? src->d[1] : -src->d[1];
186    dst->d[2] = src->d[2] >= 0.0 ? src->d[2] : -src->d[2];
187    dst->d[3] = src->d[3] >= 0.0 ? src->d[3] : -src->d[3];
188 }
189 
190 static void
micro_dadd(union tgsi_double_channel * dst,const union tgsi_double_channel * src)191 micro_dadd(union tgsi_double_channel *dst,
192           const union tgsi_double_channel *src)
193 {
194    dst->d[0] = src[0].d[0] + src[1].d[0];
195    dst->d[1] = src[0].d[1] + src[1].d[1];
196    dst->d[2] = src[0].d[2] + src[1].d[2];
197    dst->d[3] = src[0].d[3] + src[1].d[3];
198 }
199 
200 static void
micro_ddiv(union tgsi_double_channel * dst,const union tgsi_double_channel * src)201 micro_ddiv(union tgsi_double_channel *dst,
202           const union tgsi_double_channel *src)
203 {
204    dst->d[0] = src[0].d[0] / src[1].d[0];
205    dst->d[1] = src[0].d[1] / src[1].d[1];
206    dst->d[2] = src[0].d[2] / src[1].d[2];
207    dst->d[3] = src[0].d[3] / src[1].d[3];
208 }
209 
210 static void
micro_ddx(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)211 micro_ddx(union tgsi_exec_channel *dst,
212           const union tgsi_exec_channel *src)
213 {
214    dst->f[0] =
215    dst->f[1] =
216    dst->f[2] =
217    dst->f[3] = src->f[TILE_BOTTOM_RIGHT] - src->f[TILE_BOTTOM_LEFT];
218 }
219 
220 static void
micro_ddx_fine(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)221 micro_ddx_fine(union tgsi_exec_channel *dst,
222           const union tgsi_exec_channel *src)
223 {
224    dst->f[0] =
225    dst->f[1] = src->f[TILE_TOP_RIGHT] - src->f[TILE_TOP_LEFT];
226    dst->f[2] =
227    dst->f[3] = src->f[TILE_BOTTOM_RIGHT] - src->f[TILE_BOTTOM_LEFT];
228 }
229 
230 
231 static void
micro_ddy(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)232 micro_ddy(union tgsi_exec_channel *dst,
233           const union tgsi_exec_channel *src)
234 {
235    dst->f[0] =
236    dst->f[1] =
237    dst->f[2] =
238    dst->f[3] = src->f[TILE_BOTTOM_LEFT] - src->f[TILE_TOP_LEFT];
239 }
240 
241 static void
micro_ddy_fine(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)242 micro_ddy_fine(union tgsi_exec_channel *dst,
243           const union tgsi_exec_channel *src)
244 {
245    dst->f[0] =
246    dst->f[2] = src->f[TILE_BOTTOM_LEFT] - src->f[TILE_TOP_LEFT];
247    dst->f[1] =
248    dst->f[3] = src->f[TILE_BOTTOM_RIGHT] - src->f[TILE_TOP_RIGHT];
249 }
250 
251 static void
micro_dmul(union tgsi_double_channel * dst,const union tgsi_double_channel * src)252 micro_dmul(union tgsi_double_channel *dst,
253            const union tgsi_double_channel *src)
254 {
255    dst->d[0] = src[0].d[0] * src[1].d[0];
256    dst->d[1] = src[0].d[1] * src[1].d[1];
257    dst->d[2] = src[0].d[2] * src[1].d[2];
258    dst->d[3] = src[0].d[3] * src[1].d[3];
259 }
260 
261 static void
micro_dmax(union tgsi_double_channel * dst,const union tgsi_double_channel * src)262 micro_dmax(union tgsi_double_channel *dst,
263            const union tgsi_double_channel *src)
264 {
265    dst->d[0] = src[0].d[0] > src[1].d[0] ? src[0].d[0] : src[1].d[0];
266    dst->d[1] = src[0].d[1] > src[1].d[1] ? src[0].d[1] : src[1].d[1];
267    dst->d[2] = src[0].d[2] > src[1].d[2] ? src[0].d[2] : src[1].d[2];
268    dst->d[3] = src[0].d[3] > src[1].d[3] ? src[0].d[3] : src[1].d[3];
269 }
270 
271 static void
micro_dmin(union tgsi_double_channel * dst,const union tgsi_double_channel * src)272 micro_dmin(union tgsi_double_channel *dst,
273            const union tgsi_double_channel *src)
274 {
275    dst->d[0] = src[0].d[0] < src[1].d[0] ? src[0].d[0] : src[1].d[0];
276    dst->d[1] = src[0].d[1] < src[1].d[1] ? src[0].d[1] : src[1].d[1];
277    dst->d[2] = src[0].d[2] < src[1].d[2] ? src[0].d[2] : src[1].d[2];
278    dst->d[3] = src[0].d[3] < src[1].d[3] ? src[0].d[3] : src[1].d[3];
279 }
280 
281 static void
micro_dneg(union tgsi_double_channel * dst,const union tgsi_double_channel * src)282 micro_dneg(union tgsi_double_channel *dst,
283            const union tgsi_double_channel *src)
284 {
285    dst->d[0] = -src->d[0];
286    dst->d[1] = -src->d[1];
287    dst->d[2] = -src->d[2];
288    dst->d[3] = -src->d[3];
289 }
290 
291 static void
micro_dslt(union tgsi_double_channel * dst,const union tgsi_double_channel * src)292 micro_dslt(union tgsi_double_channel *dst,
293            const union tgsi_double_channel *src)
294 {
295    dst->u[0][0] = src[0].d[0] < src[1].d[0] ? ~0U : 0U;
296    dst->u[1][0] = src[0].d[1] < src[1].d[1] ? ~0U : 0U;
297    dst->u[2][0] = src[0].d[2] < src[1].d[2] ? ~0U : 0U;
298    dst->u[3][0] = src[0].d[3] < src[1].d[3] ? ~0U : 0U;
299 }
300 
301 static void
micro_dsne(union tgsi_double_channel * dst,const union tgsi_double_channel * src)302 micro_dsne(union tgsi_double_channel *dst,
303            const union tgsi_double_channel *src)
304 {
305    dst->u[0][0] = src[0].d[0] != src[1].d[0] ? ~0U : 0U;
306    dst->u[1][0] = src[0].d[1] != src[1].d[1] ? ~0U : 0U;
307    dst->u[2][0] = src[0].d[2] != src[1].d[2] ? ~0U : 0U;
308    dst->u[3][0] = src[0].d[3] != src[1].d[3] ? ~0U : 0U;
309 }
310 
311 static void
micro_dsge(union tgsi_double_channel * dst,const union tgsi_double_channel * src)312 micro_dsge(union tgsi_double_channel *dst,
313            const union tgsi_double_channel *src)
314 {
315    dst->u[0][0] = src[0].d[0] >= src[1].d[0] ? ~0U : 0U;
316    dst->u[1][0] = src[0].d[1] >= src[1].d[1] ? ~0U : 0U;
317    dst->u[2][0] = src[0].d[2] >= src[1].d[2] ? ~0U : 0U;
318    dst->u[3][0] = src[0].d[3] >= src[1].d[3] ? ~0U : 0U;
319 }
320 
321 static void
micro_dseq(union tgsi_double_channel * dst,const union tgsi_double_channel * src)322 micro_dseq(union tgsi_double_channel *dst,
323            const union tgsi_double_channel *src)
324 {
325    dst->u[0][0] = src[0].d[0] == src[1].d[0] ? ~0U : 0U;
326    dst->u[1][0] = src[0].d[1] == src[1].d[1] ? ~0U : 0U;
327    dst->u[2][0] = src[0].d[2] == src[1].d[2] ? ~0U : 0U;
328    dst->u[3][0] = src[0].d[3] == src[1].d[3] ? ~0U : 0U;
329 }
330 
331 static void
micro_drcp(union tgsi_double_channel * dst,const union tgsi_double_channel * src)332 micro_drcp(union tgsi_double_channel *dst,
333            const union tgsi_double_channel *src)
334 {
335    dst->d[0] = 1.0 / src->d[0];
336    dst->d[1] = 1.0 / src->d[1];
337    dst->d[2] = 1.0 / src->d[2];
338    dst->d[3] = 1.0 / src->d[3];
339 }
340 
341 static void
micro_dsqrt(union tgsi_double_channel * dst,const union tgsi_double_channel * src)342 micro_dsqrt(union tgsi_double_channel *dst,
343             const union tgsi_double_channel *src)
344 {
345    dst->d[0] = sqrt(src->d[0]);
346    dst->d[1] = sqrt(src->d[1]);
347    dst->d[2] = sqrt(src->d[2]);
348    dst->d[3] = sqrt(src->d[3]);
349 }
350 
351 static void
micro_drsq(union tgsi_double_channel * dst,const union tgsi_double_channel * src)352 micro_drsq(union tgsi_double_channel *dst,
353           const union tgsi_double_channel *src)
354 {
355    dst->d[0] = 1.0 / sqrt(src->d[0]);
356    dst->d[1] = 1.0 / sqrt(src->d[1]);
357    dst->d[2] = 1.0 / sqrt(src->d[2]);
358    dst->d[3] = 1.0 / sqrt(src->d[3]);
359 }
360 
361 static void
micro_dmad(union tgsi_double_channel * dst,const union tgsi_double_channel * src)362 micro_dmad(union tgsi_double_channel *dst,
363            const union tgsi_double_channel *src)
364 {
365    dst->d[0] = src[0].d[0] * src[1].d[0] + src[2].d[0];
366    dst->d[1] = src[0].d[1] * src[1].d[1] + src[2].d[1];
367    dst->d[2] = src[0].d[2] * src[1].d[2] + src[2].d[2];
368    dst->d[3] = src[0].d[3] * src[1].d[3] + src[2].d[3];
369 }
370 
371 static void
micro_dfrac(union tgsi_double_channel * dst,const union tgsi_double_channel * src)372 micro_dfrac(union tgsi_double_channel *dst,
373             const union tgsi_double_channel *src)
374 {
375    dst->d[0] = src->d[0] - floor(src->d[0]);
376    dst->d[1] = src->d[1] - floor(src->d[1]);
377    dst->d[2] = src->d[2] - floor(src->d[2]);
378    dst->d[3] = src->d[3] - floor(src->d[3]);
379 }
380 
381 static void
micro_dflr(union tgsi_double_channel * dst,const union tgsi_double_channel * src)382 micro_dflr(union tgsi_double_channel *dst,
383            const union tgsi_double_channel *src)
384 {
385    dst->d[0] = floor(src->d[0]);
386    dst->d[1] = floor(src->d[1]);
387    dst->d[2] = floor(src->d[2]);
388    dst->d[3] = floor(src->d[3]);
389 }
390 
391 static void
micro_dldexp(union tgsi_double_channel * dst,const union tgsi_double_channel * src0,union tgsi_exec_channel * src1)392 micro_dldexp(union tgsi_double_channel *dst,
393              const union tgsi_double_channel *src0,
394              union tgsi_exec_channel *src1)
395 {
396    dst->d[0] = ldexp(src0->d[0], src1->i[0]);
397    dst->d[1] = ldexp(src0->d[1], src1->i[1]);
398    dst->d[2] = ldexp(src0->d[2], src1->i[2]);
399    dst->d[3] = ldexp(src0->d[3], src1->i[3]);
400 }
401 
402 static void
micro_dfracexp(union tgsi_double_channel * dst,union tgsi_exec_channel * dst_exp,const union tgsi_double_channel * src)403 micro_dfracexp(union tgsi_double_channel *dst,
404                union tgsi_exec_channel *dst_exp,
405                const union tgsi_double_channel *src)
406 {
407    dst->d[0] = frexp(src->d[0], &dst_exp->i[0]);
408    dst->d[1] = frexp(src->d[1], &dst_exp->i[1]);
409    dst->d[2] = frexp(src->d[2], &dst_exp->i[2]);
410    dst->d[3] = frexp(src->d[3], &dst_exp->i[3]);
411 }
412 
413 static void
micro_exp2(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)414 micro_exp2(union tgsi_exec_channel *dst,
415            const union tgsi_exec_channel *src)
416 {
417 #if FAST_MATH
418    dst->f[0] = util_fast_exp2(src->f[0]);
419    dst->f[1] = util_fast_exp2(src->f[1]);
420    dst->f[2] = util_fast_exp2(src->f[2]);
421    dst->f[3] = util_fast_exp2(src->f[3]);
422 #else
423 #if DEBUG
424    /* Inf is okay for this instruction, so clamp it to silence assertions. */
425    uint i;
426    union tgsi_exec_channel clamped;
427 
428    for (i = 0; i < 4; i++) {
429       if (src->f[i] > 127.99999f) {
430          clamped.f[i] = 127.99999f;
431       } else if (src->f[i] < -126.99999f) {
432          clamped.f[i] = -126.99999f;
433       } else {
434          clamped.f[i] = src->f[i];
435       }
436    }
437    src = &clamped;
438 #endif /* DEBUG */
439 
440    dst->f[0] = powf(2.0f, src->f[0]);
441    dst->f[1] = powf(2.0f, src->f[1]);
442    dst->f[2] = powf(2.0f, src->f[2]);
443    dst->f[3] = powf(2.0f, src->f[3]);
444 #endif /* FAST_MATH */
445 }
446 
447 static void
micro_f2d(union tgsi_double_channel * dst,const union tgsi_exec_channel * src)448 micro_f2d(union tgsi_double_channel *dst,
449           const union tgsi_exec_channel *src)
450 {
451    dst->d[0] = (double)src->f[0];
452    dst->d[1] = (double)src->f[1];
453    dst->d[2] = (double)src->f[2];
454    dst->d[3] = (double)src->f[3];
455 }
456 
457 static void
micro_flr(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)458 micro_flr(union tgsi_exec_channel *dst,
459           const union tgsi_exec_channel *src)
460 {
461    dst->f[0] = floorf(src->f[0]);
462    dst->f[1] = floorf(src->f[1]);
463    dst->f[2] = floorf(src->f[2]);
464    dst->f[3] = floorf(src->f[3]);
465 }
466 
467 static void
micro_frc(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)468 micro_frc(union tgsi_exec_channel *dst,
469           const union tgsi_exec_channel *src)
470 {
471    dst->f[0] = src->f[0] - floorf(src->f[0]);
472    dst->f[1] = src->f[1] - floorf(src->f[1]);
473    dst->f[2] = src->f[2] - floorf(src->f[2]);
474    dst->f[3] = src->f[3] - floorf(src->f[3]);
475 }
476 
477 static void
micro_i2d(union tgsi_double_channel * dst,const union tgsi_exec_channel * src)478 micro_i2d(union tgsi_double_channel *dst,
479           const union tgsi_exec_channel *src)
480 {
481    dst->d[0] = (double)src->i[0];
482    dst->d[1] = (double)src->i[1];
483    dst->d[2] = (double)src->i[2];
484    dst->d[3] = (double)src->i[3];
485 }
486 
487 static void
micro_iabs(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)488 micro_iabs(union tgsi_exec_channel *dst,
489            const union tgsi_exec_channel *src)
490 {
491    dst->i[0] = src->i[0] >= 0 ? src->i[0] : -src->i[0];
492    dst->i[1] = src->i[1] >= 0 ? src->i[1] : -src->i[1];
493    dst->i[2] = src->i[2] >= 0 ? src->i[2] : -src->i[2];
494    dst->i[3] = src->i[3] >= 0 ? src->i[3] : -src->i[3];
495 }
496 
497 static void
micro_ineg(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)498 micro_ineg(union tgsi_exec_channel *dst,
499            const union tgsi_exec_channel *src)
500 {
501    dst->i[0] = -src->i[0];
502    dst->i[1] = -src->i[1];
503    dst->i[2] = -src->i[2];
504    dst->i[3] = -src->i[3];
505 }
506 
507 static void
micro_lg2(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)508 micro_lg2(union tgsi_exec_channel *dst,
509           const union tgsi_exec_channel *src)
510 {
511 #if FAST_MATH
512    dst->f[0] = util_fast_log2(src->f[0]);
513    dst->f[1] = util_fast_log2(src->f[1]);
514    dst->f[2] = util_fast_log2(src->f[2]);
515    dst->f[3] = util_fast_log2(src->f[3]);
516 #else
517    dst->f[0] = logf(src->f[0]) * 1.442695f;
518    dst->f[1] = logf(src->f[1]) * 1.442695f;
519    dst->f[2] = logf(src->f[2]) * 1.442695f;
520    dst->f[3] = logf(src->f[3]) * 1.442695f;
521 #endif
522 }
523 
524 static void
micro_lrp(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1,const union tgsi_exec_channel * src2)525 micro_lrp(union tgsi_exec_channel *dst,
526           const union tgsi_exec_channel *src0,
527           const union tgsi_exec_channel *src1,
528           const union tgsi_exec_channel *src2)
529 {
530    dst->f[0] = src0->f[0] * (src1->f[0] - src2->f[0]) + src2->f[0];
531    dst->f[1] = src0->f[1] * (src1->f[1] - src2->f[1]) + src2->f[1];
532    dst->f[2] = src0->f[2] * (src1->f[2] - src2->f[2]) + src2->f[2];
533    dst->f[3] = src0->f[3] * (src1->f[3] - src2->f[3]) + src2->f[3];
534 }
535 
536 static void
micro_mad(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1,const union tgsi_exec_channel * src2)537 micro_mad(union tgsi_exec_channel *dst,
538           const union tgsi_exec_channel *src0,
539           const union tgsi_exec_channel *src1,
540           const union tgsi_exec_channel *src2)
541 {
542    dst->f[0] = src0->f[0] * src1->f[0] + src2->f[0];
543    dst->f[1] = src0->f[1] * src1->f[1] + src2->f[1];
544    dst->f[2] = src0->f[2] * src1->f[2] + src2->f[2];
545    dst->f[3] = src0->f[3] * src1->f[3] + src2->f[3];
546 }
547 
548 static void
micro_mov(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)549 micro_mov(union tgsi_exec_channel *dst,
550           const union tgsi_exec_channel *src)
551 {
552    dst->u[0] = src->u[0];
553    dst->u[1] = src->u[1];
554    dst->u[2] = src->u[2];
555    dst->u[3] = src->u[3];
556 }
557 
558 static void
micro_rcp(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)559 micro_rcp(union tgsi_exec_channel *dst,
560           const union tgsi_exec_channel *src)
561 {
562 #if 0 /* for debugging */
563    assert(src->f[0] != 0.0f);
564    assert(src->f[1] != 0.0f);
565    assert(src->f[2] != 0.0f);
566    assert(src->f[3] != 0.0f);
567 #endif
568    dst->f[0] = 1.0f / src->f[0];
569    dst->f[1] = 1.0f / src->f[1];
570    dst->f[2] = 1.0f / src->f[2];
571    dst->f[3] = 1.0f / src->f[3];
572 }
573 
574 static void
micro_rnd(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)575 micro_rnd(union tgsi_exec_channel *dst,
576           const union tgsi_exec_channel *src)
577 {
578    dst->f[0] = _mesa_roundevenf(src->f[0]);
579    dst->f[1] = _mesa_roundevenf(src->f[1]);
580    dst->f[2] = _mesa_roundevenf(src->f[2]);
581    dst->f[3] = _mesa_roundevenf(src->f[3]);
582 }
583 
584 static void
micro_rsq(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)585 micro_rsq(union tgsi_exec_channel *dst,
586           const union tgsi_exec_channel *src)
587 {
588 #if 0 /* for debugging */
589    assert(src->f[0] != 0.0f);
590    assert(src->f[1] != 0.0f);
591    assert(src->f[2] != 0.0f);
592    assert(src->f[3] != 0.0f);
593 #endif
594    dst->f[0] = 1.0f / sqrtf(src->f[0]);
595    dst->f[1] = 1.0f / sqrtf(src->f[1]);
596    dst->f[2] = 1.0f / sqrtf(src->f[2]);
597    dst->f[3] = 1.0f / sqrtf(src->f[3]);
598 }
599 
600 static void
micro_sqrt(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)601 micro_sqrt(union tgsi_exec_channel *dst,
602            const union tgsi_exec_channel *src)
603 {
604    dst->f[0] = sqrtf(src->f[0]);
605    dst->f[1] = sqrtf(src->f[1]);
606    dst->f[2] = sqrtf(src->f[2]);
607    dst->f[3] = sqrtf(src->f[3]);
608 }
609 
610 static void
micro_seq(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)611 micro_seq(union tgsi_exec_channel *dst,
612           const union tgsi_exec_channel *src0,
613           const union tgsi_exec_channel *src1)
614 {
615    dst->f[0] = src0->f[0] == src1->f[0] ? 1.0f : 0.0f;
616    dst->f[1] = src0->f[1] == src1->f[1] ? 1.0f : 0.0f;
617    dst->f[2] = src0->f[2] == src1->f[2] ? 1.0f : 0.0f;
618    dst->f[3] = src0->f[3] == src1->f[3] ? 1.0f : 0.0f;
619 }
620 
621 static void
micro_sge(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)622 micro_sge(union tgsi_exec_channel *dst,
623           const union tgsi_exec_channel *src0,
624           const union tgsi_exec_channel *src1)
625 {
626    dst->f[0] = src0->f[0] >= src1->f[0] ? 1.0f : 0.0f;
627    dst->f[1] = src0->f[1] >= src1->f[1] ? 1.0f : 0.0f;
628    dst->f[2] = src0->f[2] >= src1->f[2] ? 1.0f : 0.0f;
629    dst->f[3] = src0->f[3] >= src1->f[3] ? 1.0f : 0.0f;
630 }
631 
632 static void
micro_sgn(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)633 micro_sgn(union tgsi_exec_channel *dst,
634           const union tgsi_exec_channel *src)
635 {
636    dst->f[0] = src->f[0] < 0.0f ? -1.0f : src->f[0] > 0.0f ? 1.0f : 0.0f;
637    dst->f[1] = src->f[1] < 0.0f ? -1.0f : src->f[1] > 0.0f ? 1.0f : 0.0f;
638    dst->f[2] = src->f[2] < 0.0f ? -1.0f : src->f[2] > 0.0f ? 1.0f : 0.0f;
639    dst->f[3] = src->f[3] < 0.0f ? -1.0f : src->f[3] > 0.0f ? 1.0f : 0.0f;
640 }
641 
642 static void
micro_isgn(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)643 micro_isgn(union tgsi_exec_channel *dst,
644           const union tgsi_exec_channel *src)
645 {
646    dst->i[0] = src->i[0] < 0 ? -1 : src->i[0] > 0 ? 1 : 0;
647    dst->i[1] = src->i[1] < 0 ? -1 : src->i[1] > 0 ? 1 : 0;
648    dst->i[2] = src->i[2] < 0 ? -1 : src->i[2] > 0 ? 1 : 0;
649    dst->i[3] = src->i[3] < 0 ? -1 : src->i[3] > 0 ? 1 : 0;
650 }
651 
652 static void
micro_sgt(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)653 micro_sgt(union tgsi_exec_channel *dst,
654           const union tgsi_exec_channel *src0,
655           const union tgsi_exec_channel *src1)
656 {
657    dst->f[0] = src0->f[0] > src1->f[0] ? 1.0f : 0.0f;
658    dst->f[1] = src0->f[1] > src1->f[1] ? 1.0f : 0.0f;
659    dst->f[2] = src0->f[2] > src1->f[2] ? 1.0f : 0.0f;
660    dst->f[3] = src0->f[3] > src1->f[3] ? 1.0f : 0.0f;
661 }
662 
663 static void
micro_sin(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)664 micro_sin(union tgsi_exec_channel *dst,
665           const union tgsi_exec_channel *src)
666 {
667    dst->f[0] = sinf(src->f[0]);
668    dst->f[1] = sinf(src->f[1]);
669    dst->f[2] = sinf(src->f[2]);
670    dst->f[3] = sinf(src->f[3]);
671 }
672 
673 static void
micro_sle(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)674 micro_sle(union tgsi_exec_channel *dst,
675           const union tgsi_exec_channel *src0,
676           const union tgsi_exec_channel *src1)
677 {
678    dst->f[0] = src0->f[0] <= src1->f[0] ? 1.0f : 0.0f;
679    dst->f[1] = src0->f[1] <= src1->f[1] ? 1.0f : 0.0f;
680    dst->f[2] = src0->f[2] <= src1->f[2] ? 1.0f : 0.0f;
681    dst->f[3] = src0->f[3] <= src1->f[3] ? 1.0f : 0.0f;
682 }
683 
684 static void
micro_slt(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)685 micro_slt(union tgsi_exec_channel *dst,
686           const union tgsi_exec_channel *src0,
687           const union tgsi_exec_channel *src1)
688 {
689    dst->f[0] = src0->f[0] < src1->f[0] ? 1.0f : 0.0f;
690    dst->f[1] = src0->f[1] < src1->f[1] ? 1.0f : 0.0f;
691    dst->f[2] = src0->f[2] < src1->f[2] ? 1.0f : 0.0f;
692    dst->f[3] = src0->f[3] < src1->f[3] ? 1.0f : 0.0f;
693 }
694 
695 static void
micro_sne(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)696 micro_sne(union tgsi_exec_channel *dst,
697           const union tgsi_exec_channel *src0,
698           const union tgsi_exec_channel *src1)
699 {
700    dst->f[0] = src0->f[0] != src1->f[0] ? 1.0f : 0.0f;
701    dst->f[1] = src0->f[1] != src1->f[1] ? 1.0f : 0.0f;
702    dst->f[2] = src0->f[2] != src1->f[2] ? 1.0f : 0.0f;
703    dst->f[3] = src0->f[3] != src1->f[3] ? 1.0f : 0.0f;
704 }
705 
706 static void
micro_trunc(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)707 micro_trunc(union tgsi_exec_channel *dst,
708             const union tgsi_exec_channel *src)
709 {
710    dst->f[0] = truncf(src->f[0]);
711    dst->f[1] = truncf(src->f[1]);
712    dst->f[2] = truncf(src->f[2]);
713    dst->f[3] = truncf(src->f[3]);
714 }
715 
716 static void
micro_u2d(union tgsi_double_channel * dst,const union tgsi_exec_channel * src)717 micro_u2d(union tgsi_double_channel *dst,
718           const union tgsi_exec_channel *src)
719 {
720    dst->d[0] = (double)src->u[0];
721    dst->d[1] = (double)src->u[1];
722    dst->d[2] = (double)src->u[2];
723    dst->d[3] = (double)src->u[3];
724 }
725 
726 static void
micro_i64abs(union tgsi_double_channel * dst,const union tgsi_double_channel * src)727 micro_i64abs(union tgsi_double_channel *dst,
728              const union tgsi_double_channel *src)
729 {
730    dst->i64[0] = src->i64[0] >= 0.0 ? src->i64[0] : -src->i64[0];
731    dst->i64[1] = src->i64[1] >= 0.0 ? src->i64[1] : -src->i64[1];
732    dst->i64[2] = src->i64[2] >= 0.0 ? src->i64[2] : -src->i64[2];
733    dst->i64[3] = src->i64[3] >= 0.0 ? src->i64[3] : -src->i64[3];
734 }
735 
736 static void
micro_i64sgn(union tgsi_double_channel * dst,const union tgsi_double_channel * src)737 micro_i64sgn(union tgsi_double_channel *dst,
738              const union tgsi_double_channel *src)
739 {
740    dst->i64[0] = src->i64[0] < 0 ? -1 : src->i64[0] > 0 ? 1 : 0;
741    dst->i64[1] = src->i64[1] < 0 ? -1 : src->i64[1] > 0 ? 1 : 0;
742    dst->i64[2] = src->i64[2] < 0 ? -1 : src->i64[2] > 0 ? 1 : 0;
743    dst->i64[3] = src->i64[3] < 0 ? -1 : src->i64[3] > 0 ? 1 : 0;
744 }
745 
746 static void
micro_i64neg(union tgsi_double_channel * dst,const union tgsi_double_channel * src)747 micro_i64neg(union tgsi_double_channel *dst,
748              const union tgsi_double_channel *src)
749 {
750    dst->i64[0] = -src->i64[0];
751    dst->i64[1] = -src->i64[1];
752    dst->i64[2] = -src->i64[2];
753    dst->i64[3] = -src->i64[3];
754 }
755 
756 static void
micro_u64seq(union tgsi_double_channel * dst,const union tgsi_double_channel * src)757 micro_u64seq(union tgsi_double_channel *dst,
758            const union tgsi_double_channel *src)
759 {
760    dst->u[0][0] = src[0].u64[0] == src[1].u64[0] ? ~0U : 0U;
761    dst->u[1][0] = src[0].u64[1] == src[1].u64[1] ? ~0U : 0U;
762    dst->u[2][0] = src[0].u64[2] == src[1].u64[2] ? ~0U : 0U;
763    dst->u[3][0] = src[0].u64[3] == src[1].u64[3] ? ~0U : 0U;
764 }
765 
766 static void
micro_u64sne(union tgsi_double_channel * dst,const union tgsi_double_channel * src)767 micro_u64sne(union tgsi_double_channel *dst,
768              const union tgsi_double_channel *src)
769 {
770    dst->u[0][0] = src[0].u64[0] != src[1].u64[0] ? ~0U : 0U;
771    dst->u[1][0] = src[0].u64[1] != src[1].u64[1] ? ~0U : 0U;
772    dst->u[2][0] = src[0].u64[2] != src[1].u64[2] ? ~0U : 0U;
773    dst->u[3][0] = src[0].u64[3] != src[1].u64[3] ? ~0U : 0U;
774 }
775 
776 static void
micro_i64slt(union tgsi_double_channel * dst,const union tgsi_double_channel * src)777 micro_i64slt(union tgsi_double_channel *dst,
778              const union tgsi_double_channel *src)
779 {
780    dst->u[0][0] = src[0].i64[0] < src[1].i64[0] ? ~0U : 0U;
781    dst->u[1][0] = src[0].i64[1] < src[1].i64[1] ? ~0U : 0U;
782    dst->u[2][0] = src[0].i64[2] < src[1].i64[2] ? ~0U : 0U;
783    dst->u[3][0] = src[0].i64[3] < src[1].i64[3] ? ~0U : 0U;
784 }
785 
786 static void
micro_u64slt(union tgsi_double_channel * dst,const union tgsi_double_channel * src)787 micro_u64slt(union tgsi_double_channel *dst,
788              const union tgsi_double_channel *src)
789 {
790    dst->u[0][0] = src[0].u64[0] < src[1].u64[0] ? ~0U : 0U;
791    dst->u[1][0] = src[0].u64[1] < src[1].u64[1] ? ~0U : 0U;
792    dst->u[2][0] = src[0].u64[2] < src[1].u64[2] ? ~0U : 0U;
793    dst->u[3][0] = src[0].u64[3] < src[1].u64[3] ? ~0U : 0U;
794 }
795 
796 static void
micro_i64sge(union tgsi_double_channel * dst,const union tgsi_double_channel * src)797 micro_i64sge(union tgsi_double_channel *dst,
798            const union tgsi_double_channel *src)
799 {
800    dst->u[0][0] = src[0].i64[0] >= src[1].i64[0] ? ~0U : 0U;
801    dst->u[1][0] = src[0].i64[1] >= src[1].i64[1] ? ~0U : 0U;
802    dst->u[2][0] = src[0].i64[2] >= src[1].i64[2] ? ~0U : 0U;
803    dst->u[3][0] = src[0].i64[3] >= src[1].i64[3] ? ~0U : 0U;
804 }
805 
806 static void
micro_u64sge(union tgsi_double_channel * dst,const union tgsi_double_channel * src)807 micro_u64sge(union tgsi_double_channel *dst,
808              const union tgsi_double_channel *src)
809 {
810    dst->u[0][0] = src[0].u64[0] >= src[1].u64[0] ? ~0U : 0U;
811    dst->u[1][0] = src[0].u64[1] >= src[1].u64[1] ? ~0U : 0U;
812    dst->u[2][0] = src[0].u64[2] >= src[1].u64[2] ? ~0U : 0U;
813    dst->u[3][0] = src[0].u64[3] >= src[1].u64[3] ? ~0U : 0U;
814 }
815 
816 static void
micro_u64max(union tgsi_double_channel * dst,const union tgsi_double_channel * src)817 micro_u64max(union tgsi_double_channel *dst,
818              const union tgsi_double_channel *src)
819 {
820    dst->u64[0] = src[0].u64[0] > src[1].u64[0] ? src[0].u64[0] : src[1].u64[0];
821    dst->u64[1] = src[0].u64[1] > src[1].u64[1] ? src[0].u64[1] : src[1].u64[1];
822    dst->u64[2] = src[0].u64[2] > src[1].u64[2] ? src[0].u64[2] : src[1].u64[2];
823    dst->u64[3] = src[0].u64[3] > src[1].u64[3] ? src[0].u64[3] : src[1].u64[3];
824 }
825 
826 static void
micro_i64max(union tgsi_double_channel * dst,const union tgsi_double_channel * src)827 micro_i64max(union tgsi_double_channel *dst,
828              const union tgsi_double_channel *src)
829 {
830    dst->i64[0] = src[0].i64[0] > src[1].i64[0] ? src[0].i64[0] : src[1].i64[0];
831    dst->i64[1] = src[0].i64[1] > src[1].i64[1] ? src[0].i64[1] : src[1].i64[1];
832    dst->i64[2] = src[0].i64[2] > src[1].i64[2] ? src[0].i64[2] : src[1].i64[2];
833    dst->i64[3] = src[0].i64[3] > src[1].i64[3] ? src[0].i64[3] : src[1].i64[3];
834 }
835 
836 static void
micro_u64min(union tgsi_double_channel * dst,const union tgsi_double_channel * src)837 micro_u64min(union tgsi_double_channel *dst,
838              const union tgsi_double_channel *src)
839 {
840    dst->u64[0] = src[0].u64[0] < src[1].u64[0] ? src[0].u64[0] : src[1].u64[0];
841    dst->u64[1] = src[0].u64[1] < src[1].u64[1] ? src[0].u64[1] : src[1].u64[1];
842    dst->u64[2] = src[0].u64[2] < src[1].u64[2] ? src[0].u64[2] : src[1].u64[2];
843    dst->u64[3] = src[0].u64[3] < src[1].u64[3] ? src[0].u64[3] : src[1].u64[3];
844 }
845 
846 static void
micro_i64min(union tgsi_double_channel * dst,const union tgsi_double_channel * src)847 micro_i64min(union tgsi_double_channel *dst,
848              const union tgsi_double_channel *src)
849 {
850    dst->i64[0] = src[0].i64[0] < src[1].i64[0] ? src[0].i64[0] : src[1].i64[0];
851    dst->i64[1] = src[0].i64[1] < src[1].i64[1] ? src[0].i64[1] : src[1].i64[1];
852    dst->i64[2] = src[0].i64[2] < src[1].i64[2] ? src[0].i64[2] : src[1].i64[2];
853    dst->i64[3] = src[0].i64[3] < src[1].i64[3] ? src[0].i64[3] : src[1].i64[3];
854 }
855 
856 static void
micro_u64add(union tgsi_double_channel * dst,const union tgsi_double_channel * src)857 micro_u64add(union tgsi_double_channel *dst,
858              const union tgsi_double_channel *src)
859 {
860    dst->u64[0] = src[0].u64[0] + src[1].u64[0];
861    dst->u64[1] = src[0].u64[1] + src[1].u64[1];
862    dst->u64[2] = src[0].u64[2] + src[1].u64[2];
863    dst->u64[3] = src[0].u64[3] + src[1].u64[3];
864 }
865 
866 static void
micro_u64mul(union tgsi_double_channel * dst,const union tgsi_double_channel * src)867 micro_u64mul(union tgsi_double_channel *dst,
868              const union tgsi_double_channel *src)
869 {
870    dst->u64[0] = src[0].u64[0] * src[1].u64[0];
871    dst->u64[1] = src[0].u64[1] * src[1].u64[1];
872    dst->u64[2] = src[0].u64[2] * src[1].u64[2];
873    dst->u64[3] = src[0].u64[3] * src[1].u64[3];
874 }
875 
876 static void
micro_u64div(union tgsi_double_channel * dst,const union tgsi_double_channel * src)877 micro_u64div(union tgsi_double_channel *dst,
878              const union tgsi_double_channel *src)
879 {
880    dst->u64[0] = src[1].u64[0] ? src[0].u64[0] / src[1].u64[0] : ~0ull;
881    dst->u64[1] = src[1].u64[1] ? src[0].u64[1] / src[1].u64[1] : ~0ull;
882    dst->u64[2] = src[1].u64[2] ? src[0].u64[2] / src[1].u64[2] : ~0ull;
883    dst->u64[3] = src[1].u64[3] ? src[0].u64[3] / src[1].u64[3] : ~0ull;
884 }
885 
886 static void
micro_i64div(union tgsi_double_channel * dst,const union tgsi_double_channel * src)887 micro_i64div(union tgsi_double_channel *dst,
888              const union tgsi_double_channel *src)
889 {
890    dst->i64[0] = src[1].i64[0] ? src[0].i64[0] / src[1].i64[0] : 0;
891    dst->i64[1] = src[1].i64[1] ? src[0].i64[1] / src[1].i64[1] : 0;
892    dst->i64[2] = src[1].i64[2] ? src[0].i64[2] / src[1].i64[2] : 0;
893    dst->i64[3] = src[1].i64[3] ? src[0].i64[3] / src[1].i64[3] : 0;
894 }
895 
896 static void
micro_u64mod(union tgsi_double_channel * dst,const union tgsi_double_channel * src)897 micro_u64mod(union tgsi_double_channel *dst,
898              const union tgsi_double_channel *src)
899 {
900    dst->u64[0] = src[1].u64[0] ? src[0].u64[0] % src[1].u64[0] : ~0ull;
901    dst->u64[1] = src[1].u64[1] ? src[0].u64[1] % src[1].u64[1] : ~0ull;
902    dst->u64[2] = src[1].u64[2] ? src[0].u64[2] % src[1].u64[2] : ~0ull;
903    dst->u64[3] = src[1].u64[3] ? src[0].u64[3] % src[1].u64[3] : ~0ull;
904 }
905 
906 static void
micro_i64mod(union tgsi_double_channel * dst,const union tgsi_double_channel * src)907 micro_i64mod(union tgsi_double_channel *dst,
908              const union tgsi_double_channel *src)
909 {
910    dst->i64[0] = src[1].i64[0] ? src[0].i64[0] % src[1].i64[0] : ~0ll;
911    dst->i64[1] = src[1].i64[1] ? src[0].i64[1] % src[1].i64[1] : ~0ll;
912    dst->i64[2] = src[1].i64[2] ? src[0].i64[2] % src[1].i64[2] : ~0ll;
913    dst->i64[3] = src[1].i64[3] ? src[0].i64[3] % src[1].i64[3] : ~0ll;
914 }
915 
916 static void
micro_u64shl(union tgsi_double_channel * dst,const union tgsi_double_channel * src0,union tgsi_exec_channel * src1)917 micro_u64shl(union tgsi_double_channel *dst,
918              const union tgsi_double_channel *src0,
919              union tgsi_exec_channel *src1)
920 {
921    unsigned masked_count;
922    masked_count = src1->u[0] & 0x3f;
923    dst->u64[0] = src0->u64[0] << masked_count;
924    masked_count = src1->u[1] & 0x3f;
925    dst->u64[1] = src0->u64[1] << masked_count;
926    masked_count = src1->u[2] & 0x3f;
927    dst->u64[2] = src0->u64[2] << masked_count;
928    masked_count = src1->u[3] & 0x3f;
929    dst->u64[3] = src0->u64[3] << masked_count;
930 }
931 
932 static void
micro_i64shr(union tgsi_double_channel * dst,const union tgsi_double_channel * src0,union tgsi_exec_channel * src1)933 micro_i64shr(union tgsi_double_channel *dst,
934              const union tgsi_double_channel *src0,
935              union tgsi_exec_channel *src1)
936 {
937    unsigned masked_count;
938    masked_count = src1->u[0] & 0x3f;
939    dst->i64[0] = src0->i64[0] >> masked_count;
940    masked_count = src1->u[1] & 0x3f;
941    dst->i64[1] = src0->i64[1] >> masked_count;
942    masked_count = src1->u[2] & 0x3f;
943    dst->i64[2] = src0->i64[2] >> masked_count;
944    masked_count = src1->u[3] & 0x3f;
945    dst->i64[3] = src0->i64[3] >> masked_count;
946 }
947 
948 static void
micro_u64shr(union tgsi_double_channel * dst,const union tgsi_double_channel * src0,union tgsi_exec_channel * src1)949 micro_u64shr(union tgsi_double_channel *dst,
950              const union tgsi_double_channel *src0,
951              union tgsi_exec_channel *src1)
952 {
953    unsigned masked_count;
954    masked_count = src1->u[0] & 0x3f;
955    dst->u64[0] = src0->u64[0] >> masked_count;
956    masked_count = src1->u[1] & 0x3f;
957    dst->u64[1] = src0->u64[1] >> masked_count;
958    masked_count = src1->u[2] & 0x3f;
959    dst->u64[2] = src0->u64[2] >> masked_count;
960    masked_count = src1->u[3] & 0x3f;
961    dst->u64[3] = src0->u64[3] >> masked_count;
962 }
963 
964 enum tgsi_exec_datatype {
965    TGSI_EXEC_DATA_FLOAT,
966    TGSI_EXEC_DATA_INT,
967    TGSI_EXEC_DATA_UINT,
968    TGSI_EXEC_DATA_DOUBLE,
969    TGSI_EXEC_DATA_INT64,
970    TGSI_EXEC_DATA_UINT64,
971 };
972 
973 /*
974  * Shorthand locations of various utility registers (_I = Index, _C = Channel)
975  */
976 #define TEMP_KILMASK_I     TGSI_EXEC_TEMP_KILMASK_I
977 #define TEMP_KILMASK_C     TGSI_EXEC_TEMP_KILMASK_C
978 #define TEMP_OUTPUT_I      TGSI_EXEC_TEMP_OUTPUT_I
979 #define TEMP_OUTPUT_C      TGSI_EXEC_TEMP_OUTPUT_C
980 #define TEMP_PRIMITIVE_I   TGSI_EXEC_TEMP_PRIMITIVE_I
981 #define TEMP_PRIMITIVE_C   TGSI_EXEC_TEMP_PRIMITIVE_C
982 #define TEMP_PRIMITIVE_S1_I   TGSI_EXEC_TEMP_PRIMITIVE_S1_I
983 #define TEMP_PRIMITIVE_S1_C   TGSI_EXEC_TEMP_PRIMITIVE_S1_C
984 #define TEMP_PRIMITIVE_S2_I   TGSI_EXEC_TEMP_PRIMITIVE_S2_I
985 #define TEMP_PRIMITIVE_S2_C   TGSI_EXEC_TEMP_PRIMITIVE_S2_C
986 #define TEMP_PRIMITIVE_S3_I   TGSI_EXEC_TEMP_PRIMITIVE_S3_I
987 #define TEMP_PRIMITIVE_S3_C   TGSI_EXEC_TEMP_PRIMITIVE_S3_C
988 
989 static const struct {
990    int idx;
991    int chan;
992 } temp_prim_idxs[] = {
993    { TEMP_PRIMITIVE_I, TEMP_PRIMITIVE_C },
994    { TEMP_PRIMITIVE_S1_I, TEMP_PRIMITIVE_S1_C },
995    { TEMP_PRIMITIVE_S2_I, TEMP_PRIMITIVE_S2_C },
996    { TEMP_PRIMITIVE_S3_I, TEMP_PRIMITIVE_S3_C },
997 };
998 
999 /** The execution mask depends on the conditional mask and the loop mask */
1000 #define UPDATE_EXEC_MASK(MACH) \
1001       MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->Switch.mask & MACH->FuncMask
1002 
1003 
1004 static const union tgsi_exec_channel ZeroVec =
1005    { { 0.0, 0.0, 0.0, 0.0 } };
1006 
1007 static const union tgsi_exec_channel OneVec = {
1008    {1.0f, 1.0f, 1.0f, 1.0f}
1009 };
1010 
1011 static const union tgsi_exec_channel P128Vec = {
1012    {128.0f, 128.0f, 128.0f, 128.0f}
1013 };
1014 
1015 static const union tgsi_exec_channel M128Vec = {
1016    {-128.0f, -128.0f, -128.0f, -128.0f}
1017 };
1018 
1019 
1020 /**
1021  * Assert that none of the float values in 'chan' are infinite or NaN.
1022  * NaN and Inf may occur normally during program execution and should
1023  * not lead to crashes, etc.  But when debugging, it's helpful to catch
1024  * them.
1025  */
1026 static inline void
check_inf_or_nan(const union tgsi_exec_channel * chan)1027 check_inf_or_nan(const union tgsi_exec_channel *chan)
1028 {
1029    assert(!util_is_inf_or_nan((chan)->f[0]));
1030    assert(!util_is_inf_or_nan((chan)->f[1]));
1031    assert(!util_is_inf_or_nan((chan)->f[2]));
1032    assert(!util_is_inf_or_nan((chan)->f[3]));
1033 }
1034 
1035 
1036 #ifdef DEBUG
1037 static void
print_chan(const char * msg,const union tgsi_exec_channel * chan)1038 print_chan(const char *msg, const union tgsi_exec_channel *chan)
1039 {
1040    debug_printf("%s = {%f, %f, %f, %f}\n",
1041                 msg, chan->f[0], chan->f[1], chan->f[2], chan->f[3]);
1042 }
1043 #endif
1044 
1045 
1046 #ifdef DEBUG
1047 static void
print_temp(const struct tgsi_exec_machine * mach,uint index)1048 print_temp(const struct tgsi_exec_machine *mach, uint index)
1049 {
1050    const struct tgsi_exec_vector *tmp = &mach->Temps[index];
1051    int i;
1052    debug_printf("Temp[%u] =\n", index);
1053    for (i = 0; i < 4; i++) {
1054       debug_printf("  %c: { %f, %f, %f, %f }\n",
1055                    "XYZW"[i],
1056                    tmp->xyzw[i].f[0],
1057                    tmp->xyzw[i].f[1],
1058                    tmp->xyzw[i].f[2],
1059                    tmp->xyzw[i].f[3]);
1060    }
1061 }
1062 #endif
1063 
1064 
1065 void
tgsi_exec_set_constant_buffers(struct tgsi_exec_machine * mach,unsigned num_bufs,const void ** bufs,const unsigned * buf_sizes)1066 tgsi_exec_set_constant_buffers(struct tgsi_exec_machine *mach,
1067                                unsigned num_bufs,
1068                                const void **bufs,
1069                                const unsigned *buf_sizes)
1070 {
1071    unsigned i;
1072 
1073    for (i = 0; i < num_bufs; i++) {
1074       mach->Consts[i] = bufs[i];
1075       mach->ConstsSize[i] = buf_sizes[i];
1076    }
1077 }
1078 
1079 /**
1080  * Initialize machine state by expanding tokens to full instructions,
1081  * allocating temporary storage, setting up constants, etc.
1082  * After this, we can call tgsi_exec_machine_run() many times.
1083  */
1084 void
tgsi_exec_machine_bind_shader(struct tgsi_exec_machine * mach,const struct tgsi_token * tokens,struct tgsi_sampler * sampler,struct tgsi_image * image,struct tgsi_buffer * buffer)1085 tgsi_exec_machine_bind_shader(
1086    struct tgsi_exec_machine *mach,
1087    const struct tgsi_token *tokens,
1088    struct tgsi_sampler *sampler,
1089    struct tgsi_image *image,
1090    struct tgsi_buffer *buffer)
1091 {
1092    uint k;
1093    struct tgsi_parse_context parse;
1094    struct tgsi_full_instruction *instructions;
1095    struct tgsi_full_declaration *declarations;
1096    uint maxInstructions = 10, numInstructions = 0;
1097    uint maxDeclarations = 10, numDeclarations = 0;
1098 
1099 #if 0
1100    tgsi_dump(tokens, 0);
1101 #endif
1102 
1103    util_init_math();
1104 
1105 
1106    mach->Tokens = tokens;
1107    mach->Sampler = sampler;
1108    mach->Image = image;
1109    mach->Buffer = buffer;
1110 
1111    if (!tokens) {
1112       /* unbind and free all */
1113       FREE(mach->Declarations);
1114       mach->Declarations = NULL;
1115       mach->NumDeclarations = 0;
1116 
1117       FREE(mach->Instructions);
1118       mach->Instructions = NULL;
1119       mach->NumInstructions = 0;
1120 
1121       return;
1122    }
1123 
1124    k = tgsi_parse_init (&parse, mach->Tokens);
1125    if (k != TGSI_PARSE_OK) {
1126       debug_printf( "Problem parsing!\n" );
1127       return;
1128    }
1129 
1130    mach->ImmLimit = 0;
1131    mach->NumOutputs = 0;
1132 
1133    for (k = 0; k < TGSI_SEMANTIC_COUNT; k++)
1134       mach->SysSemanticToIndex[k] = -1;
1135 
1136    if (mach->ShaderType == PIPE_SHADER_GEOMETRY &&
1137        !mach->UsedGeometryShader) {
1138       struct tgsi_exec_vector *inputs;
1139       struct tgsi_exec_vector *outputs;
1140 
1141       inputs = align_malloc(sizeof(struct tgsi_exec_vector) *
1142                             TGSI_MAX_PRIM_VERTICES * PIPE_MAX_SHADER_INPUTS,
1143                             16);
1144 
1145       if (!inputs)
1146          return;
1147 
1148       outputs = align_malloc(sizeof(struct tgsi_exec_vector) *
1149                              TGSI_MAX_TOTAL_VERTICES, 16);
1150 
1151       if (!outputs) {
1152          align_free(inputs);
1153          return;
1154       }
1155 
1156       align_free(mach->Inputs);
1157       align_free(mach->Outputs);
1158 
1159       mach->Inputs = inputs;
1160       mach->Outputs = outputs;
1161       mach->UsedGeometryShader = TRUE;
1162    }
1163 
1164    declarations = (struct tgsi_full_declaration *)
1165       MALLOC( maxDeclarations * sizeof(struct tgsi_full_declaration) );
1166 
1167    if (!declarations) {
1168       return;
1169    }
1170 
1171    instructions = (struct tgsi_full_instruction *)
1172       MALLOC( maxInstructions * sizeof(struct tgsi_full_instruction) );
1173 
1174    if (!instructions) {
1175       FREE( declarations );
1176       return;
1177    }
1178 
1179    while( !tgsi_parse_end_of_tokens( &parse ) ) {
1180       uint i;
1181 
1182       tgsi_parse_token( &parse );
1183       switch( parse.FullToken.Token.Type ) {
1184       case TGSI_TOKEN_TYPE_DECLARATION:
1185          /* save expanded declaration */
1186          if (numDeclarations == maxDeclarations) {
1187             declarations = REALLOC(declarations,
1188                                    maxDeclarations
1189                                    * sizeof(struct tgsi_full_declaration),
1190                                    (maxDeclarations + 10)
1191                                    * sizeof(struct tgsi_full_declaration));
1192             maxDeclarations += 10;
1193          }
1194          if (parse.FullToken.FullDeclaration.Declaration.File == TGSI_FILE_OUTPUT)
1195             mach->NumOutputs = MAX2(mach->NumOutputs, parse.FullToken.FullDeclaration.Range.Last + 1);
1196          else if (parse.FullToken.FullDeclaration.Declaration.File == TGSI_FILE_SYSTEM_VALUE) {
1197             const struct tgsi_full_declaration *decl = &parse.FullToken.FullDeclaration;
1198             mach->SysSemanticToIndex[decl->Semantic.Name] = decl->Range.First;
1199          }
1200 
1201          memcpy(declarations + numDeclarations,
1202                 &parse.FullToken.FullDeclaration,
1203                 sizeof(declarations[0]));
1204          numDeclarations++;
1205          break;
1206 
1207       case TGSI_TOKEN_TYPE_IMMEDIATE:
1208          {
1209             uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
1210             assert( size <= 4 );
1211             if (mach->ImmLimit >= mach->ImmsReserved) {
1212                unsigned newReserved = mach->ImmsReserved ? 2 * mach->ImmsReserved : 128;
1213                float4 *imms = REALLOC(mach->Imms, mach->ImmsReserved, newReserved * sizeof(float4));
1214                if (imms) {
1215                   mach->ImmsReserved = newReserved;
1216                   mach->Imms = imms;
1217                } else {
1218                   debug_printf("Unable to (re)allocate space for immidiate constants\n");
1219                   break;
1220                }
1221             }
1222 
1223             for( i = 0; i < size; i++ ) {
1224                mach->Imms[mach->ImmLimit][i] =
1225 		  parse.FullToken.FullImmediate.u[i].Float;
1226             }
1227             mach->ImmLimit += 1;
1228          }
1229          break;
1230 
1231       case TGSI_TOKEN_TYPE_INSTRUCTION:
1232 
1233          /* save expanded instruction */
1234          if (numInstructions == maxInstructions) {
1235             instructions = REALLOC(instructions,
1236                                    maxInstructions
1237                                    * sizeof(struct tgsi_full_instruction),
1238                                    (maxInstructions + 10)
1239                                    * sizeof(struct tgsi_full_instruction));
1240             maxInstructions += 10;
1241          }
1242 
1243          memcpy(instructions + numInstructions,
1244                 &parse.FullToken.FullInstruction,
1245                 sizeof(instructions[0]));
1246 
1247          numInstructions++;
1248          break;
1249 
1250       case TGSI_TOKEN_TYPE_PROPERTY:
1251          if (mach->ShaderType == PIPE_SHADER_GEOMETRY) {
1252             if (parse.FullToken.FullProperty.Property.PropertyName == TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES) {
1253                mach->MaxOutputVertices = parse.FullToken.FullProperty.u[0].Data;
1254             }
1255          }
1256          break;
1257 
1258       default:
1259          assert( 0 );
1260       }
1261    }
1262    tgsi_parse_free (&parse);
1263 
1264    FREE(mach->Declarations);
1265    mach->Declarations = declarations;
1266    mach->NumDeclarations = numDeclarations;
1267 
1268    FREE(mach->Instructions);
1269    mach->Instructions = instructions;
1270    mach->NumInstructions = numInstructions;
1271 }
1272 
1273 
1274 struct tgsi_exec_machine *
tgsi_exec_machine_create(enum pipe_shader_type shader_type)1275 tgsi_exec_machine_create(enum pipe_shader_type shader_type)
1276 {
1277    struct tgsi_exec_machine *mach;
1278 
1279    mach = align_malloc( sizeof *mach, 16 );
1280    if (!mach)
1281       goto fail;
1282 
1283    memset(mach, 0, sizeof(*mach));
1284 
1285    mach->ShaderType = shader_type;
1286    mach->Addrs = &mach->Temps[TGSI_EXEC_TEMP_ADDR];
1287    mach->MaxGeometryShaderOutputs = TGSI_MAX_TOTAL_VERTICES;
1288 
1289    if (shader_type != PIPE_SHADER_COMPUTE) {
1290       mach->Inputs = align_malloc(sizeof(struct tgsi_exec_vector) * PIPE_MAX_SHADER_INPUTS, 16);
1291       mach->Outputs = align_malloc(sizeof(struct tgsi_exec_vector) * PIPE_MAX_SHADER_OUTPUTS, 16);
1292       if (!mach->Inputs || !mach->Outputs)
1293          goto fail;
1294    }
1295 
1296    if (shader_type == PIPE_SHADER_FRAGMENT) {
1297       mach->InputSampleOffsetApply = align_malloc(sizeof(apply_sample_offset_func) * PIPE_MAX_SHADER_INPUTS, 16);
1298       if (!mach->InputSampleOffsetApply)
1299          goto fail;
1300    }
1301 
1302 #ifdef DEBUG
1303    /* silence warnings */
1304    (void) print_chan;
1305    (void) print_temp;
1306 #endif
1307 
1308    return mach;
1309 
1310 fail:
1311    if (mach) {
1312       align_free(mach->InputSampleOffsetApply);
1313       align_free(mach->Inputs);
1314       align_free(mach->Outputs);
1315       align_free(mach);
1316    }
1317    return NULL;
1318 }
1319 
1320 
1321 void
tgsi_exec_machine_destroy(struct tgsi_exec_machine * mach)1322 tgsi_exec_machine_destroy(struct tgsi_exec_machine *mach)
1323 {
1324    if (mach) {
1325       FREE(mach->Instructions);
1326       FREE(mach->Declarations);
1327       FREE(mach->Imms);
1328 
1329       align_free(mach->InputSampleOffsetApply);
1330       align_free(mach->Inputs);
1331       align_free(mach->Outputs);
1332 
1333       align_free(mach);
1334    }
1335 }
1336 
1337 static void
micro_add(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)1338 micro_add(union tgsi_exec_channel *dst,
1339           const union tgsi_exec_channel *src0,
1340           const union tgsi_exec_channel *src1)
1341 {
1342    dst->f[0] = src0->f[0] + src1->f[0];
1343    dst->f[1] = src0->f[1] + src1->f[1];
1344    dst->f[2] = src0->f[2] + src1->f[2];
1345    dst->f[3] = src0->f[3] + src1->f[3];
1346 }
1347 
1348 static void
micro_div(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)1349 micro_div(
1350    union tgsi_exec_channel *dst,
1351    const union tgsi_exec_channel *src0,
1352    const union tgsi_exec_channel *src1 )
1353 {
1354    if (src1->f[0] != 0) {
1355       dst->f[0] = src0->f[0] / src1->f[0];
1356    }
1357    if (src1->f[1] != 0) {
1358       dst->f[1] = src0->f[1] / src1->f[1];
1359    }
1360    if (src1->f[2] != 0) {
1361       dst->f[2] = src0->f[2] / src1->f[2];
1362    }
1363    if (src1->f[3] != 0) {
1364       dst->f[3] = src0->f[3] / src1->f[3];
1365    }
1366 }
1367 
1368 static void
micro_lt(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1,const union tgsi_exec_channel * src2,const union tgsi_exec_channel * src3)1369 micro_lt(
1370    union tgsi_exec_channel *dst,
1371    const union tgsi_exec_channel *src0,
1372    const union tgsi_exec_channel *src1,
1373    const union tgsi_exec_channel *src2,
1374    const union tgsi_exec_channel *src3 )
1375 {
1376    dst->f[0] = src0->f[0] < src1->f[0] ? src2->f[0] : src3->f[0];
1377    dst->f[1] = src0->f[1] < src1->f[1] ? src2->f[1] : src3->f[1];
1378    dst->f[2] = src0->f[2] < src1->f[2] ? src2->f[2] : src3->f[2];
1379    dst->f[3] = src0->f[3] < src1->f[3] ? src2->f[3] : src3->f[3];
1380 }
1381 
1382 static void
micro_max(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)1383 micro_max(union tgsi_exec_channel *dst,
1384           const union tgsi_exec_channel *src0,
1385           const union tgsi_exec_channel *src1)
1386 {
1387    dst->f[0] = src0->f[0] > src1->f[0] ? src0->f[0] : src1->f[0];
1388    dst->f[1] = src0->f[1] > src1->f[1] ? src0->f[1] : src1->f[1];
1389    dst->f[2] = src0->f[2] > src1->f[2] ? src0->f[2] : src1->f[2];
1390    dst->f[3] = src0->f[3] > src1->f[3] ? src0->f[3] : src1->f[3];
1391 }
1392 
1393 static void
micro_min(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)1394 micro_min(union tgsi_exec_channel *dst,
1395           const union tgsi_exec_channel *src0,
1396           const union tgsi_exec_channel *src1)
1397 {
1398    dst->f[0] = src0->f[0] < src1->f[0] ? src0->f[0] : src1->f[0];
1399    dst->f[1] = src0->f[1] < src1->f[1] ? src0->f[1] : src1->f[1];
1400    dst->f[2] = src0->f[2] < src1->f[2] ? src0->f[2] : src1->f[2];
1401    dst->f[3] = src0->f[3] < src1->f[3] ? src0->f[3] : src1->f[3];
1402 }
1403 
1404 static void
micro_mul(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)1405 micro_mul(union tgsi_exec_channel *dst,
1406           const union tgsi_exec_channel *src0,
1407           const union tgsi_exec_channel *src1)
1408 {
1409    dst->f[0] = src0->f[0] * src1->f[0];
1410    dst->f[1] = src0->f[1] * src1->f[1];
1411    dst->f[2] = src0->f[2] * src1->f[2];
1412    dst->f[3] = src0->f[3] * src1->f[3];
1413 }
1414 
1415 static void
micro_neg(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)1416 micro_neg(
1417    union tgsi_exec_channel *dst,
1418    const union tgsi_exec_channel *src )
1419 {
1420    dst->f[0] = -src->f[0];
1421    dst->f[1] = -src->f[1];
1422    dst->f[2] = -src->f[2];
1423    dst->f[3] = -src->f[3];
1424 }
1425 
1426 static void
micro_pow(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)1427 micro_pow(
1428    union tgsi_exec_channel *dst,
1429    const union tgsi_exec_channel *src0,
1430    const union tgsi_exec_channel *src1 )
1431 {
1432 #if FAST_MATH
1433    dst->f[0] = util_fast_pow( src0->f[0], src1->f[0] );
1434    dst->f[1] = util_fast_pow( src0->f[1], src1->f[1] );
1435    dst->f[2] = util_fast_pow( src0->f[2], src1->f[2] );
1436    dst->f[3] = util_fast_pow( src0->f[3], src1->f[3] );
1437 #else
1438    dst->f[0] = powf( src0->f[0], src1->f[0] );
1439    dst->f[1] = powf( src0->f[1], src1->f[1] );
1440    dst->f[2] = powf( src0->f[2], src1->f[2] );
1441    dst->f[3] = powf( src0->f[3], src1->f[3] );
1442 #endif
1443 }
1444 
1445 static void
micro_ldexp(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)1446 micro_ldexp(union tgsi_exec_channel *dst,
1447             const union tgsi_exec_channel *src0,
1448             const union tgsi_exec_channel *src1)
1449 {
1450    dst->f[0] = ldexpf(src0->f[0], src1->i[0]);
1451    dst->f[1] = ldexpf(src0->f[1], src1->i[1]);
1452    dst->f[2] = ldexpf(src0->f[2], src1->i[2]);
1453    dst->f[3] = ldexpf(src0->f[3], src1->i[3]);
1454 }
1455 
1456 static void
micro_sub(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)1457 micro_sub(union tgsi_exec_channel *dst,
1458           const union tgsi_exec_channel *src0,
1459           const union tgsi_exec_channel *src1)
1460 {
1461    dst->f[0] = src0->f[0] - src1->f[0];
1462    dst->f[1] = src0->f[1] - src1->f[1];
1463    dst->f[2] = src0->f[2] - src1->f[2];
1464    dst->f[3] = src0->f[3] - src1->f[3];
1465 }
1466 
1467 static void
fetch_src_file_channel(const struct tgsi_exec_machine * mach,const uint file,const uint swizzle,const union tgsi_exec_channel * index,const union tgsi_exec_channel * index2D,union tgsi_exec_channel * chan)1468 fetch_src_file_channel(const struct tgsi_exec_machine *mach,
1469                        const uint file,
1470                        const uint swizzle,
1471                        const union tgsi_exec_channel *index,
1472                        const union tgsi_exec_channel *index2D,
1473                        union tgsi_exec_channel *chan)
1474 {
1475    uint i;
1476 
1477    assert(swizzle < 4);
1478 
1479    switch (file) {
1480    case TGSI_FILE_CONSTANT:
1481       for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1482          assert(index2D->i[i] >= 0 && index2D->i[i] < PIPE_MAX_CONSTANT_BUFFERS);
1483          assert(mach->Consts[index2D->i[i]]);
1484 
1485          if (index->i[i] < 0) {
1486             chan->u[i] = 0;
1487          } else {
1488             /* NOTE: copying the const value as a uint instead of float */
1489             const uint constbuf = index2D->i[i];
1490             const uint *buf = (const uint *)mach->Consts[constbuf];
1491             const int pos = index->i[i] * 4 + swizzle;
1492             /* const buffer bounds check */
1493             if (pos < 0 || pos >= (int) mach->ConstsSize[constbuf]) {
1494                if (0) {
1495                   /* Debug: print warning */
1496                   static int count = 0;
1497                   if (count++ < 100)
1498                      debug_printf("TGSI Exec: const buffer index %d"
1499                                   " out of bounds\n", pos);
1500                }
1501                chan->u[i] = 0;
1502             }
1503             else
1504                chan->u[i] = buf[pos];
1505          }
1506       }
1507       break;
1508 
1509    case TGSI_FILE_INPUT:
1510       for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1511          /*
1512          if (PIPE_SHADER_GEOMETRY == mach->ShaderType) {
1513             debug_printf("Fetching Input[%d] (2d=%d, 1d=%d)\n",
1514                          index2D->i[i] * TGSI_EXEC_MAX_INPUT_ATTRIBS + index->i[i],
1515                          index2D->i[i], index->i[i]);
1516                          }*/
1517          int pos = index2D->i[i] * TGSI_EXEC_MAX_INPUT_ATTRIBS + index->i[i];
1518          assert(pos >= 0);
1519          assert(pos < TGSI_MAX_PRIM_VERTICES * PIPE_MAX_ATTRIBS);
1520          chan->u[i] = mach->Inputs[pos].xyzw[swizzle].u[i];
1521       }
1522       break;
1523 
1524    case TGSI_FILE_SYSTEM_VALUE:
1525       for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1526          chan->u[i] = mach->SystemValue[index->i[i]].xyzw[swizzle].u[i];
1527       }
1528       break;
1529 
1530    case TGSI_FILE_TEMPORARY:
1531       for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1532          assert(index->i[i] < TGSI_EXEC_NUM_TEMPS);
1533          assert(index2D->i[i] == 0);
1534 
1535          chan->u[i] = mach->Temps[index->i[i]].xyzw[swizzle].u[i];
1536       }
1537       break;
1538 
1539    case TGSI_FILE_IMMEDIATE:
1540       for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1541          assert(index->i[i] >= 0 && index->i[i] < (int)mach->ImmLimit);
1542          assert(index2D->i[i] == 0);
1543 
1544          chan->f[i] = mach->Imms[index->i[i]][swizzle];
1545       }
1546       break;
1547 
1548    case TGSI_FILE_ADDRESS:
1549       for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1550          assert(index->i[i] >= 0);
1551          assert(index2D->i[i] == 0);
1552 
1553          chan->u[i] = mach->Addrs[index->i[i]].xyzw[swizzle].u[i];
1554       }
1555       break;
1556 
1557    case TGSI_FILE_OUTPUT:
1558       /* vertex/fragment output vars can be read too */
1559       for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1560          assert(index->i[i] >= 0);
1561          assert(index2D->i[i] == 0);
1562 
1563          chan->u[i] = mach->Outputs[index->i[i]].xyzw[swizzle].u[i];
1564       }
1565       break;
1566 
1567    default:
1568       assert(0);
1569       for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1570          chan->u[i] = 0;
1571       }
1572    }
1573 }
1574 
1575 static void
get_index_registers(const struct tgsi_exec_machine * mach,const struct tgsi_full_src_register * reg,union tgsi_exec_channel * index,union tgsi_exec_channel * index2D)1576 get_index_registers(const struct tgsi_exec_machine *mach,
1577                     const struct tgsi_full_src_register *reg,
1578                     union tgsi_exec_channel *index,
1579                     union tgsi_exec_channel *index2D)
1580 {
1581    uint swizzle;
1582 
1583    /* We start with a direct index into a register file.
1584     *
1585     *    file[1],
1586     *    where:
1587     *       file = Register.File
1588     *       [1] = Register.Index
1589     */
1590    index->i[0] =
1591    index->i[1] =
1592    index->i[2] =
1593    index->i[3] = reg->Register.Index;
1594 
1595    /* There is an extra source register that indirectly subscripts
1596     * a register file. The direct index now becomes an offset
1597     * that is being added to the indirect register.
1598     *
1599     *    file[ind[2].x+1],
1600     *    where:
1601     *       ind = Indirect.File
1602     *       [2] = Indirect.Index
1603     *       .x = Indirect.SwizzleX
1604     */
1605    if (reg->Register.Indirect) {
1606       union tgsi_exec_channel index2;
1607       union tgsi_exec_channel indir_index;
1608       const uint execmask = mach->ExecMask;
1609       uint i;
1610 
1611       /* which address register (always zero now) */
1612       index2.i[0] =
1613       index2.i[1] =
1614       index2.i[2] =
1615       index2.i[3] = reg->Indirect.Index;
1616       /* get current value of address register[swizzle] */
1617       swizzle = reg->Indirect.Swizzle;
1618       fetch_src_file_channel(mach,
1619                              reg->Indirect.File,
1620                              swizzle,
1621                              &index2,
1622                              &ZeroVec,
1623                              &indir_index);
1624 
1625       /* add value of address register to the offset */
1626       index->i[0] += indir_index.i[0];
1627       index->i[1] += indir_index.i[1];
1628       index->i[2] += indir_index.i[2];
1629       index->i[3] += indir_index.i[3];
1630 
1631       /* for disabled execution channels, zero-out the index to
1632        * avoid using a potential garbage value.
1633        */
1634       for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1635          if ((execmask & (1 << i)) == 0)
1636             index->i[i] = 0;
1637       }
1638    }
1639 
1640    /* There is an extra source register that is a second
1641     * subscript to a register file. Effectively it means that
1642     * the register file is actually a 2D array of registers.
1643     *
1644     *    file[3][1],
1645     *    where:
1646     *       [3] = Dimension.Index
1647     */
1648    if (reg->Register.Dimension) {
1649       index2D->i[0] =
1650       index2D->i[1] =
1651       index2D->i[2] =
1652       index2D->i[3] = reg->Dimension.Index;
1653 
1654       /* Again, the second subscript index can be addressed indirectly
1655        * identically to the first one.
1656        * Nothing stops us from indirectly addressing the indirect register,
1657        * but there is no need for that, so we won't exercise it.
1658        *
1659        *    file[ind[4].y+3][1],
1660        *    where:
1661        *       ind = DimIndirect.File
1662        *       [4] = DimIndirect.Index
1663        *       .y = DimIndirect.SwizzleX
1664        */
1665       if (reg->Dimension.Indirect) {
1666          union tgsi_exec_channel index2;
1667          union tgsi_exec_channel indir_index;
1668          const uint execmask = mach->ExecMask;
1669          uint i;
1670 
1671          index2.i[0] =
1672          index2.i[1] =
1673          index2.i[2] =
1674          index2.i[3] = reg->DimIndirect.Index;
1675 
1676          swizzle = reg->DimIndirect.Swizzle;
1677          fetch_src_file_channel(mach,
1678                                 reg->DimIndirect.File,
1679                                 swizzle,
1680                                 &index2,
1681                                 &ZeroVec,
1682                                 &indir_index);
1683 
1684          index2D->i[0] += indir_index.i[0];
1685          index2D->i[1] += indir_index.i[1];
1686          index2D->i[2] += indir_index.i[2];
1687          index2D->i[3] += indir_index.i[3];
1688 
1689          /* for disabled execution channels, zero-out the index to
1690           * avoid using a potential garbage value.
1691           */
1692          for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1693             if ((execmask & (1 << i)) == 0) {
1694                index2D->i[i] = 0;
1695             }
1696          }
1697       }
1698 
1699       /* If by any chance there was a need for a 3D array of register
1700        * files, we would have to check whether Dimension is followed
1701        * by a dimension register and continue the saga.
1702        */
1703    } else {
1704       index2D->i[0] =
1705       index2D->i[1] =
1706       index2D->i[2] =
1707       index2D->i[3] = 0;
1708    }
1709 }
1710 
1711 
1712 static void
fetch_source_d(const struct tgsi_exec_machine * mach,union tgsi_exec_channel * chan,const struct tgsi_full_src_register * reg,const uint chan_index)1713 fetch_source_d(const struct tgsi_exec_machine *mach,
1714                union tgsi_exec_channel *chan,
1715                const struct tgsi_full_src_register *reg,
1716 	       const uint chan_index)
1717 {
1718    union tgsi_exec_channel index;
1719    union tgsi_exec_channel index2D;
1720    uint swizzle;
1721 
1722    get_index_registers(mach, reg, &index, &index2D);
1723 
1724 
1725    swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
1726    fetch_src_file_channel(mach,
1727                           reg->Register.File,
1728                           swizzle,
1729                           &index,
1730                           &index2D,
1731                           chan);
1732 }
1733 
1734 static void
fetch_source(const struct tgsi_exec_machine * mach,union tgsi_exec_channel * chan,const struct tgsi_full_src_register * reg,const uint chan_index,enum tgsi_exec_datatype src_datatype)1735 fetch_source(const struct tgsi_exec_machine *mach,
1736              union tgsi_exec_channel *chan,
1737              const struct tgsi_full_src_register *reg,
1738              const uint chan_index,
1739              enum tgsi_exec_datatype src_datatype)
1740 {
1741    fetch_source_d(mach, chan, reg, chan_index);
1742 
1743    if (reg->Register.Absolute) {
1744       if (src_datatype == TGSI_EXEC_DATA_FLOAT) {
1745          micro_abs(chan, chan);
1746       } else {
1747          micro_iabs(chan, chan);
1748       }
1749    }
1750 
1751    if (reg->Register.Negate) {
1752       if (src_datatype == TGSI_EXEC_DATA_FLOAT) {
1753          micro_neg(chan, chan);
1754       } else {
1755          micro_ineg(chan, chan);
1756       }
1757    }
1758 }
1759 
1760 static union tgsi_exec_channel *
store_dest_dstret(struct tgsi_exec_machine * mach,const union tgsi_exec_channel * chan,const struct tgsi_full_dst_register * reg,uint chan_index,enum tgsi_exec_datatype dst_datatype)1761 store_dest_dstret(struct tgsi_exec_machine *mach,
1762                  const union tgsi_exec_channel *chan,
1763                  const struct tgsi_full_dst_register *reg,
1764                  uint chan_index,
1765                  enum tgsi_exec_datatype dst_datatype)
1766 {
1767    static union tgsi_exec_channel null;
1768    union tgsi_exec_channel *dst;
1769    union tgsi_exec_channel index2D;
1770    int offset = 0;  /* indirection offset */
1771    int index;
1772 
1773    /* for debugging */
1774    if (0 && dst_datatype == TGSI_EXEC_DATA_FLOAT) {
1775       check_inf_or_nan(chan);
1776    }
1777 
1778    /* There is an extra source register that indirectly subscripts
1779     * a register file. The direct index now becomes an offset
1780     * that is being added to the indirect register.
1781     *
1782     *    file[ind[2].x+1],
1783     *    where:
1784     *       ind = Indirect.File
1785     *       [2] = Indirect.Index
1786     *       .x = Indirect.SwizzleX
1787     */
1788    if (reg->Register.Indirect) {
1789       union tgsi_exec_channel index;
1790       union tgsi_exec_channel indir_index;
1791       uint swizzle;
1792 
1793       /* which address register (always zero for now) */
1794       index.i[0] =
1795       index.i[1] =
1796       index.i[2] =
1797       index.i[3] = reg->Indirect.Index;
1798 
1799       /* get current value of address register[swizzle] */
1800       swizzle = reg->Indirect.Swizzle;
1801 
1802       /* fetch values from the address/indirection register */
1803       fetch_src_file_channel(mach,
1804                              reg->Indirect.File,
1805                              swizzle,
1806                              &index,
1807                              &ZeroVec,
1808                              &indir_index);
1809 
1810       /* save indirection offset */
1811       offset = indir_index.i[0];
1812    }
1813 
1814    /* There is an extra source register that is a second
1815     * subscript to a register file. Effectively it means that
1816     * the register file is actually a 2D array of registers.
1817     *
1818     *    file[3][1],
1819     *    where:
1820     *       [3] = Dimension.Index
1821     */
1822    if (reg->Register.Dimension) {
1823       index2D.i[0] =
1824       index2D.i[1] =
1825       index2D.i[2] =
1826       index2D.i[3] = reg->Dimension.Index;
1827 
1828       /* Again, the second subscript index can be addressed indirectly
1829        * identically to the first one.
1830        * Nothing stops us from indirectly addressing the indirect register,
1831        * but there is no need for that, so we won't exercise it.
1832        *
1833        *    file[ind[4].y+3][1],
1834        *    where:
1835        *       ind = DimIndirect.File
1836        *       [4] = DimIndirect.Index
1837        *       .y = DimIndirect.SwizzleX
1838        */
1839       if (reg->Dimension.Indirect) {
1840          union tgsi_exec_channel index2;
1841          union tgsi_exec_channel indir_index;
1842          const uint execmask = mach->ExecMask;
1843          unsigned swizzle;
1844          uint i;
1845 
1846          index2.i[0] =
1847          index2.i[1] =
1848          index2.i[2] =
1849          index2.i[3] = reg->DimIndirect.Index;
1850 
1851          swizzle = reg->DimIndirect.Swizzle;
1852          fetch_src_file_channel(mach,
1853                                 reg->DimIndirect.File,
1854                                 swizzle,
1855                                 &index2,
1856                                 &ZeroVec,
1857                                 &indir_index);
1858 
1859          index2D.i[0] += indir_index.i[0];
1860          index2D.i[1] += indir_index.i[1];
1861          index2D.i[2] += indir_index.i[2];
1862          index2D.i[3] += indir_index.i[3];
1863 
1864          /* for disabled execution channels, zero-out the index to
1865           * avoid using a potential garbage value.
1866           */
1867          for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1868             if ((execmask & (1 << i)) == 0) {
1869                index2D.i[i] = 0;
1870             }
1871          }
1872       }
1873 
1874       /* If by any chance there was a need for a 3D array of register
1875        * files, we would have to check whether Dimension is followed
1876        * by a dimension register and continue the saga.
1877        */
1878    } else {
1879       index2D.i[0] =
1880       index2D.i[1] =
1881       index2D.i[2] =
1882       index2D.i[3] = 0;
1883    }
1884 
1885    switch (reg->Register.File) {
1886    case TGSI_FILE_NULL:
1887       dst = &null;
1888       break;
1889 
1890    case TGSI_FILE_OUTPUT:
1891       index = mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0]
1892          + reg->Register.Index;
1893       dst = &mach->Outputs[offset + index].xyzw[chan_index];
1894 #if 0
1895       debug_printf("NumOutputs = %d, TEMP_O_C/I = %d, redindex = %d\n",
1896                    mach->NumOutputs, mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0],
1897                    reg->Register.Index);
1898       if (PIPE_SHADER_GEOMETRY == mach->ShaderType) {
1899          debug_printf("STORING OUT[%d] mask(%d), = (", offset + index, execmask);
1900          for (i = 0; i < TGSI_QUAD_SIZE; i++)
1901             if (execmask & (1 << i))
1902                debug_printf("%f, ", chan->f[i]);
1903          debug_printf(")\n");
1904       }
1905 #endif
1906       break;
1907 
1908    case TGSI_FILE_TEMPORARY:
1909       index = reg->Register.Index;
1910       assert( index < TGSI_EXEC_NUM_TEMPS );
1911       dst = &mach->Temps[offset + index].xyzw[chan_index];
1912       break;
1913 
1914    case TGSI_FILE_ADDRESS:
1915       index = reg->Register.Index;
1916       dst = &mach->Addrs[index].xyzw[chan_index];
1917       break;
1918 
1919    default:
1920       assert( 0 );
1921       return NULL;
1922    }
1923 
1924    return dst;
1925 }
1926 
1927 static void
store_dest_double(struct tgsi_exec_machine * mach,const union tgsi_exec_channel * chan,const struct tgsi_full_dst_register * reg,uint chan_index,enum tgsi_exec_datatype dst_datatype)1928 store_dest_double(struct tgsi_exec_machine *mach,
1929                  const union tgsi_exec_channel *chan,
1930                  const struct tgsi_full_dst_register *reg,
1931                  uint chan_index,
1932                  enum tgsi_exec_datatype dst_datatype)
1933 {
1934    union tgsi_exec_channel *dst;
1935    const uint execmask = mach->ExecMask;
1936    int i;
1937 
1938    dst = store_dest_dstret(mach, chan, reg, chan_index, dst_datatype);
1939    if (!dst)
1940       return;
1941 
1942    /* doubles path */
1943    for (i = 0; i < TGSI_QUAD_SIZE; i++)
1944       if (execmask & (1 << i))
1945          dst->i[i] = chan->i[i];
1946 }
1947 
1948 static void
store_dest(struct tgsi_exec_machine * mach,const union tgsi_exec_channel * chan,const struct tgsi_full_dst_register * reg,const struct tgsi_full_instruction * inst,uint chan_index,enum tgsi_exec_datatype dst_datatype)1949 store_dest(struct tgsi_exec_machine *mach,
1950            const union tgsi_exec_channel *chan,
1951            const struct tgsi_full_dst_register *reg,
1952            const struct tgsi_full_instruction *inst,
1953            uint chan_index,
1954            enum tgsi_exec_datatype dst_datatype)
1955 {
1956    union tgsi_exec_channel *dst;
1957    const uint execmask = mach->ExecMask;
1958    int i;
1959 
1960    dst = store_dest_dstret(mach, chan, reg, chan_index, dst_datatype);
1961    if (!dst)
1962       return;
1963 
1964    if (!inst->Instruction.Saturate) {
1965       for (i = 0; i < TGSI_QUAD_SIZE; i++)
1966          if (execmask & (1 << i))
1967             dst->i[i] = chan->i[i];
1968    }
1969    else {
1970       for (i = 0; i < TGSI_QUAD_SIZE; i++)
1971          if (execmask & (1 << i)) {
1972             if (chan->f[i] < 0.0f)
1973                dst->f[i] = 0.0f;
1974             else if (chan->f[i] > 1.0f)
1975                dst->f[i] = 1.0f;
1976             else
1977                dst->i[i] = chan->i[i];
1978          }
1979    }
1980 }
1981 
1982 #define FETCH(VAL,INDEX,CHAN)\
1983     fetch_source(mach, VAL, &inst->Src[INDEX], CHAN, TGSI_EXEC_DATA_FLOAT)
1984 
1985 #define IFETCH(VAL,INDEX,CHAN)\
1986     fetch_source(mach, VAL, &inst->Src[INDEX], CHAN, TGSI_EXEC_DATA_INT)
1987 
1988 
1989 /**
1990  * Execute ARB-style KIL which is predicated by a src register.
1991  * Kill fragment if any of the four values is less than zero.
1992  */
1993 static void
exec_kill_if(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)1994 exec_kill_if(struct tgsi_exec_machine *mach,
1995              const struct tgsi_full_instruction *inst)
1996 {
1997    uint uniquemask;
1998    uint chan_index;
1999    uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
2000    union tgsi_exec_channel r[1];
2001 
2002    /* This mask stores component bits that were already tested. */
2003    uniquemask = 0;
2004 
2005    for (chan_index = 0; chan_index < 4; chan_index++)
2006    {
2007       uint swizzle;
2008       uint i;
2009 
2010       /* unswizzle channel */
2011       swizzle = tgsi_util_get_full_src_register_swizzle (
2012                         &inst->Src[0],
2013                         chan_index);
2014 
2015       /* check if the component has not been already tested */
2016       if (uniquemask & (1 << swizzle))
2017          continue;
2018       uniquemask |= 1 << swizzle;
2019 
2020       FETCH(&r[0], 0, chan_index);
2021       for (i = 0; i < 4; i++)
2022          if (r[0].f[i] < 0.0f)
2023             kilmask |= 1 << i;
2024    }
2025 
2026    /* restrict to fragments currently executing */
2027    kilmask &= mach->ExecMask;
2028 
2029    mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask;
2030 }
2031 
2032 /**
2033  * Unconditional fragment kill/discard.
2034  */
2035 static void
exec_kill(struct tgsi_exec_machine * mach)2036 exec_kill(struct tgsi_exec_machine *mach)
2037 {
2038    uint kilmask; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
2039 
2040    /* kill fragment for all fragments currently executing */
2041    kilmask = mach->ExecMask;
2042    mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask;
2043 }
2044 
2045 static void
emit_vertex(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)2046 emit_vertex(struct tgsi_exec_machine *mach,
2047             const struct tgsi_full_instruction *inst)
2048 {
2049    union tgsi_exec_channel r[1];
2050    unsigned stream_id;
2051    unsigned *prim_count;
2052    /* FIXME: check for exec mask correctly
2053    unsigned i;
2054    for (i = 0; i < TGSI_QUAD_SIZE; ++i) {
2055          if ((mach->ExecMask & (1 << i)))
2056    */
2057    IFETCH(&r[0], 0, TGSI_CHAN_X);
2058    stream_id = r[0].u[0];
2059    prim_count = &mach->Temps[temp_prim_idxs[stream_id].idx].xyzw[temp_prim_idxs[stream_id].chan].u[0];
2060    if (mach->ExecMask) {
2061       if (mach->Primitives[stream_id][*prim_count] >= mach->MaxOutputVertices)
2062          return;
2063 
2064       if (mach->Primitives[stream_id][*prim_count] == 0)
2065          mach->PrimitiveOffsets[stream_id][*prim_count] = mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0];
2066       mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] += mach->NumOutputs;
2067       mach->Primitives[stream_id][*prim_count]++;
2068    }
2069 }
2070 
2071 static void
emit_primitive(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)2072 emit_primitive(struct tgsi_exec_machine *mach,
2073                const struct tgsi_full_instruction *inst)
2074 {
2075    unsigned *prim_count;
2076    union tgsi_exec_channel r[1];
2077    unsigned stream_id = 0;
2078    /* FIXME: check for exec mask correctly
2079    unsigned i;
2080    for (i = 0; i < TGSI_QUAD_SIZE; ++i) {
2081          if ((mach->ExecMask & (1 << i)))
2082    */
2083    if (inst) {
2084       IFETCH(&r[0], 0, TGSI_CHAN_X);
2085       stream_id = r[0].u[0];
2086    }
2087    prim_count = &mach->Temps[temp_prim_idxs[stream_id].idx].xyzw[temp_prim_idxs[stream_id].chan].u[0];
2088    if (mach->ExecMask) {
2089       ++(*prim_count);
2090       debug_assert((*prim_count * mach->NumOutputs) < mach->MaxGeometryShaderOutputs);
2091       mach->Primitives[stream_id][*prim_count] = 0;
2092    }
2093 }
2094 
2095 static void
conditional_emit_primitive(struct tgsi_exec_machine * mach)2096 conditional_emit_primitive(struct tgsi_exec_machine *mach)
2097 {
2098    if (PIPE_SHADER_GEOMETRY == mach->ShaderType) {
2099       int emitted_verts =
2100          mach->Primitives[0][mach->Temps[temp_prim_idxs[0].idx].xyzw[temp_prim_idxs[0].chan].u[0]];
2101       if (emitted_verts) {
2102          emit_primitive(mach, NULL);
2103       }
2104    }
2105 }
2106 
2107 
2108 /*
2109  * Fetch four texture samples using STR texture coordinates.
2110  */
2111 static void
fetch_texel(struct tgsi_sampler * sampler,const unsigned sview_idx,const unsigned sampler_idx,const union tgsi_exec_channel * s,const union tgsi_exec_channel * t,const union tgsi_exec_channel * p,const union tgsi_exec_channel * c0,const union tgsi_exec_channel * c1,float derivs[3][2][TGSI_QUAD_SIZE],const int8_t offset[3],enum tgsi_sampler_control control,union tgsi_exec_channel * r,union tgsi_exec_channel * g,union tgsi_exec_channel * b,union tgsi_exec_channel * a)2112 fetch_texel( struct tgsi_sampler *sampler,
2113              const unsigned sview_idx,
2114              const unsigned sampler_idx,
2115              const union tgsi_exec_channel *s,
2116              const union tgsi_exec_channel *t,
2117              const union tgsi_exec_channel *p,
2118              const union tgsi_exec_channel *c0,
2119              const union tgsi_exec_channel *c1,
2120              float derivs[3][2][TGSI_QUAD_SIZE],
2121              const int8_t offset[3],
2122              enum tgsi_sampler_control control,
2123              union tgsi_exec_channel *r,
2124              union tgsi_exec_channel *g,
2125              union tgsi_exec_channel *b,
2126              union tgsi_exec_channel *a )
2127 {
2128    uint j;
2129    float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
2130 
2131    /* FIXME: handle explicit derivs, offsets */
2132    sampler->get_samples(sampler, sview_idx, sampler_idx,
2133                         s->f, t->f, p->f, c0->f, c1->f, derivs, offset, control, rgba);
2134 
2135    for (j = 0; j < 4; j++) {
2136       r->f[j] = rgba[0][j];
2137       g->f[j] = rgba[1][j];
2138       b->f[j] = rgba[2][j];
2139       a->f[j] = rgba[3][j];
2140    }
2141 }
2142 
2143 
2144 #define TEX_MODIFIER_NONE           0
2145 #define TEX_MODIFIER_PROJECTED      1
2146 #define TEX_MODIFIER_LOD_BIAS       2
2147 #define TEX_MODIFIER_EXPLICIT_LOD   3
2148 #define TEX_MODIFIER_LEVEL_ZERO     4
2149 #define TEX_MODIFIER_GATHER         5
2150 
2151 /*
2152  * Fetch all 3 (for s,t,r coords) texel offsets, put them into int array.
2153  */
2154 static void
fetch_texel_offsets(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst,int8_t offsets[3])2155 fetch_texel_offsets(struct tgsi_exec_machine *mach,
2156                     const struct tgsi_full_instruction *inst,
2157                     int8_t offsets[3])
2158 {
2159    if (inst->Texture.NumOffsets == 1) {
2160       union tgsi_exec_channel index;
2161       union tgsi_exec_channel offset[3];
2162       index.i[0] = index.i[1] = index.i[2] = index.i[3] = inst->TexOffsets[0].Index;
2163       fetch_src_file_channel(mach, inst->TexOffsets[0].File,
2164                              inst->TexOffsets[0].SwizzleX, &index, &ZeroVec, &offset[0]);
2165       fetch_src_file_channel(mach, inst->TexOffsets[0].File,
2166                              inst->TexOffsets[0].SwizzleY, &index, &ZeroVec, &offset[1]);
2167       fetch_src_file_channel(mach, inst->TexOffsets[0].File,
2168                              inst->TexOffsets[0].SwizzleZ, &index, &ZeroVec, &offset[2]);
2169      offsets[0] = offset[0].i[0];
2170      offsets[1] = offset[1].i[0];
2171      offsets[2] = offset[2].i[0];
2172    } else {
2173      assert(inst->Texture.NumOffsets == 0);
2174      offsets[0] = offsets[1] = offsets[2] = 0;
2175    }
2176 }
2177 
2178 
2179 /*
2180  * Fetch dx and dy values for one channel (s, t or r).
2181  * Put dx values into one float array, dy values into another.
2182  */
2183 static void
fetch_assign_deriv_channel(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst,unsigned regdsrcx,unsigned chan,float derivs[2][TGSI_QUAD_SIZE])2184 fetch_assign_deriv_channel(struct tgsi_exec_machine *mach,
2185                            const struct tgsi_full_instruction *inst,
2186                            unsigned regdsrcx,
2187                            unsigned chan,
2188                            float derivs[2][TGSI_QUAD_SIZE])
2189 {
2190    union tgsi_exec_channel d;
2191    FETCH(&d, regdsrcx, chan);
2192    derivs[0][0] = d.f[0];
2193    derivs[0][1] = d.f[1];
2194    derivs[0][2] = d.f[2];
2195    derivs[0][3] = d.f[3];
2196    FETCH(&d, regdsrcx + 1, chan);
2197    derivs[1][0] = d.f[0];
2198    derivs[1][1] = d.f[1];
2199    derivs[1][2] = d.f[2];
2200    derivs[1][3] = d.f[3];
2201 }
2202 
2203 static uint
fetch_sampler_unit(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst,uint sampler)2204 fetch_sampler_unit(struct tgsi_exec_machine *mach,
2205                    const struct tgsi_full_instruction *inst,
2206                    uint sampler)
2207 {
2208    uint unit = 0;
2209    int i;
2210    if (inst->Src[sampler].Register.Indirect) {
2211       const struct tgsi_full_src_register *reg = &inst->Src[sampler];
2212       union tgsi_exec_channel indir_index, index2;
2213       const uint execmask = mach->ExecMask;
2214       index2.i[0] =
2215       index2.i[1] =
2216       index2.i[2] =
2217       index2.i[3] = reg->Indirect.Index;
2218 
2219       fetch_src_file_channel(mach,
2220                              reg->Indirect.File,
2221                              reg->Indirect.Swizzle,
2222                              &index2,
2223                              &ZeroVec,
2224                              &indir_index);
2225       for (i = 0; i < TGSI_QUAD_SIZE; i++) {
2226          if (execmask & (1 << i)) {
2227             unit = inst->Src[sampler].Register.Index + indir_index.i[i];
2228             break;
2229          }
2230       }
2231 
2232    } else {
2233       unit = inst->Src[sampler].Register.Index;
2234    }
2235    return unit;
2236 }
2237 
2238 /*
2239  * execute a texture instruction.
2240  *
2241  * modifier is used to control the channel routing for the
2242  * instruction variants like proj, lod, and texture with lod bias.
2243  * sampler indicates which src register the sampler is contained in.
2244  */
2245 static void
exec_tex(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst,uint modifier,uint sampler)2246 exec_tex(struct tgsi_exec_machine *mach,
2247          const struct tgsi_full_instruction *inst,
2248          uint modifier, uint sampler)
2249 {
2250    const union tgsi_exec_channel *args[5], *proj = NULL;
2251    union tgsi_exec_channel r[5];
2252    enum tgsi_sampler_control control = TGSI_SAMPLER_LOD_NONE;
2253    uint chan;
2254    uint unit;
2255    int8_t offsets[3];
2256    int dim, shadow_ref, i;
2257 
2258    unit = fetch_sampler_unit(mach, inst, sampler);
2259    /* always fetch all 3 offsets, overkill but keeps code simple */
2260    fetch_texel_offsets(mach, inst, offsets);
2261 
2262    assert(modifier != TEX_MODIFIER_LEVEL_ZERO);
2263    assert(inst->Texture.Texture != TGSI_TEXTURE_BUFFER);
2264 
2265    dim = tgsi_util_get_texture_coord_dim(inst->Texture.Texture);
2266    shadow_ref = tgsi_util_get_shadow_ref_src_index(inst->Texture.Texture);
2267 
2268    assert(dim <= 4);
2269    if (shadow_ref >= 0)
2270       assert(shadow_ref >= dim && shadow_ref < (int)ARRAY_SIZE(args));
2271 
2272    /* fetch modifier to the last argument */
2273    if (modifier != TEX_MODIFIER_NONE) {
2274       const int last = ARRAY_SIZE(args) - 1;
2275 
2276       /* fetch modifier from src0.w or src1.x */
2277       if (sampler == 1) {
2278          assert(dim <= TGSI_CHAN_W && shadow_ref != TGSI_CHAN_W);
2279          FETCH(&r[last], 0, TGSI_CHAN_W);
2280       }
2281       else {
2282          FETCH(&r[last], 1, TGSI_CHAN_X);
2283       }
2284 
2285       if (modifier != TEX_MODIFIER_PROJECTED) {
2286          args[last] = &r[last];
2287       }
2288       else {
2289          proj = &r[last];
2290          args[last] = &ZeroVec;
2291       }
2292 
2293       /* point unused arguments to zero vector */
2294       for (i = dim; i < last; i++)
2295          args[i] = &ZeroVec;
2296 
2297       if (modifier == TEX_MODIFIER_EXPLICIT_LOD)
2298          control = TGSI_SAMPLER_LOD_EXPLICIT;
2299       else if (modifier == TEX_MODIFIER_LOD_BIAS)
2300          control = TGSI_SAMPLER_LOD_BIAS;
2301       else if (modifier == TEX_MODIFIER_GATHER)
2302          control = TGSI_SAMPLER_GATHER;
2303    }
2304    else {
2305       for (i = dim; i < (int)ARRAY_SIZE(args); i++)
2306          args[i] = &ZeroVec;
2307    }
2308 
2309    /* fetch coordinates */
2310    for (i = 0; i < dim; i++) {
2311       FETCH(&r[i], 0, TGSI_CHAN_X + i);
2312 
2313       if (proj)
2314          micro_div(&r[i], &r[i], proj);
2315 
2316       args[i] = &r[i];
2317    }
2318 
2319    /* fetch reference value */
2320    if (shadow_ref >= 0) {
2321       FETCH(&r[shadow_ref], shadow_ref / 4, TGSI_CHAN_X + (shadow_ref % 4));
2322 
2323       if (proj)
2324          micro_div(&r[shadow_ref], &r[shadow_ref], proj);
2325 
2326       args[shadow_ref] = &r[shadow_ref];
2327    }
2328 
2329    fetch_texel(mach->Sampler, unit, unit,
2330          args[0], args[1], args[2], args[3], args[4],
2331          NULL, offsets, control,
2332          &r[0], &r[1], &r[2], &r[3]);     /* R, G, B, A */
2333 
2334 #if 0
2335    debug_printf("fetch r: %g %g %g %g\n",
2336          r[0].f[0], r[0].f[1], r[0].f[2], r[0].f[3]);
2337    debug_printf("fetch g: %g %g %g %g\n",
2338          r[1].f[0], r[1].f[1], r[1].f[2], r[1].f[3]);
2339    debug_printf("fetch b: %g %g %g %g\n",
2340          r[2].f[0], r[2].f[1], r[2].f[2], r[2].f[3]);
2341    debug_printf("fetch a: %g %g %g %g\n",
2342          r[3].f[0], r[3].f[1], r[3].f[2], r[3].f[3]);
2343 #endif
2344 
2345    for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2346       if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2347          store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
2348       }
2349    }
2350 }
2351 
2352 static void
exec_lodq(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)2353 exec_lodq(struct tgsi_exec_machine *mach,
2354           const struct tgsi_full_instruction *inst)
2355 {
2356    uint resource_unit, sampler_unit;
2357    unsigned dim;
2358    unsigned i;
2359    union tgsi_exec_channel coords[4];
2360    const union tgsi_exec_channel *args[ARRAY_SIZE(coords)];
2361    union tgsi_exec_channel r[2];
2362 
2363    resource_unit = fetch_sampler_unit(mach, inst, 1);
2364    if (inst->Instruction.Opcode == TGSI_OPCODE_LOD) {
2365       uint target = mach->SamplerViews[resource_unit].Resource;
2366       dim = tgsi_util_get_texture_coord_dim(target);
2367       sampler_unit = fetch_sampler_unit(mach, inst, 2);
2368    } else {
2369       dim = tgsi_util_get_texture_coord_dim(inst->Texture.Texture);
2370       sampler_unit = resource_unit;
2371    }
2372    assert(dim <= ARRAY_SIZE(coords));
2373    /* fetch coordinates */
2374    for (i = 0; i < dim; i++) {
2375       FETCH(&coords[i], 0, TGSI_CHAN_X + i);
2376       args[i] = &coords[i];
2377    }
2378    for (i = dim; i < ARRAY_SIZE(coords); i++) {
2379       args[i] = &ZeroVec;
2380    }
2381    mach->Sampler->query_lod(mach->Sampler, resource_unit, sampler_unit,
2382                             args[0]->f,
2383                             args[1]->f,
2384                             args[2]->f,
2385                             args[3]->f,
2386                             TGSI_SAMPLER_LOD_NONE,
2387                             r[0].f,
2388                             r[1].f);
2389 
2390    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
2391       store_dest(mach, &r[0], &inst->Dst[0], inst, TGSI_CHAN_X,
2392                  TGSI_EXEC_DATA_FLOAT);
2393    }
2394    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
2395       store_dest(mach, &r[1], &inst->Dst[0], inst, TGSI_CHAN_Y,
2396                  TGSI_EXEC_DATA_FLOAT);
2397    }
2398    if (inst->Instruction.Opcode == TGSI_OPCODE_LOD) {
2399       unsigned char swizzles[4];
2400       unsigned chan;
2401       swizzles[0] = inst->Src[1].Register.SwizzleX;
2402       swizzles[1] = inst->Src[1].Register.SwizzleY;
2403       swizzles[2] = inst->Src[1].Register.SwizzleZ;
2404       swizzles[3] = inst->Src[1].Register.SwizzleW;
2405 
2406       for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2407          if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2408             if (swizzles[chan] >= 2) {
2409                store_dest(mach, &ZeroVec,
2410                           &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
2411             } else {
2412                store_dest(mach, &r[swizzles[chan]],
2413                           &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
2414             }
2415          }
2416       }
2417    } else {
2418       if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
2419          store_dest(mach, &r[0], &inst->Dst[0], inst, TGSI_CHAN_X,
2420                     TGSI_EXEC_DATA_FLOAT);
2421       }
2422       if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
2423          store_dest(mach, &r[1], &inst->Dst[0], inst, TGSI_CHAN_Y,
2424                     TGSI_EXEC_DATA_FLOAT);
2425       }
2426    }
2427 }
2428 
2429 static void
exec_txd(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)2430 exec_txd(struct tgsi_exec_machine *mach,
2431          const struct tgsi_full_instruction *inst)
2432 {
2433    union tgsi_exec_channel r[4];
2434    float derivs[3][2][TGSI_QUAD_SIZE];
2435    uint chan;
2436    uint unit;
2437    int8_t offsets[3];
2438 
2439    unit = fetch_sampler_unit(mach, inst, 3);
2440    /* always fetch all 3 offsets, overkill but keeps code simple */
2441    fetch_texel_offsets(mach, inst, offsets);
2442 
2443    switch (inst->Texture.Texture) {
2444    case TGSI_TEXTURE_1D:
2445       FETCH(&r[0], 0, TGSI_CHAN_X);
2446 
2447       fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_X, derivs[0]);
2448 
2449       fetch_texel(mach->Sampler, unit, unit,
2450                   &r[0], &ZeroVec, &ZeroVec, &ZeroVec, &ZeroVec,   /* S, T, P, C, LOD */
2451                   derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT,
2452                   &r[0], &r[1], &r[2], &r[3]);           /* R, G, B, A */
2453       break;
2454 
2455    case TGSI_TEXTURE_SHADOW1D:
2456    case TGSI_TEXTURE_1D_ARRAY:
2457    case TGSI_TEXTURE_SHADOW1D_ARRAY:
2458       /* SHADOW1D/1D_ARRAY would not need Y/Z respectively, but don't bother */
2459       FETCH(&r[0], 0, TGSI_CHAN_X);
2460       FETCH(&r[1], 0, TGSI_CHAN_Y);
2461       FETCH(&r[2], 0, TGSI_CHAN_Z);
2462 
2463       fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_X, derivs[0]);
2464 
2465       fetch_texel(mach->Sampler, unit, unit,
2466                   &r[0], &r[1], &r[2], &ZeroVec, &ZeroVec,   /* S, T, P, C, LOD */
2467                   derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT,
2468                   &r[0], &r[1], &r[2], &r[3]);           /* R, G, B, A */
2469       break;
2470 
2471    case TGSI_TEXTURE_2D:
2472    case TGSI_TEXTURE_RECT:
2473       FETCH(&r[0], 0, TGSI_CHAN_X);
2474       FETCH(&r[1], 0, TGSI_CHAN_Y);
2475 
2476       fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_X, derivs[0]);
2477       fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_Y, derivs[1]);
2478 
2479       fetch_texel(mach->Sampler, unit, unit,
2480                   &r[0], &r[1], &ZeroVec, &ZeroVec, &ZeroVec,   /* S, T, P, C, LOD */
2481                   derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT,
2482                   &r[0], &r[1], &r[2], &r[3]);           /* R, G, B, A */
2483       break;
2484 
2485 
2486    case TGSI_TEXTURE_SHADOW2D:
2487    case TGSI_TEXTURE_SHADOWRECT:
2488    case TGSI_TEXTURE_2D_ARRAY:
2489    case TGSI_TEXTURE_SHADOW2D_ARRAY:
2490       /* only SHADOW2D_ARRAY actually needs W */
2491       FETCH(&r[0], 0, TGSI_CHAN_X);
2492       FETCH(&r[1], 0, TGSI_CHAN_Y);
2493       FETCH(&r[2], 0, TGSI_CHAN_Z);
2494       FETCH(&r[3], 0, TGSI_CHAN_W);
2495 
2496       fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_X, derivs[0]);
2497       fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_Y, derivs[1]);
2498 
2499       fetch_texel(mach->Sampler, unit, unit,
2500                   &r[0], &r[1], &r[2], &r[3], &ZeroVec,   /* inputs */
2501                   derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT,
2502                   &r[0], &r[1], &r[2], &r[3]);     /* outputs */
2503       break;
2504 
2505    case TGSI_TEXTURE_3D:
2506    case TGSI_TEXTURE_CUBE:
2507    case TGSI_TEXTURE_CUBE_ARRAY:
2508    case TGSI_TEXTURE_SHADOWCUBE:
2509       /* only TEXTURE_CUBE_ARRAY and TEXTURE_SHADOWCUBE actually need W */
2510       FETCH(&r[0], 0, TGSI_CHAN_X);
2511       FETCH(&r[1], 0, TGSI_CHAN_Y);
2512       FETCH(&r[2], 0, TGSI_CHAN_Z);
2513       FETCH(&r[3], 0, TGSI_CHAN_W);
2514 
2515       fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_X, derivs[0]);
2516       fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_Y, derivs[1]);
2517       fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_Z, derivs[2]);
2518 
2519       fetch_texel(mach->Sampler, unit, unit,
2520                   &r[0], &r[1], &r[2], &r[3], &ZeroVec,   /* inputs */
2521                   derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT,
2522                   &r[0], &r[1], &r[2], &r[3]);     /* outputs */
2523       break;
2524 
2525    default:
2526       assert(0);
2527    }
2528 
2529    for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2530       if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2531          store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
2532       }
2533    }
2534 }
2535 
2536 
2537 static void
exec_txf(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)2538 exec_txf(struct tgsi_exec_machine *mach,
2539          const struct tgsi_full_instruction *inst)
2540 {
2541    union tgsi_exec_channel r[4];
2542    uint chan;
2543    uint unit;
2544    float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
2545    int j;
2546    int8_t offsets[3];
2547    unsigned target;
2548 
2549    unit = fetch_sampler_unit(mach, inst, 1);
2550    /* always fetch all 3 offsets, overkill but keeps code simple */
2551    fetch_texel_offsets(mach, inst, offsets);
2552 
2553    IFETCH(&r[3], 0, TGSI_CHAN_W);
2554 
2555    if (inst->Instruction.Opcode == TGSI_OPCODE_SAMPLE_I ||
2556        inst->Instruction.Opcode == TGSI_OPCODE_SAMPLE_I_MS) {
2557       target = mach->SamplerViews[unit].Resource;
2558    }
2559    else {
2560       target = inst->Texture.Texture;
2561    }
2562    switch(target) {
2563    case TGSI_TEXTURE_3D:
2564    case TGSI_TEXTURE_2D_ARRAY:
2565    case TGSI_TEXTURE_SHADOW2D_ARRAY:
2566    case TGSI_TEXTURE_2D_ARRAY_MSAA:
2567       IFETCH(&r[2], 0, TGSI_CHAN_Z);
2568       /* fallthrough */
2569    case TGSI_TEXTURE_2D:
2570    case TGSI_TEXTURE_RECT:
2571    case TGSI_TEXTURE_SHADOW1D_ARRAY:
2572    case TGSI_TEXTURE_SHADOW2D:
2573    case TGSI_TEXTURE_SHADOWRECT:
2574    case TGSI_TEXTURE_1D_ARRAY:
2575    case TGSI_TEXTURE_2D_MSAA:
2576       IFETCH(&r[1], 0, TGSI_CHAN_Y);
2577       /* fallthrough */
2578    case TGSI_TEXTURE_BUFFER:
2579    case TGSI_TEXTURE_1D:
2580    case TGSI_TEXTURE_SHADOW1D:
2581       IFETCH(&r[0], 0, TGSI_CHAN_X);
2582       break;
2583    default:
2584       assert(0);
2585       break;
2586    }
2587 
2588    mach->Sampler->get_texel(mach->Sampler, unit, r[0].i, r[1].i, r[2].i, r[3].i,
2589                             offsets, rgba);
2590 
2591    for (j = 0; j < TGSI_QUAD_SIZE; j++) {
2592       r[0].f[j] = rgba[0][j];
2593       r[1].f[j] = rgba[1][j];
2594       r[2].f[j] = rgba[2][j];
2595       r[3].f[j] = rgba[3][j];
2596    }
2597 
2598    if (inst->Instruction.Opcode == TGSI_OPCODE_SAMPLE_I ||
2599        inst->Instruction.Opcode == TGSI_OPCODE_SAMPLE_I_MS) {
2600       unsigned char swizzles[4];
2601       swizzles[0] = inst->Src[1].Register.SwizzleX;
2602       swizzles[1] = inst->Src[1].Register.SwizzleY;
2603       swizzles[2] = inst->Src[1].Register.SwizzleZ;
2604       swizzles[3] = inst->Src[1].Register.SwizzleW;
2605 
2606       for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2607          if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2608             store_dest(mach, &r[swizzles[chan]],
2609                        &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
2610          }
2611       }
2612    }
2613    else {
2614       for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2615          if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2616             store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
2617          }
2618       }
2619    }
2620 }
2621 
2622 static void
exec_txq(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)2623 exec_txq(struct tgsi_exec_machine *mach,
2624          const struct tgsi_full_instruction *inst)
2625 {
2626    int result[4];
2627    union tgsi_exec_channel r[4], src;
2628    uint chan;
2629    uint unit;
2630    int i,j;
2631 
2632    unit = fetch_sampler_unit(mach, inst, 1);
2633 
2634    fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_INT);
2635 
2636    /* XXX: This interface can't return per-pixel values */
2637    mach->Sampler->get_dims(mach->Sampler, unit, src.i[0], result);
2638 
2639    for (i = 0; i < TGSI_QUAD_SIZE; i++) {
2640       for (j = 0; j < 4; j++) {
2641          r[j].i[i] = result[j];
2642       }
2643    }
2644 
2645    for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2646       if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2647          store_dest(mach, &r[chan], &inst->Dst[0], inst, chan,
2648                     TGSI_EXEC_DATA_INT);
2649       }
2650    }
2651 }
2652 
2653 static void
exec_sample(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst,uint modifier,boolean compare)2654 exec_sample(struct tgsi_exec_machine *mach,
2655             const struct tgsi_full_instruction *inst,
2656             uint modifier, boolean compare)
2657 {
2658    const uint resource_unit = inst->Src[1].Register.Index;
2659    const uint sampler_unit = inst->Src[2].Register.Index;
2660    union tgsi_exec_channel r[5], c1;
2661    const union tgsi_exec_channel *lod = &ZeroVec;
2662    enum tgsi_sampler_control control = TGSI_SAMPLER_LOD_NONE;
2663    uint chan;
2664    unsigned char swizzles[4];
2665    int8_t offsets[3];
2666 
2667    /* always fetch all 3 offsets, overkill but keeps code simple */
2668    fetch_texel_offsets(mach, inst, offsets);
2669 
2670    assert(modifier != TEX_MODIFIER_PROJECTED);
2671 
2672    if (modifier != TEX_MODIFIER_NONE) {
2673       if (modifier == TEX_MODIFIER_LOD_BIAS) {
2674          FETCH(&c1, 3, TGSI_CHAN_X);
2675          lod = &c1;
2676          control = TGSI_SAMPLER_LOD_BIAS;
2677       }
2678       else if (modifier == TEX_MODIFIER_EXPLICIT_LOD) {
2679          FETCH(&c1, 3, TGSI_CHAN_X);
2680          lod = &c1;
2681          control = TGSI_SAMPLER_LOD_EXPLICIT;
2682       }
2683       else if (modifier == TEX_MODIFIER_GATHER) {
2684          control = TGSI_SAMPLER_GATHER;
2685       }
2686       else {
2687          assert(modifier == TEX_MODIFIER_LEVEL_ZERO);
2688          control = TGSI_SAMPLER_LOD_ZERO;
2689       }
2690    }
2691 
2692    FETCH(&r[0], 0, TGSI_CHAN_X);
2693 
2694    switch (mach->SamplerViews[resource_unit].Resource) {
2695    case TGSI_TEXTURE_1D:
2696       if (compare) {
2697          FETCH(&r[2], 3, TGSI_CHAN_X);
2698          fetch_texel(mach->Sampler, resource_unit, sampler_unit,
2699                      &r[0], &ZeroVec, &r[2], &ZeroVec, lod, /* S, T, P, C, LOD */
2700                      NULL, offsets, control,
2701                      &r[0], &r[1], &r[2], &r[3]);     /* R, G, B, A */
2702       }
2703       else {
2704          fetch_texel(mach->Sampler, resource_unit, sampler_unit,
2705                      &r[0], &ZeroVec, &ZeroVec, &ZeroVec, lod, /* S, T, P, C, LOD */
2706                      NULL, offsets, control,
2707                      &r[0], &r[1], &r[2], &r[3]);     /* R, G, B, A */
2708       }
2709       break;
2710 
2711    case TGSI_TEXTURE_1D_ARRAY:
2712    case TGSI_TEXTURE_2D:
2713    case TGSI_TEXTURE_RECT:
2714       FETCH(&r[1], 0, TGSI_CHAN_Y);
2715       if (compare) {
2716          FETCH(&r[2], 3, TGSI_CHAN_X);
2717          fetch_texel(mach->Sampler, resource_unit, sampler_unit,
2718                      &r[0], &r[1], &r[2], &ZeroVec, lod,    /* S, T, P, C, LOD */
2719                      NULL, offsets, control,
2720                      &r[0], &r[1], &r[2], &r[3]);  /* outputs */
2721       }
2722       else {
2723          fetch_texel(mach->Sampler, resource_unit, sampler_unit,
2724                      &r[0], &r[1], &ZeroVec, &ZeroVec, lod,    /* S, T, P, C, LOD */
2725                      NULL, offsets, control,
2726                      &r[0], &r[1], &r[2], &r[3]);  /* outputs */
2727       }
2728       break;
2729 
2730    case TGSI_TEXTURE_2D_ARRAY:
2731    case TGSI_TEXTURE_3D:
2732    case TGSI_TEXTURE_CUBE:
2733       FETCH(&r[1], 0, TGSI_CHAN_Y);
2734       FETCH(&r[2], 0, TGSI_CHAN_Z);
2735       if(compare) {
2736          FETCH(&r[3], 3, TGSI_CHAN_X);
2737          fetch_texel(mach->Sampler, resource_unit, sampler_unit,
2738                      &r[0], &r[1], &r[2], &r[3], lod,
2739                      NULL, offsets, control,
2740                      &r[0], &r[1], &r[2], &r[3]);
2741       }
2742       else {
2743          fetch_texel(mach->Sampler, resource_unit, sampler_unit,
2744                      &r[0], &r[1], &r[2], &ZeroVec, lod,
2745                      NULL, offsets, control,
2746                      &r[0], &r[1], &r[2], &r[3]);
2747       }
2748       break;
2749 
2750    case TGSI_TEXTURE_CUBE_ARRAY:
2751       FETCH(&r[1], 0, TGSI_CHAN_Y);
2752       FETCH(&r[2], 0, TGSI_CHAN_Z);
2753       FETCH(&r[3], 0, TGSI_CHAN_W);
2754       if(compare) {
2755          FETCH(&r[4], 3, TGSI_CHAN_X);
2756          fetch_texel(mach->Sampler, resource_unit, sampler_unit,
2757                      &r[0], &r[1], &r[2], &r[3], &r[4],
2758                      NULL, offsets, control,
2759                      &r[0], &r[1], &r[2], &r[3]);
2760       }
2761       else {
2762          fetch_texel(mach->Sampler, resource_unit, sampler_unit,
2763                      &r[0], &r[1], &r[2], &r[3], lod,
2764                      NULL, offsets, control,
2765                      &r[0], &r[1], &r[2], &r[3]);
2766       }
2767       break;
2768 
2769 
2770    default:
2771       assert(0);
2772    }
2773 
2774    swizzles[0] = inst->Src[1].Register.SwizzleX;
2775    swizzles[1] = inst->Src[1].Register.SwizzleY;
2776    swizzles[2] = inst->Src[1].Register.SwizzleZ;
2777    swizzles[3] = inst->Src[1].Register.SwizzleW;
2778 
2779    for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2780       if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2781          store_dest(mach, &r[swizzles[chan]],
2782                     &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
2783       }
2784    }
2785 }
2786 
2787 static void
exec_sample_d(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)2788 exec_sample_d(struct tgsi_exec_machine *mach,
2789               const struct tgsi_full_instruction *inst)
2790 {
2791    const uint resource_unit = inst->Src[1].Register.Index;
2792    const uint sampler_unit = inst->Src[2].Register.Index;
2793    union tgsi_exec_channel r[4];
2794    float derivs[3][2][TGSI_QUAD_SIZE];
2795    uint chan;
2796    unsigned char swizzles[4];
2797    int8_t offsets[3];
2798 
2799    /* always fetch all 3 offsets, overkill but keeps code simple */
2800    fetch_texel_offsets(mach, inst, offsets);
2801 
2802    FETCH(&r[0], 0, TGSI_CHAN_X);
2803 
2804    switch (mach->SamplerViews[resource_unit].Resource) {
2805    case TGSI_TEXTURE_1D:
2806    case TGSI_TEXTURE_1D_ARRAY:
2807       /* only 1D array actually needs Y */
2808       FETCH(&r[1], 0, TGSI_CHAN_Y);
2809 
2810       fetch_assign_deriv_channel(mach, inst, 3, TGSI_CHAN_X, derivs[0]);
2811 
2812       fetch_texel(mach->Sampler, resource_unit, sampler_unit,
2813                   &r[0], &r[1], &ZeroVec, &ZeroVec, &ZeroVec,   /* S, T, P, C, LOD */
2814                   derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT,
2815                   &r[0], &r[1], &r[2], &r[3]);           /* R, G, B, A */
2816       break;
2817 
2818    case TGSI_TEXTURE_2D:
2819    case TGSI_TEXTURE_RECT:
2820    case TGSI_TEXTURE_2D_ARRAY:
2821       /* only 2D array actually needs Z */
2822       FETCH(&r[1], 0, TGSI_CHAN_Y);
2823       FETCH(&r[2], 0, TGSI_CHAN_Z);
2824 
2825       fetch_assign_deriv_channel(mach, inst, 3, TGSI_CHAN_X, derivs[0]);
2826       fetch_assign_deriv_channel(mach, inst, 3, TGSI_CHAN_Y, derivs[1]);
2827 
2828       fetch_texel(mach->Sampler, resource_unit, sampler_unit,
2829                   &r[0], &r[1], &r[2], &ZeroVec, &ZeroVec,   /* inputs */
2830                   derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT,
2831                   &r[0], &r[1], &r[2], &r[3]);     /* outputs */
2832       break;
2833 
2834    case TGSI_TEXTURE_3D:
2835    case TGSI_TEXTURE_CUBE:
2836    case TGSI_TEXTURE_CUBE_ARRAY:
2837       /* only cube array actually needs W */
2838       FETCH(&r[1], 0, TGSI_CHAN_Y);
2839       FETCH(&r[2], 0, TGSI_CHAN_Z);
2840       FETCH(&r[3], 0, TGSI_CHAN_W);
2841 
2842       fetch_assign_deriv_channel(mach, inst, 3, TGSI_CHAN_X, derivs[0]);
2843       fetch_assign_deriv_channel(mach, inst, 3, TGSI_CHAN_Y, derivs[1]);
2844       fetch_assign_deriv_channel(mach, inst, 3, TGSI_CHAN_Z, derivs[2]);
2845 
2846       fetch_texel(mach->Sampler, resource_unit, sampler_unit,
2847                   &r[0], &r[1], &r[2], &r[3], &ZeroVec,
2848                   derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT,
2849                   &r[0], &r[1], &r[2], &r[3]);
2850       break;
2851 
2852    default:
2853       assert(0);
2854    }
2855 
2856    swizzles[0] = inst->Src[1].Register.SwizzleX;
2857    swizzles[1] = inst->Src[1].Register.SwizzleY;
2858    swizzles[2] = inst->Src[1].Register.SwizzleZ;
2859    swizzles[3] = inst->Src[1].Register.SwizzleW;
2860 
2861    for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2862       if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2863          store_dest(mach, &r[swizzles[chan]],
2864                     &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
2865       }
2866    }
2867 }
2868 
2869 
2870 /**
2871  * Evaluate a constant-valued coefficient at the position of the
2872  * current quad.
2873  */
2874 static void
eval_constant_coef(struct tgsi_exec_machine * mach,unsigned attrib,unsigned chan)2875 eval_constant_coef(
2876    struct tgsi_exec_machine *mach,
2877    unsigned attrib,
2878    unsigned chan )
2879 {
2880    unsigned i;
2881 
2882    for( i = 0; i < TGSI_QUAD_SIZE; i++ ) {
2883       mach->Inputs[attrib].xyzw[chan].f[i] = mach->InterpCoefs[attrib].a0[chan];
2884    }
2885 }
2886 
2887 static void
interp_constant_offset(UNUSED const struct tgsi_exec_machine * mach,UNUSED unsigned attrib,UNUSED unsigned chan,UNUSED float ofs_x,UNUSED float ofs_y,UNUSED union tgsi_exec_channel * out_chan)2888 interp_constant_offset(
2889       UNUSED const struct tgsi_exec_machine *mach,
2890       UNUSED unsigned attrib,
2891       UNUSED unsigned chan,
2892       UNUSED float ofs_x,
2893       UNUSED float ofs_y,
2894       UNUSED union tgsi_exec_channel *out_chan)
2895 {
2896 }
2897 
2898 /**
2899  * Evaluate a linear-valued coefficient at the position of the
2900  * current quad.
2901  */
2902 static void
interp_linear_offset(const struct tgsi_exec_machine * mach,unsigned attrib,unsigned chan,float ofs_x,float ofs_y,union tgsi_exec_channel * out_chan)2903 interp_linear_offset(
2904       const struct tgsi_exec_machine *mach,
2905       unsigned attrib,
2906       unsigned chan,
2907       float ofs_x,
2908       float ofs_y,
2909       union tgsi_exec_channel *out_chan)
2910 {
2911    const float dadx = mach->InterpCoefs[attrib].dadx[chan];
2912    const float dady = mach->InterpCoefs[attrib].dady[chan];
2913    const float delta = ofs_x * dadx + ofs_y * dady;
2914    out_chan->f[0] += delta;
2915    out_chan->f[1] += delta;
2916    out_chan->f[2] += delta;
2917    out_chan->f[3] += delta;
2918 }
2919 
2920 static void
eval_linear_coef(struct tgsi_exec_machine * mach,unsigned attrib,unsigned chan)2921 eval_linear_coef(struct tgsi_exec_machine *mach,
2922                  unsigned attrib,
2923                  unsigned chan)
2924 {
2925    const float x = mach->QuadPos.xyzw[0].f[0];
2926    const float y = mach->QuadPos.xyzw[1].f[0];
2927    const float dadx = mach->InterpCoefs[attrib].dadx[chan];
2928    const float dady = mach->InterpCoefs[attrib].dady[chan];
2929    const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y;
2930 
2931    mach->Inputs[attrib].xyzw[chan].f[0] = a0;
2932    mach->Inputs[attrib].xyzw[chan].f[1] = a0 + dadx;
2933    mach->Inputs[attrib].xyzw[chan].f[2] = a0 + dady;
2934    mach->Inputs[attrib].xyzw[chan].f[3] = a0 + dadx + dady;
2935 }
2936 
2937 /**
2938  * Evaluate a perspective-valued coefficient at the position of the
2939  * current quad.
2940  */
2941 
2942 static void
interp_perspective_offset(const struct tgsi_exec_machine * mach,unsigned attrib,unsigned chan,float ofs_x,float ofs_y,union tgsi_exec_channel * out_chan)2943 interp_perspective_offset(
2944    const struct tgsi_exec_machine *mach,
2945    unsigned attrib,
2946    unsigned chan,
2947    float ofs_x,
2948    float ofs_y,
2949    union tgsi_exec_channel *out_chan)
2950 {
2951    const float dadx = mach->InterpCoefs[attrib].dadx[chan];
2952    const float dady = mach->InterpCoefs[attrib].dady[chan];
2953    const float *w = mach->QuadPos.xyzw[3].f;
2954    const float delta = ofs_x * dadx + ofs_y * dady;
2955    out_chan->f[0] += delta / w[0];
2956    out_chan->f[1] += delta / w[1];
2957    out_chan->f[2] += delta / w[2];
2958    out_chan->f[3] += delta / w[3];
2959 }
2960 
2961 static void
eval_perspective_coef(struct tgsi_exec_machine * mach,unsigned attrib,unsigned chan)2962 eval_perspective_coef(
2963    struct tgsi_exec_machine *mach,
2964    unsigned attrib,
2965    unsigned chan )
2966 {
2967    const float x = mach->QuadPos.xyzw[0].f[0];
2968    const float y = mach->QuadPos.xyzw[1].f[0];
2969    const float dadx = mach->InterpCoefs[attrib].dadx[chan];
2970    const float dady = mach->InterpCoefs[attrib].dady[chan];
2971    const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y;
2972    const float *w = mach->QuadPos.xyzw[3].f;
2973    /* divide by W here */
2974    mach->Inputs[attrib].xyzw[chan].f[0] = a0 / w[0];
2975    mach->Inputs[attrib].xyzw[chan].f[1] = (a0 + dadx) / w[1];
2976    mach->Inputs[attrib].xyzw[chan].f[2] = (a0 + dady) / w[2];
2977    mach->Inputs[attrib].xyzw[chan].f[3] = (a0 + dadx + dady) / w[3];
2978 }
2979 
2980 
2981 typedef void (* eval_coef_func)(
2982    struct tgsi_exec_machine *mach,
2983    unsigned attrib,
2984    unsigned chan );
2985 
2986 static void
exec_declaration(struct tgsi_exec_machine * mach,const struct tgsi_full_declaration * decl)2987 exec_declaration(struct tgsi_exec_machine *mach,
2988                  const struct tgsi_full_declaration *decl)
2989 {
2990    if (decl->Declaration.File == TGSI_FILE_SAMPLER_VIEW) {
2991       mach->SamplerViews[decl->Range.First] = decl->SamplerView;
2992       return;
2993    }
2994 
2995    if (mach->ShaderType == PIPE_SHADER_FRAGMENT) {
2996       if (decl->Declaration.File == TGSI_FILE_INPUT) {
2997          uint first, last, mask;
2998 
2999          first = decl->Range.First;
3000          last = decl->Range.Last;
3001          mask = decl->Declaration.UsageMask;
3002 
3003          /* XXX we could remove this special-case code since
3004           * mach->InterpCoefs[first].a0 should already have the
3005           * front/back-face value.  But we should first update the
3006           * ureg code to emit the right UsageMask value (WRITEMASK_X).
3007           * Then, we could remove the tgsi_exec_machine::Face field.
3008           */
3009          /* XXX make FACE a system value */
3010          if (decl->Semantic.Name == TGSI_SEMANTIC_FACE) {
3011             uint i;
3012 
3013             assert(decl->Semantic.Index == 0);
3014             assert(first == last);
3015 
3016             for (i = 0; i < TGSI_QUAD_SIZE; i++) {
3017                mach->Inputs[first].xyzw[0].f[i] = mach->Face;
3018             }
3019          } else {
3020             eval_coef_func eval;
3021             apply_sample_offset_func interp;
3022             uint i, j;
3023 
3024             switch (decl->Interp.Interpolate) {
3025             case TGSI_INTERPOLATE_CONSTANT:
3026                eval = eval_constant_coef;
3027                interp = interp_constant_offset;
3028                break;
3029 
3030             case TGSI_INTERPOLATE_LINEAR:
3031                eval = eval_linear_coef;
3032                interp = interp_linear_offset;
3033                break;
3034 
3035             case TGSI_INTERPOLATE_PERSPECTIVE:
3036                eval = eval_perspective_coef;
3037                interp = interp_perspective_offset;
3038                break;
3039 
3040             case TGSI_INTERPOLATE_COLOR:
3041                eval = mach->flatshade_color ? eval_constant_coef : eval_perspective_coef;
3042                interp = mach->flatshade_color ? interp_constant_offset : interp_perspective_offset;
3043                break;
3044 
3045             default:
3046                assert(0);
3047                return;
3048             }
3049 
3050             for (i = first; i <= last; i++)
3051                mach->InputSampleOffsetApply[i] = interp;
3052 
3053             for (j = 0; j < TGSI_NUM_CHANNELS; j++) {
3054                if (mask & (1 << j)) {
3055                   for (i = first; i <= last; i++) {
3056                      eval(mach, i, j);
3057                   }
3058                }
3059             }
3060          }
3061 
3062          if (DEBUG_EXECUTION) {
3063             uint i, j;
3064             for (i = first; i <= last; ++i) {
3065                debug_printf("IN[%2u] = ", i);
3066                for (j = 0; j < TGSI_NUM_CHANNELS; j++) {
3067                   if (j > 0) {
3068                      debug_printf("         ");
3069                   }
3070                   debug_printf("(%6f %u, %6f %u, %6f %u, %6f %u)\n",
3071                                mach->Inputs[i].xyzw[0].f[j], mach->Inputs[i].xyzw[0].u[j],
3072                                mach->Inputs[i].xyzw[1].f[j], mach->Inputs[i].xyzw[1].u[j],
3073                                mach->Inputs[i].xyzw[2].f[j], mach->Inputs[i].xyzw[2].u[j],
3074                                mach->Inputs[i].xyzw[3].f[j], mach->Inputs[i].xyzw[3].u[j]);
3075                }
3076             }
3077          }
3078       }
3079    }
3080 
3081 }
3082 
3083 typedef void (* micro_unary_op)(union tgsi_exec_channel *dst,
3084                                 const union tgsi_exec_channel *src);
3085 
3086 static void
exec_scalar_unary(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst,micro_unary_op op,enum tgsi_exec_datatype dst_datatype,enum tgsi_exec_datatype src_datatype)3087 exec_scalar_unary(struct tgsi_exec_machine *mach,
3088                   const struct tgsi_full_instruction *inst,
3089                   micro_unary_op op,
3090                   enum tgsi_exec_datatype dst_datatype,
3091                   enum tgsi_exec_datatype src_datatype)
3092 {
3093    unsigned int chan;
3094    union tgsi_exec_channel src;
3095    union tgsi_exec_channel dst;
3096 
3097    fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_X, src_datatype);
3098    op(&dst, &src);
3099    for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3100       if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3101          store_dest(mach, &dst, &inst->Dst[0], inst, chan, dst_datatype);
3102       }
3103    }
3104 }
3105 
3106 static void
exec_vector_unary(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst,micro_unary_op op,enum tgsi_exec_datatype dst_datatype,enum tgsi_exec_datatype src_datatype)3107 exec_vector_unary(struct tgsi_exec_machine *mach,
3108                   const struct tgsi_full_instruction *inst,
3109                   micro_unary_op op,
3110                   enum tgsi_exec_datatype dst_datatype,
3111                   enum tgsi_exec_datatype src_datatype)
3112 {
3113    unsigned int chan;
3114    struct tgsi_exec_vector dst;
3115 
3116    for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3117       if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3118          union tgsi_exec_channel src;
3119 
3120          fetch_source(mach, &src, &inst->Src[0], chan, src_datatype);
3121          op(&dst.xyzw[chan], &src);
3122       }
3123    }
3124    for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3125       if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3126          store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan, dst_datatype);
3127       }
3128    }
3129 }
3130 
3131 typedef void (* micro_binary_op)(union tgsi_exec_channel *dst,
3132                                  const union tgsi_exec_channel *src0,
3133                                  const union tgsi_exec_channel *src1);
3134 
3135 static void
exec_scalar_binary(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst,micro_binary_op op,enum tgsi_exec_datatype dst_datatype,enum tgsi_exec_datatype src_datatype)3136 exec_scalar_binary(struct tgsi_exec_machine *mach,
3137                    const struct tgsi_full_instruction *inst,
3138                    micro_binary_op op,
3139                    enum tgsi_exec_datatype dst_datatype,
3140                    enum tgsi_exec_datatype src_datatype)
3141 {
3142    unsigned int chan;
3143    union tgsi_exec_channel src[2];
3144    union tgsi_exec_channel dst;
3145 
3146    fetch_source(mach, &src[0], &inst->Src[0], TGSI_CHAN_X, src_datatype);
3147    fetch_source(mach, &src[1], &inst->Src[1], TGSI_CHAN_X, src_datatype);
3148    op(&dst, &src[0], &src[1]);
3149    for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3150       if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3151          store_dest(mach, &dst, &inst->Dst[0], inst, chan, dst_datatype);
3152       }
3153    }
3154 }
3155 
3156 static void
exec_vector_binary(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst,micro_binary_op op,enum tgsi_exec_datatype dst_datatype,enum tgsi_exec_datatype src_datatype)3157 exec_vector_binary(struct tgsi_exec_machine *mach,
3158                    const struct tgsi_full_instruction *inst,
3159                    micro_binary_op op,
3160                    enum tgsi_exec_datatype dst_datatype,
3161                    enum tgsi_exec_datatype src_datatype)
3162 {
3163    unsigned int chan;
3164    struct tgsi_exec_vector dst;
3165 
3166    for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3167       if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3168          union tgsi_exec_channel src[2];
3169 
3170          fetch_source(mach, &src[0], &inst->Src[0], chan, src_datatype);
3171          fetch_source(mach, &src[1], &inst->Src[1], chan, src_datatype);
3172          op(&dst.xyzw[chan], &src[0], &src[1]);
3173       }
3174    }
3175    for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3176       if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3177          store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan, dst_datatype);
3178       }
3179    }
3180 }
3181 
3182 typedef void (* micro_trinary_op)(union tgsi_exec_channel *dst,
3183                                   const union tgsi_exec_channel *src0,
3184                                   const union tgsi_exec_channel *src1,
3185                                   const union tgsi_exec_channel *src2);
3186 
3187 static void
exec_vector_trinary(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst,micro_trinary_op op,enum tgsi_exec_datatype dst_datatype,enum tgsi_exec_datatype src_datatype)3188 exec_vector_trinary(struct tgsi_exec_machine *mach,
3189                     const struct tgsi_full_instruction *inst,
3190                     micro_trinary_op op,
3191                     enum tgsi_exec_datatype dst_datatype,
3192                     enum tgsi_exec_datatype src_datatype)
3193 {
3194    unsigned int chan;
3195    struct tgsi_exec_vector dst;
3196 
3197    for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3198       if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3199          union tgsi_exec_channel src[3];
3200 
3201          fetch_source(mach, &src[0], &inst->Src[0], chan, src_datatype);
3202          fetch_source(mach, &src[1], &inst->Src[1], chan, src_datatype);
3203          fetch_source(mach, &src[2], &inst->Src[2], chan, src_datatype);
3204          op(&dst.xyzw[chan], &src[0], &src[1], &src[2]);
3205       }
3206    }
3207    for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3208       if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3209          store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan, dst_datatype);
3210       }
3211    }
3212 }
3213 
3214 typedef void (* micro_quaternary_op)(union tgsi_exec_channel *dst,
3215                                      const union tgsi_exec_channel *src0,
3216                                      const union tgsi_exec_channel *src1,
3217                                      const union tgsi_exec_channel *src2,
3218                                      const union tgsi_exec_channel *src3);
3219 
3220 static void
exec_vector_quaternary(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst,micro_quaternary_op op,enum tgsi_exec_datatype dst_datatype,enum tgsi_exec_datatype src_datatype)3221 exec_vector_quaternary(struct tgsi_exec_machine *mach,
3222                        const struct tgsi_full_instruction *inst,
3223                        micro_quaternary_op op,
3224                        enum tgsi_exec_datatype dst_datatype,
3225                        enum tgsi_exec_datatype src_datatype)
3226 {
3227    unsigned int chan;
3228    struct tgsi_exec_vector dst;
3229 
3230    for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3231       if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3232          union tgsi_exec_channel src[4];
3233 
3234          fetch_source(mach, &src[0], &inst->Src[0], chan, src_datatype);
3235          fetch_source(mach, &src[1], &inst->Src[1], chan, src_datatype);
3236          fetch_source(mach, &src[2], &inst->Src[2], chan, src_datatype);
3237          fetch_source(mach, &src[3], &inst->Src[3], chan, src_datatype);
3238          op(&dst.xyzw[chan], &src[0], &src[1], &src[2], &src[3]);
3239       }
3240    }
3241    for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3242       if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3243          store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan, dst_datatype);
3244       }
3245    }
3246 }
3247 
3248 static void
exec_dp3(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)3249 exec_dp3(struct tgsi_exec_machine *mach,
3250          const struct tgsi_full_instruction *inst)
3251 {
3252    unsigned int chan;
3253    union tgsi_exec_channel arg[3];
3254 
3255    fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3256    fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3257    micro_mul(&arg[2], &arg[0], &arg[1]);
3258 
3259    for (chan = TGSI_CHAN_Y; chan <= TGSI_CHAN_Z; chan++) {
3260       fetch_source(mach, &arg[0], &inst->Src[0], chan, TGSI_EXEC_DATA_FLOAT);
3261       fetch_source(mach, &arg[1], &inst->Src[1], chan, TGSI_EXEC_DATA_FLOAT);
3262       micro_mad(&arg[2], &arg[0], &arg[1], &arg[2]);
3263    }
3264 
3265    for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3266       if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3267          store_dest(mach, &arg[2], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
3268       }
3269    }
3270 }
3271 
3272 static void
exec_dp4(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)3273 exec_dp4(struct tgsi_exec_machine *mach,
3274          const struct tgsi_full_instruction *inst)
3275 {
3276    unsigned int chan;
3277    union tgsi_exec_channel arg[3];
3278 
3279    fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3280    fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3281    micro_mul(&arg[2], &arg[0], &arg[1]);
3282 
3283    for (chan = TGSI_CHAN_Y; chan <= TGSI_CHAN_W; chan++) {
3284       fetch_source(mach, &arg[0], &inst->Src[0], chan, TGSI_EXEC_DATA_FLOAT);
3285       fetch_source(mach, &arg[1], &inst->Src[1], chan, TGSI_EXEC_DATA_FLOAT);
3286       micro_mad(&arg[2], &arg[0], &arg[1], &arg[2]);
3287    }
3288 
3289    for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3290       if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3291          store_dest(mach, &arg[2], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
3292       }
3293    }
3294 }
3295 
3296 static void
exec_dp2(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)3297 exec_dp2(struct tgsi_exec_machine *mach,
3298          const struct tgsi_full_instruction *inst)
3299 {
3300    unsigned int chan;
3301    union tgsi_exec_channel arg[3];
3302 
3303    fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3304    fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3305    micro_mul(&arg[2], &arg[0], &arg[1]);
3306 
3307    fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
3308    fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
3309    micro_mad(&arg[2], &arg[0], &arg[1], &arg[2]);
3310 
3311    for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3312       if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3313          store_dest(mach, &arg[2], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
3314       }
3315    }
3316 }
3317 
3318 static void
exec_pk2h(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)3319 exec_pk2h(struct tgsi_exec_machine *mach,
3320           const struct tgsi_full_instruction *inst)
3321 {
3322    unsigned chan;
3323    union tgsi_exec_channel arg[2], dst;
3324 
3325    fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3326    fetch_source(mach, &arg[1], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
3327    for (chan = 0; chan < TGSI_QUAD_SIZE; chan++) {
3328       dst.u[chan] = _mesa_float_to_half(arg[0].f[chan]) |
3329          (_mesa_float_to_half(arg[1].f[chan]) << 16);
3330    }
3331    for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3332       if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3333          store_dest(mach, &dst, &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_UINT);
3334       }
3335    }
3336 }
3337 
3338 static void
exec_up2h(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)3339 exec_up2h(struct tgsi_exec_machine *mach,
3340           const struct tgsi_full_instruction *inst)
3341 {
3342    unsigned chan;
3343    union tgsi_exec_channel arg, dst[2];
3344 
3345    fetch_source(mach, &arg, &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_UINT);
3346    for (chan = 0; chan < TGSI_QUAD_SIZE; chan++) {
3347       dst[0].f[chan] = _mesa_half_to_float(arg.u[chan] & 0xffff);
3348       dst[1].f[chan] = _mesa_half_to_float(arg.u[chan] >> 16);
3349    }
3350    for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3351       if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3352          store_dest(mach, &dst[chan & 1], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
3353       }
3354    }
3355 }
3356 
3357 static void
micro_ucmp(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1,const union tgsi_exec_channel * src2)3358 micro_ucmp(union tgsi_exec_channel *dst,
3359            const union tgsi_exec_channel *src0,
3360            const union tgsi_exec_channel *src1,
3361            const union tgsi_exec_channel *src2)
3362 {
3363    dst->f[0] = src0->u[0] ? src1->f[0] : src2->f[0];
3364    dst->f[1] = src0->u[1] ? src1->f[1] : src2->f[1];
3365    dst->f[2] = src0->u[2] ? src1->f[2] : src2->f[2];
3366    dst->f[3] = src0->u[3] ? src1->f[3] : src2->f[3];
3367 }
3368 
3369 static void
exec_ucmp(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)3370 exec_ucmp(struct tgsi_exec_machine *mach,
3371           const struct tgsi_full_instruction *inst)
3372 {
3373    unsigned int chan;
3374    struct tgsi_exec_vector dst;
3375 
3376    for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3377       if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3378          union tgsi_exec_channel src[3];
3379 
3380          fetch_source(mach, &src[0], &inst->Src[0], chan,
3381                       TGSI_EXEC_DATA_UINT);
3382          fetch_source(mach, &src[1], &inst->Src[1], chan,
3383                       TGSI_EXEC_DATA_FLOAT);
3384          fetch_source(mach, &src[2], &inst->Src[2], chan,
3385                       TGSI_EXEC_DATA_FLOAT);
3386          micro_ucmp(&dst.xyzw[chan], &src[0], &src[1], &src[2]);
3387       }
3388    }
3389    for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3390       if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3391          store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan,
3392                     TGSI_EXEC_DATA_FLOAT);
3393       }
3394    }
3395 }
3396 
3397 static void
exec_dst(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)3398 exec_dst(struct tgsi_exec_machine *mach,
3399          const struct tgsi_full_instruction *inst)
3400 {
3401    union tgsi_exec_channel r[2];
3402    union tgsi_exec_channel d[4];
3403 
3404    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
3405       fetch_source(mach, &r[0], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
3406       fetch_source(mach, &r[1], &inst->Src[1], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
3407       micro_mul(&d[TGSI_CHAN_Y], &r[0], &r[1]);
3408    }
3409    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
3410       fetch_source(mach, &d[TGSI_CHAN_Z], &inst->Src[0], TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT);
3411    }
3412    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
3413       fetch_source(mach, &d[TGSI_CHAN_W], &inst->Src[1], TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT);
3414    }
3415 
3416    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
3417       store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3418    }
3419    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
3420       store_dest(mach, &d[TGSI_CHAN_Y], &inst->Dst[0], inst, TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
3421    }
3422    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
3423       store_dest(mach, &d[TGSI_CHAN_Z], &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT);
3424    }
3425    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
3426       store_dest(mach, &d[TGSI_CHAN_W], &inst->Dst[0], inst, TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT);
3427    }
3428 }
3429 
3430 static void
exec_log(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)3431 exec_log(struct tgsi_exec_machine *mach,
3432          const struct tgsi_full_instruction *inst)
3433 {
3434    union tgsi_exec_channel r[3];
3435 
3436    fetch_source(mach, &r[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3437    micro_abs(&r[2], &r[0]);  /* r2 = abs(r0) */
3438    micro_lg2(&r[1], &r[2]);  /* r1 = lg2(r2) */
3439    micro_flr(&r[0], &r[1]);  /* r0 = floor(r1) */
3440    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
3441       store_dest(mach, &r[0], &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3442    }
3443    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
3444       micro_exp2(&r[0], &r[0]);       /* r0 = 2 ^ r0 */
3445       micro_div(&r[0], &r[2], &r[0]); /* r0 = r2 / r0 */
3446       store_dest(mach, &r[0], &inst->Dst[0], inst, TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
3447    }
3448    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
3449       store_dest(mach, &r[1], &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT);
3450    }
3451    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
3452       store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT);
3453    }
3454 }
3455 
3456 static void
exec_exp(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)3457 exec_exp(struct tgsi_exec_machine *mach,
3458          const struct tgsi_full_instruction *inst)
3459 {
3460    union tgsi_exec_channel r[3];
3461 
3462    fetch_source(mach, &r[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3463    micro_flr(&r[1], &r[0]);  /* r1 = floor(r0) */
3464    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
3465       micro_exp2(&r[2], &r[1]);       /* r2 = 2 ^ r1 */
3466       store_dest(mach, &r[2], &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3467    }
3468    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
3469       micro_sub(&r[2], &r[0], &r[1]); /* r2 = r0 - r1 */
3470       store_dest(mach, &r[2], &inst->Dst[0], inst, TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
3471    }
3472    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
3473       micro_exp2(&r[2], &r[0]);       /* r2 = 2 ^ r0 */
3474       store_dest(mach, &r[2], &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT);
3475    }
3476    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
3477       store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT);
3478    }
3479 }
3480 
3481 static void
exec_lit(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)3482 exec_lit(struct tgsi_exec_machine *mach,
3483          const struct tgsi_full_instruction *inst)
3484 {
3485    union tgsi_exec_channel r[3];
3486    union tgsi_exec_channel d[3];
3487 
3488    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_YZ) {
3489       fetch_source(mach, &r[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3490       if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
3491          fetch_source(mach, &r[1], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
3492          micro_max(&r[1], &r[1], &ZeroVec);
3493 
3494          fetch_source(mach, &r[2], &inst->Src[0], TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT);
3495          micro_min(&r[2], &r[2], &P128Vec);
3496          micro_max(&r[2], &r[2], &M128Vec);
3497          micro_pow(&r[1], &r[1], &r[2]);
3498          micro_lt(&d[TGSI_CHAN_Z], &ZeroVec, &r[0], &r[1], &ZeroVec);
3499          store_dest(mach, &d[TGSI_CHAN_Z], &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT);
3500       }
3501       if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
3502          micro_max(&d[TGSI_CHAN_Y], &r[0], &ZeroVec);
3503          store_dest(mach, &d[TGSI_CHAN_Y], &inst->Dst[0], inst, TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
3504       }
3505    }
3506    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
3507       store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3508    }
3509 
3510    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
3511       store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT);
3512    }
3513 }
3514 
3515 static void
exec_break(struct tgsi_exec_machine * mach)3516 exec_break(struct tgsi_exec_machine *mach)
3517 {
3518    if (mach->BreakType == TGSI_EXEC_BREAK_INSIDE_LOOP) {
3519       /* turn off loop channels for each enabled exec channel */
3520       mach->LoopMask &= ~mach->ExecMask;
3521       /* Todo: if mach->LoopMask == 0, jump to end of loop */
3522       UPDATE_EXEC_MASK(mach);
3523    } else {
3524       assert(mach->BreakType == TGSI_EXEC_BREAK_INSIDE_SWITCH);
3525 
3526       mach->Switch.mask = 0x0;
3527 
3528       UPDATE_EXEC_MASK(mach);
3529    }
3530 }
3531 
3532 static void
exec_switch(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)3533 exec_switch(struct tgsi_exec_machine *mach,
3534             const struct tgsi_full_instruction *inst)
3535 {
3536    assert(mach->SwitchStackTop < TGSI_EXEC_MAX_SWITCH_NESTING);
3537    assert(mach->BreakStackTop < TGSI_EXEC_MAX_BREAK_STACK);
3538 
3539    mach->SwitchStack[mach->SwitchStackTop++] = mach->Switch;
3540    fetch_source(mach, &mach->Switch.selector, &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_UINT);
3541    mach->Switch.mask = 0x0;
3542    mach->Switch.defaultMask = 0x0;
3543 
3544    mach->BreakStack[mach->BreakStackTop++] = mach->BreakType;
3545    mach->BreakType = TGSI_EXEC_BREAK_INSIDE_SWITCH;
3546 
3547    UPDATE_EXEC_MASK(mach);
3548 }
3549 
3550 static void
exec_case(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)3551 exec_case(struct tgsi_exec_machine *mach,
3552           const struct tgsi_full_instruction *inst)
3553 {
3554    uint prevMask = mach->SwitchStack[mach->SwitchStackTop - 1].mask;
3555    union tgsi_exec_channel src;
3556    uint mask = 0;
3557 
3558    fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_UINT);
3559 
3560    if (mach->Switch.selector.u[0] == src.u[0]) {
3561       mask |= 0x1;
3562    }
3563    if (mach->Switch.selector.u[1] == src.u[1]) {
3564       mask |= 0x2;
3565    }
3566    if (mach->Switch.selector.u[2] == src.u[2]) {
3567       mask |= 0x4;
3568    }
3569    if (mach->Switch.selector.u[3] == src.u[3]) {
3570       mask |= 0x8;
3571    }
3572 
3573    mach->Switch.defaultMask |= mask;
3574 
3575    mach->Switch.mask |= mask & prevMask;
3576 
3577    UPDATE_EXEC_MASK(mach);
3578 }
3579 
3580 /* FIXME: this will only work if default is last */
3581 static void
exec_default(struct tgsi_exec_machine * mach)3582 exec_default(struct tgsi_exec_machine *mach)
3583 {
3584    uint prevMask = mach->SwitchStack[mach->SwitchStackTop - 1].mask;
3585 
3586    mach->Switch.mask |= ~mach->Switch.defaultMask & prevMask;
3587 
3588    UPDATE_EXEC_MASK(mach);
3589 }
3590 
3591 static void
exec_endswitch(struct tgsi_exec_machine * mach)3592 exec_endswitch(struct tgsi_exec_machine *mach)
3593 {
3594    mach->Switch = mach->SwitchStack[--mach->SwitchStackTop];
3595    mach->BreakType = mach->BreakStack[--mach->BreakStackTop];
3596 
3597    UPDATE_EXEC_MASK(mach);
3598 }
3599 
3600 typedef void (* micro_dop)(union tgsi_double_channel *dst,
3601                            const union tgsi_double_channel *src);
3602 
3603 typedef void (* micro_dop_sop)(union tgsi_double_channel *dst,
3604                                const union tgsi_double_channel *src0,
3605                                union tgsi_exec_channel *src1);
3606 
3607 typedef void (* micro_dop_s)(union tgsi_double_channel *dst,
3608                              const union tgsi_exec_channel *src);
3609 
3610 typedef void (* micro_sop_d)(union tgsi_exec_channel *dst,
3611                              const union tgsi_double_channel *src);
3612 
3613 static void
fetch_double_channel(struct tgsi_exec_machine * mach,union tgsi_double_channel * chan,const struct tgsi_full_src_register * reg,uint chan_0,uint chan_1)3614 fetch_double_channel(struct tgsi_exec_machine *mach,
3615                      union tgsi_double_channel *chan,
3616                      const struct tgsi_full_src_register *reg,
3617                      uint chan_0,
3618                      uint chan_1)
3619 {
3620    union tgsi_exec_channel src[2];
3621    uint i;
3622 
3623    fetch_source_d(mach, &src[0], reg, chan_0);
3624    fetch_source_d(mach, &src[1], reg, chan_1);
3625 
3626    for (i = 0; i < TGSI_QUAD_SIZE; i++) {
3627       chan->u[i][0] = src[0].u[i];
3628       chan->u[i][1] = src[1].u[i];
3629    }
3630    if (reg->Register.Absolute) {
3631       micro_dabs(chan, chan);
3632    }
3633    if (reg->Register.Negate) {
3634       micro_dneg(chan, chan);
3635    }
3636 }
3637 
3638 static void
store_double_channel(struct tgsi_exec_machine * mach,const union tgsi_double_channel * chan,const struct tgsi_full_dst_register * reg,const struct tgsi_full_instruction * inst,uint chan_0,uint chan_1)3639 store_double_channel(struct tgsi_exec_machine *mach,
3640                      const union tgsi_double_channel *chan,
3641                      const struct tgsi_full_dst_register *reg,
3642                      const struct tgsi_full_instruction *inst,
3643                      uint chan_0,
3644                      uint chan_1)
3645 {
3646    union tgsi_exec_channel dst[2];
3647    uint i;
3648    union tgsi_double_channel temp;
3649    const uint execmask = mach->ExecMask;
3650 
3651    if (!inst->Instruction.Saturate) {
3652       for (i = 0; i < TGSI_QUAD_SIZE; i++)
3653          if (execmask & (1 << i)) {
3654             dst[0].u[i] = chan->u[i][0];
3655             dst[1].u[i] = chan->u[i][1];
3656          }
3657    }
3658    else {
3659       for (i = 0; i < TGSI_QUAD_SIZE; i++)
3660          if (execmask & (1 << i)) {
3661             if (chan->d[i] < 0.0)
3662                temp.d[i] = 0.0;
3663             else if (chan->d[i] > 1.0)
3664                temp.d[i] = 1.0;
3665             else
3666                temp.d[i] = chan->d[i];
3667 
3668             dst[0].u[i] = temp.u[i][0];
3669             dst[1].u[i] = temp.u[i][1];
3670          }
3671    }
3672 
3673    store_dest_double(mach, &dst[0], reg, chan_0, TGSI_EXEC_DATA_UINT);
3674    if (chan_1 != (unsigned)-1)
3675       store_dest_double(mach, &dst[1], reg, chan_1, TGSI_EXEC_DATA_UINT);
3676 }
3677 
3678 static void
exec_double_unary(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst,micro_dop op)3679 exec_double_unary(struct tgsi_exec_machine *mach,
3680                   const struct tgsi_full_instruction *inst,
3681                   micro_dop op)
3682 {
3683    union tgsi_double_channel src;
3684    union tgsi_double_channel dst;
3685 
3686    if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) == TGSI_WRITEMASK_XY) {
3687       fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y);
3688       op(&dst, &src);
3689       store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_CHAN_Y);
3690    }
3691    if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_ZW) == TGSI_WRITEMASK_ZW) {
3692       fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W);
3693       op(&dst, &src);
3694       store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_CHAN_W);
3695    }
3696 }
3697 
3698 static void
exec_double_binary(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst,micro_dop op,enum tgsi_exec_datatype dst_datatype)3699 exec_double_binary(struct tgsi_exec_machine *mach,
3700                    const struct tgsi_full_instruction *inst,
3701                    micro_dop op,
3702                    enum tgsi_exec_datatype dst_datatype)
3703 {
3704    union tgsi_double_channel src[2];
3705    union tgsi_double_channel dst;
3706    int first_dest_chan, second_dest_chan;
3707    int wmask;
3708 
3709    wmask = inst->Dst[0].Register.WriteMask;
3710    /* these are & because of the way DSLT etc store their destinations */
3711    if (wmask & TGSI_WRITEMASK_XY) {
3712       first_dest_chan = TGSI_CHAN_X;
3713       second_dest_chan = TGSI_CHAN_Y;
3714       if (dst_datatype == TGSI_EXEC_DATA_UINT) {
3715          first_dest_chan = (wmask & TGSI_WRITEMASK_X) ? TGSI_CHAN_X : TGSI_CHAN_Y;
3716          second_dest_chan = -1;
3717       }
3718 
3719       fetch_double_channel(mach, &src[0], &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y);
3720       fetch_double_channel(mach, &src[1], &inst->Src[1], TGSI_CHAN_X, TGSI_CHAN_Y);
3721       op(&dst, src);
3722       store_double_channel(mach, &dst, &inst->Dst[0], inst, first_dest_chan, second_dest_chan);
3723    }
3724 
3725    if (wmask & TGSI_WRITEMASK_ZW) {
3726       first_dest_chan = TGSI_CHAN_Z;
3727       second_dest_chan = TGSI_CHAN_W;
3728       if (dst_datatype == TGSI_EXEC_DATA_UINT) {
3729          first_dest_chan = (wmask & TGSI_WRITEMASK_Z) ? TGSI_CHAN_Z : TGSI_CHAN_W;
3730          second_dest_chan = -1;
3731       }
3732 
3733       fetch_double_channel(mach, &src[0], &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W);
3734       fetch_double_channel(mach, &src[1], &inst->Src[1], TGSI_CHAN_Z, TGSI_CHAN_W);
3735       op(&dst, src);
3736       store_double_channel(mach, &dst, &inst->Dst[0], inst, first_dest_chan, second_dest_chan);
3737    }
3738 }
3739 
3740 static void
exec_double_trinary(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst,micro_dop op)3741 exec_double_trinary(struct tgsi_exec_machine *mach,
3742                     const struct tgsi_full_instruction *inst,
3743                     micro_dop op)
3744 {
3745    union tgsi_double_channel src[3];
3746    union tgsi_double_channel dst;
3747 
3748    if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) == TGSI_WRITEMASK_XY) {
3749       fetch_double_channel(mach, &src[0], &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y);
3750       fetch_double_channel(mach, &src[1], &inst->Src[1], TGSI_CHAN_X, TGSI_CHAN_Y);
3751       fetch_double_channel(mach, &src[2], &inst->Src[2], TGSI_CHAN_X, TGSI_CHAN_Y);
3752       op(&dst, src);
3753       store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_CHAN_Y);
3754    }
3755    if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_ZW) == TGSI_WRITEMASK_ZW) {
3756       fetch_double_channel(mach, &src[0], &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W);
3757       fetch_double_channel(mach, &src[1], &inst->Src[1], TGSI_CHAN_Z, TGSI_CHAN_W);
3758       fetch_double_channel(mach, &src[2], &inst->Src[2], TGSI_CHAN_Z, TGSI_CHAN_W);
3759       op(&dst, src);
3760       store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_CHAN_W);
3761    }
3762 }
3763 
3764 static void
exec_dldexp(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)3765 exec_dldexp(struct tgsi_exec_machine *mach,
3766             const struct tgsi_full_instruction *inst)
3767 {
3768    union tgsi_double_channel src0;
3769    union tgsi_exec_channel src1;
3770    union tgsi_double_channel dst;
3771    int wmask;
3772 
3773    wmask = inst->Dst[0].Register.WriteMask;
3774    if (wmask & TGSI_WRITEMASK_XY) {
3775       fetch_double_channel(mach, &src0, &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y);
3776       fetch_source(mach, &src1, &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_INT);
3777       micro_dldexp(&dst, &src0, &src1);
3778       store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_CHAN_Y);
3779    }
3780 
3781    if (wmask & TGSI_WRITEMASK_ZW) {
3782       fetch_double_channel(mach, &src0, &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W);
3783       fetch_source(mach, &src1, &inst->Src[1], TGSI_CHAN_Z, TGSI_EXEC_DATA_INT);
3784       micro_dldexp(&dst, &src0, &src1);
3785       store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_CHAN_W);
3786    }
3787 }
3788 
3789 static void
exec_dfracexp(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)3790 exec_dfracexp(struct tgsi_exec_machine *mach,
3791               const struct tgsi_full_instruction *inst)
3792 {
3793    union tgsi_double_channel src;
3794    union tgsi_double_channel dst;
3795    union tgsi_exec_channel dst_exp;
3796 
3797    fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y);
3798    micro_dfracexp(&dst, &dst_exp, &src);
3799    if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) == TGSI_WRITEMASK_XY)
3800       store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_CHAN_Y);
3801    if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_ZW) == TGSI_WRITEMASK_ZW)
3802       store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_CHAN_W);
3803    for (unsigned chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3804       if (inst->Dst[1].Register.WriteMask & (1 << chan))
3805          store_dest(mach, &dst_exp, &inst->Dst[1], inst, chan, TGSI_EXEC_DATA_INT);
3806    }
3807 }
3808 
3809 static void
exec_arg0_64_arg1_32(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst,micro_dop_sop op)3810 exec_arg0_64_arg1_32(struct tgsi_exec_machine *mach,
3811             const struct tgsi_full_instruction *inst,
3812             micro_dop_sop op)
3813 {
3814    union tgsi_double_channel src0;
3815    union tgsi_exec_channel src1;
3816    union tgsi_double_channel dst;
3817    int wmask;
3818 
3819    wmask = inst->Dst[0].Register.WriteMask;
3820    if (wmask & TGSI_WRITEMASK_XY) {
3821       fetch_double_channel(mach, &src0, &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y);
3822       fetch_source(mach, &src1, &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_INT);
3823       op(&dst, &src0, &src1);
3824       store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_CHAN_Y);
3825    }
3826 
3827    if (wmask & TGSI_WRITEMASK_ZW) {
3828       fetch_double_channel(mach, &src0, &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W);
3829       fetch_source(mach, &src1, &inst->Src[1], TGSI_CHAN_Z, TGSI_EXEC_DATA_INT);
3830       op(&dst, &src0, &src1);
3831       store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_CHAN_W);
3832    }
3833 }
3834 
3835 static int
get_image_coord_dim(unsigned tgsi_tex)3836 get_image_coord_dim(unsigned tgsi_tex)
3837 {
3838    int dim;
3839    switch (tgsi_tex) {
3840    case TGSI_TEXTURE_BUFFER:
3841    case TGSI_TEXTURE_1D:
3842       dim = 1;
3843       break;
3844    case TGSI_TEXTURE_2D:
3845    case TGSI_TEXTURE_RECT:
3846    case TGSI_TEXTURE_1D_ARRAY:
3847    case TGSI_TEXTURE_2D_MSAA:
3848       dim = 2;
3849       break;
3850    case TGSI_TEXTURE_3D:
3851    case TGSI_TEXTURE_CUBE:
3852    case TGSI_TEXTURE_2D_ARRAY:
3853    case TGSI_TEXTURE_2D_ARRAY_MSAA:
3854    case TGSI_TEXTURE_CUBE_ARRAY:
3855       dim = 3;
3856       break;
3857    default:
3858       assert(!"unknown texture target");
3859       dim = 0;
3860       break;
3861    }
3862 
3863    return dim;
3864 }
3865 
3866 static int
get_image_coord_sample(unsigned tgsi_tex)3867 get_image_coord_sample(unsigned tgsi_tex)
3868 {
3869    int sample = 0;
3870    switch (tgsi_tex) {
3871    case TGSI_TEXTURE_2D_MSAA:
3872       sample = 3;
3873       break;
3874    case TGSI_TEXTURE_2D_ARRAY_MSAA:
3875       sample = 4;
3876       break;
3877    default:
3878       break;
3879    }
3880    return sample;
3881 }
3882 
3883 static void
exec_load_img(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)3884 exec_load_img(struct tgsi_exec_machine *mach,
3885               const struct tgsi_full_instruction *inst)
3886 {
3887    union tgsi_exec_channel r[4], sample_r;
3888    uint unit;
3889    int sample;
3890    int i, j;
3891    int dim;
3892    uint chan;
3893    float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
3894    struct tgsi_image_params params;
3895    int kilmask = mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];
3896 
3897    unit = fetch_sampler_unit(mach, inst, 0);
3898    dim = get_image_coord_dim(inst->Memory.Texture);
3899    sample = get_image_coord_sample(inst->Memory.Texture);
3900    assert(dim <= 3);
3901 
3902    params.execmask = mach->ExecMask & mach->NonHelperMask & ~kilmask;
3903    params.unit = unit;
3904    params.tgsi_tex_instr = inst->Memory.Texture;
3905    params.format = inst->Memory.Format;
3906 
3907    for (i = 0; i < dim; i++) {
3908       IFETCH(&r[i], 1, TGSI_CHAN_X + i);
3909    }
3910 
3911    if (sample)
3912       IFETCH(&sample_r, 1, TGSI_CHAN_X + sample);
3913 
3914    mach->Image->load(mach->Image, &params,
3915                      r[0].i, r[1].i, r[2].i, sample_r.i,
3916                      rgba);
3917    for (j = 0; j < TGSI_QUAD_SIZE; j++) {
3918       r[0].f[j] = rgba[0][j];
3919       r[1].f[j] = rgba[1][j];
3920       r[2].f[j] = rgba[2][j];
3921       r[3].f[j] = rgba[3][j];
3922    }
3923    for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3924       if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3925          store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
3926       }
3927    }
3928 }
3929 
3930 static void
exec_load_buf(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)3931 exec_load_buf(struct tgsi_exec_machine *mach,
3932               const struct tgsi_full_instruction *inst)
3933 {
3934    union tgsi_exec_channel r[4];
3935    uint unit;
3936    int j;
3937    uint chan;
3938    float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
3939    struct tgsi_buffer_params params;
3940    int kilmask = mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];
3941 
3942    unit = fetch_sampler_unit(mach, inst, 0);
3943 
3944    params.execmask = mach->ExecMask & mach->NonHelperMask & ~kilmask;
3945    params.unit = unit;
3946    IFETCH(&r[0], 1, TGSI_CHAN_X);
3947 
3948    mach->Buffer->load(mach->Buffer, &params,
3949                       r[0].i, rgba);
3950    for (j = 0; j < TGSI_QUAD_SIZE; j++) {
3951       r[0].f[j] = rgba[0][j];
3952       r[1].f[j] = rgba[1][j];
3953       r[2].f[j] = rgba[2][j];
3954       r[3].f[j] = rgba[3][j];
3955    }
3956    for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3957       if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3958          store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
3959       }
3960    }
3961 }
3962 
3963 static void
exec_load_mem(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)3964 exec_load_mem(struct tgsi_exec_machine *mach,
3965               const struct tgsi_full_instruction *inst)
3966 {
3967    union tgsi_exec_channel r[4];
3968    uint chan;
3969    char *ptr = mach->LocalMem;
3970    uint32_t offset;
3971    int j;
3972 
3973    IFETCH(&r[0], 1, TGSI_CHAN_X);
3974    if (r[0].u[0] >= mach->LocalMemSize)
3975       return;
3976 
3977    offset = r[0].u[0];
3978    ptr += offset;
3979 
3980    for (j = 0; j < TGSI_QUAD_SIZE; j++) {
3981       for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3982          if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3983             memcpy(&r[chan].u[j], ptr + (4 * chan), 4);
3984          }
3985       }
3986    }
3987 
3988    for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3989       if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3990          store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
3991       }
3992    }
3993 }
3994 
3995 static void
exec_load(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)3996 exec_load(struct tgsi_exec_machine *mach,
3997           const struct tgsi_full_instruction *inst)
3998 {
3999    if (inst->Src[0].Register.File == TGSI_FILE_IMAGE)
4000       exec_load_img(mach, inst);
4001    else if (inst->Src[0].Register.File == TGSI_FILE_BUFFER)
4002       exec_load_buf(mach, inst);
4003    else if (inst->Src[0].Register.File == TGSI_FILE_MEMORY)
4004       exec_load_mem(mach, inst);
4005 }
4006 
4007 static uint
fetch_store_img_unit(struct tgsi_exec_machine * mach,const struct tgsi_full_dst_register * dst)4008 fetch_store_img_unit(struct tgsi_exec_machine *mach,
4009                      const struct tgsi_full_dst_register *dst)
4010 {
4011    uint unit = 0;
4012    int i;
4013    if (dst->Register.Indirect) {
4014       union tgsi_exec_channel indir_index, index2;
4015       const uint execmask = mach->ExecMask;
4016       index2.i[0] =
4017       index2.i[1] =
4018       index2.i[2] =
4019       index2.i[3] = dst->Indirect.Index;
4020 
4021       fetch_src_file_channel(mach,
4022                              dst->Indirect.File,
4023                              dst->Indirect.Swizzle,
4024                              &index2,
4025                              &ZeroVec,
4026                              &indir_index);
4027       for (i = 0; i < TGSI_QUAD_SIZE; i++) {
4028          if (execmask & (1 << i)) {
4029             unit = dst->Register.Index + indir_index.i[i];
4030             break;
4031          }
4032       }
4033    } else {
4034       unit = dst->Register.Index;
4035    }
4036    return unit;
4037 }
4038 
4039 static void
exec_store_img(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)4040 exec_store_img(struct tgsi_exec_machine *mach,
4041                const struct tgsi_full_instruction *inst)
4042 {
4043    union tgsi_exec_channel r[3], sample_r;
4044    union tgsi_exec_channel value[4];
4045    float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
4046    struct tgsi_image_params params;
4047    int dim;
4048    int sample;
4049    int i, j;
4050    uint unit;
4051    int kilmask = mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];
4052    unit = fetch_store_img_unit(mach, &inst->Dst[0]);
4053    dim = get_image_coord_dim(inst->Memory.Texture);
4054    sample = get_image_coord_sample(inst->Memory.Texture);
4055    assert(dim <= 3);
4056 
4057    params.execmask = mach->ExecMask & mach->NonHelperMask & ~kilmask;
4058    params.unit = unit;
4059    params.tgsi_tex_instr = inst->Memory.Texture;
4060    params.format = inst->Memory.Format;
4061 
4062    for (i = 0; i < dim; i++) {
4063       IFETCH(&r[i], 0, TGSI_CHAN_X + i);
4064    }
4065 
4066    for (i = 0; i < 4; i++) {
4067       FETCH(&value[i], 1, TGSI_CHAN_X + i);
4068    }
4069    if (sample)
4070       IFETCH(&sample_r, 0, TGSI_CHAN_X + sample);
4071 
4072    for (j = 0; j < TGSI_QUAD_SIZE; j++) {
4073       rgba[0][j] = value[0].f[j];
4074       rgba[1][j] = value[1].f[j];
4075       rgba[2][j] = value[2].f[j];
4076       rgba[3][j] = value[3].f[j];
4077    }
4078 
4079    mach->Image->store(mach->Image, &params,
4080                       r[0].i, r[1].i, r[2].i, sample_r.i,
4081                       rgba);
4082 }
4083 
4084 static void
exec_store_buf(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)4085 exec_store_buf(struct tgsi_exec_machine *mach,
4086                const struct tgsi_full_instruction *inst)
4087 {
4088    union tgsi_exec_channel r[3];
4089    union tgsi_exec_channel value[4];
4090    float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
4091    struct tgsi_buffer_params params;
4092    int i, j;
4093    uint unit;
4094    int kilmask = mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];
4095 
4096    unit = fetch_store_img_unit(mach, &inst->Dst[0]);
4097 
4098    params.execmask = mach->ExecMask & mach->NonHelperMask & ~kilmask;
4099    params.unit = unit;
4100    params.writemask = inst->Dst[0].Register.WriteMask;
4101 
4102    IFETCH(&r[0], 0, TGSI_CHAN_X);
4103    for (i = 0; i < 4; i++) {
4104       FETCH(&value[i], 1, TGSI_CHAN_X + i);
4105    }
4106 
4107    for (j = 0; j < TGSI_QUAD_SIZE; j++) {
4108       rgba[0][j] = value[0].f[j];
4109       rgba[1][j] = value[1].f[j];
4110       rgba[2][j] = value[2].f[j];
4111       rgba[3][j] = value[3].f[j];
4112    }
4113 
4114    mach->Buffer->store(mach->Buffer, &params,
4115                       r[0].i,
4116                       rgba);
4117 }
4118 
4119 static void
exec_store_mem(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)4120 exec_store_mem(struct tgsi_exec_machine *mach,
4121                const struct tgsi_full_instruction *inst)
4122 {
4123    union tgsi_exec_channel r[3];
4124    union tgsi_exec_channel value[4];
4125    uint i, chan;
4126    char *ptr = mach->LocalMem;
4127    int kilmask = mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];
4128    int execmask = mach->ExecMask & mach->NonHelperMask & ~kilmask;
4129 
4130    IFETCH(&r[0], 0, TGSI_CHAN_X);
4131 
4132    for (i = 0; i < 4; i++) {
4133       FETCH(&value[i], 1, TGSI_CHAN_X + i);
4134    }
4135 
4136    if (r[0].u[0] >= mach->LocalMemSize)
4137       return;
4138    ptr += r[0].u[0];
4139 
4140    for (i = 0; i < TGSI_QUAD_SIZE; i++) {
4141       if (execmask & (1 << i)) {
4142          for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
4143             if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
4144                memcpy(ptr + (chan * 4), &value[chan].u[0], 4);
4145             }
4146          }
4147       }
4148    }
4149 }
4150 
4151 static void
exec_store(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)4152 exec_store(struct tgsi_exec_machine *mach,
4153            const struct tgsi_full_instruction *inst)
4154 {
4155    if (inst->Dst[0].Register.File == TGSI_FILE_IMAGE)
4156       exec_store_img(mach, inst);
4157    else if (inst->Dst[0].Register.File == TGSI_FILE_BUFFER)
4158       exec_store_buf(mach, inst);
4159    else if (inst->Dst[0].Register.File == TGSI_FILE_MEMORY)
4160       exec_store_mem(mach, inst);
4161 }
4162 
4163 static void
exec_atomop_img(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)4164 exec_atomop_img(struct tgsi_exec_machine *mach,
4165                 const struct tgsi_full_instruction *inst)
4166 {
4167    union tgsi_exec_channel r[4], sample_r;
4168    union tgsi_exec_channel value[4], value2[4];
4169    float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
4170    float rgba2[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
4171    struct tgsi_image_params params;
4172    int dim;
4173    int sample;
4174    int i, j;
4175    uint unit, chan;
4176    int kilmask = mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];
4177    unit = fetch_sampler_unit(mach, inst, 0);
4178    dim = get_image_coord_dim(inst->Memory.Texture);
4179    sample = get_image_coord_sample(inst->Memory.Texture);
4180    assert(dim <= 3);
4181 
4182    params.execmask = mach->ExecMask & mach->NonHelperMask & ~kilmask;
4183    params.unit = unit;
4184    params.tgsi_tex_instr = inst->Memory.Texture;
4185    params.format = inst->Memory.Format;
4186 
4187    for (i = 0; i < dim; i++) {
4188       IFETCH(&r[i], 1, TGSI_CHAN_X + i);
4189    }
4190 
4191    for (i = 0; i < 4; i++) {
4192       FETCH(&value[i], 2, TGSI_CHAN_X + i);
4193       if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS)
4194          FETCH(&value2[i], 3, TGSI_CHAN_X + i);
4195    }
4196    if (sample)
4197       IFETCH(&sample_r, 1, TGSI_CHAN_X + sample);
4198 
4199    for (j = 0; j < TGSI_QUAD_SIZE; j++) {
4200       rgba[0][j] = value[0].f[j];
4201       rgba[1][j] = value[1].f[j];
4202       rgba[2][j] = value[2].f[j];
4203       rgba[3][j] = value[3].f[j];
4204    }
4205    if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) {
4206       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
4207          rgba2[0][j] = value2[0].f[j];
4208          rgba2[1][j] = value2[1].f[j];
4209          rgba2[2][j] = value2[2].f[j];
4210          rgba2[3][j] = value2[3].f[j];
4211       }
4212    }
4213 
4214    mach->Image->op(mach->Image, &params, inst->Instruction.Opcode,
4215                    r[0].i, r[1].i, r[2].i, sample_r.i,
4216                    rgba, rgba2);
4217 
4218    for (j = 0; j < TGSI_QUAD_SIZE; j++) {
4219       r[0].f[j] = rgba[0][j];
4220       r[1].f[j] = rgba[1][j];
4221       r[2].f[j] = rgba[2][j];
4222       r[3].f[j] = rgba[3][j];
4223    }
4224    for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
4225       if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
4226          store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
4227       }
4228    }
4229 }
4230 
4231 static void
exec_atomop_buf(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)4232 exec_atomop_buf(struct tgsi_exec_machine *mach,
4233                 const struct tgsi_full_instruction *inst)
4234 {
4235    union tgsi_exec_channel r[4];
4236    union tgsi_exec_channel value[4], value2[4];
4237    float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
4238    float rgba2[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
4239    struct tgsi_buffer_params params;
4240    int i, j;
4241    uint unit, chan;
4242    int kilmask = mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];
4243 
4244    unit = fetch_sampler_unit(mach, inst, 0);
4245 
4246    params.execmask = mach->ExecMask & mach->NonHelperMask & ~kilmask;
4247    params.unit = unit;
4248    params.writemask = inst->Dst[0].Register.WriteMask;
4249 
4250    IFETCH(&r[0], 1, TGSI_CHAN_X);
4251 
4252    for (i = 0; i < 4; i++) {
4253       FETCH(&value[i], 2, TGSI_CHAN_X + i);
4254       if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS)
4255          FETCH(&value2[i], 3, TGSI_CHAN_X + i);
4256    }
4257 
4258    for (j = 0; j < TGSI_QUAD_SIZE; j++) {
4259       rgba[0][j] = value[0].f[j];
4260       rgba[1][j] = value[1].f[j];
4261       rgba[2][j] = value[2].f[j];
4262       rgba[3][j] = value[3].f[j];
4263    }
4264    if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) {
4265       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
4266          rgba2[0][j] = value2[0].f[j];
4267          rgba2[1][j] = value2[1].f[j];
4268          rgba2[2][j] = value2[2].f[j];
4269          rgba2[3][j] = value2[3].f[j];
4270       }
4271    }
4272 
4273    mach->Buffer->op(mach->Buffer, &params, inst->Instruction.Opcode,
4274                    r[0].i,
4275                    rgba, rgba2);
4276 
4277    for (j = 0; j < TGSI_QUAD_SIZE; j++) {
4278       r[0].f[j] = rgba[0][j];
4279       r[1].f[j] = rgba[1][j];
4280       r[2].f[j] = rgba[2][j];
4281       r[3].f[j] = rgba[3][j];
4282    }
4283    for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
4284       if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
4285          store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
4286       }
4287    }
4288 }
4289 
4290 static void
exec_atomop_mem(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)4291 exec_atomop_mem(struct tgsi_exec_machine *mach,
4292                 const struct tgsi_full_instruction *inst)
4293 {
4294    union tgsi_exec_channel r[4];
4295    union tgsi_exec_channel value[4], value2[4];
4296    char *ptr = mach->LocalMem;
4297    uint32_t val;
4298    uint chan, i;
4299    uint32_t offset;
4300    int kilmask = mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];
4301    int execmask = mach->ExecMask & mach->NonHelperMask & ~kilmask;
4302    IFETCH(&r[0], 1, TGSI_CHAN_X);
4303 
4304    if (r[0].u[0] >= mach->LocalMemSize)
4305       return;
4306 
4307    offset = r[0].u[0];
4308    ptr += offset;
4309    for (i = 0; i < 4; i++) {
4310       FETCH(&value[i], 2, TGSI_CHAN_X + i);
4311       if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS)
4312          FETCH(&value2[i], 3, TGSI_CHAN_X + i);
4313    }
4314 
4315    memcpy(&r[0].u[0], ptr, 4);
4316    val = r[0].u[0];
4317    switch (inst->Instruction.Opcode) {
4318    case TGSI_OPCODE_ATOMUADD:
4319       val += value[0].u[0];
4320       break;
4321    case TGSI_OPCODE_ATOMXOR:
4322       val ^= value[0].u[0];
4323       break;
4324    case TGSI_OPCODE_ATOMOR:
4325       val |= value[0].u[0];
4326       break;
4327    case TGSI_OPCODE_ATOMAND:
4328       val &= value[0].u[0];
4329       break;
4330    case TGSI_OPCODE_ATOMUMIN:
4331       val = MIN2(val, value[0].u[0]);
4332       break;
4333    case TGSI_OPCODE_ATOMUMAX:
4334       val = MAX2(val, value[0].u[0]);
4335       break;
4336    case TGSI_OPCODE_ATOMIMIN:
4337       val = MIN2(r[0].i[0], value[0].i[0]);
4338       break;
4339    case TGSI_OPCODE_ATOMIMAX:
4340       val = MAX2(r[0].i[0], value[0].i[0]);
4341       break;
4342    case TGSI_OPCODE_ATOMXCHG:
4343       val = value[0].i[0];
4344       break;
4345    case TGSI_OPCODE_ATOMCAS:
4346       if (val == value[0].u[0])
4347          val = value2[0].u[0];
4348       break;
4349    case TGSI_OPCODE_ATOMFADD:
4350       val = fui(r[0].f[0] + value[0].f[0]);
4351       break;
4352    default:
4353       break;
4354    }
4355    for (i = 0; i < TGSI_QUAD_SIZE; i++)
4356       if (execmask & (1 << i))
4357          memcpy(ptr, &val, 4);
4358 
4359    for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
4360       if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
4361          store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
4362       }
4363    }
4364 }
4365 
4366 static void
exec_atomop(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)4367 exec_atomop(struct tgsi_exec_machine *mach,
4368             const struct tgsi_full_instruction *inst)
4369 {
4370    if (inst->Src[0].Register.File == TGSI_FILE_IMAGE)
4371       exec_atomop_img(mach, inst);
4372    else if (inst->Src[0].Register.File == TGSI_FILE_BUFFER)
4373       exec_atomop_buf(mach, inst);
4374    else if (inst->Src[0].Register.File == TGSI_FILE_MEMORY)
4375       exec_atomop_mem(mach, inst);
4376 }
4377 
4378 static void
exec_resq_img(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)4379 exec_resq_img(struct tgsi_exec_machine *mach,
4380               const struct tgsi_full_instruction *inst)
4381 {
4382    int result[4];
4383    union tgsi_exec_channel r[4];
4384    uint unit;
4385    int i, chan, j;
4386    struct tgsi_image_params params;
4387    int kilmask = mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];
4388 
4389    unit = fetch_sampler_unit(mach, inst, 0);
4390 
4391    params.execmask = mach->ExecMask & mach->NonHelperMask & ~kilmask;
4392    params.unit = unit;
4393    params.tgsi_tex_instr = inst->Memory.Texture;
4394    params.format = inst->Memory.Format;
4395 
4396    mach->Image->get_dims(mach->Image, &params, result);
4397 
4398    for (i = 0; i < TGSI_QUAD_SIZE; i++) {
4399       for (j = 0; j < 4; j++) {
4400          r[j].i[i] = result[j];
4401       }
4402    }
4403 
4404    for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
4405       if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
4406          store_dest(mach, &r[chan], &inst->Dst[0], inst, chan,
4407                     TGSI_EXEC_DATA_INT);
4408       }
4409    }
4410 }
4411 
4412 static void
exec_resq_buf(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)4413 exec_resq_buf(struct tgsi_exec_machine *mach,
4414               const struct tgsi_full_instruction *inst)
4415 {
4416    int result;
4417    union tgsi_exec_channel r[4];
4418    uint unit;
4419    int i, chan;
4420    struct tgsi_buffer_params params;
4421    int kilmask = mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];
4422 
4423    unit = fetch_sampler_unit(mach, inst, 0);
4424 
4425    params.execmask = mach->ExecMask & mach->NonHelperMask & ~kilmask;
4426    params.unit = unit;
4427 
4428    mach->Buffer->get_dims(mach->Buffer, &params, &result);
4429 
4430    for (i = 0; i < TGSI_QUAD_SIZE; i++) {
4431       r[0].i[i] = result;
4432    }
4433 
4434    for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
4435       if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
4436          store_dest(mach, &r[chan], &inst->Dst[0], inst, chan,
4437                     TGSI_EXEC_DATA_INT);
4438       }
4439    }
4440 }
4441 
4442 static void
exec_resq(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)4443 exec_resq(struct tgsi_exec_machine *mach,
4444           const struct tgsi_full_instruction *inst)
4445 {
4446    if (inst->Src[0].Register.File == TGSI_FILE_IMAGE)
4447       exec_resq_img(mach, inst);
4448    else
4449       exec_resq_buf(mach, inst);
4450 }
4451 
4452 static void
micro_f2u64(union tgsi_double_channel * dst,const union tgsi_exec_channel * src)4453 micro_f2u64(union tgsi_double_channel *dst,
4454             const union tgsi_exec_channel *src)
4455 {
4456    dst->u64[0] = (uint64_t)src->f[0];
4457    dst->u64[1] = (uint64_t)src->f[1];
4458    dst->u64[2] = (uint64_t)src->f[2];
4459    dst->u64[3] = (uint64_t)src->f[3];
4460 }
4461 
4462 static void
micro_f2i64(union tgsi_double_channel * dst,const union tgsi_exec_channel * src)4463 micro_f2i64(union tgsi_double_channel *dst,
4464             const union tgsi_exec_channel *src)
4465 {
4466    dst->i64[0] = (int64_t)src->f[0];
4467    dst->i64[1] = (int64_t)src->f[1];
4468    dst->i64[2] = (int64_t)src->f[2];
4469    dst->i64[3] = (int64_t)src->f[3];
4470 }
4471 
4472 static void
micro_u2i64(union tgsi_double_channel * dst,const union tgsi_exec_channel * src)4473 micro_u2i64(union tgsi_double_channel *dst,
4474             const union tgsi_exec_channel *src)
4475 {
4476    dst->u64[0] = (uint64_t)src->u[0];
4477    dst->u64[1] = (uint64_t)src->u[1];
4478    dst->u64[2] = (uint64_t)src->u[2];
4479    dst->u64[3] = (uint64_t)src->u[3];
4480 }
4481 
4482 static void
micro_i2i64(union tgsi_double_channel * dst,const union tgsi_exec_channel * src)4483 micro_i2i64(union tgsi_double_channel *dst,
4484             const union tgsi_exec_channel *src)
4485 {
4486    dst->i64[0] = (int64_t)src->i[0];
4487    dst->i64[1] = (int64_t)src->i[1];
4488    dst->i64[2] = (int64_t)src->i[2];
4489    dst->i64[3] = (int64_t)src->i[3];
4490 }
4491 
4492 static void
micro_d2u64(union tgsi_double_channel * dst,const union tgsi_double_channel * src)4493 micro_d2u64(union tgsi_double_channel *dst,
4494            const union tgsi_double_channel *src)
4495 {
4496    dst->u64[0] = (uint64_t)src->d[0];
4497    dst->u64[1] = (uint64_t)src->d[1];
4498    dst->u64[2] = (uint64_t)src->d[2];
4499    dst->u64[3] = (uint64_t)src->d[3];
4500 }
4501 
4502 static void
micro_d2i64(union tgsi_double_channel * dst,const union tgsi_double_channel * src)4503 micro_d2i64(union tgsi_double_channel *dst,
4504            const union tgsi_double_channel *src)
4505 {
4506    dst->i64[0] = (int64_t)src->d[0];
4507    dst->i64[1] = (int64_t)src->d[1];
4508    dst->i64[2] = (int64_t)src->d[2];
4509    dst->i64[3] = (int64_t)src->d[3];
4510 }
4511 
4512 static void
micro_u642d(union tgsi_double_channel * dst,const union tgsi_double_channel * src)4513 micro_u642d(union tgsi_double_channel *dst,
4514            const union tgsi_double_channel *src)
4515 {
4516    dst->d[0] = (double)src->u64[0];
4517    dst->d[1] = (double)src->u64[1];
4518    dst->d[2] = (double)src->u64[2];
4519    dst->d[3] = (double)src->u64[3];
4520 }
4521 
4522 static void
micro_i642d(union tgsi_double_channel * dst,const union tgsi_double_channel * src)4523 micro_i642d(union tgsi_double_channel *dst,
4524            const union tgsi_double_channel *src)
4525 {
4526    dst->d[0] = (double)src->i64[0];
4527    dst->d[1] = (double)src->i64[1];
4528    dst->d[2] = (double)src->i64[2];
4529    dst->d[3] = (double)src->i64[3];
4530 }
4531 
4532 static void
micro_u642f(union tgsi_exec_channel * dst,const union tgsi_double_channel * src)4533 micro_u642f(union tgsi_exec_channel *dst,
4534             const union tgsi_double_channel *src)
4535 {
4536    dst->f[0] = (float)src->u64[0];
4537    dst->f[1] = (float)src->u64[1];
4538    dst->f[2] = (float)src->u64[2];
4539    dst->f[3] = (float)src->u64[3];
4540 }
4541 
4542 static void
micro_i642f(union tgsi_exec_channel * dst,const union tgsi_double_channel * src)4543 micro_i642f(union tgsi_exec_channel *dst,
4544             const union tgsi_double_channel *src)
4545 {
4546    dst->f[0] = (float)src->i64[0];
4547    dst->f[1] = (float)src->i64[1];
4548    dst->f[2] = (float)src->i64[2];
4549    dst->f[3] = (float)src->i64[3];
4550 }
4551 
4552 static void
exec_t_2_64(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst,micro_dop_s op,enum tgsi_exec_datatype src_datatype)4553 exec_t_2_64(struct tgsi_exec_machine *mach,
4554           const struct tgsi_full_instruction *inst,
4555           micro_dop_s op,
4556           enum tgsi_exec_datatype src_datatype)
4557 {
4558    union tgsi_exec_channel src;
4559    union tgsi_double_channel dst;
4560 
4561    if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) == TGSI_WRITEMASK_XY) {
4562       fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_X, src_datatype);
4563       op(&dst, &src);
4564       store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_CHAN_Y);
4565    }
4566    if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_ZW) == TGSI_WRITEMASK_ZW) {
4567       fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_Y, src_datatype);
4568       op(&dst, &src);
4569       store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_CHAN_W);
4570    }
4571 }
4572 
4573 static void
exec_64_2_t(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst,micro_sop_d op,enum tgsi_exec_datatype dst_datatype)4574 exec_64_2_t(struct tgsi_exec_machine *mach,
4575             const struct tgsi_full_instruction *inst,
4576             micro_sop_d op,
4577             enum tgsi_exec_datatype dst_datatype)
4578 {
4579    union tgsi_double_channel src;
4580    union tgsi_exec_channel dst;
4581    int wm = inst->Dst[0].Register.WriteMask;
4582    int i;
4583    int bit;
4584    for (i = 0; i < 2; i++) {
4585       bit = ffs(wm);
4586       if (bit) {
4587          wm &= ~(1 << (bit - 1));
4588          if (i == 0)
4589             fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y);
4590          else
4591             fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W);
4592          op(&dst, &src);
4593          store_dest(mach, &dst, &inst->Dst[0], inst, bit - 1, dst_datatype);
4594       }
4595    }
4596 }
4597 
4598 static void
micro_i2f(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)4599 micro_i2f(union tgsi_exec_channel *dst,
4600           const union tgsi_exec_channel *src)
4601 {
4602    dst->f[0] = (float)src->i[0];
4603    dst->f[1] = (float)src->i[1];
4604    dst->f[2] = (float)src->i[2];
4605    dst->f[3] = (float)src->i[3];
4606 }
4607 
4608 static void
micro_not(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)4609 micro_not(union tgsi_exec_channel *dst,
4610           const union tgsi_exec_channel *src)
4611 {
4612    dst->u[0] = ~src->u[0];
4613    dst->u[1] = ~src->u[1];
4614    dst->u[2] = ~src->u[2];
4615    dst->u[3] = ~src->u[3];
4616 }
4617 
4618 static void
micro_shl(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4619 micro_shl(union tgsi_exec_channel *dst,
4620           const union tgsi_exec_channel *src0,
4621           const union tgsi_exec_channel *src1)
4622 {
4623    unsigned masked_count;
4624    masked_count = src1->u[0] & 0x1f;
4625    dst->u[0] = src0->u[0] << masked_count;
4626    masked_count = src1->u[1] & 0x1f;
4627    dst->u[1] = src0->u[1] << masked_count;
4628    masked_count = src1->u[2] & 0x1f;
4629    dst->u[2] = src0->u[2] << masked_count;
4630    masked_count = src1->u[3] & 0x1f;
4631    dst->u[3] = src0->u[3] << masked_count;
4632 }
4633 
4634 static void
micro_and(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4635 micro_and(union tgsi_exec_channel *dst,
4636           const union tgsi_exec_channel *src0,
4637           const union tgsi_exec_channel *src1)
4638 {
4639    dst->u[0] = src0->u[0] & src1->u[0];
4640    dst->u[1] = src0->u[1] & src1->u[1];
4641    dst->u[2] = src0->u[2] & src1->u[2];
4642    dst->u[3] = src0->u[3] & src1->u[3];
4643 }
4644 
4645 static void
micro_or(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4646 micro_or(union tgsi_exec_channel *dst,
4647          const union tgsi_exec_channel *src0,
4648          const union tgsi_exec_channel *src1)
4649 {
4650    dst->u[0] = src0->u[0] | src1->u[0];
4651    dst->u[1] = src0->u[1] | src1->u[1];
4652    dst->u[2] = src0->u[2] | src1->u[2];
4653    dst->u[3] = src0->u[3] | src1->u[3];
4654 }
4655 
4656 static void
micro_xor(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4657 micro_xor(union tgsi_exec_channel *dst,
4658           const union tgsi_exec_channel *src0,
4659           const union tgsi_exec_channel *src1)
4660 {
4661    dst->u[0] = src0->u[0] ^ src1->u[0];
4662    dst->u[1] = src0->u[1] ^ src1->u[1];
4663    dst->u[2] = src0->u[2] ^ src1->u[2];
4664    dst->u[3] = src0->u[3] ^ src1->u[3];
4665 }
4666 
4667 static void
micro_mod(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4668 micro_mod(union tgsi_exec_channel *dst,
4669           const union tgsi_exec_channel *src0,
4670           const union tgsi_exec_channel *src1)
4671 {
4672    dst->i[0] = src1->i[0] ? src0->i[0] % src1->i[0] : ~0;
4673    dst->i[1] = src1->i[1] ? src0->i[1] % src1->i[1] : ~0;
4674    dst->i[2] = src1->i[2] ? src0->i[2] % src1->i[2] : ~0;
4675    dst->i[3] = src1->i[3] ? src0->i[3] % src1->i[3] : ~0;
4676 }
4677 
4678 static void
micro_f2i(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)4679 micro_f2i(union tgsi_exec_channel *dst,
4680           const union tgsi_exec_channel *src)
4681 {
4682    dst->i[0] = (int)src->f[0];
4683    dst->i[1] = (int)src->f[1];
4684    dst->i[2] = (int)src->f[2];
4685    dst->i[3] = (int)src->f[3];
4686 }
4687 
4688 static void
micro_fseq(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4689 micro_fseq(union tgsi_exec_channel *dst,
4690            const union tgsi_exec_channel *src0,
4691            const union tgsi_exec_channel *src1)
4692 {
4693    dst->u[0] = src0->f[0] == src1->f[0] ? ~0 : 0;
4694    dst->u[1] = src0->f[1] == src1->f[1] ? ~0 : 0;
4695    dst->u[2] = src0->f[2] == src1->f[2] ? ~0 : 0;
4696    dst->u[3] = src0->f[3] == src1->f[3] ? ~0 : 0;
4697 }
4698 
4699 static void
micro_fsge(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4700 micro_fsge(union tgsi_exec_channel *dst,
4701            const union tgsi_exec_channel *src0,
4702            const union tgsi_exec_channel *src1)
4703 {
4704    dst->u[0] = src0->f[0] >= src1->f[0] ? ~0 : 0;
4705    dst->u[1] = src0->f[1] >= src1->f[1] ? ~0 : 0;
4706    dst->u[2] = src0->f[2] >= src1->f[2] ? ~0 : 0;
4707    dst->u[3] = src0->f[3] >= src1->f[3] ? ~0 : 0;
4708 }
4709 
4710 static void
micro_fslt(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4711 micro_fslt(union tgsi_exec_channel *dst,
4712            const union tgsi_exec_channel *src0,
4713            const union tgsi_exec_channel *src1)
4714 {
4715    dst->u[0] = src0->f[0] < src1->f[0] ? ~0 : 0;
4716    dst->u[1] = src0->f[1] < src1->f[1] ? ~0 : 0;
4717    dst->u[2] = src0->f[2] < src1->f[2] ? ~0 : 0;
4718    dst->u[3] = src0->f[3] < src1->f[3] ? ~0 : 0;
4719 }
4720 
4721 static void
micro_fsne(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4722 micro_fsne(union tgsi_exec_channel *dst,
4723            const union tgsi_exec_channel *src0,
4724            const union tgsi_exec_channel *src1)
4725 {
4726    dst->u[0] = src0->f[0] != src1->f[0] ? ~0 : 0;
4727    dst->u[1] = src0->f[1] != src1->f[1] ? ~0 : 0;
4728    dst->u[2] = src0->f[2] != src1->f[2] ? ~0 : 0;
4729    dst->u[3] = src0->f[3] != src1->f[3] ? ~0 : 0;
4730 }
4731 
4732 static void
micro_idiv(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4733 micro_idiv(union tgsi_exec_channel *dst,
4734            const union tgsi_exec_channel *src0,
4735            const union tgsi_exec_channel *src1)
4736 {
4737    dst->i[0] = src1->i[0] ? src0->i[0] / src1->i[0] : 0;
4738    dst->i[1] = src1->i[1] ? src0->i[1] / src1->i[1] : 0;
4739    dst->i[2] = src1->i[2] ? src0->i[2] / src1->i[2] : 0;
4740    dst->i[3] = src1->i[3] ? src0->i[3] / src1->i[3] : 0;
4741 }
4742 
4743 static void
micro_imax(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4744 micro_imax(union tgsi_exec_channel *dst,
4745            const union tgsi_exec_channel *src0,
4746            const union tgsi_exec_channel *src1)
4747 {
4748    dst->i[0] = src0->i[0] > src1->i[0] ? src0->i[0] : src1->i[0];
4749    dst->i[1] = src0->i[1] > src1->i[1] ? src0->i[1] : src1->i[1];
4750    dst->i[2] = src0->i[2] > src1->i[2] ? src0->i[2] : src1->i[2];
4751    dst->i[3] = src0->i[3] > src1->i[3] ? src0->i[3] : src1->i[3];
4752 }
4753 
4754 static void
micro_imin(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4755 micro_imin(union tgsi_exec_channel *dst,
4756            const union tgsi_exec_channel *src0,
4757            const union tgsi_exec_channel *src1)
4758 {
4759    dst->i[0] = src0->i[0] < src1->i[0] ? src0->i[0] : src1->i[0];
4760    dst->i[1] = src0->i[1] < src1->i[1] ? src0->i[1] : src1->i[1];
4761    dst->i[2] = src0->i[2] < src1->i[2] ? src0->i[2] : src1->i[2];
4762    dst->i[3] = src0->i[3] < src1->i[3] ? src0->i[3] : src1->i[3];
4763 }
4764 
4765 static void
micro_isge(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4766 micro_isge(union tgsi_exec_channel *dst,
4767            const union tgsi_exec_channel *src0,
4768            const union tgsi_exec_channel *src1)
4769 {
4770    dst->i[0] = src0->i[0] >= src1->i[0] ? -1 : 0;
4771    dst->i[1] = src0->i[1] >= src1->i[1] ? -1 : 0;
4772    dst->i[2] = src0->i[2] >= src1->i[2] ? -1 : 0;
4773    dst->i[3] = src0->i[3] >= src1->i[3] ? -1 : 0;
4774 }
4775 
4776 static void
micro_ishr(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4777 micro_ishr(union tgsi_exec_channel *dst,
4778            const union tgsi_exec_channel *src0,
4779            const union tgsi_exec_channel *src1)
4780 {
4781    unsigned masked_count;
4782    masked_count = src1->i[0] & 0x1f;
4783    dst->i[0] = src0->i[0] >> masked_count;
4784    masked_count = src1->i[1] & 0x1f;
4785    dst->i[1] = src0->i[1] >> masked_count;
4786    masked_count = src1->i[2] & 0x1f;
4787    dst->i[2] = src0->i[2] >> masked_count;
4788    masked_count = src1->i[3] & 0x1f;
4789    dst->i[3] = src0->i[3] >> masked_count;
4790 }
4791 
4792 static void
micro_islt(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4793 micro_islt(union tgsi_exec_channel *dst,
4794            const union tgsi_exec_channel *src0,
4795            const union tgsi_exec_channel *src1)
4796 {
4797    dst->i[0] = src0->i[0] < src1->i[0] ? -1 : 0;
4798    dst->i[1] = src0->i[1] < src1->i[1] ? -1 : 0;
4799    dst->i[2] = src0->i[2] < src1->i[2] ? -1 : 0;
4800    dst->i[3] = src0->i[3] < src1->i[3] ? -1 : 0;
4801 }
4802 
4803 static void
micro_f2u(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)4804 micro_f2u(union tgsi_exec_channel *dst,
4805           const union tgsi_exec_channel *src)
4806 {
4807    dst->u[0] = (uint)src->f[0];
4808    dst->u[1] = (uint)src->f[1];
4809    dst->u[2] = (uint)src->f[2];
4810    dst->u[3] = (uint)src->f[3];
4811 }
4812 
4813 static void
micro_u2f(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)4814 micro_u2f(union tgsi_exec_channel *dst,
4815           const union tgsi_exec_channel *src)
4816 {
4817    dst->f[0] = (float)src->u[0];
4818    dst->f[1] = (float)src->u[1];
4819    dst->f[2] = (float)src->u[2];
4820    dst->f[3] = (float)src->u[3];
4821 }
4822 
4823 static void
micro_uadd(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4824 micro_uadd(union tgsi_exec_channel *dst,
4825            const union tgsi_exec_channel *src0,
4826            const union tgsi_exec_channel *src1)
4827 {
4828    dst->u[0] = src0->u[0] + src1->u[0];
4829    dst->u[1] = src0->u[1] + src1->u[1];
4830    dst->u[2] = src0->u[2] + src1->u[2];
4831    dst->u[3] = src0->u[3] + src1->u[3];
4832 }
4833 
4834 static void
micro_udiv(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4835 micro_udiv(union tgsi_exec_channel *dst,
4836            const union tgsi_exec_channel *src0,
4837            const union tgsi_exec_channel *src1)
4838 {
4839    dst->u[0] = src1->u[0] ? src0->u[0] / src1->u[0] : ~0u;
4840    dst->u[1] = src1->u[1] ? src0->u[1] / src1->u[1] : ~0u;
4841    dst->u[2] = src1->u[2] ? src0->u[2] / src1->u[2] : ~0u;
4842    dst->u[3] = src1->u[3] ? src0->u[3] / src1->u[3] : ~0u;
4843 }
4844 
4845 static void
micro_umad(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1,const union tgsi_exec_channel * src2)4846 micro_umad(union tgsi_exec_channel *dst,
4847            const union tgsi_exec_channel *src0,
4848            const union tgsi_exec_channel *src1,
4849            const union tgsi_exec_channel *src2)
4850 {
4851    dst->u[0] = src0->u[0] * src1->u[0] + src2->u[0];
4852    dst->u[1] = src0->u[1] * src1->u[1] + src2->u[1];
4853    dst->u[2] = src0->u[2] * src1->u[2] + src2->u[2];
4854    dst->u[3] = src0->u[3] * src1->u[3] + src2->u[3];
4855 }
4856 
4857 static void
micro_umax(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4858 micro_umax(union tgsi_exec_channel *dst,
4859            const union tgsi_exec_channel *src0,
4860            const union tgsi_exec_channel *src1)
4861 {
4862    dst->u[0] = src0->u[0] > src1->u[0] ? src0->u[0] : src1->u[0];
4863    dst->u[1] = src0->u[1] > src1->u[1] ? src0->u[1] : src1->u[1];
4864    dst->u[2] = src0->u[2] > src1->u[2] ? src0->u[2] : src1->u[2];
4865    dst->u[3] = src0->u[3] > src1->u[3] ? src0->u[3] : src1->u[3];
4866 }
4867 
4868 static void
micro_umin(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4869 micro_umin(union tgsi_exec_channel *dst,
4870            const union tgsi_exec_channel *src0,
4871            const union tgsi_exec_channel *src1)
4872 {
4873    dst->u[0] = src0->u[0] < src1->u[0] ? src0->u[0] : src1->u[0];
4874    dst->u[1] = src0->u[1] < src1->u[1] ? src0->u[1] : src1->u[1];
4875    dst->u[2] = src0->u[2] < src1->u[2] ? src0->u[2] : src1->u[2];
4876    dst->u[3] = src0->u[3] < src1->u[3] ? src0->u[3] : src1->u[3];
4877 }
4878 
4879 static void
micro_umod(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4880 micro_umod(union tgsi_exec_channel *dst,
4881            const union tgsi_exec_channel *src0,
4882            const union tgsi_exec_channel *src1)
4883 {
4884    dst->u[0] = src1->u[0] ? src0->u[0] % src1->u[0] : ~0u;
4885    dst->u[1] = src1->u[1] ? src0->u[1] % src1->u[1] : ~0u;
4886    dst->u[2] = src1->u[2] ? src0->u[2] % src1->u[2] : ~0u;
4887    dst->u[3] = src1->u[3] ? src0->u[3] % src1->u[3] : ~0u;
4888 }
4889 
4890 static void
micro_umul(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4891 micro_umul(union tgsi_exec_channel *dst,
4892            const union tgsi_exec_channel *src0,
4893            const union tgsi_exec_channel *src1)
4894 {
4895    dst->u[0] = src0->u[0] * src1->u[0];
4896    dst->u[1] = src0->u[1] * src1->u[1];
4897    dst->u[2] = src0->u[2] * src1->u[2];
4898    dst->u[3] = src0->u[3] * src1->u[3];
4899 }
4900 
4901 static void
micro_imul_hi(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4902 micro_imul_hi(union tgsi_exec_channel *dst,
4903               const union tgsi_exec_channel *src0,
4904               const union tgsi_exec_channel *src1)
4905 {
4906 #define I64M(x, y) ((((int64_t)x) * ((int64_t)y)) >> 32)
4907    dst->i[0] = I64M(src0->i[0], src1->i[0]);
4908    dst->i[1] = I64M(src0->i[1], src1->i[1]);
4909    dst->i[2] = I64M(src0->i[2], src1->i[2]);
4910    dst->i[3] = I64M(src0->i[3], src1->i[3]);
4911 #undef I64M
4912 }
4913 
4914 static void
micro_umul_hi(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4915 micro_umul_hi(union tgsi_exec_channel *dst,
4916               const union tgsi_exec_channel *src0,
4917               const union tgsi_exec_channel *src1)
4918 {
4919 #define U64M(x, y) ((((uint64_t)x) * ((uint64_t)y)) >> 32)
4920    dst->u[0] = U64M(src0->u[0], src1->u[0]);
4921    dst->u[1] = U64M(src0->u[1], src1->u[1]);
4922    dst->u[2] = U64M(src0->u[2], src1->u[2]);
4923    dst->u[3] = U64M(src0->u[3], src1->u[3]);
4924 #undef U64M
4925 }
4926 
4927 static void
micro_useq(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4928 micro_useq(union tgsi_exec_channel *dst,
4929            const union tgsi_exec_channel *src0,
4930            const union tgsi_exec_channel *src1)
4931 {
4932    dst->u[0] = src0->u[0] == src1->u[0] ? ~0 : 0;
4933    dst->u[1] = src0->u[1] == src1->u[1] ? ~0 : 0;
4934    dst->u[2] = src0->u[2] == src1->u[2] ? ~0 : 0;
4935    dst->u[3] = src0->u[3] == src1->u[3] ? ~0 : 0;
4936 }
4937 
4938 static void
micro_usge(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4939 micro_usge(union tgsi_exec_channel *dst,
4940            const union tgsi_exec_channel *src0,
4941            const union tgsi_exec_channel *src1)
4942 {
4943    dst->u[0] = src0->u[0] >= src1->u[0] ? ~0 : 0;
4944    dst->u[1] = src0->u[1] >= src1->u[1] ? ~0 : 0;
4945    dst->u[2] = src0->u[2] >= src1->u[2] ? ~0 : 0;
4946    dst->u[3] = src0->u[3] >= src1->u[3] ? ~0 : 0;
4947 }
4948 
4949 static void
micro_ushr(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4950 micro_ushr(union tgsi_exec_channel *dst,
4951            const union tgsi_exec_channel *src0,
4952            const union tgsi_exec_channel *src1)
4953 {
4954    unsigned masked_count;
4955    masked_count = src1->u[0] & 0x1f;
4956    dst->u[0] = src0->u[0] >> masked_count;
4957    masked_count = src1->u[1] & 0x1f;
4958    dst->u[1] = src0->u[1] >> masked_count;
4959    masked_count = src1->u[2] & 0x1f;
4960    dst->u[2] = src0->u[2] >> masked_count;
4961    masked_count = src1->u[3] & 0x1f;
4962    dst->u[3] = src0->u[3] >> masked_count;
4963 }
4964 
4965 static void
micro_uslt(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4966 micro_uslt(union tgsi_exec_channel *dst,
4967            const union tgsi_exec_channel *src0,
4968            const union tgsi_exec_channel *src1)
4969 {
4970    dst->u[0] = src0->u[0] < src1->u[0] ? ~0 : 0;
4971    dst->u[1] = src0->u[1] < src1->u[1] ? ~0 : 0;
4972    dst->u[2] = src0->u[2] < src1->u[2] ? ~0 : 0;
4973    dst->u[3] = src0->u[3] < src1->u[3] ? ~0 : 0;
4974 }
4975 
4976 static void
micro_usne(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4977 micro_usne(union tgsi_exec_channel *dst,
4978            const union tgsi_exec_channel *src0,
4979            const union tgsi_exec_channel *src1)
4980 {
4981    dst->u[0] = src0->u[0] != src1->u[0] ? ~0 : 0;
4982    dst->u[1] = src0->u[1] != src1->u[1] ? ~0 : 0;
4983    dst->u[2] = src0->u[2] != src1->u[2] ? ~0 : 0;
4984    dst->u[3] = src0->u[3] != src1->u[3] ? ~0 : 0;
4985 }
4986 
4987 static void
micro_uarl(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)4988 micro_uarl(union tgsi_exec_channel *dst,
4989            const union tgsi_exec_channel *src)
4990 {
4991    dst->i[0] = src->u[0];
4992    dst->i[1] = src->u[1];
4993    dst->i[2] = src->u[2];
4994    dst->i[3] = src->u[3];
4995 }
4996 
4997 /**
4998  * Signed bitfield extract (i.e. sign-extend the extracted bits)
4999  */
5000 static void
micro_ibfe(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1,const union tgsi_exec_channel * src2)5001 micro_ibfe(union tgsi_exec_channel *dst,
5002            const union tgsi_exec_channel *src0,
5003            const union tgsi_exec_channel *src1,
5004            const union tgsi_exec_channel *src2)
5005 {
5006    int i;
5007    for (i = 0; i < 4; i++) {
5008       int width = src2->i[i];
5009       int offset = src1->i[i] & 0x1f;
5010       if (width == 32 && offset == 0) {
5011          dst->i[i] = src0->i[i];
5012          continue;
5013       }
5014       width &= 0x1f;
5015       if (width == 0)
5016          dst->i[i] = 0;
5017       else if (width + offset < 32)
5018          dst->i[i] = (src0->i[i] << (32 - width - offset)) >> (32 - width);
5019       else
5020          dst->i[i] = src0->i[i] >> offset;
5021    }
5022 }
5023 
5024 /**
5025  * Unsigned bitfield extract
5026  */
5027 static void
micro_ubfe(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1,const union tgsi_exec_channel * src2)5028 micro_ubfe(union tgsi_exec_channel *dst,
5029            const union tgsi_exec_channel *src0,
5030            const union tgsi_exec_channel *src1,
5031            const union tgsi_exec_channel *src2)
5032 {
5033    int i;
5034    for (i = 0; i < 4; i++) {
5035       int width = src2->u[i];
5036       int offset = src1->u[i] & 0x1f;
5037       if (width == 32 && offset == 0) {
5038          dst->u[i] = src0->u[i];
5039          continue;
5040       }
5041       width &= 0x1f;
5042       if (width == 0)
5043          dst->u[i] = 0;
5044       else if (width + offset < 32)
5045          dst->u[i] = (src0->u[i] << (32 - width - offset)) >> (32 - width);
5046       else
5047          dst->u[i] = src0->u[i] >> offset;
5048    }
5049 }
5050 
5051 /**
5052  * Bitfield insert: copy low bits from src1 into a region of src0.
5053  */
5054 static void
micro_bfi(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1,const union tgsi_exec_channel * src2,const union tgsi_exec_channel * src3)5055 micro_bfi(union tgsi_exec_channel *dst,
5056           const union tgsi_exec_channel *src0,
5057           const union tgsi_exec_channel *src1,
5058           const union tgsi_exec_channel *src2,
5059           const union tgsi_exec_channel *src3)
5060 {
5061    int i;
5062    for (i = 0; i < 4; i++) {
5063       int width = src3->u[i];
5064       int offset = src2->u[i] & 0x1f;
5065       if (width == 32) {
5066          dst->u[i] = src1->u[i];
5067       } else {
5068          int bitmask = ((1 << width) - 1) << offset;
5069          dst->u[i] = ((src1->u[i] << offset) & bitmask) | (src0->u[i] & ~bitmask);
5070       }
5071    }
5072 }
5073 
5074 static void
micro_brev(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)5075 micro_brev(union tgsi_exec_channel *dst,
5076            const union tgsi_exec_channel *src)
5077 {
5078    dst->u[0] = util_bitreverse(src->u[0]);
5079    dst->u[1] = util_bitreverse(src->u[1]);
5080    dst->u[2] = util_bitreverse(src->u[2]);
5081    dst->u[3] = util_bitreverse(src->u[3]);
5082 }
5083 
5084 static void
micro_popc(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)5085 micro_popc(union tgsi_exec_channel *dst,
5086            const union tgsi_exec_channel *src)
5087 {
5088    dst->u[0] = util_bitcount(src->u[0]);
5089    dst->u[1] = util_bitcount(src->u[1]);
5090    dst->u[2] = util_bitcount(src->u[2]);
5091    dst->u[3] = util_bitcount(src->u[3]);
5092 }
5093 
5094 static void
micro_lsb(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)5095 micro_lsb(union tgsi_exec_channel *dst,
5096           const union tgsi_exec_channel *src)
5097 {
5098    dst->i[0] = ffs(src->u[0]) - 1;
5099    dst->i[1] = ffs(src->u[1]) - 1;
5100    dst->i[2] = ffs(src->u[2]) - 1;
5101    dst->i[3] = ffs(src->u[3]) - 1;
5102 }
5103 
5104 static void
micro_imsb(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)5105 micro_imsb(union tgsi_exec_channel *dst,
5106            const union tgsi_exec_channel *src)
5107 {
5108    dst->i[0] = util_last_bit_signed(src->i[0]) - 1;
5109    dst->i[1] = util_last_bit_signed(src->i[1]) - 1;
5110    dst->i[2] = util_last_bit_signed(src->i[2]) - 1;
5111    dst->i[3] = util_last_bit_signed(src->i[3]) - 1;
5112 }
5113 
5114 static void
micro_umsb(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)5115 micro_umsb(union tgsi_exec_channel *dst,
5116            const union tgsi_exec_channel *src)
5117 {
5118    dst->i[0] = util_last_bit(src->u[0]) - 1;
5119    dst->i[1] = util_last_bit(src->u[1]) - 1;
5120    dst->i[2] = util_last_bit(src->u[2]) - 1;
5121    dst->i[3] = util_last_bit(src->u[3]) - 1;
5122 }
5123 
5124 
5125 static void
exec_interp_at_sample(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)5126 exec_interp_at_sample(struct tgsi_exec_machine *mach,
5127                       const struct tgsi_full_instruction *inst)
5128 {
5129    union tgsi_exec_channel index;
5130    union tgsi_exec_channel index2D;
5131    union tgsi_exec_channel result[TGSI_NUM_CHANNELS];
5132    const struct tgsi_full_src_register *reg = &inst->Src[0];
5133 
5134    assert(reg->Register.File == TGSI_FILE_INPUT);
5135    assert(inst->Src[1].Register.File == TGSI_FILE_IMMEDIATE);
5136 
5137    get_index_registers(mach, reg, &index, &index2D);
5138    float sample = mach->Imms[inst->Src[1].Register.Index][inst->Src[1].Register.SwizzleX];
5139 
5140    /* Short cut: sample 0 is like a normal fetch */
5141    for (unsigned chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
5142       if (!(inst->Dst[0].Register.WriteMask & (1 << chan)))
5143          continue;
5144 
5145       fetch_src_file_channel(mach, TGSI_FILE_INPUT, chan, &index, &index2D,
5146                              &result[chan]);
5147       if (sample != 0.0f) {
5148 
5149       /* TODO: define the samples > 0, but so far we only do fake MSAA */
5150          float x = 0;
5151          float y = 0;
5152 
5153          unsigned pos = index2D.i[chan] * TGSI_EXEC_MAX_INPUT_ATTRIBS + index.i[chan];
5154          assert(pos >= 0);
5155          assert(pos < TGSI_MAX_PRIM_VERTICES * PIPE_MAX_ATTRIBS);
5156          mach->InputSampleOffsetApply[pos](mach, pos, chan, x, y, &result[chan]);
5157       }
5158       store_dest(mach, &result[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
5159    }
5160 }
5161 
5162 
5163 static void
exec_interp_at_offset(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)5164 exec_interp_at_offset(struct tgsi_exec_machine *mach,
5165                       const struct tgsi_full_instruction *inst)
5166 {
5167    union tgsi_exec_channel index;
5168    union tgsi_exec_channel index2D;
5169    union tgsi_exec_channel ofsx;
5170    union tgsi_exec_channel ofsy;
5171    const struct tgsi_full_src_register *reg = &inst->Src[0];
5172 
5173    assert(reg->Register.File == TGSI_FILE_INPUT);
5174 
5175    get_index_registers(mach, reg, &index, &index2D);
5176    unsigned pos = index2D.i[0] * TGSI_EXEC_MAX_INPUT_ATTRIBS + index.i[0];
5177 
5178    fetch_source(mach, &ofsx, &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
5179    fetch_source(mach, &ofsy, &inst->Src[1], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
5180 
5181    for (int chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
5182       if (!(inst->Dst[0].Register.WriteMask & (1 << chan)))
5183          continue;
5184       union tgsi_exec_channel result;
5185       fetch_src_file_channel(mach, TGSI_FILE_INPUT, chan, &index, &index2D, &result);
5186       mach->InputSampleOffsetApply[pos](mach, pos, chan, ofsx.f[chan], ofsy.f[chan], &result);
5187       store_dest(mach, &result, &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
5188    }
5189 }
5190 
5191 
5192 static void
exec_interp_at_centroid(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)5193 exec_interp_at_centroid(struct tgsi_exec_machine *mach,
5194                         const struct tgsi_full_instruction *inst)
5195 {
5196    union tgsi_exec_channel index;
5197    union tgsi_exec_channel index2D;
5198    union tgsi_exec_channel result[TGSI_NUM_CHANNELS];
5199    const struct tgsi_full_src_register *reg = &inst->Src[0];
5200 
5201    assert(reg->Register.File == TGSI_FILE_INPUT);
5202    get_index_registers(mach, reg, &index, &index2D);
5203 
5204    for (unsigned chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
5205       if (!(inst->Dst[0].Register.WriteMask & (1 << chan)))
5206          continue;
5207 
5208       /* Here we should add the change to use a sample that lies within the
5209        * primitive (Section 15.2):
5210        *
5211        * "When interpolating variables declared using centroid in ,
5212        * the variable is sampled at a location within the pixel covered
5213        * by the primitive generating the fragment.
5214        * ...
5215        * The built-in functions interpolateAtCentroid ... will sample
5216        * variables as though they were declared with the centroid ...
5217        * qualifier[s]."
5218        *
5219        * Since we only support 1 sample currently, this is just a pass-through.
5220        */
5221       fetch_src_file_channel(mach, TGSI_FILE_INPUT, chan, &index, &index2D,
5222                              &result[chan]);
5223       store_dest(mach, &result[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
5224    }
5225 
5226 }
5227 
5228 
5229 /**
5230  * Execute a TGSI instruction.
5231  * Returns TRUE if a barrier instruction is hit,
5232  * otherwise FALSE.
5233  */
5234 static boolean
exec_instruction(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst,int * pc)5235 exec_instruction(
5236    struct tgsi_exec_machine *mach,
5237    const struct tgsi_full_instruction *inst,
5238    int *pc )
5239 {
5240    union tgsi_exec_channel r[10];
5241 
5242    (*pc)++;
5243 
5244    switch (inst->Instruction.Opcode) {
5245    case TGSI_OPCODE_ARL:
5246       exec_vector_unary(mach, inst, micro_arl, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_FLOAT);
5247       break;
5248 
5249    case TGSI_OPCODE_MOV:
5250       exec_vector_unary(mach, inst, micro_mov, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_FLOAT);
5251       break;
5252 
5253    case TGSI_OPCODE_LIT:
5254       exec_lit(mach, inst);
5255       break;
5256 
5257    case TGSI_OPCODE_RCP:
5258       exec_scalar_unary(mach, inst, micro_rcp, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5259       break;
5260 
5261    case TGSI_OPCODE_RSQ:
5262       exec_scalar_unary(mach, inst, micro_rsq, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5263       break;
5264 
5265    case TGSI_OPCODE_EXP:
5266       exec_exp(mach, inst);
5267       break;
5268 
5269    case TGSI_OPCODE_LOG:
5270       exec_log(mach, inst);
5271       break;
5272 
5273    case TGSI_OPCODE_MUL:
5274       exec_vector_binary(mach, inst, micro_mul, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5275       break;
5276 
5277    case TGSI_OPCODE_ADD:
5278       exec_vector_binary(mach, inst, micro_add, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5279       break;
5280 
5281    case TGSI_OPCODE_DP3:
5282       exec_dp3(mach, inst);
5283       break;
5284 
5285    case TGSI_OPCODE_DP4:
5286       exec_dp4(mach, inst);
5287       break;
5288 
5289    case TGSI_OPCODE_DST:
5290       exec_dst(mach, inst);
5291       break;
5292 
5293    case TGSI_OPCODE_MIN:
5294       exec_vector_binary(mach, inst, micro_min, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5295       break;
5296 
5297    case TGSI_OPCODE_MAX:
5298       exec_vector_binary(mach, inst, micro_max, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5299       break;
5300 
5301    case TGSI_OPCODE_SLT:
5302       exec_vector_binary(mach, inst, micro_slt, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5303       break;
5304 
5305    case TGSI_OPCODE_SGE:
5306       exec_vector_binary(mach, inst, micro_sge, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5307       break;
5308 
5309    case TGSI_OPCODE_MAD:
5310       exec_vector_trinary(mach, inst, micro_mad, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5311       break;
5312 
5313    case TGSI_OPCODE_LRP:
5314       exec_vector_trinary(mach, inst, micro_lrp, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5315       break;
5316 
5317    case TGSI_OPCODE_SQRT:
5318       exec_scalar_unary(mach, inst, micro_sqrt, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5319       break;
5320 
5321    case TGSI_OPCODE_FRC:
5322       exec_vector_unary(mach, inst, micro_frc, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5323       break;
5324 
5325    case TGSI_OPCODE_FLR:
5326       exec_vector_unary(mach, inst, micro_flr, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5327       break;
5328 
5329    case TGSI_OPCODE_ROUND:
5330       exec_vector_unary(mach, inst, micro_rnd, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5331       break;
5332 
5333    case TGSI_OPCODE_EX2:
5334       exec_scalar_unary(mach, inst, micro_exp2, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5335       break;
5336 
5337    case TGSI_OPCODE_LG2:
5338       exec_scalar_unary(mach, inst, micro_lg2, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5339       break;
5340 
5341    case TGSI_OPCODE_POW:
5342       exec_scalar_binary(mach, inst, micro_pow, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5343       break;
5344 
5345    case TGSI_OPCODE_LDEXP:
5346       exec_vector_binary(mach, inst, micro_ldexp, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5347       break;
5348 
5349    case TGSI_OPCODE_COS:
5350       exec_scalar_unary(mach, inst, micro_cos, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5351       break;
5352 
5353    case TGSI_OPCODE_DDX_FINE:
5354       exec_vector_unary(mach, inst, micro_ddx_fine, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5355       break;
5356 
5357    case TGSI_OPCODE_DDX:
5358       exec_vector_unary(mach, inst, micro_ddx, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5359       break;
5360 
5361    case TGSI_OPCODE_DDY_FINE:
5362       exec_vector_unary(mach, inst, micro_ddy_fine, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5363       break;
5364 
5365    case TGSI_OPCODE_DDY:
5366       exec_vector_unary(mach, inst, micro_ddy, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5367       break;
5368 
5369    case TGSI_OPCODE_KILL:
5370       exec_kill (mach);
5371       break;
5372 
5373    case TGSI_OPCODE_KILL_IF:
5374       exec_kill_if (mach, inst);
5375       break;
5376 
5377    case TGSI_OPCODE_PK2H:
5378       exec_pk2h(mach, inst);
5379       break;
5380 
5381    case TGSI_OPCODE_PK2US:
5382       assert (0);
5383       break;
5384 
5385    case TGSI_OPCODE_PK4B:
5386       assert (0);
5387       break;
5388 
5389    case TGSI_OPCODE_PK4UB:
5390       assert (0);
5391       break;
5392 
5393    case TGSI_OPCODE_SEQ:
5394       exec_vector_binary(mach, inst, micro_seq, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5395       break;
5396 
5397    case TGSI_OPCODE_SGT:
5398       exec_vector_binary(mach, inst, micro_sgt, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5399       break;
5400 
5401    case TGSI_OPCODE_SIN:
5402       exec_scalar_unary(mach, inst, micro_sin, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5403       break;
5404 
5405    case TGSI_OPCODE_SLE:
5406       exec_vector_binary(mach, inst, micro_sle, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5407       break;
5408 
5409    case TGSI_OPCODE_SNE:
5410       exec_vector_binary(mach, inst, micro_sne, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5411       break;
5412 
5413    case TGSI_OPCODE_TEX:
5414       /* simple texture lookup */
5415       /* src[0] = texcoord */
5416       /* src[1] = sampler unit */
5417       exec_tex(mach, inst, TEX_MODIFIER_NONE, 1);
5418       break;
5419 
5420    case TGSI_OPCODE_TXB:
5421       /* Texture lookup with lod bias */
5422       /* src[0] = texcoord (src[0].w = LOD bias) */
5423       /* src[1] = sampler unit */
5424       exec_tex(mach, inst, TEX_MODIFIER_LOD_BIAS, 1);
5425       break;
5426 
5427    case TGSI_OPCODE_TXD:
5428       /* Texture lookup with explict partial derivatives */
5429       /* src[0] = texcoord */
5430       /* src[1] = d[strq]/dx */
5431       /* src[2] = d[strq]/dy */
5432       /* src[3] = sampler unit */
5433       exec_txd(mach, inst);
5434       break;
5435 
5436    case TGSI_OPCODE_TXL:
5437       /* Texture lookup with explit LOD */
5438       /* src[0] = texcoord (src[0].w = LOD) */
5439       /* src[1] = sampler unit */
5440       exec_tex(mach, inst, TEX_MODIFIER_EXPLICIT_LOD, 1);
5441       break;
5442 
5443    case TGSI_OPCODE_TXP:
5444       /* Texture lookup with projection */
5445       /* src[0] = texcoord (src[0].w = projection) */
5446       /* src[1] = sampler unit */
5447       exec_tex(mach, inst, TEX_MODIFIER_PROJECTED, 1);
5448       break;
5449 
5450    case TGSI_OPCODE_TG4:
5451       /* src[0] = texcoord */
5452       /* src[1] = component */
5453       /* src[2] = sampler unit */
5454       exec_tex(mach, inst, TEX_MODIFIER_GATHER, 2);
5455       break;
5456 
5457    case TGSI_OPCODE_LODQ:
5458       /* src[0] = texcoord */
5459       /* src[1] = sampler unit */
5460       exec_lodq(mach, inst);
5461       break;
5462 
5463    case TGSI_OPCODE_UP2H:
5464       exec_up2h(mach, inst);
5465       break;
5466 
5467    case TGSI_OPCODE_UP2US:
5468       assert (0);
5469       break;
5470 
5471    case TGSI_OPCODE_UP4B:
5472       assert (0);
5473       break;
5474 
5475    case TGSI_OPCODE_UP4UB:
5476       assert (0);
5477       break;
5478 
5479    case TGSI_OPCODE_ARR:
5480       exec_vector_unary(mach, inst, micro_arr, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_FLOAT);
5481       break;
5482 
5483    case TGSI_OPCODE_CAL:
5484       /* skip the call if no execution channels are enabled */
5485       if (mach->ExecMask) {
5486          /* do the call */
5487 
5488          /* First, record the depths of the execution stacks.
5489           * This is important for deeply nested/looped return statements.
5490           * We have to unwind the stacks by the correct amount.  For a
5491           * real code generator, we could determine the number of entries
5492           * to pop off each stack with simple static analysis and avoid
5493           * implementing this data structure at run time.
5494           */
5495          mach->CallStack[mach->CallStackTop].CondStackTop = mach->CondStackTop;
5496          mach->CallStack[mach->CallStackTop].LoopStackTop = mach->LoopStackTop;
5497          mach->CallStack[mach->CallStackTop].ContStackTop = mach->ContStackTop;
5498          mach->CallStack[mach->CallStackTop].SwitchStackTop = mach->SwitchStackTop;
5499          mach->CallStack[mach->CallStackTop].BreakStackTop = mach->BreakStackTop;
5500          /* note that PC was already incremented above */
5501          mach->CallStack[mach->CallStackTop].ReturnAddr = *pc;
5502 
5503          mach->CallStackTop++;
5504 
5505          /* Second, push the Cond, Loop, Cont, Func stacks */
5506          assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
5507          assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
5508          assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
5509          assert(mach->SwitchStackTop < TGSI_EXEC_MAX_SWITCH_NESTING);
5510          assert(mach->BreakStackTop < TGSI_EXEC_MAX_BREAK_STACK);
5511          assert(mach->FuncStackTop < TGSI_EXEC_MAX_CALL_NESTING);
5512 
5513          mach->CondStack[mach->CondStackTop++] = mach->CondMask;
5514          mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
5515          mach->ContStack[mach->ContStackTop++] = mach->ContMask;
5516          mach->SwitchStack[mach->SwitchStackTop++] = mach->Switch;
5517          mach->BreakStack[mach->BreakStackTop++] = mach->BreakType;
5518          mach->FuncStack[mach->FuncStackTop++] = mach->FuncMask;
5519 
5520          /* Finally, jump to the subroutine.  The label is a pointer
5521           * (an instruction number) to the BGNSUB instruction.
5522           */
5523          *pc = inst->Label.Label;
5524          assert(mach->Instructions[*pc].Instruction.Opcode
5525                 == TGSI_OPCODE_BGNSUB);
5526       }
5527       break;
5528 
5529    case TGSI_OPCODE_RET:
5530       mach->FuncMask &= ~mach->ExecMask;
5531       UPDATE_EXEC_MASK(mach);
5532 
5533       if (mach->FuncMask == 0x0) {
5534          /* really return now (otherwise, keep executing */
5535 
5536          if (mach->CallStackTop == 0) {
5537             /* returning from main() */
5538             mach->CondStackTop = 0;
5539             mach->LoopStackTop = 0;
5540             mach->ContStackTop = 0;
5541             mach->LoopLabelStackTop = 0;
5542             mach->SwitchStackTop = 0;
5543             mach->BreakStackTop = 0;
5544             *pc = -1;
5545             return FALSE;
5546          }
5547 
5548          assert(mach->CallStackTop > 0);
5549          mach->CallStackTop--;
5550 
5551          mach->CondStackTop = mach->CallStack[mach->CallStackTop].CondStackTop;
5552          mach->CondMask = mach->CondStack[mach->CondStackTop];
5553 
5554          mach->LoopStackTop = mach->CallStack[mach->CallStackTop].LoopStackTop;
5555          mach->LoopMask = mach->LoopStack[mach->LoopStackTop];
5556 
5557          mach->ContStackTop = mach->CallStack[mach->CallStackTop].ContStackTop;
5558          mach->ContMask = mach->ContStack[mach->ContStackTop];
5559 
5560          mach->SwitchStackTop = mach->CallStack[mach->CallStackTop].SwitchStackTop;
5561          mach->Switch = mach->SwitchStack[mach->SwitchStackTop];
5562 
5563          mach->BreakStackTop = mach->CallStack[mach->CallStackTop].BreakStackTop;
5564          mach->BreakType = mach->BreakStack[mach->BreakStackTop];
5565 
5566          assert(mach->FuncStackTop > 0);
5567          mach->FuncMask = mach->FuncStack[--mach->FuncStackTop];
5568 
5569          *pc = mach->CallStack[mach->CallStackTop].ReturnAddr;
5570 
5571          UPDATE_EXEC_MASK(mach);
5572       }
5573       break;
5574 
5575    case TGSI_OPCODE_SSG:
5576       exec_vector_unary(mach, inst, micro_sgn, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5577       break;
5578 
5579    case TGSI_OPCODE_CMP:
5580       exec_vector_trinary(mach, inst, micro_cmp, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5581       break;
5582 
5583    case TGSI_OPCODE_DIV:
5584       exec_vector_binary(mach, inst, micro_div, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5585       break;
5586 
5587    case TGSI_OPCODE_DP2:
5588       exec_dp2(mach, inst);
5589       break;
5590 
5591    case TGSI_OPCODE_IF:
5592       /* push CondMask */
5593       assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
5594       mach->CondStack[mach->CondStackTop++] = mach->CondMask;
5595       FETCH( &r[0], 0, TGSI_CHAN_X );
5596       /* update CondMask */
5597       if( ! r[0].f[0] ) {
5598          mach->CondMask &= ~0x1;
5599       }
5600       if( ! r[0].f[1] ) {
5601          mach->CondMask &= ~0x2;
5602       }
5603       if( ! r[0].f[2] ) {
5604          mach->CondMask &= ~0x4;
5605       }
5606       if( ! r[0].f[3] ) {
5607          mach->CondMask &= ~0x8;
5608       }
5609       UPDATE_EXEC_MASK(mach);
5610       /* Todo: If CondMask==0, jump to ELSE */
5611       break;
5612 
5613    case TGSI_OPCODE_UIF:
5614       /* push CondMask */
5615       assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
5616       mach->CondStack[mach->CondStackTop++] = mach->CondMask;
5617       IFETCH( &r[0], 0, TGSI_CHAN_X );
5618       /* update CondMask */
5619       if( ! r[0].u[0] ) {
5620          mach->CondMask &= ~0x1;
5621       }
5622       if( ! r[0].u[1] ) {
5623          mach->CondMask &= ~0x2;
5624       }
5625       if( ! r[0].u[2] ) {
5626          mach->CondMask &= ~0x4;
5627       }
5628       if( ! r[0].u[3] ) {
5629          mach->CondMask &= ~0x8;
5630       }
5631       UPDATE_EXEC_MASK(mach);
5632       /* Todo: If CondMask==0, jump to ELSE */
5633       break;
5634 
5635    case TGSI_OPCODE_ELSE:
5636       /* invert CondMask wrt previous mask */
5637       {
5638          uint prevMask;
5639          assert(mach->CondStackTop > 0);
5640          prevMask = mach->CondStack[mach->CondStackTop - 1];
5641          mach->CondMask = ~mach->CondMask & prevMask;
5642          UPDATE_EXEC_MASK(mach);
5643          /* Todo: If CondMask==0, jump to ENDIF */
5644       }
5645       break;
5646 
5647    case TGSI_OPCODE_ENDIF:
5648       /* pop CondMask */
5649       assert(mach->CondStackTop > 0);
5650       mach->CondMask = mach->CondStack[--mach->CondStackTop];
5651       UPDATE_EXEC_MASK(mach);
5652       break;
5653 
5654    case TGSI_OPCODE_END:
5655       /* make sure we end primitives which haven't
5656        * been explicitly emitted */
5657       conditional_emit_primitive(mach);
5658       /* halt execution */
5659       *pc = -1;
5660       break;
5661 
5662    case TGSI_OPCODE_CEIL:
5663       exec_vector_unary(mach, inst, micro_ceil, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5664       break;
5665 
5666    case TGSI_OPCODE_I2F:
5667       exec_vector_unary(mach, inst, micro_i2f, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_INT);
5668       break;
5669 
5670    case TGSI_OPCODE_NOT:
5671       exec_vector_unary(mach, inst, micro_not, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
5672       break;
5673 
5674    case TGSI_OPCODE_TRUNC:
5675       exec_vector_unary(mach, inst, micro_trunc, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5676       break;
5677 
5678    case TGSI_OPCODE_SHL:
5679       exec_vector_binary(mach, inst, micro_shl, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
5680       break;
5681 
5682    case TGSI_OPCODE_AND:
5683       exec_vector_binary(mach, inst, micro_and, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
5684       break;
5685 
5686    case TGSI_OPCODE_OR:
5687       exec_vector_binary(mach, inst, micro_or, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
5688       break;
5689 
5690    case TGSI_OPCODE_MOD:
5691       exec_vector_binary(mach, inst, micro_mod, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
5692       break;
5693 
5694    case TGSI_OPCODE_XOR:
5695       exec_vector_binary(mach, inst, micro_xor, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
5696       break;
5697 
5698    case TGSI_OPCODE_TXF:
5699       exec_txf(mach, inst);
5700       break;
5701 
5702    case TGSI_OPCODE_TXQ:
5703       exec_txq(mach, inst);
5704       break;
5705 
5706    case TGSI_OPCODE_EMIT:
5707       emit_vertex(mach, inst);
5708       break;
5709 
5710    case TGSI_OPCODE_ENDPRIM:
5711       emit_primitive(mach, inst);
5712       break;
5713 
5714    case TGSI_OPCODE_BGNLOOP:
5715       /* push LoopMask and ContMasks */
5716       assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
5717       assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
5718       assert(mach->LoopLabelStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
5719       assert(mach->BreakStackTop < TGSI_EXEC_MAX_BREAK_STACK);
5720 
5721       mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
5722       mach->ContStack[mach->ContStackTop++] = mach->ContMask;
5723       mach->LoopLabelStack[mach->LoopLabelStackTop++] = *pc - 1;
5724       mach->BreakStack[mach->BreakStackTop++] = mach->BreakType;
5725       mach->BreakType = TGSI_EXEC_BREAK_INSIDE_LOOP;
5726       break;
5727 
5728    case TGSI_OPCODE_ENDLOOP:
5729       /* Restore ContMask, but don't pop */
5730       assert(mach->ContStackTop > 0);
5731       mach->ContMask = mach->ContStack[mach->ContStackTop - 1];
5732       UPDATE_EXEC_MASK(mach);
5733       if (mach->ExecMask) {
5734          /* repeat loop: jump to instruction just past BGNLOOP */
5735          assert(mach->LoopLabelStackTop > 0);
5736          *pc = mach->LoopLabelStack[mach->LoopLabelStackTop - 1] + 1;
5737       }
5738       else {
5739          /* exit loop: pop LoopMask */
5740          assert(mach->LoopStackTop > 0);
5741          mach->LoopMask = mach->LoopStack[--mach->LoopStackTop];
5742          /* pop ContMask */
5743          assert(mach->ContStackTop > 0);
5744          mach->ContMask = mach->ContStack[--mach->ContStackTop];
5745          assert(mach->LoopLabelStackTop > 0);
5746          --mach->LoopLabelStackTop;
5747 
5748          mach->BreakType = mach->BreakStack[--mach->BreakStackTop];
5749       }
5750       UPDATE_EXEC_MASK(mach);
5751       break;
5752 
5753    case TGSI_OPCODE_BRK:
5754       exec_break(mach);
5755       break;
5756 
5757    case TGSI_OPCODE_CONT:
5758       /* turn off cont channels for each enabled exec channel */
5759       mach->ContMask &= ~mach->ExecMask;
5760       /* Todo: if mach->LoopMask == 0, jump to end of loop */
5761       UPDATE_EXEC_MASK(mach);
5762       break;
5763 
5764    case TGSI_OPCODE_BGNSUB:
5765       /* no-op */
5766       break;
5767 
5768    case TGSI_OPCODE_ENDSUB:
5769       /*
5770        * XXX: This really should be a no-op. We should never reach this opcode.
5771        */
5772 
5773       assert(mach->CallStackTop > 0);
5774       mach->CallStackTop--;
5775 
5776       mach->CondStackTop = mach->CallStack[mach->CallStackTop].CondStackTop;
5777       mach->CondMask = mach->CondStack[mach->CondStackTop];
5778 
5779       mach->LoopStackTop = mach->CallStack[mach->CallStackTop].LoopStackTop;
5780       mach->LoopMask = mach->LoopStack[mach->LoopStackTop];
5781 
5782       mach->ContStackTop = mach->CallStack[mach->CallStackTop].ContStackTop;
5783       mach->ContMask = mach->ContStack[mach->ContStackTop];
5784 
5785       mach->SwitchStackTop = mach->CallStack[mach->CallStackTop].SwitchStackTop;
5786       mach->Switch = mach->SwitchStack[mach->SwitchStackTop];
5787 
5788       mach->BreakStackTop = mach->CallStack[mach->CallStackTop].BreakStackTop;
5789       mach->BreakType = mach->BreakStack[mach->BreakStackTop];
5790 
5791       assert(mach->FuncStackTop > 0);
5792       mach->FuncMask = mach->FuncStack[--mach->FuncStackTop];
5793 
5794       *pc = mach->CallStack[mach->CallStackTop].ReturnAddr;
5795 
5796       UPDATE_EXEC_MASK(mach);
5797       break;
5798 
5799    case TGSI_OPCODE_NOP:
5800       break;
5801 
5802    case TGSI_OPCODE_F2I:
5803       exec_vector_unary(mach, inst, micro_f2i, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_FLOAT);
5804       break;
5805 
5806    case TGSI_OPCODE_FSEQ:
5807       exec_vector_binary(mach, inst, micro_fseq, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_FLOAT);
5808       break;
5809 
5810    case TGSI_OPCODE_FSGE:
5811       exec_vector_binary(mach, inst, micro_fsge, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_FLOAT);
5812       break;
5813 
5814    case TGSI_OPCODE_FSLT:
5815       exec_vector_binary(mach, inst, micro_fslt, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_FLOAT);
5816       break;
5817 
5818    case TGSI_OPCODE_FSNE:
5819       exec_vector_binary(mach, inst, micro_fsne, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_FLOAT);
5820       break;
5821 
5822    case TGSI_OPCODE_IDIV:
5823       exec_vector_binary(mach, inst, micro_idiv, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
5824       break;
5825 
5826    case TGSI_OPCODE_IMAX:
5827       exec_vector_binary(mach, inst, micro_imax, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
5828       break;
5829 
5830    case TGSI_OPCODE_IMIN:
5831       exec_vector_binary(mach, inst, micro_imin, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
5832       break;
5833 
5834    case TGSI_OPCODE_INEG:
5835       exec_vector_unary(mach, inst, micro_ineg, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
5836       break;
5837 
5838    case TGSI_OPCODE_ISGE:
5839       exec_vector_binary(mach, inst, micro_isge, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
5840       break;
5841 
5842    case TGSI_OPCODE_ISHR:
5843       exec_vector_binary(mach, inst, micro_ishr, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
5844       break;
5845 
5846    case TGSI_OPCODE_ISLT:
5847       exec_vector_binary(mach, inst, micro_islt, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
5848       break;
5849 
5850    case TGSI_OPCODE_F2U:
5851       exec_vector_unary(mach, inst, micro_f2u, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_FLOAT);
5852       break;
5853 
5854    case TGSI_OPCODE_U2F:
5855       exec_vector_unary(mach, inst, micro_u2f, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_UINT);
5856       break;
5857 
5858    case TGSI_OPCODE_UADD:
5859       exec_vector_binary(mach, inst, micro_uadd, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
5860       break;
5861 
5862    case TGSI_OPCODE_UDIV:
5863       exec_vector_binary(mach, inst, micro_udiv, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
5864       break;
5865 
5866    case TGSI_OPCODE_UMAD:
5867       exec_vector_trinary(mach, inst, micro_umad, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
5868       break;
5869 
5870    case TGSI_OPCODE_UMAX:
5871       exec_vector_binary(mach, inst, micro_umax, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
5872       break;
5873 
5874    case TGSI_OPCODE_UMIN:
5875       exec_vector_binary(mach, inst, micro_umin, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
5876       break;
5877 
5878    case TGSI_OPCODE_UMOD:
5879       exec_vector_binary(mach, inst, micro_umod, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
5880       break;
5881 
5882    case TGSI_OPCODE_UMUL:
5883       exec_vector_binary(mach, inst, micro_umul, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
5884       break;
5885 
5886    case TGSI_OPCODE_IMUL_HI:
5887       exec_vector_binary(mach, inst, micro_imul_hi, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
5888       break;
5889 
5890    case TGSI_OPCODE_UMUL_HI:
5891       exec_vector_binary(mach, inst, micro_umul_hi, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
5892       break;
5893 
5894    case TGSI_OPCODE_USEQ:
5895       exec_vector_binary(mach, inst, micro_useq, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
5896       break;
5897 
5898    case TGSI_OPCODE_USGE:
5899       exec_vector_binary(mach, inst, micro_usge, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
5900       break;
5901 
5902    case TGSI_OPCODE_USHR:
5903       exec_vector_binary(mach, inst, micro_ushr, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
5904       break;
5905 
5906    case TGSI_OPCODE_USLT:
5907       exec_vector_binary(mach, inst, micro_uslt, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
5908       break;
5909 
5910    case TGSI_OPCODE_USNE:
5911       exec_vector_binary(mach, inst, micro_usne, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
5912       break;
5913 
5914    case TGSI_OPCODE_SWITCH:
5915       exec_switch(mach, inst);
5916       break;
5917 
5918    case TGSI_OPCODE_CASE:
5919       exec_case(mach, inst);
5920       break;
5921 
5922    case TGSI_OPCODE_DEFAULT:
5923       exec_default(mach);
5924       break;
5925 
5926    case TGSI_OPCODE_ENDSWITCH:
5927       exec_endswitch(mach);
5928       break;
5929 
5930    case TGSI_OPCODE_SAMPLE_I:
5931       exec_txf(mach, inst);
5932       break;
5933 
5934    case TGSI_OPCODE_SAMPLE_I_MS:
5935       exec_txf(mach, inst);
5936       break;
5937 
5938    case TGSI_OPCODE_SAMPLE:
5939       exec_sample(mach, inst, TEX_MODIFIER_NONE, FALSE);
5940       break;
5941 
5942    case TGSI_OPCODE_SAMPLE_B:
5943       exec_sample(mach, inst, TEX_MODIFIER_LOD_BIAS, FALSE);
5944       break;
5945 
5946    case TGSI_OPCODE_SAMPLE_C:
5947       exec_sample(mach, inst, TEX_MODIFIER_NONE, TRUE);
5948       break;
5949 
5950    case TGSI_OPCODE_SAMPLE_C_LZ:
5951       exec_sample(mach, inst, TEX_MODIFIER_LEVEL_ZERO, TRUE);
5952       break;
5953 
5954    case TGSI_OPCODE_SAMPLE_D:
5955       exec_sample_d(mach, inst);
5956       break;
5957 
5958    case TGSI_OPCODE_SAMPLE_L:
5959       exec_sample(mach, inst, TEX_MODIFIER_EXPLICIT_LOD, FALSE);
5960       break;
5961 
5962    case TGSI_OPCODE_GATHER4:
5963       exec_sample(mach, inst, TEX_MODIFIER_GATHER, FALSE);
5964       break;
5965 
5966    case TGSI_OPCODE_SVIEWINFO:
5967       exec_txq(mach, inst);
5968       break;
5969 
5970    case TGSI_OPCODE_SAMPLE_POS:
5971       assert(0);
5972       break;
5973 
5974    case TGSI_OPCODE_SAMPLE_INFO:
5975       assert(0);
5976       break;
5977 
5978    case TGSI_OPCODE_LOD:
5979       exec_lodq(mach, inst);
5980       break;
5981 
5982    case TGSI_OPCODE_UARL:
5983       exec_vector_unary(mach, inst, micro_uarl, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_UINT);
5984       break;
5985 
5986    case TGSI_OPCODE_UCMP:
5987       exec_ucmp(mach, inst);
5988       break;
5989 
5990    case TGSI_OPCODE_IABS:
5991       exec_vector_unary(mach, inst, micro_iabs, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
5992       break;
5993 
5994    case TGSI_OPCODE_ISSG:
5995       exec_vector_unary(mach, inst, micro_isgn, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
5996       break;
5997 
5998    case TGSI_OPCODE_TEX2:
5999       /* simple texture lookup */
6000       /* src[0] = texcoord */
6001       /* src[1] = compare */
6002       /* src[2] = sampler unit */
6003       exec_tex(mach, inst, TEX_MODIFIER_NONE, 2);
6004       break;
6005    case TGSI_OPCODE_TXB2:
6006       /* simple texture lookup */
6007       /* src[0] = texcoord */
6008       /* src[1] = bias */
6009       /* src[2] = sampler unit */
6010       exec_tex(mach, inst, TEX_MODIFIER_LOD_BIAS, 2);
6011       break;
6012    case TGSI_OPCODE_TXL2:
6013       /* simple texture lookup */
6014       /* src[0] = texcoord */
6015       /* src[1] = lod */
6016       /* src[2] = sampler unit */
6017       exec_tex(mach, inst, TEX_MODIFIER_EXPLICIT_LOD, 2);
6018       break;
6019 
6020    case TGSI_OPCODE_IBFE:
6021       exec_vector_trinary(mach, inst, micro_ibfe, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
6022       break;
6023    case TGSI_OPCODE_UBFE:
6024       exec_vector_trinary(mach, inst, micro_ubfe, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
6025       break;
6026    case TGSI_OPCODE_BFI:
6027       exec_vector_quaternary(mach, inst, micro_bfi, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
6028       break;
6029    case TGSI_OPCODE_BREV:
6030       exec_vector_unary(mach, inst, micro_brev, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
6031       break;
6032    case TGSI_OPCODE_POPC:
6033       exec_vector_unary(mach, inst, micro_popc, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
6034       break;
6035    case TGSI_OPCODE_LSB:
6036       exec_vector_unary(mach, inst, micro_lsb, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_UINT);
6037       break;
6038    case TGSI_OPCODE_IMSB:
6039       exec_vector_unary(mach, inst, micro_imsb, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
6040       break;
6041    case TGSI_OPCODE_UMSB:
6042       exec_vector_unary(mach, inst, micro_umsb, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_UINT);
6043       break;
6044 
6045    case TGSI_OPCODE_F2D:
6046       exec_t_2_64(mach, inst, micro_f2d, TGSI_EXEC_DATA_FLOAT);
6047       break;
6048 
6049    case TGSI_OPCODE_D2F:
6050       exec_64_2_t(mach, inst, micro_d2f, TGSI_EXEC_DATA_FLOAT);
6051       break;
6052 
6053    case TGSI_OPCODE_DABS:
6054       exec_double_unary(mach, inst, micro_dabs);
6055       break;
6056 
6057    case TGSI_OPCODE_DNEG:
6058       exec_double_unary(mach, inst, micro_dneg);
6059       break;
6060 
6061    case TGSI_OPCODE_DADD:
6062       exec_double_binary(mach, inst, micro_dadd, TGSI_EXEC_DATA_DOUBLE);
6063       break;
6064 
6065    case TGSI_OPCODE_DDIV:
6066       exec_double_binary(mach, inst, micro_ddiv, TGSI_EXEC_DATA_DOUBLE);
6067       break;
6068 
6069    case TGSI_OPCODE_DMUL:
6070       exec_double_binary(mach, inst, micro_dmul, TGSI_EXEC_DATA_DOUBLE);
6071       break;
6072 
6073    case TGSI_OPCODE_DMAX:
6074       exec_double_binary(mach, inst, micro_dmax, TGSI_EXEC_DATA_DOUBLE);
6075       break;
6076 
6077    case TGSI_OPCODE_DMIN:
6078       exec_double_binary(mach, inst, micro_dmin, TGSI_EXEC_DATA_DOUBLE);
6079       break;
6080 
6081    case TGSI_OPCODE_DSLT:
6082       exec_double_binary(mach, inst, micro_dslt, TGSI_EXEC_DATA_UINT);
6083       break;
6084 
6085    case TGSI_OPCODE_DSGE:
6086       exec_double_binary(mach, inst, micro_dsge, TGSI_EXEC_DATA_UINT);
6087       break;
6088 
6089    case TGSI_OPCODE_DSEQ:
6090       exec_double_binary(mach, inst, micro_dseq, TGSI_EXEC_DATA_UINT);
6091       break;
6092 
6093    case TGSI_OPCODE_DSNE:
6094       exec_double_binary(mach, inst, micro_dsne, TGSI_EXEC_DATA_UINT);
6095       break;
6096 
6097    case TGSI_OPCODE_DRCP:
6098       exec_double_unary(mach, inst, micro_drcp);
6099       break;
6100 
6101    case TGSI_OPCODE_DSQRT:
6102       exec_double_unary(mach, inst, micro_dsqrt);
6103       break;
6104 
6105    case TGSI_OPCODE_DRSQ:
6106       exec_double_unary(mach, inst, micro_drsq);
6107       break;
6108 
6109    case TGSI_OPCODE_DMAD:
6110       exec_double_trinary(mach, inst, micro_dmad);
6111       break;
6112 
6113    case TGSI_OPCODE_DFRAC:
6114       exec_double_unary(mach, inst, micro_dfrac);
6115       break;
6116 
6117    case TGSI_OPCODE_DFLR:
6118       exec_double_unary(mach, inst, micro_dflr);
6119       break;
6120 
6121    case TGSI_OPCODE_DLDEXP:
6122       exec_dldexp(mach, inst);
6123       break;
6124 
6125    case TGSI_OPCODE_DFRACEXP:
6126       exec_dfracexp(mach, inst);
6127       break;
6128 
6129    case TGSI_OPCODE_I2D:
6130       exec_t_2_64(mach, inst, micro_i2d, TGSI_EXEC_DATA_INT);
6131       break;
6132 
6133    case TGSI_OPCODE_D2I:
6134       exec_64_2_t(mach, inst, micro_d2i, TGSI_EXEC_DATA_INT);
6135       break;
6136 
6137    case TGSI_OPCODE_U2D:
6138       exec_t_2_64(mach, inst, micro_u2d, TGSI_EXEC_DATA_UINT);
6139       break;
6140 
6141    case TGSI_OPCODE_D2U:
6142       exec_64_2_t(mach, inst, micro_d2u, TGSI_EXEC_DATA_INT);
6143       break;
6144 
6145    case TGSI_OPCODE_LOAD:
6146       exec_load(mach, inst);
6147       break;
6148 
6149    case TGSI_OPCODE_STORE:
6150       exec_store(mach, inst);
6151       break;
6152 
6153    case TGSI_OPCODE_ATOMUADD:
6154    case TGSI_OPCODE_ATOMXCHG:
6155    case TGSI_OPCODE_ATOMCAS:
6156    case TGSI_OPCODE_ATOMAND:
6157    case TGSI_OPCODE_ATOMOR:
6158    case TGSI_OPCODE_ATOMXOR:
6159    case TGSI_OPCODE_ATOMUMIN:
6160    case TGSI_OPCODE_ATOMUMAX:
6161    case TGSI_OPCODE_ATOMIMIN:
6162    case TGSI_OPCODE_ATOMIMAX:
6163    case TGSI_OPCODE_ATOMFADD:
6164       exec_atomop(mach, inst);
6165       break;
6166 
6167    case TGSI_OPCODE_RESQ:
6168       exec_resq(mach, inst);
6169       break;
6170    case TGSI_OPCODE_BARRIER:
6171    case TGSI_OPCODE_MEMBAR:
6172       return TRUE;
6173       break;
6174 
6175    case TGSI_OPCODE_I64ABS:
6176       exec_double_unary(mach, inst, micro_i64abs);
6177       break;
6178 
6179    case TGSI_OPCODE_I64SSG:
6180       exec_double_unary(mach, inst, micro_i64sgn);
6181       break;
6182 
6183    case TGSI_OPCODE_I64NEG:
6184       exec_double_unary(mach, inst, micro_i64neg);
6185       break;
6186 
6187    case TGSI_OPCODE_U64SEQ:
6188       exec_double_binary(mach, inst, micro_u64seq, TGSI_EXEC_DATA_UINT);
6189       break;
6190 
6191    case TGSI_OPCODE_U64SNE:
6192       exec_double_binary(mach, inst, micro_u64sne, TGSI_EXEC_DATA_UINT);
6193       break;
6194 
6195    case TGSI_OPCODE_I64SLT:
6196       exec_double_binary(mach, inst, micro_i64slt, TGSI_EXEC_DATA_UINT);
6197       break;
6198    case TGSI_OPCODE_U64SLT:
6199       exec_double_binary(mach, inst, micro_u64slt, TGSI_EXEC_DATA_UINT);
6200       break;
6201 
6202    case TGSI_OPCODE_I64SGE:
6203       exec_double_binary(mach, inst, micro_i64sge, TGSI_EXEC_DATA_UINT);
6204       break;
6205    case TGSI_OPCODE_U64SGE:
6206       exec_double_binary(mach, inst, micro_u64sge, TGSI_EXEC_DATA_UINT);
6207       break;
6208 
6209    case TGSI_OPCODE_I64MIN:
6210       exec_double_binary(mach, inst, micro_i64min, TGSI_EXEC_DATA_INT64);
6211       break;
6212    case TGSI_OPCODE_U64MIN:
6213       exec_double_binary(mach, inst, micro_u64min, TGSI_EXEC_DATA_UINT64);
6214       break;
6215    case TGSI_OPCODE_I64MAX:
6216       exec_double_binary(mach, inst, micro_i64max, TGSI_EXEC_DATA_INT64);
6217       break;
6218    case TGSI_OPCODE_U64MAX:
6219       exec_double_binary(mach, inst, micro_u64max, TGSI_EXEC_DATA_UINT64);
6220       break;
6221    case TGSI_OPCODE_U64ADD:
6222       exec_double_binary(mach, inst, micro_u64add, TGSI_EXEC_DATA_UINT64);
6223       break;
6224    case TGSI_OPCODE_U64MUL:
6225       exec_double_binary(mach, inst, micro_u64mul, TGSI_EXEC_DATA_UINT64);
6226       break;
6227    case TGSI_OPCODE_U64SHL:
6228       exec_arg0_64_arg1_32(mach, inst, micro_u64shl);
6229       break;
6230    case TGSI_OPCODE_I64SHR:
6231       exec_arg0_64_arg1_32(mach, inst, micro_i64shr);
6232       break;
6233    case TGSI_OPCODE_U64SHR:
6234       exec_arg0_64_arg1_32(mach, inst, micro_u64shr);
6235       break;
6236    case TGSI_OPCODE_U64DIV:
6237       exec_double_binary(mach, inst, micro_u64div, TGSI_EXEC_DATA_UINT64);
6238       break;
6239    case TGSI_OPCODE_I64DIV:
6240       exec_double_binary(mach, inst, micro_i64div, TGSI_EXEC_DATA_INT64);
6241       break;
6242    case TGSI_OPCODE_U64MOD:
6243       exec_double_binary(mach, inst, micro_u64mod, TGSI_EXEC_DATA_UINT64);
6244       break;
6245    case TGSI_OPCODE_I64MOD:
6246       exec_double_binary(mach, inst, micro_i64mod, TGSI_EXEC_DATA_INT64);
6247       break;
6248 
6249    case TGSI_OPCODE_F2U64:
6250       exec_t_2_64(mach, inst, micro_f2u64, TGSI_EXEC_DATA_FLOAT);
6251       break;
6252 
6253    case TGSI_OPCODE_F2I64:
6254       exec_t_2_64(mach, inst, micro_f2i64, TGSI_EXEC_DATA_FLOAT);
6255       break;
6256 
6257    case TGSI_OPCODE_U2I64:
6258       exec_t_2_64(mach, inst, micro_u2i64, TGSI_EXEC_DATA_INT);
6259       break;
6260    case TGSI_OPCODE_I2I64:
6261       exec_t_2_64(mach, inst, micro_i2i64, TGSI_EXEC_DATA_INT);
6262       break;
6263 
6264    case TGSI_OPCODE_D2U64:
6265       exec_double_unary(mach, inst, micro_d2u64);
6266       break;
6267 
6268    case TGSI_OPCODE_D2I64:
6269       exec_double_unary(mach, inst, micro_d2i64);
6270       break;
6271 
6272    case TGSI_OPCODE_U642F:
6273       exec_64_2_t(mach, inst, micro_u642f, TGSI_EXEC_DATA_FLOAT);
6274       break;
6275    case TGSI_OPCODE_I642F:
6276       exec_64_2_t(mach, inst, micro_i642f, TGSI_EXEC_DATA_FLOAT);
6277       break;
6278 
6279    case TGSI_OPCODE_U642D:
6280       exec_double_unary(mach, inst, micro_u642d);
6281       break;
6282    case TGSI_OPCODE_I642D:
6283       exec_double_unary(mach, inst, micro_i642d);
6284       break;
6285    case TGSI_OPCODE_INTERP_SAMPLE:
6286       exec_interp_at_sample(mach, inst);
6287       break;
6288    case TGSI_OPCODE_INTERP_OFFSET:
6289       exec_interp_at_offset(mach, inst);
6290       break;
6291    case TGSI_OPCODE_INTERP_CENTROID:
6292       exec_interp_at_centroid(mach, inst);
6293       break;
6294    default:
6295       assert( 0 );
6296    }
6297    return FALSE;
6298 }
6299 
6300 static void
tgsi_exec_machine_setup_masks(struct tgsi_exec_machine * mach)6301 tgsi_exec_machine_setup_masks(struct tgsi_exec_machine *mach)
6302 {
6303    uint default_mask = 0xf;
6304 
6305    mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] = 0;
6306    mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] = 0;
6307 
6308    if (mach->ShaderType == PIPE_SHADER_GEOMETRY) {
6309       for (unsigned i = 0; i < TGSI_MAX_VERTEX_STREAMS; i++) {
6310          mach->Temps[temp_prim_idxs[i].idx].xyzw[temp_prim_idxs[i].chan].u[0] = 0;
6311          mach->Primitives[i][0] = 0;
6312       }
6313       /* GS runs on a single primitive for now */
6314       default_mask = 0x1;
6315    }
6316 
6317    if (mach->NonHelperMask == 0)
6318       mach->NonHelperMask = default_mask;
6319    mach->CondMask = default_mask;
6320    mach->LoopMask = default_mask;
6321    mach->ContMask = default_mask;
6322    mach->FuncMask = default_mask;
6323    mach->ExecMask = default_mask;
6324 
6325    mach->Switch.mask = default_mask;
6326 
6327    assert(mach->CondStackTop == 0);
6328    assert(mach->LoopStackTop == 0);
6329    assert(mach->ContStackTop == 0);
6330    assert(mach->SwitchStackTop == 0);
6331    assert(mach->BreakStackTop == 0);
6332    assert(mach->CallStackTop == 0);
6333 }
6334 
6335 /**
6336  * Run TGSI interpreter.
6337  * \return bitmask of "alive" quad components
6338  */
6339 uint
tgsi_exec_machine_run(struct tgsi_exec_machine * mach,int start_pc)6340 tgsi_exec_machine_run( struct tgsi_exec_machine *mach, int start_pc )
6341 {
6342    uint i;
6343 
6344    mach->pc = start_pc;
6345 
6346    if (!start_pc) {
6347       tgsi_exec_machine_setup_masks(mach);
6348 
6349       /* execute declarations (interpolants) */
6350       for (i = 0; i < mach->NumDeclarations; i++) {
6351          exec_declaration( mach, mach->Declarations+i );
6352       }
6353    }
6354 
6355    {
6356 #if DEBUG_EXECUTION
6357       struct tgsi_exec_vector temps[TGSI_EXEC_NUM_TEMPS + TGSI_EXEC_NUM_TEMP_EXTRAS];
6358       struct tgsi_exec_vector outputs[PIPE_MAX_ATTRIBS];
6359       uint inst = 1;
6360 
6361       if (!start_pc) {
6362          memset(mach->Temps, 0, sizeof(temps));
6363          if (mach->Outputs)
6364             memset(mach->Outputs, 0, sizeof(outputs));
6365          memset(temps, 0, sizeof(temps));
6366          memset(outputs, 0, sizeof(outputs));
6367       }
6368 #endif
6369 
6370       /* execute instructions, until pc is set to -1 */
6371       while (mach->pc != -1) {
6372          boolean barrier_hit;
6373 #if DEBUG_EXECUTION
6374          uint i;
6375 
6376          tgsi_dump_instruction(&mach->Instructions[mach->pc], inst++);
6377 #endif
6378 
6379          assert(mach->pc < (int) mach->NumInstructions);
6380          barrier_hit = exec_instruction(mach, mach->Instructions + mach->pc, &mach->pc);
6381 
6382          /* for compute shaders if we hit a barrier return now for later rescheduling */
6383          if (barrier_hit && mach->ShaderType == PIPE_SHADER_COMPUTE)
6384             return 0;
6385 
6386 #if DEBUG_EXECUTION
6387          for (i = 0; i < TGSI_EXEC_NUM_TEMPS + TGSI_EXEC_NUM_TEMP_EXTRAS; i++) {
6388             if (memcmp(&temps[i], &mach->Temps[i], sizeof(temps[i]))) {
6389                uint j;
6390 
6391                memcpy(&temps[i], &mach->Temps[i], sizeof(temps[i]));
6392                debug_printf("TEMP[%2u] = ", i);
6393                for (j = 0; j < 4; j++) {
6394                   if (j > 0) {
6395                      debug_printf("           ");
6396                   }
6397                   debug_printf("(%6f %u, %6f %u, %6f %u, %6f %u)\n",
6398                                temps[i].xyzw[0].f[j], temps[i].xyzw[0].u[j],
6399                                temps[i].xyzw[1].f[j], temps[i].xyzw[1].u[j],
6400                                temps[i].xyzw[2].f[j], temps[i].xyzw[2].u[j],
6401                                temps[i].xyzw[3].f[j], temps[i].xyzw[3].u[j]);
6402                }
6403             }
6404          }
6405          if (mach->Outputs) {
6406             for (i = 0; i < PIPE_MAX_ATTRIBS; i++) {
6407                if (memcmp(&outputs[i], &mach->Outputs[i], sizeof(outputs[i]))) {
6408                   uint j;
6409 
6410                   memcpy(&outputs[i], &mach->Outputs[i], sizeof(outputs[i]));
6411                   debug_printf("OUT[%2u] =  ", i);
6412                   for (j = 0; j < 4; j++) {
6413                      if (j > 0) {
6414                         debug_printf("           ");
6415                      }
6416                      debug_printf("(%6f %u, %6f %u, %6f %u, %6f %u)\n",
6417                                   outputs[i].xyzw[0].f[j], outputs[i].xyzw[0].u[j],
6418                                   outputs[i].xyzw[1].f[j], outputs[i].xyzw[1].u[j],
6419                                   outputs[i].xyzw[2].f[j], outputs[i].xyzw[2].u[j],
6420                                   outputs[i].xyzw[3].f[j], outputs[i].xyzw[3].u[j]);
6421                   }
6422                }
6423             }
6424          }
6425 #endif
6426       }
6427    }
6428 
6429 #if 0
6430    /* we scale from floats in [0,1] to Zbuffer ints in sp_quad_depth_test.c */
6431    if (mach->ShaderType == PIPE_SHADER_FRAGMENT) {
6432       /*
6433        * Scale back depth component.
6434        */
6435       for (i = 0; i < 4; i++)
6436          mach->Outputs[0].xyzw[2].f[i] *= ctx->DrawBuffer->_DepthMaxF;
6437    }
6438 #endif
6439 
6440    /* Strictly speaking, these assertions aren't really needed but they
6441     * can potentially catch some bugs in the control flow code.
6442     */
6443    assert(mach->CondStackTop == 0);
6444    assert(mach->LoopStackTop == 0);
6445    assert(mach->ContStackTop == 0);
6446    assert(mach->SwitchStackTop == 0);
6447    assert(mach->BreakStackTop == 0);
6448    assert(mach->CallStackTop == 0);
6449 
6450    return ~mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];
6451 }
6452