1 /**************************************************************************
2 *
3 * Copyright 2010 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 /**
29 * @file
30 * TGSI to LLVM IR translation -- AoS.
31 *
32 * FIXME:
33 * - No control flow support: the existing control flow code should be factored
34 * out into from the SoA code into a common module and shared.
35 * - No derivatives. Derivate logic should be pluggable, just like the samplers.
36 *
37 * @author Jose Fonseca <jfonseca@vmware.com>
38 */
39
40 #include "pipe/p_config.h"
41 #include "pipe/p_shader_tokens.h"
42 #include "util/u_debug.h"
43 #include "util/u_math.h"
44 #include "util/u_memory.h"
45 #include "tgsi/tgsi_dump.h"
46 #include "tgsi/tgsi_info.h"
47 #include "tgsi/tgsi_parse.h"
48 #include "tgsi/tgsi_util.h"
49 #include "tgsi/tgsi_scan.h"
50 #include "lp_bld_type.h"
51 #include "lp_bld_const.h"
52 #include "lp_bld_arit.h"
53 #include "lp_bld_logic.h"
54 #include "lp_bld_swizzle.h"
55 #include "lp_bld_flow.h"
56 #include "lp_bld_quad.h"
57 #include "lp_bld_tgsi.h"
58 #include "lp_bld_debug.h"
59 #include "lp_bld_sample.h"
60
61
62 /**
63 * Wrapper around lp_build_swizzle_aos which translates swizzles to another
64 * ordering.
65 */
66 static LLVMValueRef
swizzle_aos(struct lp_build_tgsi_context * bld_base,LLVMValueRef a,unsigned swizzle_x,unsigned swizzle_y,unsigned swizzle_z,unsigned swizzle_w)67 swizzle_aos(struct lp_build_tgsi_context *bld_base,
68 LLVMValueRef a,
69 unsigned swizzle_x,
70 unsigned swizzle_y,
71 unsigned swizzle_z,
72 unsigned swizzle_w)
73 {
74 unsigned char swizzles[4];
75 struct lp_build_tgsi_aos_context *bld = lp_aos_context(bld_base);
76
77 assert(swizzle_x < 4);
78 assert(swizzle_y < 4);
79 assert(swizzle_z < 4);
80 assert(swizzle_w < 4);
81
82 swizzles[bld->inv_swizzles[0]] = bld->swizzles[swizzle_x];
83 swizzles[bld->inv_swizzles[1]] = bld->swizzles[swizzle_y];
84 swizzles[bld->inv_swizzles[2]] = bld->swizzles[swizzle_z];
85 swizzles[bld->inv_swizzles[3]] = bld->swizzles[swizzle_w];
86
87 return lp_build_swizzle_aos(&bld->bld_base.base, a, swizzles);
88 }
89
90
91 static LLVMValueRef
swizzle_scalar_aos(struct lp_build_tgsi_aos_context * bld,LLVMValueRef a,unsigned chan)92 swizzle_scalar_aos(struct lp_build_tgsi_aos_context *bld,
93 LLVMValueRef a,
94 unsigned chan)
95 {
96 chan = bld->swizzles[chan];
97 return lp_build_swizzle_scalar_aos(&bld->bld_base.base, a, chan);
98 }
99
100
101 static LLVMValueRef
emit_fetch_constant(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_src_register * reg,enum tgsi_opcode_type stype,unsigned swizzle)102 emit_fetch_constant(
103 struct lp_build_tgsi_context * bld_base,
104 const struct tgsi_full_src_register * reg,
105 enum tgsi_opcode_type stype,
106 unsigned swizzle)
107 {
108 struct lp_build_tgsi_aos_context * bld = lp_aos_context(bld_base);
109 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
110 struct lp_type type = bld_base->base.type;
111 LLVMValueRef res;
112 unsigned chan;
113
114 assert(!reg->Register.Indirect);
115
116 /*
117 * Get the constants components
118 */
119
120 res = bld->bld_base.base.undef;
121 for (chan = 0; chan < 4; ++chan) {
122 LLVMValueRef index;
123 LLVMValueRef scalar_ptr;
124 LLVMValueRef scalar;
125 LLVMValueRef swizzle;
126
127 index = lp_build_const_int32(bld->bld_base.base.gallivm,
128 reg->Register.Index * 4 + chan);
129
130 scalar_ptr = LLVMBuildGEP(builder, bld->consts_ptr, &index, 1, "");
131
132 scalar = LLVMBuildLoad(builder, scalar_ptr, "");
133
134 lp_build_name(scalar, "const[%u].%c", reg->Register.Index, "xyzw"[chan]);
135
136 /*
137 * NOTE: constants array is always assumed to be RGBA
138 */
139
140 swizzle = lp_build_const_int32(bld->bld_base.base.gallivm,
141 bld->swizzles[chan]);
142
143 res = LLVMBuildInsertElement(builder, res, scalar, swizzle, "");
144 }
145
146 /*
147 * Broadcast the first quaternion to all others.
148 *
149 * XXX: could be factored into a reusable function.
150 */
151
152 if (type.length > 4) {
153 LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
154 unsigned i;
155
156 for (chan = 0; chan < 4; ++chan) {
157 shuffles[chan] = lp_build_const_int32(bld->bld_base.base.gallivm, chan);
158 }
159
160 for (i = 4; i < type.length; ++i) {
161 shuffles[i] = shuffles[i % 4];
162 }
163
164 res = LLVMBuildShuffleVector(builder,
165 res, bld->bld_base.base.undef,
166 LLVMConstVector(shuffles, type.length),
167 "");
168 }
169 return res;
170 }
171
172 static LLVMValueRef
emit_fetch_immediate(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_src_register * reg,enum tgsi_opcode_type stype,unsigned swizzle)173 emit_fetch_immediate(
174 struct lp_build_tgsi_context * bld_base,
175 const struct tgsi_full_src_register * reg,
176 enum tgsi_opcode_type stype,
177 unsigned swizzle)
178 {
179 struct lp_build_tgsi_aos_context * bld = lp_aos_context(bld_base);
180 LLVMValueRef res = bld->immediates[reg->Register.Index];
181 assert(res);
182 return res;
183 }
184
185 static LLVMValueRef
emit_fetch_input(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_src_register * reg,enum tgsi_opcode_type stype,unsigned swizzle)186 emit_fetch_input(
187 struct lp_build_tgsi_context * bld_base,
188 const struct tgsi_full_src_register * reg,
189 enum tgsi_opcode_type stype,
190 unsigned swizzle)
191 {
192 struct lp_build_tgsi_aos_context * bld = lp_aos_context(bld_base);
193 LLVMValueRef res = bld->inputs[reg->Register.Index];
194 assert(!reg->Register.Indirect);
195 assert(res);
196 return res;
197 }
198
199 static LLVMValueRef
emit_fetch_temporary(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_src_register * reg,enum tgsi_opcode_type stype,unsigned swizzle)200 emit_fetch_temporary(
201 struct lp_build_tgsi_context * bld_base,
202 const struct tgsi_full_src_register * reg,
203 enum tgsi_opcode_type stype,
204 unsigned swizzle)
205 {
206 struct lp_build_tgsi_aos_context * bld = lp_aos_context(bld_base);
207 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
208 LLVMValueRef temp_ptr = bld->temps[reg->Register.Index];
209 LLVMValueRef res = LLVMBuildLoad(builder, temp_ptr, "");
210 assert(!reg->Register.Indirect);
211 if (!res)
212 return bld->bld_base.base.undef;
213
214 return res;
215 }
216
217 /**
218 * Register store.
219 */
220 void
lp_emit_store_aos(struct lp_build_tgsi_aos_context * bld,const struct tgsi_full_instruction * inst,unsigned index,LLVMValueRef value)221 lp_emit_store_aos(
222 struct lp_build_tgsi_aos_context *bld,
223 const struct tgsi_full_instruction *inst,
224 unsigned index,
225 LLVMValueRef value)
226 {
227 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
228 const struct tgsi_full_dst_register *reg = &inst->Dst[index];
229 LLVMValueRef mask = NULL;
230 LLVMValueRef ptr;
231
232 /*
233 * Saturate the value
234 */
235
236 switch (inst->Instruction.Saturate) {
237 case TGSI_SAT_NONE:
238 break;
239
240 case TGSI_SAT_ZERO_ONE:
241 value = lp_build_max(&bld->bld_base.base, value, bld->bld_base.base.zero);
242 value = lp_build_min(&bld->bld_base.base, value, bld->bld_base.base.one);
243 break;
244
245 case TGSI_SAT_MINUS_PLUS_ONE:
246 value = lp_build_max(&bld->bld_base.base, value, lp_build_const_vec(bld->bld_base.base.gallivm, bld->bld_base.base.type, -1.0));
247 value = lp_build_min(&bld->bld_base.base, value, bld->bld_base.base.one);
248 break;
249
250 default:
251 assert(0);
252 }
253
254 /*
255 * Translate the register file
256 */
257
258 assert(!reg->Register.Indirect);
259
260 switch (reg->Register.File) {
261 case TGSI_FILE_OUTPUT:
262 ptr = bld->outputs[reg->Register.Index];
263 break;
264
265 case TGSI_FILE_TEMPORARY:
266 ptr = bld->temps[reg->Register.Index];
267 break;
268
269 case TGSI_FILE_ADDRESS:
270 ptr = bld->addr[reg->Indirect.Index];
271 break;
272
273 case TGSI_FILE_PREDICATE:
274 ptr = bld->preds[reg->Register.Index];
275 break;
276
277 default:
278 assert(0);
279 return;
280 }
281
282 if (!ptr)
283 return;
284 /*
285 * Predicate
286 */
287
288 if (inst->Instruction.Predicate) {
289 LLVMValueRef pred;
290
291 assert(inst->Predicate.Index < LP_MAX_TGSI_PREDS);
292
293 pred = LLVMBuildLoad(builder,
294 bld->preds[inst->Predicate.Index], "");
295
296 /*
297 * Convert the value to an integer mask.
298 */
299 pred = lp_build_compare(bld->bld_base.base.gallivm,
300 bld->bld_base.base.type,
301 PIPE_FUNC_NOTEQUAL,
302 pred,
303 bld->bld_base.base.zero);
304
305 if (inst->Predicate.Negate) {
306 pred = LLVMBuildNot(builder, pred, "");
307 }
308
309 pred = bld->bld_base.emit_swizzle(&bld->bld_base, pred,
310 inst->Predicate.SwizzleX,
311 inst->Predicate.SwizzleY,
312 inst->Predicate.SwizzleZ,
313 inst->Predicate.SwizzleW);
314
315 if (mask) {
316 mask = LLVMBuildAnd(builder, mask, pred, "");
317 } else {
318 mask = pred;
319 }
320 }
321
322 /*
323 * Writemask
324 */
325
326 if (reg->Register.WriteMask != TGSI_WRITEMASK_XYZW) {
327 LLVMValueRef writemask;
328
329 writemask = lp_build_const_mask_aos_swizzled(bld->bld_base.base.gallivm,
330 bld->bld_base.base.type,
331 reg->Register.WriteMask,
332 bld->swizzles);
333
334 if (mask) {
335 mask = LLVMBuildAnd(builder, mask, writemask, "");
336 } else {
337 mask = writemask;
338 }
339 }
340
341 if (mask) {
342 LLVMValueRef orig_value;
343
344 orig_value = LLVMBuildLoad(builder, ptr, "");
345 value = lp_build_select(&bld->bld_base.base,
346 mask, value, orig_value);
347 }
348
349 LLVMBuildStore(builder, value, ptr);
350 }
351
352
353 /**
354 * High-level instruction translators.
355 */
356
357 static LLVMValueRef
emit_tex(struct lp_build_tgsi_aos_context * bld,const struct tgsi_full_instruction * inst,enum lp_build_tex_modifier modifier)358 emit_tex(struct lp_build_tgsi_aos_context *bld,
359 const struct tgsi_full_instruction *inst,
360 enum lp_build_tex_modifier modifier)
361 {
362 unsigned target;
363 unsigned unit;
364 LLVMValueRef coords;
365 LLVMValueRef ddx;
366 LLVMValueRef ddy;
367 struct lp_derivatives derivs;
368
369 if (!bld->sampler) {
370 _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
371 return bld->bld_base.base.undef;
372 }
373
374 target = inst->Texture.Texture;
375
376 coords = lp_build_emit_fetch( &bld->bld_base, inst, 0 , LP_CHAN_ALL);
377
378 if (0 && modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
379 ddx = lp_build_emit_fetch( &bld->bld_base, inst, 1 , LP_CHAN_ALL);
380 ddy = lp_build_emit_fetch( &bld->bld_base, inst, 2 , LP_CHAN_ALL);
381 unit = inst->Src[3].Register.Index;
382 } else {
383 #if 0
384 ddx = lp_build_ddx( &bld->bld_base.base, coords );
385 ddy = lp_build_ddy( &bld->bld_base.base, coords );
386 #else
387 /* TODO */
388 derivs.ddx_ddy[0] = bld->bld_base.base.one;
389 derivs.ddx_ddy[1] = bld->bld_base.base.one;
390 #endif
391 unit = inst->Src[1].Register.Index;
392 }
393
394 return bld->sampler->emit_fetch_texel(bld->sampler,
395 &bld->bld_base.base,
396 target, unit,
397 coords, derivs,
398 modifier);
399 }
400
401
402 void
lp_emit_declaration_aos(struct lp_build_tgsi_aos_context * bld,const struct tgsi_full_declaration * decl)403 lp_emit_declaration_aos(
404 struct lp_build_tgsi_aos_context *bld,
405 const struct tgsi_full_declaration *decl)
406 {
407 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
408 LLVMTypeRef vec_type = lp_build_vec_type(bld->bld_base.base.gallivm, bld->bld_base.base.type);
409
410 unsigned first = decl->Range.First;
411 unsigned last = decl->Range.Last;
412 unsigned idx;
413
414 for (idx = first; idx <= last; ++idx) {
415 switch (decl->Declaration.File) {
416 case TGSI_FILE_TEMPORARY:
417 assert(idx < LP_MAX_TGSI_TEMPS);
418 if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
419 LLVMValueRef array_size = lp_build_const_int32(gallivm, last + 1);
420 bld->temps_array = lp_build_array_alloca(bld->bld_base.base.gallivm,
421 vec_type, array_size, "");
422 } else {
423 bld->temps[idx] = lp_build_alloca(gallivm, vec_type, "");
424 }
425 break;
426
427 case TGSI_FILE_OUTPUT:
428 bld->outputs[idx] = lp_build_alloca(gallivm, vec_type, "");
429 break;
430
431 case TGSI_FILE_ADDRESS:
432 assert(idx < LP_MAX_TGSI_ADDRS);
433 bld->addr[idx] = lp_build_alloca(gallivm, vec_type, "");
434 break;
435
436 case TGSI_FILE_PREDICATE:
437 assert(idx < LP_MAX_TGSI_PREDS);
438 bld->preds[idx] = lp_build_alloca(gallivm, vec_type, "");
439 break;
440
441 default:
442 /* don't need to declare other vars */
443 break;
444 }
445 }
446 }
447
448
449 /**
450 * Emit LLVM for one TGSI instruction.
451 * \param return TRUE for success, FALSE otherwise
452 */
453 boolean
lp_emit_instruction_aos(struct lp_build_tgsi_aos_context * bld,const struct tgsi_full_instruction * inst,const struct tgsi_opcode_info * info,int * pc)454 lp_emit_instruction_aos(
455 struct lp_build_tgsi_aos_context *bld,
456 const struct tgsi_full_instruction *inst,
457 const struct tgsi_opcode_info *info,
458 int *pc)
459 {
460 LLVMValueRef src0, src1, src2;
461 LLVMValueRef tmp0, tmp1;
462 LLVMValueRef dst0 = NULL;
463
464 /*
465 * Stores and write masks are handled in a general fashion after the long
466 * instruction opcode switch statement.
467 *
468 * Although not stricitly necessary, we avoid generating instructions for
469 * channels which won't be stored, in cases where's that easy. For some
470 * complex instructions, like texture sampling, it is more convenient to
471 * assume a full writemask and then let LLVM optimization passes eliminate
472 * redundant code.
473 */
474
475 (*pc)++;
476
477 assert(info->num_dst <= 1);
478 if (info->num_dst) {
479 dst0 = bld->bld_base.base.undef;
480 }
481
482 switch (inst->Instruction.Opcode) {
483 case TGSI_OPCODE_ARL:
484 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
485 dst0 = lp_build_floor(&bld->bld_base.base, src0);
486 break;
487
488 case TGSI_OPCODE_MOV:
489 dst0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
490 break;
491
492 case TGSI_OPCODE_LIT:
493 return FALSE;
494
495 case TGSI_OPCODE_RCP:
496 /* TGSI_OPCODE_RECIP */
497 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
498 dst0 = lp_build_rcp(&bld->bld_base.base, src0);
499 break;
500
501 case TGSI_OPCODE_RSQ:
502 /* TGSI_OPCODE_RECIPSQRT */
503 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
504 tmp0 = lp_build_emit_llvm_unary(&bld->bld_base, TGSI_OPCODE_ABS, src0);
505 dst0 = lp_build_rsqrt(&bld->bld_base.base, tmp0);
506 break;
507
508 case TGSI_OPCODE_EXP:
509 return FALSE;
510
511 case TGSI_OPCODE_LOG:
512 return FALSE;
513
514 case TGSI_OPCODE_MUL:
515 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
516 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
517 dst0 = lp_build_mul(&bld->bld_base.base, src0, src1);
518 break;
519
520 case TGSI_OPCODE_ADD:
521 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
522 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
523 dst0 = lp_build_add(&bld->bld_base.base, src0, src1);
524 break;
525
526 case TGSI_OPCODE_DP3:
527 /* TGSI_OPCODE_DOT3 */
528 return FALSE;
529
530 case TGSI_OPCODE_DP4:
531 /* TGSI_OPCODE_DOT4 */
532 return FALSE;
533
534 case TGSI_OPCODE_DST:
535 return FALSE;
536
537 case TGSI_OPCODE_MIN:
538 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
539 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
540 dst0 = lp_build_max(&bld->bld_base.base, src0, src1);
541 break;
542
543 case TGSI_OPCODE_MAX:
544 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
545 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
546 dst0 = lp_build_max(&bld->bld_base.base, src0, src1);
547 break;
548
549 case TGSI_OPCODE_SLT:
550 /* TGSI_OPCODE_SETLT */
551 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
552 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
553 tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_LESS, src0, src1);
554 dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero);
555 break;
556
557 case TGSI_OPCODE_SGE:
558 /* TGSI_OPCODE_SETGE */
559 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
560 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
561 tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_GEQUAL, src0, src1);
562 dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero);
563 break;
564
565 case TGSI_OPCODE_MAD:
566 /* TGSI_OPCODE_MADD */
567 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
568 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
569 src2 = lp_build_emit_fetch(&bld->bld_base, inst, 2, LP_CHAN_ALL);
570 tmp0 = lp_build_mul(&bld->bld_base.base, src0, src1);
571 dst0 = lp_build_add(&bld->bld_base.base, tmp0, src2);
572 break;
573
574 case TGSI_OPCODE_SUB:
575 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
576 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
577 dst0 = lp_build_sub(&bld->bld_base.base, src0, src1);
578 break;
579
580 case TGSI_OPCODE_LRP:
581 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
582 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
583 src2 = lp_build_emit_fetch(&bld->bld_base, inst, 2, LP_CHAN_ALL);
584 tmp0 = lp_build_sub(&bld->bld_base.base, src1, src2);
585 tmp0 = lp_build_mul(&bld->bld_base.base, src0, tmp0);
586 dst0 = lp_build_add(&bld->bld_base.base, tmp0, src2);
587 break;
588
589 case TGSI_OPCODE_CND:
590 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
591 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
592 src2 = lp_build_emit_fetch(&bld->bld_base, inst, 2, LP_CHAN_ALL);
593 tmp1 = lp_build_const_vec(bld->bld_base.base.gallivm, bld->bld_base.base.type, 0.5);
594 tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_GREATER, src2, tmp1);
595 dst0 = lp_build_select(&bld->bld_base.base, tmp0, src0, src1);
596 break;
597
598 case TGSI_OPCODE_DP2A:
599 return FALSE;
600
601 case TGSI_OPCODE_FRC:
602 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
603 tmp0 = lp_build_floor(&bld->bld_base.base, src0);
604 dst0 = lp_build_sub(&bld->bld_base.base, src0, tmp0);
605 break;
606
607 case TGSI_OPCODE_CLAMP:
608 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
609 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
610 src2 = lp_build_emit_fetch(&bld->bld_base, inst, 2, LP_CHAN_ALL);
611 tmp0 = lp_build_max(&bld->bld_base.base, src0, src1);
612 dst0 = lp_build_min(&bld->bld_base.base, tmp0, src2);
613 break;
614
615 case TGSI_OPCODE_FLR:
616 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
617 dst0 = lp_build_floor(&bld->bld_base.base, src0);
618 break;
619
620 case TGSI_OPCODE_ROUND:
621 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
622 dst0 = lp_build_round(&bld->bld_base.base, src0);
623 break;
624
625 case TGSI_OPCODE_EX2:
626 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
627 tmp0 = lp_build_swizzle_scalar_aos(&bld->bld_base.base, src0, TGSI_SWIZZLE_X);
628 dst0 = lp_build_exp2(&bld->bld_base.base, tmp0);
629 break;
630
631 case TGSI_OPCODE_LG2:
632 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
633 tmp0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X);
634 dst0 = lp_build_log2(&bld->bld_base.base, tmp0);
635 break;
636
637 case TGSI_OPCODE_POW:
638 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
639 src0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X);
640 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
641 src1 = swizzle_scalar_aos(bld, src1, TGSI_SWIZZLE_X);
642 dst0 = lp_build_pow(&bld->bld_base.base, src0, src1);
643 break;
644
645 case TGSI_OPCODE_XPD:
646 return FALSE;
647
648 case TGSI_OPCODE_RCC:
649 /* deprecated? */
650 assert(0);
651 return FALSE;
652
653 case TGSI_OPCODE_DPH:
654 return FALSE;
655
656 case TGSI_OPCODE_COS:
657 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
658 tmp0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X);
659 dst0 = lp_build_cos(&bld->bld_base.base, tmp0);
660 break;
661
662 case TGSI_OPCODE_DDX:
663 return FALSE;
664
665 case TGSI_OPCODE_DDY:
666 return FALSE;
667
668 case TGSI_OPCODE_KILP:
669 /* predicated kill */
670 return FALSE;
671
672 case TGSI_OPCODE_KIL:
673 /* conditional kill */
674 return FALSE;
675
676 case TGSI_OPCODE_PK2H:
677 return FALSE;
678 break;
679
680 case TGSI_OPCODE_PK2US:
681 return FALSE;
682 break;
683
684 case TGSI_OPCODE_PK4B:
685 return FALSE;
686 break;
687
688 case TGSI_OPCODE_PK4UB:
689 return FALSE;
690
691 case TGSI_OPCODE_RFL:
692 return FALSE;
693
694 case TGSI_OPCODE_SEQ:
695 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
696 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
697 tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_EQUAL, src0, src1);
698 dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero);
699 break;
700
701 case TGSI_OPCODE_SFL:
702 dst0 = bld->bld_base.base.zero;
703 break;
704
705 case TGSI_OPCODE_SGT:
706 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
707 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
708 tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_GREATER, src0, src1);
709 dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero);
710 break;
711
712 case TGSI_OPCODE_SIN:
713 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
714 tmp0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X);
715 dst0 = lp_build_sin(&bld->bld_base.base, tmp0);
716 break;
717
718 case TGSI_OPCODE_SLE:
719 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
720 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
721 tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_LEQUAL, src0, src1);
722 dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero);
723 break;
724
725 case TGSI_OPCODE_SNE:
726 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
727 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
728 tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_NOTEQUAL, src0, src1);
729 dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero);
730 break;
731
732 case TGSI_OPCODE_STR:
733 dst0 = bld->bld_base.base.one;
734 break;
735
736 case TGSI_OPCODE_TEX:
737 dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_NONE);
738 break;
739
740 case TGSI_OPCODE_TXD:
741 dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV);
742 break;
743
744 case TGSI_OPCODE_UP2H:
745 /* deprecated */
746 assert (0);
747 return FALSE;
748 break;
749
750 case TGSI_OPCODE_UP2US:
751 /* deprecated */
752 assert(0);
753 return FALSE;
754 break;
755
756 case TGSI_OPCODE_UP4B:
757 /* deprecated */
758 assert(0);
759 return FALSE;
760 break;
761
762 case TGSI_OPCODE_UP4UB:
763 /* deprecated */
764 assert(0);
765 return FALSE;
766 break;
767
768 case TGSI_OPCODE_X2D:
769 /* deprecated? */
770 assert(0);
771 return FALSE;
772 break;
773
774 case TGSI_OPCODE_ARA:
775 /* deprecated */
776 assert(0);
777 return FALSE;
778 break;
779
780 case TGSI_OPCODE_ARR:
781 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
782 dst0 = lp_build_round(&bld->bld_base.base, src0);
783 break;
784
785 case TGSI_OPCODE_BRA:
786 /* deprecated */
787 assert(0);
788 return FALSE;
789 break;
790
791 case TGSI_OPCODE_CAL:
792 return FALSE;
793
794 case TGSI_OPCODE_RET:
795 return FALSE;
796
797 case TGSI_OPCODE_END:
798 *pc = -1;
799 break;
800
801 case TGSI_OPCODE_SSG:
802 /* TGSI_OPCODE_SGN */
803 tmp0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
804 dst0 = lp_build_sgn(&bld->bld_base.base, tmp0);
805 break;
806
807 case TGSI_OPCODE_CMP:
808 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
809 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
810 src2 = lp_build_emit_fetch(&bld->bld_base, inst, 2, LP_CHAN_ALL);
811 tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_LESS, src0, bld->bld_base.base.zero);
812 dst0 = lp_build_select(&bld->bld_base.base, tmp0, src1, src2);
813 break;
814
815 case TGSI_OPCODE_SCS:
816 return FALSE;
817
818 case TGSI_OPCODE_TXB:
819 dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_LOD_BIAS);
820 break;
821
822 case TGSI_OPCODE_NRM:
823 /* fall-through */
824 case TGSI_OPCODE_NRM4:
825 return FALSE;
826
827 case TGSI_OPCODE_DIV:
828 /* deprecated */
829 assert(0);
830 return FALSE;
831 break;
832
833 case TGSI_OPCODE_DP2:
834 return FALSE;
835
836 case TGSI_OPCODE_TXL:
837 dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD);
838 break;
839
840 case TGSI_OPCODE_TXP:
841 dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_PROJECTED);
842 break;
843
844 case TGSI_OPCODE_BRK:
845 return FALSE;
846
847 case TGSI_OPCODE_IF:
848 return FALSE;
849
850 case TGSI_OPCODE_BGNLOOP:
851 return FALSE;
852
853 case TGSI_OPCODE_BGNSUB:
854 return FALSE;
855
856 case TGSI_OPCODE_ELSE:
857 return FALSE;
858
859 case TGSI_OPCODE_ENDIF:
860 return FALSE;
861
862 case TGSI_OPCODE_ENDLOOP:
863 return FALSE;
864
865 case TGSI_OPCODE_ENDSUB:
866 return FALSE;
867
868 case TGSI_OPCODE_PUSHA:
869 /* deprecated? */
870 assert(0);
871 return FALSE;
872 break;
873
874 case TGSI_OPCODE_POPA:
875 /* deprecated? */
876 assert(0);
877 return FALSE;
878 break;
879
880 case TGSI_OPCODE_CEIL:
881 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
882 dst0 = lp_build_ceil(&bld->bld_base.base, src0);
883 break;
884
885 case TGSI_OPCODE_I2F:
886 /* deprecated? */
887 assert(0);
888 return FALSE;
889 break;
890
891 case TGSI_OPCODE_NOT:
892 /* deprecated? */
893 assert(0);
894 return FALSE;
895 break;
896
897 case TGSI_OPCODE_TRUNC:
898 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
899 dst0 = lp_build_trunc(&bld->bld_base.base, src0);
900 break;
901
902 case TGSI_OPCODE_SHL:
903 /* deprecated? */
904 assert(0);
905 return FALSE;
906 break;
907
908 case TGSI_OPCODE_ISHR:
909 /* deprecated? */
910 assert(0);
911 return FALSE;
912 break;
913
914 case TGSI_OPCODE_AND:
915 /* deprecated? */
916 assert(0);
917 return FALSE;
918 break;
919
920 case TGSI_OPCODE_OR:
921 /* deprecated? */
922 assert(0);
923 return FALSE;
924 break;
925
926 case TGSI_OPCODE_MOD:
927 /* deprecated? */
928 assert(0);
929 return FALSE;
930 break;
931
932 case TGSI_OPCODE_XOR:
933 /* deprecated? */
934 assert(0);
935 return FALSE;
936 break;
937
938 case TGSI_OPCODE_SAD:
939 /* deprecated? */
940 assert(0);
941 return FALSE;
942 break;
943
944 case TGSI_OPCODE_TXF:
945 /* deprecated? */
946 assert(0);
947 return FALSE;
948 break;
949
950 case TGSI_OPCODE_TXQ:
951 /* deprecated? */
952 assert(0);
953 return FALSE;
954 break;
955
956 case TGSI_OPCODE_CONT:
957 return FALSE;
958
959 case TGSI_OPCODE_EMIT:
960 return FALSE;
961 break;
962
963 case TGSI_OPCODE_ENDPRIM:
964 return FALSE;
965 break;
966
967 case TGSI_OPCODE_NOP:
968 break;
969
970 default:
971 return FALSE;
972 }
973
974 if (info->num_dst) {
975 lp_emit_store_aos(bld, inst, 0, dst0);
976 }
977
978 return TRUE;
979 }
980
981
982 void
lp_build_tgsi_aos(struct gallivm_state * gallivm,const struct tgsi_token * tokens,struct lp_type type,const unsigned char swizzles[4],LLVMValueRef consts_ptr,const LLVMValueRef * inputs,LLVMValueRef * outputs,struct lp_build_sampler_aos * sampler,const struct tgsi_shader_info * info)983 lp_build_tgsi_aos(struct gallivm_state *gallivm,
984 const struct tgsi_token *tokens,
985 struct lp_type type,
986 const unsigned char swizzles[4],
987 LLVMValueRef consts_ptr,
988 const LLVMValueRef *inputs,
989 LLVMValueRef *outputs,
990 struct lp_build_sampler_aos *sampler,
991 const struct tgsi_shader_info *info)
992 {
993 struct lp_build_tgsi_aos_context bld;
994 struct tgsi_parse_context parse;
995 uint num_immediates = 0;
996 unsigned chan;
997 int pc = 0;
998
999 /* Setup build context */
1000 memset(&bld, 0, sizeof bld);
1001 lp_build_context_init(&bld.bld_base.base, gallivm, type);
1002 lp_build_context_init(&bld.bld_base.uint_bld, gallivm, lp_uint_type(type));
1003 lp_build_context_init(&bld.bld_base.int_bld, gallivm, lp_int_type(type));
1004 lp_build_context_init(&bld.int_bld, gallivm, lp_int_type(type));
1005
1006 for (chan = 0; chan < 4; ++chan) {
1007 bld.swizzles[chan] = swizzles[chan];
1008 bld.inv_swizzles[swizzles[chan]] = chan;
1009 }
1010
1011 bld.inputs = inputs;
1012 bld.outputs = outputs;
1013 bld.consts_ptr = consts_ptr;
1014 bld.sampler = sampler;
1015 bld.indirect_files = info->indirect_files;
1016 bld.bld_base.emit_swizzle = swizzle_aos;
1017 bld.bld_base.info = info;
1018
1019 bld.bld_base.emit_fetch_funcs[TGSI_FILE_CONSTANT] = emit_fetch_constant;
1020 bld.bld_base.emit_fetch_funcs[TGSI_FILE_IMMEDIATE] = emit_fetch_immediate;
1021 bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_input;
1022 bld.bld_base.emit_fetch_funcs[TGSI_FILE_TEMPORARY] = emit_fetch_temporary;
1023
1024 /* Set opcode actions */
1025 lp_set_default_actions_cpu(&bld.bld_base);
1026
1027 if (!lp_bld_tgsi_list_init(&bld.bld_base)) {
1028 return;
1029 }
1030
1031 tgsi_parse_init(&parse, tokens);
1032
1033 while (!tgsi_parse_end_of_tokens(&parse)) {
1034 tgsi_parse_token(&parse);
1035
1036 switch(parse.FullToken.Token.Type) {
1037 case TGSI_TOKEN_TYPE_DECLARATION:
1038 /* Inputs already interpolated */
1039 lp_emit_declaration_aos(&bld, &parse.FullToken.FullDeclaration);
1040 break;
1041
1042 case TGSI_TOKEN_TYPE_INSTRUCTION:
1043 /* save expanded instruction */
1044 lp_bld_tgsi_add_instruction(&bld.bld_base,
1045 &parse.FullToken.FullInstruction);
1046 break;
1047
1048 case TGSI_TOKEN_TYPE_IMMEDIATE:
1049 /* simply copy the immediate values into the next immediates[] slot */
1050 {
1051 const uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
1052 float imm[4];
1053 assert(size <= 4);
1054 assert(num_immediates < LP_MAX_TGSI_IMMEDIATES);
1055 for (chan = 0; chan < 4; ++chan) {
1056 imm[chan] = 0.0f;
1057 }
1058 for (chan = 0; chan < size; ++chan) {
1059 unsigned swizzle = bld.swizzles[chan];
1060 imm[swizzle] = parse.FullToken.FullImmediate.u[chan].Float;
1061 }
1062 bld.immediates[num_immediates] =
1063 lp_build_const_aos(gallivm, type,
1064 imm[0], imm[1], imm[2], imm[3],
1065 NULL);
1066 num_immediates++;
1067 }
1068 break;
1069
1070 case TGSI_TOKEN_TYPE_PROPERTY:
1071 break;
1072
1073 default:
1074 assert(0);
1075 }
1076 }
1077
1078 while (pc != -1) {
1079 struct tgsi_full_instruction *instr = bld.bld_base.instructions + pc;
1080 const struct tgsi_opcode_info *opcode_info =
1081 tgsi_get_opcode_info(instr->Instruction.Opcode);
1082 if (!lp_emit_instruction_aos(&bld, instr, opcode_info, &pc))
1083 _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n",
1084 opcode_info->mnemonic);
1085 }
1086
1087 if (0) {
1088 LLVMBasicBlockRef block = LLVMGetInsertBlock(gallivm->builder);
1089 LLVMValueRef function = LLVMGetBasicBlockParent(block);
1090 debug_printf("11111111111111111111111111111 \n");
1091 tgsi_dump(tokens, 0);
1092 lp_debug_dump_value(function);
1093 debug_printf("2222222222222222222222222222 \n");
1094 }
1095 tgsi_parse_free(&parse);
1096 FREE(bld.bld_base.instructions);
1097
1098 if (0) {
1099 LLVMModuleRef module = LLVMGetGlobalParent(
1100 LLVMGetBasicBlockParent(LLVMGetInsertBlock(gallivm->builder)));
1101 LLVMDumpModule(module);
1102 }
1103
1104 }
1105
1106