1 /**************************************************************************
2  *
3  * Copyright 2010 VMware, Inc.
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the
8  * "Software"), to deal in the Software without restriction, including
9  * without limitation the rights to use, copy, modify, merge, publish,
10  * distribute, sub license, and/or sell copies of the Software, and to
11  * permit persons to whom the Software is furnished to do so, subject to
12  * the following conditions:
13  *
14  * The above copyright notice and this permission notice (including the
15  * next paragraph) shall be included in all copies or substantial portions
16  * of the Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25  *
26  **************************************************************************/
27 
28 /**
29  * @file
30  * TGSI to LLVM IR translation -- AoS.
31  *
32  * FIXME:
33  * - No control flow support: the existing control flow code should be factored
34  * out into from the SoA code into a common module and shared.
35  * - No derivatives. Derivate logic should be pluggable, just like the samplers.
36  *
37  * @author Jose Fonseca <jfonseca@vmware.com>
38  */
39 
40 #include "pipe/p_config.h"
41 #include "pipe/p_shader_tokens.h"
42 #include "util/u_debug.h"
43 #include "util/u_math.h"
44 #include "util/u_memory.h"
45 #include "tgsi/tgsi_dump.h"
46 #include "tgsi/tgsi_info.h"
47 #include "tgsi/tgsi_parse.h"
48 #include "tgsi/tgsi_util.h"
49 #include "tgsi/tgsi_scan.h"
50 #include "lp_bld_type.h"
51 #include "lp_bld_const.h"
52 #include "lp_bld_arit.h"
53 #include "lp_bld_logic.h"
54 #include "lp_bld_swizzle.h"
55 #include "lp_bld_flow.h"
56 #include "lp_bld_quad.h"
57 #include "lp_bld_tgsi.h"
58 #include "lp_bld_debug.h"
59 #include "lp_bld_sample.h"
60 
61 
62 /**
63  * Wrapper around lp_build_swizzle_aos which translates swizzles to another
64  * ordering.
65  */
66 static LLVMValueRef
swizzle_aos(struct lp_build_tgsi_context * bld_base,LLVMValueRef a,unsigned swizzle_x,unsigned swizzle_y,unsigned swizzle_z,unsigned swizzle_w)67 swizzle_aos(struct lp_build_tgsi_context *bld_base,
68             LLVMValueRef a,
69             unsigned swizzle_x,
70             unsigned swizzle_y,
71             unsigned swizzle_z,
72             unsigned swizzle_w)
73 {
74    unsigned char swizzles[4];
75    struct lp_build_tgsi_aos_context *bld = lp_aos_context(bld_base);
76 
77    assert(swizzle_x < 4);
78    assert(swizzle_y < 4);
79    assert(swizzle_z < 4);
80    assert(swizzle_w < 4);
81 
82    swizzles[bld->inv_swizzles[0]] = bld->swizzles[swizzle_x];
83    swizzles[bld->inv_swizzles[1]] = bld->swizzles[swizzle_y];
84    swizzles[bld->inv_swizzles[2]] = bld->swizzles[swizzle_z];
85    swizzles[bld->inv_swizzles[3]] = bld->swizzles[swizzle_w];
86 
87    return lp_build_swizzle_aos(&bld->bld_base.base, a, swizzles);
88 }
89 
90 
91 static LLVMValueRef
swizzle_scalar_aos(struct lp_build_tgsi_aos_context * bld,LLVMValueRef a,unsigned chan)92 swizzle_scalar_aos(struct lp_build_tgsi_aos_context *bld,
93                    LLVMValueRef a,
94                    unsigned chan)
95 {
96    chan = bld->swizzles[chan];
97    return lp_build_swizzle_scalar_aos(&bld->bld_base.base, a, chan);
98 }
99 
100 
101 static LLVMValueRef
emit_fetch_constant(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_src_register * reg,enum tgsi_opcode_type stype,unsigned swizzle)102 emit_fetch_constant(
103    struct lp_build_tgsi_context * bld_base,
104    const struct tgsi_full_src_register * reg,
105    enum tgsi_opcode_type stype,
106    unsigned swizzle)
107 {
108    struct lp_build_tgsi_aos_context * bld = lp_aos_context(bld_base);
109    LLVMBuilderRef builder = bld_base->base.gallivm->builder;
110    struct lp_type type = bld_base->base.type;
111    LLVMValueRef res;
112    unsigned chan;
113 
114    assert(!reg->Register.Indirect);
115 
116    /*
117     * Get the constants components
118     */
119 
120    res = bld->bld_base.base.undef;
121    for (chan = 0; chan < 4; ++chan) {
122       LLVMValueRef index;
123       LLVMValueRef scalar_ptr;
124       LLVMValueRef scalar;
125       LLVMValueRef swizzle;
126 
127       index = lp_build_const_int32(bld->bld_base.base.gallivm,
128                                    reg->Register.Index * 4 + chan);
129 
130       scalar_ptr = LLVMBuildGEP(builder, bld->consts_ptr, &index, 1, "");
131 
132       scalar = LLVMBuildLoad(builder, scalar_ptr, "");
133 
134       lp_build_name(scalar, "const[%u].%c", reg->Register.Index, "xyzw"[chan]);
135 
136       /*
137        * NOTE: constants array is always assumed to be RGBA
138        */
139 
140       swizzle = lp_build_const_int32(bld->bld_base.base.gallivm,
141                                      bld->swizzles[chan]);
142 
143       res = LLVMBuildInsertElement(builder, res, scalar, swizzle, "");
144    }
145 
146    /*
147     * Broadcast the first quaternion to all others.
148     *
149     * XXX: could be factored into a reusable function.
150     */
151 
152    if (type.length > 4) {
153       LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
154       unsigned i;
155 
156       for (chan = 0; chan < 4; ++chan) {
157          shuffles[chan] = lp_build_const_int32(bld->bld_base.base.gallivm, chan);
158       }
159 
160       for (i = 4; i < type.length; ++i) {
161          shuffles[i] = shuffles[i % 4];
162       }
163 
164       res = LLVMBuildShuffleVector(builder,
165                                    res, bld->bld_base.base.undef,
166                                    LLVMConstVector(shuffles, type.length),
167                                    "");
168    }
169    return res;
170 }
171 
172 static LLVMValueRef
emit_fetch_immediate(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_src_register * reg,enum tgsi_opcode_type stype,unsigned swizzle)173 emit_fetch_immediate(
174    struct lp_build_tgsi_context * bld_base,
175    const struct tgsi_full_src_register * reg,
176    enum tgsi_opcode_type stype,
177    unsigned swizzle)
178 {
179    struct lp_build_tgsi_aos_context * bld = lp_aos_context(bld_base);
180    LLVMValueRef res = bld->immediates[reg->Register.Index];
181    assert(res);
182    return res;
183 }
184 
185 static LLVMValueRef
emit_fetch_input(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_src_register * reg,enum tgsi_opcode_type stype,unsigned swizzle)186 emit_fetch_input(
187    struct lp_build_tgsi_context * bld_base,
188    const struct tgsi_full_src_register * reg,
189    enum tgsi_opcode_type stype,
190    unsigned swizzle)
191 {
192    struct lp_build_tgsi_aos_context * bld = lp_aos_context(bld_base);
193    LLVMValueRef res = bld->inputs[reg->Register.Index];
194    assert(!reg->Register.Indirect);
195    assert(res);
196    return res;
197 }
198 
199 static LLVMValueRef
emit_fetch_temporary(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_src_register * reg,enum tgsi_opcode_type stype,unsigned swizzle)200 emit_fetch_temporary(
201    struct lp_build_tgsi_context * bld_base,
202    const struct tgsi_full_src_register * reg,
203    enum tgsi_opcode_type stype,
204    unsigned swizzle)
205 {
206    struct lp_build_tgsi_aos_context * bld = lp_aos_context(bld_base);
207    LLVMBuilderRef builder = bld_base->base.gallivm->builder;
208    LLVMValueRef temp_ptr = bld->temps[reg->Register.Index];
209    LLVMValueRef res = LLVMBuildLoad(builder, temp_ptr, "");
210    assert(!reg->Register.Indirect);
211    if (!res)
212       return bld->bld_base.base.undef;
213 
214    return res;
215 }
216 
217 /**
218  * Register store.
219  */
220 void
lp_emit_store_aos(struct lp_build_tgsi_aos_context * bld,const struct tgsi_full_instruction * inst,unsigned index,LLVMValueRef value)221 lp_emit_store_aos(
222    struct lp_build_tgsi_aos_context *bld,
223    const struct tgsi_full_instruction *inst,
224    unsigned index,
225    LLVMValueRef value)
226 {
227    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
228    const struct tgsi_full_dst_register *reg = &inst->Dst[index];
229    LLVMValueRef mask = NULL;
230    LLVMValueRef ptr;
231 
232    /*
233     * Saturate the value
234     */
235 
236    switch (inst->Instruction.Saturate) {
237    case TGSI_SAT_NONE:
238       break;
239 
240    case TGSI_SAT_ZERO_ONE:
241       value = lp_build_max(&bld->bld_base.base, value, bld->bld_base.base.zero);
242       value = lp_build_min(&bld->bld_base.base, value, bld->bld_base.base.one);
243       break;
244 
245    case TGSI_SAT_MINUS_PLUS_ONE:
246       value = lp_build_max(&bld->bld_base.base, value, lp_build_const_vec(bld->bld_base.base.gallivm, bld->bld_base.base.type, -1.0));
247       value = lp_build_min(&bld->bld_base.base, value, bld->bld_base.base.one);
248       break;
249 
250    default:
251       assert(0);
252    }
253 
254    /*
255     * Translate the register file
256     */
257 
258    assert(!reg->Register.Indirect);
259 
260    switch (reg->Register.File) {
261    case TGSI_FILE_OUTPUT:
262       ptr = bld->outputs[reg->Register.Index];
263       break;
264 
265    case TGSI_FILE_TEMPORARY:
266       ptr = bld->temps[reg->Register.Index];
267       break;
268 
269    case TGSI_FILE_ADDRESS:
270       ptr = bld->addr[reg->Indirect.Index];
271       break;
272 
273    case TGSI_FILE_PREDICATE:
274       ptr = bld->preds[reg->Register.Index];
275       break;
276 
277    default:
278       assert(0);
279       return;
280    }
281 
282    if (!ptr)
283       return;
284    /*
285     * Predicate
286     */
287 
288    if (inst->Instruction.Predicate) {
289       LLVMValueRef pred;
290 
291       assert(inst->Predicate.Index < LP_MAX_TGSI_PREDS);
292 
293       pred = LLVMBuildLoad(builder,
294                            bld->preds[inst->Predicate.Index], "");
295 
296       /*
297        * Convert the value to an integer mask.
298        */
299       pred = lp_build_compare(bld->bld_base.base.gallivm,
300                                bld->bld_base.base.type,
301                                PIPE_FUNC_NOTEQUAL,
302                                pred,
303                                bld->bld_base.base.zero);
304 
305       if (inst->Predicate.Negate) {
306          pred = LLVMBuildNot(builder, pred, "");
307       }
308 
309       pred = bld->bld_base.emit_swizzle(&bld->bld_base, pred,
310                          inst->Predicate.SwizzleX,
311                          inst->Predicate.SwizzleY,
312                          inst->Predicate.SwizzleZ,
313                          inst->Predicate.SwizzleW);
314 
315       if (mask) {
316          mask = LLVMBuildAnd(builder, mask, pred, "");
317       } else {
318          mask = pred;
319       }
320    }
321 
322    /*
323     * Writemask
324     */
325 
326    if (reg->Register.WriteMask != TGSI_WRITEMASK_XYZW) {
327       LLVMValueRef writemask;
328 
329       writemask = lp_build_const_mask_aos_swizzled(bld->bld_base.base.gallivm,
330                                                    bld->bld_base.base.type,
331                                                    reg->Register.WriteMask,
332                                                    bld->swizzles);
333 
334       if (mask) {
335          mask = LLVMBuildAnd(builder, mask, writemask, "");
336       } else {
337          mask = writemask;
338       }
339    }
340 
341    if (mask) {
342       LLVMValueRef orig_value;
343 
344       orig_value = LLVMBuildLoad(builder, ptr, "");
345       value = lp_build_select(&bld->bld_base.base,
346                               mask, value, orig_value);
347    }
348 
349    LLVMBuildStore(builder, value, ptr);
350 }
351 
352 
353 /**
354  * High-level instruction translators.
355  */
356 
357 static LLVMValueRef
emit_tex(struct lp_build_tgsi_aos_context * bld,const struct tgsi_full_instruction * inst,enum lp_build_tex_modifier modifier)358 emit_tex(struct lp_build_tgsi_aos_context *bld,
359          const struct tgsi_full_instruction *inst,
360          enum lp_build_tex_modifier modifier)
361 {
362    unsigned target;
363    unsigned unit;
364    LLVMValueRef coords;
365    LLVMValueRef ddx;
366    LLVMValueRef ddy;
367    struct lp_derivatives derivs;
368 
369    if (!bld->sampler) {
370       _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
371       return bld->bld_base.base.undef;
372    }
373 
374    target = inst->Texture.Texture;
375 
376    coords = lp_build_emit_fetch( &bld->bld_base, inst, 0 , LP_CHAN_ALL);
377 
378    if (0 && modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
379       ddx = lp_build_emit_fetch( &bld->bld_base, inst, 1 , LP_CHAN_ALL);
380       ddy = lp_build_emit_fetch( &bld->bld_base, inst, 2 , LP_CHAN_ALL);
381       unit = inst->Src[3].Register.Index;
382    }  else {
383 #if 0
384       ddx = lp_build_ddx( &bld->bld_base.base, coords );
385       ddy = lp_build_ddy( &bld->bld_base.base, coords );
386 #else
387       /* TODO */
388       derivs.ddx_ddy[0] = bld->bld_base.base.one;
389       derivs.ddx_ddy[1] = bld->bld_base.base.one;
390 #endif
391       unit = inst->Src[1].Register.Index;
392    }
393 
394    return bld->sampler->emit_fetch_texel(bld->sampler,
395                                          &bld->bld_base.base,
396                                          target, unit,
397                                          coords, derivs,
398                                          modifier);
399 }
400 
401 
402 void
lp_emit_declaration_aos(struct lp_build_tgsi_aos_context * bld,const struct tgsi_full_declaration * decl)403 lp_emit_declaration_aos(
404    struct lp_build_tgsi_aos_context *bld,
405    const struct tgsi_full_declaration *decl)
406 {
407    struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
408    LLVMTypeRef vec_type = lp_build_vec_type(bld->bld_base.base.gallivm, bld->bld_base.base.type);
409 
410    unsigned first = decl->Range.First;
411    unsigned last = decl->Range.Last;
412    unsigned idx;
413 
414    for (idx = first; idx <= last; ++idx) {
415       switch (decl->Declaration.File) {
416       case TGSI_FILE_TEMPORARY:
417          assert(idx < LP_MAX_TGSI_TEMPS);
418          if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
419             LLVMValueRef array_size = lp_build_const_int32(gallivm, last + 1);
420             bld->temps_array = lp_build_array_alloca(bld->bld_base.base.gallivm,
421                                                      vec_type, array_size, "");
422          } else {
423             bld->temps[idx] = lp_build_alloca(gallivm, vec_type, "");
424          }
425          break;
426 
427       case TGSI_FILE_OUTPUT:
428          bld->outputs[idx] = lp_build_alloca(gallivm, vec_type, "");
429          break;
430 
431       case TGSI_FILE_ADDRESS:
432          assert(idx < LP_MAX_TGSI_ADDRS);
433          bld->addr[idx] = lp_build_alloca(gallivm, vec_type, "");
434          break;
435 
436       case TGSI_FILE_PREDICATE:
437          assert(idx < LP_MAX_TGSI_PREDS);
438          bld->preds[idx] = lp_build_alloca(gallivm, vec_type, "");
439          break;
440 
441       default:
442          /* don't need to declare other vars */
443          break;
444       }
445    }
446 }
447 
448 
449 /**
450  * Emit LLVM for one TGSI instruction.
451  * \param return TRUE for success, FALSE otherwise
452  */
453 boolean
lp_emit_instruction_aos(struct lp_build_tgsi_aos_context * bld,const struct tgsi_full_instruction * inst,const struct tgsi_opcode_info * info,int * pc)454 lp_emit_instruction_aos(
455    struct lp_build_tgsi_aos_context *bld,
456    const struct tgsi_full_instruction *inst,
457    const struct tgsi_opcode_info *info,
458    int *pc)
459 {
460    LLVMValueRef src0, src1, src2;
461    LLVMValueRef tmp0, tmp1;
462    LLVMValueRef dst0 = NULL;
463 
464    /*
465     * Stores and write masks are handled in a general fashion after the long
466     * instruction opcode switch statement.
467     *
468     * Although not stricitly necessary, we avoid generating instructions for
469     * channels which won't be stored, in cases where's that easy. For some
470     * complex instructions, like texture sampling, it is more convenient to
471     * assume a full writemask and then let LLVM optimization passes eliminate
472     * redundant code.
473     */
474 
475    (*pc)++;
476 
477    assert(info->num_dst <= 1);
478    if (info->num_dst) {
479       dst0 = bld->bld_base.base.undef;
480    }
481 
482    switch (inst->Instruction.Opcode) {
483    case TGSI_OPCODE_ARL:
484       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
485       dst0 = lp_build_floor(&bld->bld_base.base, src0);
486       break;
487 
488    case TGSI_OPCODE_MOV:
489       dst0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
490       break;
491 
492    case TGSI_OPCODE_LIT:
493       return FALSE;
494 
495    case TGSI_OPCODE_RCP:
496    /* TGSI_OPCODE_RECIP */
497       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
498       dst0 = lp_build_rcp(&bld->bld_base.base, src0);
499       break;
500 
501    case TGSI_OPCODE_RSQ:
502    /* TGSI_OPCODE_RECIPSQRT */
503       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
504       tmp0 = lp_build_emit_llvm_unary(&bld->bld_base, TGSI_OPCODE_ABS, src0);
505       dst0 = lp_build_rsqrt(&bld->bld_base.base, tmp0);
506       break;
507 
508    case TGSI_OPCODE_EXP:
509       return FALSE;
510 
511    case TGSI_OPCODE_LOG:
512       return FALSE;
513 
514    case TGSI_OPCODE_MUL:
515       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
516       src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
517       dst0 = lp_build_mul(&bld->bld_base.base, src0, src1);
518       break;
519 
520    case TGSI_OPCODE_ADD:
521       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
522       src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
523       dst0 = lp_build_add(&bld->bld_base.base, src0, src1);
524       break;
525 
526    case TGSI_OPCODE_DP3:
527    /* TGSI_OPCODE_DOT3 */
528       return FALSE;
529 
530    case TGSI_OPCODE_DP4:
531    /* TGSI_OPCODE_DOT4 */
532       return FALSE;
533 
534    case TGSI_OPCODE_DST:
535       return FALSE;
536 
537    case TGSI_OPCODE_MIN:
538       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
539       src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
540       dst0 = lp_build_max(&bld->bld_base.base, src0, src1);
541       break;
542 
543    case TGSI_OPCODE_MAX:
544       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
545       src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
546       dst0 = lp_build_max(&bld->bld_base.base, src0, src1);
547       break;
548 
549    case TGSI_OPCODE_SLT:
550    /* TGSI_OPCODE_SETLT */
551       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
552       src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
553       tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_LESS, src0, src1);
554       dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero);
555       break;
556 
557    case TGSI_OPCODE_SGE:
558    /* TGSI_OPCODE_SETGE */
559       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
560       src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
561       tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_GEQUAL, src0, src1);
562       dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero);
563       break;
564 
565    case TGSI_OPCODE_MAD:
566    /* TGSI_OPCODE_MADD */
567       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
568       src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
569       src2 = lp_build_emit_fetch(&bld->bld_base, inst, 2, LP_CHAN_ALL);
570       tmp0 = lp_build_mul(&bld->bld_base.base, src0, src1);
571       dst0 = lp_build_add(&bld->bld_base.base, tmp0, src2);
572       break;
573 
574    case TGSI_OPCODE_SUB:
575       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
576       src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
577       dst0 = lp_build_sub(&bld->bld_base.base, src0, src1);
578       break;
579 
580    case TGSI_OPCODE_LRP:
581       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
582       src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
583       src2 = lp_build_emit_fetch(&bld->bld_base, inst, 2, LP_CHAN_ALL);
584       tmp0 = lp_build_sub(&bld->bld_base.base, src1, src2);
585       tmp0 = lp_build_mul(&bld->bld_base.base, src0, tmp0);
586       dst0 = lp_build_add(&bld->bld_base.base, tmp0, src2);
587       break;
588 
589    case TGSI_OPCODE_CND:
590       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
591       src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
592       src2 = lp_build_emit_fetch(&bld->bld_base, inst, 2, LP_CHAN_ALL);
593       tmp1 = lp_build_const_vec(bld->bld_base.base.gallivm, bld->bld_base.base.type, 0.5);
594       tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_GREATER, src2, tmp1);
595       dst0 = lp_build_select(&bld->bld_base.base, tmp0, src0, src1);
596       break;
597 
598    case TGSI_OPCODE_DP2A:
599       return FALSE;
600 
601    case TGSI_OPCODE_FRC:
602       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
603       tmp0 = lp_build_floor(&bld->bld_base.base, src0);
604       dst0 = lp_build_sub(&bld->bld_base.base, src0, tmp0);
605       break;
606 
607    case TGSI_OPCODE_CLAMP:
608       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
609       src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
610       src2 = lp_build_emit_fetch(&bld->bld_base, inst, 2, LP_CHAN_ALL);
611       tmp0 = lp_build_max(&bld->bld_base.base, src0, src1);
612       dst0 = lp_build_min(&bld->bld_base.base, tmp0, src2);
613       break;
614 
615    case TGSI_OPCODE_FLR:
616       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
617       dst0 = lp_build_floor(&bld->bld_base.base, src0);
618       break;
619 
620    case TGSI_OPCODE_ROUND:
621       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
622       dst0 = lp_build_round(&bld->bld_base.base, src0);
623       break;
624 
625    case TGSI_OPCODE_EX2:
626       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
627       tmp0 = lp_build_swizzle_scalar_aos(&bld->bld_base.base, src0, TGSI_SWIZZLE_X);
628       dst0 = lp_build_exp2(&bld->bld_base.base, tmp0);
629       break;
630 
631    case TGSI_OPCODE_LG2:
632       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
633       tmp0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X);
634       dst0 = lp_build_log2(&bld->bld_base.base, tmp0);
635       break;
636 
637    case TGSI_OPCODE_POW:
638       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
639       src0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X);
640       src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
641       src1 = swizzle_scalar_aos(bld, src1, TGSI_SWIZZLE_X);
642       dst0 = lp_build_pow(&bld->bld_base.base, src0, src1);
643       break;
644 
645    case TGSI_OPCODE_XPD:
646       return FALSE;
647 
648    case TGSI_OPCODE_RCC:
649       /* deprecated? */
650       assert(0);
651       return FALSE;
652 
653    case TGSI_OPCODE_DPH:
654       return FALSE;
655 
656    case TGSI_OPCODE_COS:
657       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
658       tmp0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X);
659       dst0 = lp_build_cos(&bld->bld_base.base, tmp0);
660       break;
661 
662    case TGSI_OPCODE_DDX:
663       return FALSE;
664 
665    case TGSI_OPCODE_DDY:
666       return FALSE;
667 
668    case TGSI_OPCODE_KILP:
669       /* predicated kill */
670       return FALSE;
671 
672    case TGSI_OPCODE_KIL:
673       /* conditional kill */
674       return FALSE;
675 
676    case TGSI_OPCODE_PK2H:
677       return FALSE;
678       break;
679 
680    case TGSI_OPCODE_PK2US:
681       return FALSE;
682       break;
683 
684    case TGSI_OPCODE_PK4B:
685       return FALSE;
686       break;
687 
688    case TGSI_OPCODE_PK4UB:
689       return FALSE;
690 
691    case TGSI_OPCODE_RFL:
692       return FALSE;
693 
694    case TGSI_OPCODE_SEQ:
695       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
696       src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
697       tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_EQUAL, src0, src1);
698       dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero);
699       break;
700 
701    case TGSI_OPCODE_SFL:
702       dst0 = bld->bld_base.base.zero;
703       break;
704 
705    case TGSI_OPCODE_SGT:
706       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
707       src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
708       tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_GREATER, src0, src1);
709       dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero);
710       break;
711 
712    case TGSI_OPCODE_SIN:
713       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
714       tmp0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X);
715       dst0 = lp_build_sin(&bld->bld_base.base, tmp0);
716       break;
717 
718    case TGSI_OPCODE_SLE:
719       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
720       src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
721       tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_LEQUAL, src0, src1);
722       dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero);
723       break;
724 
725    case TGSI_OPCODE_SNE:
726       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
727       src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
728       tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_NOTEQUAL, src0, src1);
729       dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero);
730       break;
731 
732    case TGSI_OPCODE_STR:
733       dst0 = bld->bld_base.base.one;
734       break;
735 
736    case TGSI_OPCODE_TEX:
737       dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_NONE);
738       break;
739 
740    case TGSI_OPCODE_TXD:
741       dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV);
742       break;
743 
744    case TGSI_OPCODE_UP2H:
745       /* deprecated */
746       assert (0);
747       return FALSE;
748       break;
749 
750    case TGSI_OPCODE_UP2US:
751       /* deprecated */
752       assert(0);
753       return FALSE;
754       break;
755 
756    case TGSI_OPCODE_UP4B:
757       /* deprecated */
758       assert(0);
759       return FALSE;
760       break;
761 
762    case TGSI_OPCODE_UP4UB:
763       /* deprecated */
764       assert(0);
765       return FALSE;
766       break;
767 
768    case TGSI_OPCODE_X2D:
769       /* deprecated? */
770       assert(0);
771       return FALSE;
772       break;
773 
774    case TGSI_OPCODE_ARA:
775       /* deprecated */
776       assert(0);
777       return FALSE;
778       break;
779 
780    case TGSI_OPCODE_ARR:
781       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
782       dst0 = lp_build_round(&bld->bld_base.base, src0);
783       break;
784 
785    case TGSI_OPCODE_BRA:
786       /* deprecated */
787       assert(0);
788       return FALSE;
789       break;
790 
791    case TGSI_OPCODE_CAL:
792       return FALSE;
793 
794    case TGSI_OPCODE_RET:
795       return FALSE;
796 
797    case TGSI_OPCODE_END:
798       *pc = -1;
799       break;
800 
801    case TGSI_OPCODE_SSG:
802    /* TGSI_OPCODE_SGN */
803       tmp0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
804       dst0 = lp_build_sgn(&bld->bld_base.base, tmp0);
805       break;
806 
807    case TGSI_OPCODE_CMP:
808       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
809       src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
810       src2 = lp_build_emit_fetch(&bld->bld_base, inst, 2, LP_CHAN_ALL);
811       tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_LESS, src0, bld->bld_base.base.zero);
812       dst0 = lp_build_select(&bld->bld_base.base, tmp0, src1, src2);
813       break;
814 
815    case TGSI_OPCODE_SCS:
816       return FALSE;
817 
818    case TGSI_OPCODE_TXB:
819       dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_LOD_BIAS);
820       break;
821 
822    case TGSI_OPCODE_NRM:
823       /* fall-through */
824    case TGSI_OPCODE_NRM4:
825       return FALSE;
826 
827    case TGSI_OPCODE_DIV:
828       /* deprecated */
829       assert(0);
830       return FALSE;
831       break;
832 
833    case TGSI_OPCODE_DP2:
834       return FALSE;
835 
836    case TGSI_OPCODE_TXL:
837       dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD);
838       break;
839 
840    case TGSI_OPCODE_TXP:
841       dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_PROJECTED);
842       break;
843 
844    case TGSI_OPCODE_BRK:
845       return FALSE;
846 
847    case TGSI_OPCODE_IF:
848       return FALSE;
849 
850    case TGSI_OPCODE_BGNLOOP:
851       return FALSE;
852 
853    case TGSI_OPCODE_BGNSUB:
854       return FALSE;
855 
856    case TGSI_OPCODE_ELSE:
857       return FALSE;
858 
859    case TGSI_OPCODE_ENDIF:
860       return FALSE;
861 
862    case TGSI_OPCODE_ENDLOOP:
863       return FALSE;
864 
865    case TGSI_OPCODE_ENDSUB:
866       return FALSE;
867 
868    case TGSI_OPCODE_PUSHA:
869       /* deprecated? */
870       assert(0);
871       return FALSE;
872       break;
873 
874    case TGSI_OPCODE_POPA:
875       /* deprecated? */
876       assert(0);
877       return FALSE;
878       break;
879 
880    case TGSI_OPCODE_CEIL:
881       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
882       dst0 = lp_build_ceil(&bld->bld_base.base, src0);
883       break;
884 
885    case TGSI_OPCODE_I2F:
886       /* deprecated? */
887       assert(0);
888       return FALSE;
889       break;
890 
891    case TGSI_OPCODE_NOT:
892       /* deprecated? */
893       assert(0);
894       return FALSE;
895       break;
896 
897    case TGSI_OPCODE_TRUNC:
898       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
899       dst0 = lp_build_trunc(&bld->bld_base.base, src0);
900       break;
901 
902    case TGSI_OPCODE_SHL:
903       /* deprecated? */
904       assert(0);
905       return FALSE;
906       break;
907 
908    case TGSI_OPCODE_ISHR:
909       /* deprecated? */
910       assert(0);
911       return FALSE;
912       break;
913 
914    case TGSI_OPCODE_AND:
915       /* deprecated? */
916       assert(0);
917       return FALSE;
918       break;
919 
920    case TGSI_OPCODE_OR:
921       /* deprecated? */
922       assert(0);
923       return FALSE;
924       break;
925 
926    case TGSI_OPCODE_MOD:
927       /* deprecated? */
928       assert(0);
929       return FALSE;
930       break;
931 
932    case TGSI_OPCODE_XOR:
933       /* deprecated? */
934       assert(0);
935       return FALSE;
936       break;
937 
938    case TGSI_OPCODE_SAD:
939       /* deprecated? */
940       assert(0);
941       return FALSE;
942       break;
943 
944    case TGSI_OPCODE_TXF:
945       /* deprecated? */
946       assert(0);
947       return FALSE;
948       break;
949 
950    case TGSI_OPCODE_TXQ:
951       /* deprecated? */
952       assert(0);
953       return FALSE;
954       break;
955 
956    case TGSI_OPCODE_CONT:
957       return FALSE;
958 
959    case TGSI_OPCODE_EMIT:
960       return FALSE;
961       break;
962 
963    case TGSI_OPCODE_ENDPRIM:
964       return FALSE;
965       break;
966 
967    case TGSI_OPCODE_NOP:
968       break;
969 
970    default:
971       return FALSE;
972    }
973 
974    if (info->num_dst) {
975       lp_emit_store_aos(bld, inst, 0, dst0);
976    }
977 
978    return TRUE;
979 }
980 
981 
982 void
lp_build_tgsi_aos(struct gallivm_state * gallivm,const struct tgsi_token * tokens,struct lp_type type,const unsigned char swizzles[4],LLVMValueRef consts_ptr,const LLVMValueRef * inputs,LLVMValueRef * outputs,struct lp_build_sampler_aos * sampler,const struct tgsi_shader_info * info)983 lp_build_tgsi_aos(struct gallivm_state *gallivm,
984                   const struct tgsi_token *tokens,
985                   struct lp_type type,
986                   const unsigned char swizzles[4],
987                   LLVMValueRef consts_ptr,
988                   const LLVMValueRef *inputs,
989                   LLVMValueRef *outputs,
990                   struct lp_build_sampler_aos *sampler,
991                   const struct tgsi_shader_info *info)
992 {
993    struct lp_build_tgsi_aos_context bld;
994    struct tgsi_parse_context parse;
995    uint num_immediates = 0;
996    unsigned chan;
997    int pc = 0;
998 
999    /* Setup build context */
1000    memset(&bld, 0, sizeof bld);
1001    lp_build_context_init(&bld.bld_base.base, gallivm, type);
1002    lp_build_context_init(&bld.bld_base.uint_bld, gallivm, lp_uint_type(type));
1003    lp_build_context_init(&bld.bld_base.int_bld, gallivm, lp_int_type(type));
1004    lp_build_context_init(&bld.int_bld, gallivm, lp_int_type(type));
1005 
1006    for (chan = 0; chan < 4; ++chan) {
1007       bld.swizzles[chan] = swizzles[chan];
1008       bld.inv_swizzles[swizzles[chan]] = chan;
1009    }
1010 
1011    bld.inputs = inputs;
1012    bld.outputs = outputs;
1013    bld.consts_ptr = consts_ptr;
1014    bld.sampler = sampler;
1015    bld.indirect_files = info->indirect_files;
1016    bld.bld_base.emit_swizzle = swizzle_aos;
1017    bld.bld_base.info = info;
1018 
1019    bld.bld_base.emit_fetch_funcs[TGSI_FILE_CONSTANT] = emit_fetch_constant;
1020    bld.bld_base.emit_fetch_funcs[TGSI_FILE_IMMEDIATE] = emit_fetch_immediate;
1021    bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_input;
1022    bld.bld_base.emit_fetch_funcs[TGSI_FILE_TEMPORARY] = emit_fetch_temporary;
1023 
1024    /* Set opcode actions */
1025    lp_set_default_actions_cpu(&bld.bld_base);
1026 
1027    if (!lp_bld_tgsi_list_init(&bld.bld_base)) {
1028       return;
1029    }
1030 
1031    tgsi_parse_init(&parse, tokens);
1032 
1033    while (!tgsi_parse_end_of_tokens(&parse)) {
1034       tgsi_parse_token(&parse);
1035 
1036       switch(parse.FullToken.Token.Type) {
1037       case TGSI_TOKEN_TYPE_DECLARATION:
1038          /* Inputs already interpolated */
1039          lp_emit_declaration_aos(&bld, &parse.FullToken.FullDeclaration);
1040          break;
1041 
1042       case TGSI_TOKEN_TYPE_INSTRUCTION:
1043          /* save expanded instruction */
1044          lp_bld_tgsi_add_instruction(&bld.bld_base,
1045                                      &parse.FullToken.FullInstruction);
1046          break;
1047 
1048       case TGSI_TOKEN_TYPE_IMMEDIATE:
1049          /* simply copy the immediate values into the next immediates[] slot */
1050          {
1051             const uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
1052             float imm[4];
1053             assert(size <= 4);
1054             assert(num_immediates < LP_MAX_TGSI_IMMEDIATES);
1055             for (chan = 0; chan < 4; ++chan) {
1056                imm[chan] = 0.0f;
1057             }
1058             for (chan = 0; chan < size; ++chan) {
1059                unsigned swizzle = bld.swizzles[chan];
1060                imm[swizzle] = parse.FullToken.FullImmediate.u[chan].Float;
1061             }
1062             bld.immediates[num_immediates] =
1063                      lp_build_const_aos(gallivm, type,
1064                                         imm[0], imm[1], imm[2], imm[3],
1065                                         NULL);
1066             num_immediates++;
1067          }
1068          break;
1069 
1070       case TGSI_TOKEN_TYPE_PROPERTY:
1071          break;
1072 
1073       default:
1074          assert(0);
1075       }
1076    }
1077 
1078    while (pc != -1) {
1079       struct tgsi_full_instruction *instr = bld.bld_base.instructions + pc;
1080       const struct tgsi_opcode_info *opcode_info =
1081          tgsi_get_opcode_info(instr->Instruction.Opcode);
1082       if (!lp_emit_instruction_aos(&bld, instr, opcode_info, &pc))
1083          _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n",
1084                        opcode_info->mnemonic);
1085    }
1086 
1087    if (0) {
1088       LLVMBasicBlockRef block = LLVMGetInsertBlock(gallivm->builder);
1089       LLVMValueRef function = LLVMGetBasicBlockParent(block);
1090       debug_printf("11111111111111111111111111111 \n");
1091       tgsi_dump(tokens, 0);
1092       lp_debug_dump_value(function);
1093       debug_printf("2222222222222222222222222222 \n");
1094    }
1095    tgsi_parse_free(&parse);
1096    FREE(bld.bld_base.instructions);
1097 
1098    if (0) {
1099       LLVMModuleRef module = LLVMGetGlobalParent(
1100          LLVMGetBasicBlockParent(LLVMGetInsertBlock(gallivm->builder)));
1101       LLVMDumpModule(module);
1102    }
1103 
1104 }
1105 
1106