1 /**************************************************************************
2  *
3  * Copyright 2010 VMware, Inc.
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the
8  * "Software"), to deal in the Software without restriction, including
9  * without limitation the rights to use, copy, modify, merge, publish,
10  * distribute, sub license, and/or sell copies of the Software, and to
11  * permit persons to whom the Software is furnished to do so, subject to
12  * the following conditions:
13  *
14  * The above copyright notice and this permission notice (including the
15  * next paragraph) shall be included in all copies or substantial portions
16  * of the Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25  *
26  **************************************************************************/
27 
28 /**
29  * @file
30  * TGSI to LLVM IR translation -- AoS.
31  *
32  * FIXME:
33  * - No control flow support: the existing control flow code should be factored
34  * out into from the SoA code into a common module and shared.
35  * - No derivatives. Derivate logic should be pluggable, just like the samplers.
36  *
37  * @author Jose Fonseca <jfonseca@vmware.com>
38  */
39 
40 #include "pipe/p_config.h"
41 #include "pipe/p_shader_tokens.h"
42 #include "util/u_debug.h"
43 #include "util/u_math.h"
44 #include "util/u_memory.h"
45 #include "tgsi/tgsi_dump.h"
46 #include "tgsi/tgsi_info.h"
47 #include "tgsi/tgsi_parse.h"
48 #include "tgsi/tgsi_util.h"
49 #include "tgsi/tgsi_scan.h"
50 #include "lp_bld_type.h"
51 #include "lp_bld_const.h"
52 #include "lp_bld_arit.h"
53 #include "lp_bld_logic.h"
54 #include "lp_bld_swizzle.h"
55 #include "lp_bld_flow.h"
56 #include "lp_bld_quad.h"
57 #include "lp_bld_tgsi.h"
58 #include "lp_bld_debug.h"
59 #include "lp_bld_sample.h"
60 
61 
62 /**
63  * Wrapper around lp_build_swizzle_aos which translates swizzles to another
64  * ordering.
65  */
66 static LLVMValueRef
swizzle_aos(struct lp_build_tgsi_context * bld_base,LLVMValueRef a,unsigned swizzle_x,unsigned swizzle_y,unsigned swizzle_z,unsigned swizzle_w)67 swizzle_aos(struct lp_build_tgsi_context *bld_base,
68             LLVMValueRef a,
69             unsigned swizzle_x,
70             unsigned swizzle_y,
71             unsigned swizzle_z,
72             unsigned swizzle_w)
73 {
74    unsigned char swizzles[4];
75    struct lp_build_tgsi_aos_context *bld = lp_aos_context(bld_base);
76 
77    assert(swizzle_x < 4);
78    assert(swizzle_y < 4);
79    assert(swizzle_z < 4);
80    assert(swizzle_w < 4);
81 
82    swizzles[bld->inv_swizzles[0]] = bld->swizzles[swizzle_x];
83    swizzles[bld->inv_swizzles[1]] = bld->swizzles[swizzle_y];
84    swizzles[bld->inv_swizzles[2]] = bld->swizzles[swizzle_z];
85    swizzles[bld->inv_swizzles[3]] = bld->swizzles[swizzle_w];
86 
87    return lp_build_swizzle_aos(&bld->bld_base.base, a, swizzles);
88 }
89 
90 
91 static LLVMValueRef
swizzle_scalar_aos(struct lp_build_tgsi_aos_context * bld,LLVMValueRef a,unsigned chan)92 swizzle_scalar_aos(struct lp_build_tgsi_aos_context *bld,
93                    LLVMValueRef a,
94                    unsigned chan)
95 {
96    chan = bld->swizzles[chan];
97    return lp_build_swizzle_scalar_aos(&bld->bld_base.base, a, chan, 4);
98 }
99 
100 
101 static LLVMValueRef
emit_fetch_constant(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_src_register * reg,enum tgsi_opcode_type stype,unsigned swizzle)102 emit_fetch_constant(
103    struct lp_build_tgsi_context * bld_base,
104    const struct tgsi_full_src_register * reg,
105    enum tgsi_opcode_type stype,
106    unsigned swizzle)
107 {
108    struct lp_build_tgsi_aos_context * bld = lp_aos_context(bld_base);
109    LLVMBuilderRef builder = bld_base->base.gallivm->builder;
110    struct lp_type type = bld_base->base.type;
111    LLVMValueRef res;
112    unsigned chan;
113 
114    assert(!reg->Register.Indirect);
115 
116    /*
117     * Get the constants components
118     */
119 
120    res = bld->bld_base.base.undef;
121    for (chan = 0; chan < 4; ++chan) {
122       LLVMValueRef index;
123       LLVMValueRef scalar_ptr;
124       LLVMValueRef scalar;
125       LLVMValueRef swizzle;
126 
127       index = lp_build_const_int32(bld->bld_base.base.gallivm,
128                                    reg->Register.Index * 4 + chan);
129 
130       scalar_ptr = LLVMBuildGEP(builder, bld->consts_ptr, &index, 1, "");
131 
132       scalar = LLVMBuildLoad(builder, scalar_ptr, "");
133 
134       lp_build_name(scalar, "const[%u].%c", reg->Register.Index, "xyzw"[chan]);
135 
136       /*
137        * NOTE: constants array is always assumed to be RGBA
138        */
139 
140       swizzle = lp_build_const_int32(bld->bld_base.base.gallivm,
141                                      bld->swizzles[chan]);
142 
143       res = LLVMBuildInsertElement(builder, res, scalar, swizzle, "");
144    }
145 
146    /*
147     * Broadcast the first quaternion to all others.
148     *
149     * XXX: could be factored into a reusable function.
150     */
151 
152    if (type.length > 4) {
153       LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
154       unsigned i;
155 
156       for (chan = 0; chan < 4; ++chan) {
157          shuffles[chan] = lp_build_const_int32(bld->bld_base.base.gallivm, chan);
158       }
159 
160       for (i = 4; i < type.length; ++i) {
161          shuffles[i] = shuffles[i % 4];
162       }
163 
164       res = LLVMBuildShuffleVector(builder,
165                                    res, bld->bld_base.base.undef,
166                                    LLVMConstVector(shuffles, type.length),
167                                    "");
168    }
169    return res;
170 }
171 
172 static LLVMValueRef
emit_fetch_immediate(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_src_register * reg,enum tgsi_opcode_type stype,unsigned swizzle)173 emit_fetch_immediate(
174    struct lp_build_tgsi_context * bld_base,
175    const struct tgsi_full_src_register * reg,
176    enum tgsi_opcode_type stype,
177    unsigned swizzle)
178 {
179    struct lp_build_tgsi_aos_context * bld = lp_aos_context(bld_base);
180    LLVMValueRef res = bld->immediates[reg->Register.Index];
181    assert(res);
182    return res;
183 }
184 
185 static LLVMValueRef
emit_fetch_input(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_src_register * reg,enum tgsi_opcode_type stype,unsigned swizzle)186 emit_fetch_input(
187    struct lp_build_tgsi_context * bld_base,
188    const struct tgsi_full_src_register * reg,
189    enum tgsi_opcode_type stype,
190    unsigned swizzle)
191 {
192    struct lp_build_tgsi_aos_context * bld = lp_aos_context(bld_base);
193    LLVMValueRef res = bld->inputs[reg->Register.Index];
194    assert(!reg->Register.Indirect);
195    assert(res);
196    return res;
197 }
198 
199 static LLVMValueRef
emit_fetch_temporary(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_src_register * reg,enum tgsi_opcode_type stype,unsigned swizzle)200 emit_fetch_temporary(
201    struct lp_build_tgsi_context * bld_base,
202    const struct tgsi_full_src_register * reg,
203    enum tgsi_opcode_type stype,
204    unsigned swizzle)
205 {
206    struct lp_build_tgsi_aos_context * bld = lp_aos_context(bld_base);
207    LLVMBuilderRef builder = bld_base->base.gallivm->builder;
208    LLVMValueRef temp_ptr = bld->temps[reg->Register.Index];
209    LLVMValueRef res = LLVMBuildLoad(builder, temp_ptr, "");
210    assert(!reg->Register.Indirect);
211    if (!res)
212       return bld->bld_base.base.undef;
213 
214    return res;
215 }
216 
217 /**
218  * Register store.
219  */
220 void
lp_emit_store_aos(struct lp_build_tgsi_aos_context * bld,const struct tgsi_full_instruction * inst,unsigned index,LLVMValueRef value)221 lp_emit_store_aos(
222    struct lp_build_tgsi_aos_context *bld,
223    const struct tgsi_full_instruction *inst,
224    unsigned index,
225    LLVMValueRef value)
226 {
227    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
228    const struct tgsi_full_dst_register *reg = &inst->Dst[index];
229    LLVMValueRef mask = NULL;
230    LLVMValueRef ptr;
231 
232    /*
233     * Saturate the value
234     */
235    if (inst->Instruction.Saturate) {
236       value = lp_build_max(&bld->bld_base.base, value, bld->bld_base.base.zero);
237       value = lp_build_min(&bld->bld_base.base, value, bld->bld_base.base.one);
238    }
239 
240    /*
241     * Translate the register file
242     */
243 
244    assert(!reg->Register.Indirect);
245 
246    switch (reg->Register.File) {
247    case TGSI_FILE_OUTPUT:
248       ptr = bld->outputs[reg->Register.Index];
249       break;
250 
251    case TGSI_FILE_TEMPORARY:
252       ptr = bld->temps[reg->Register.Index];
253       break;
254 
255    case TGSI_FILE_ADDRESS:
256       ptr = bld->addr[reg->Indirect.Index];
257       break;
258 
259    default:
260       assert(0);
261       return;
262    }
263 
264    if (!ptr)
265       return;
266 
267    /*
268     * Writemask
269     */
270 
271    if (reg->Register.WriteMask != TGSI_WRITEMASK_XYZW) {
272       LLVMValueRef writemask;
273 
274       writemask = lp_build_const_mask_aos_swizzled(bld->bld_base.base.gallivm,
275                                                    bld->bld_base.base.type,
276                                                    reg->Register.WriteMask,
277                                                    TGSI_NUM_CHANNELS,
278                                                    bld->swizzles);
279 
280       if (mask) {
281          mask = LLVMBuildAnd(builder, mask, writemask, "");
282       } else {
283          mask = writemask;
284       }
285    }
286 
287    if (mask) {
288       LLVMValueRef orig_value;
289 
290       orig_value = LLVMBuildLoad(builder, ptr, "");
291       value = lp_build_select(&bld->bld_base.base,
292                               mask, value, orig_value);
293    }
294 
295    LLVMBuildStore(builder, value, ptr);
296 }
297 
298 
299 /**
300  * High-level instruction translators.
301  */
302 
303 static LLVMValueRef
emit_tex(struct lp_build_tgsi_aos_context * bld,const struct tgsi_full_instruction * inst,enum lp_build_tex_modifier modifier)304 emit_tex(struct lp_build_tgsi_aos_context *bld,
305          const struct tgsi_full_instruction *inst,
306          enum lp_build_tex_modifier modifier)
307 {
308    unsigned target;
309    unsigned unit;
310    LLVMValueRef coords;
311    struct lp_derivatives derivs = { {NULL}, {NULL} };
312 
313    if (!bld->sampler) {
314       _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
315       return bld->bld_base.base.undef;
316    }
317 
318    target = inst->Texture.Texture;
319 
320    coords = lp_build_emit_fetch( &bld->bld_base, inst, 0 , LP_CHAN_ALL);
321 
322    if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
323       /* probably not going to work */
324       derivs.ddx[0] = lp_build_emit_fetch( &bld->bld_base, inst, 1 , LP_CHAN_ALL);
325       derivs.ddy[0] = lp_build_emit_fetch( &bld->bld_base, inst, 2 , LP_CHAN_ALL);
326       unit = inst->Src[3].Register.Index;
327    }
328    else {
329       unit = inst->Src[1].Register.Index;
330    }
331    return bld->sampler->emit_fetch_texel(bld->sampler,
332                                          &bld->bld_base.base,
333                                          target, unit,
334                                          coords, derivs,
335                                          modifier);
336 }
337 
338 
339 static LLVMValueRef
emit_sample(struct lp_build_tgsi_aos_context * bld,const struct tgsi_full_instruction * inst,enum lp_build_tex_modifier modifier)340 emit_sample(struct lp_build_tgsi_aos_context *bld,
341             const struct tgsi_full_instruction *inst,
342             enum lp_build_tex_modifier modifier)
343 {
344    unsigned target;
345    unsigned unit;
346    LLVMValueRef coords;
347    struct lp_derivatives derivs = { {NULL}, {NULL} };
348 
349    if (!bld->sampler) {
350       _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
351       return bld->bld_base.base.undef;
352    }
353 
354    coords = lp_build_emit_fetch( &bld->bld_base, inst, 0 , LP_CHAN_ALL);
355 
356    /* ignore modifiers, can't handle different sampler / sampler view, etc... */
357    unit = inst->Src[1].Register.Index;
358    assert(inst->Src[2].Register.Index == unit);
359 
360    target = bld->sv[unit].Resource;
361 
362    return bld->sampler->emit_fetch_texel(bld->sampler,
363                                          &bld->bld_base.base,
364                                          target, unit,
365                                          coords, derivs,
366                                          modifier);
367 }
368 
369 
370 void
lp_emit_declaration_aos(struct lp_build_tgsi_aos_context * bld,const struct tgsi_full_declaration * decl)371 lp_emit_declaration_aos(
372    struct lp_build_tgsi_aos_context *bld,
373    const struct tgsi_full_declaration *decl)
374 {
375    struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
376    LLVMTypeRef vec_type = lp_build_vec_type(bld->bld_base.base.gallivm, bld->bld_base.base.type);
377 
378    unsigned first = decl->Range.First;
379    unsigned last = decl->Range.Last;
380    unsigned idx;
381 
382    for (idx = first; idx <= last; ++idx) {
383       switch (decl->Declaration.File) {
384       case TGSI_FILE_TEMPORARY:
385          assert(idx < LP_MAX_INLINED_TEMPS);
386          if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
387             LLVMValueRef array_size = lp_build_const_int32(gallivm, last + 1);
388             bld->temps_array = lp_build_array_alloca(bld->bld_base.base.gallivm,
389                                                      vec_type, array_size, "");
390          } else {
391             bld->temps[idx] = lp_build_alloca(gallivm, vec_type, "");
392          }
393          break;
394 
395       case TGSI_FILE_OUTPUT:
396          bld->outputs[idx] = lp_build_alloca(gallivm, vec_type, "");
397          break;
398 
399       case TGSI_FILE_ADDRESS:
400          assert(idx < LP_MAX_TGSI_ADDRS);
401          bld->addr[idx] = lp_build_alloca(gallivm, vec_type, "");
402          break;
403 
404       case TGSI_FILE_SAMPLER_VIEW:
405          /*
406           * The target stored here MUST match whatever there actually
407           * is in the set sampler views (what about return type?).
408           */
409          assert(last < PIPE_MAX_SHADER_SAMPLER_VIEWS);
410          for (idx = first; idx <= last; ++idx) {
411             bld->sv[idx] = decl->SamplerView;
412          }
413          break;
414 
415       default:
416          /* don't need to declare other vars */
417          break;
418       }
419    }
420 }
421 
422 
423 /**
424  * Emit LLVM for one TGSI instruction.
425  * \param return TRUE for success, FALSE otherwise
426  */
427 boolean
lp_emit_instruction_aos(struct lp_build_tgsi_aos_context * bld,const struct tgsi_full_instruction * inst,const struct tgsi_opcode_info * info,int * pc)428 lp_emit_instruction_aos(
429    struct lp_build_tgsi_aos_context *bld,
430    const struct tgsi_full_instruction *inst,
431    const struct tgsi_opcode_info *info,
432    int *pc)
433 {
434    LLVMValueRef src0, src1, src2;
435    LLVMValueRef tmp0;
436    LLVMValueRef dst0 = NULL;
437 
438    /*
439     * Stores and write masks are handled in a general fashion after the long
440     * instruction opcode switch statement.
441     *
442     * Although not stricitly necessary, we avoid generating instructions for
443     * channels which won't be stored, in cases where's that easy. For some
444     * complex instructions, like texture sampling, it is more convenient to
445     * assume a full writemask and then let LLVM optimization passes eliminate
446     * redundant code.
447     */
448 
449    (*pc)++;
450 
451    assert(info->num_dst <= 1);
452    if (info->num_dst) {
453       dst0 = bld->bld_base.base.undef;
454    }
455 
456    switch (inst->Instruction.Opcode) {
457    case TGSI_OPCODE_ARL:
458       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
459       dst0 = lp_build_floor(&bld->bld_base.base, src0);
460       break;
461 
462    case TGSI_OPCODE_MOV:
463       dst0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
464       break;
465 
466    case TGSI_OPCODE_LIT:
467       return FALSE;
468 
469    case TGSI_OPCODE_RCP:
470    /* TGSI_OPCODE_RECIP */
471       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
472       dst0 = lp_build_rcp(&bld->bld_base.base, src0);
473       break;
474 
475    case TGSI_OPCODE_RSQ:
476    /* TGSI_OPCODE_RECIPSQRT */
477       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
478       tmp0 = lp_build_abs(&bld->bld_base.base, src0);
479       dst0 = lp_build_rsqrt(&bld->bld_base.base, tmp0);
480       break;
481 
482    case TGSI_OPCODE_EXP:
483       return FALSE;
484 
485    case TGSI_OPCODE_LOG:
486       return FALSE;
487 
488    case TGSI_OPCODE_MUL:
489       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
490       src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
491       dst0 = lp_build_mul(&bld->bld_base.base, src0, src1);
492       break;
493 
494    case TGSI_OPCODE_ADD:
495       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
496       src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
497       dst0 = lp_build_add(&bld->bld_base.base, src0, src1);
498       break;
499 
500    case TGSI_OPCODE_DP3:
501    /* TGSI_OPCODE_DOT3 */
502       return FALSE;
503 
504    case TGSI_OPCODE_DP4:
505    /* TGSI_OPCODE_DOT4 */
506       return FALSE;
507 
508    case TGSI_OPCODE_DST:
509       return FALSE;
510 
511    case TGSI_OPCODE_MIN:
512       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
513       src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
514       dst0 = lp_build_min(&bld->bld_base.base, src0, src1);
515       break;
516 
517    case TGSI_OPCODE_MAX:
518       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
519       src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
520       dst0 = lp_build_max(&bld->bld_base.base, src0, src1);
521       break;
522 
523    case TGSI_OPCODE_SLT:
524    /* TGSI_OPCODE_SETLT */
525       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
526       src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
527       tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_LESS, src0, src1);
528       dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero);
529       break;
530 
531    case TGSI_OPCODE_SGE:
532    /* TGSI_OPCODE_SETGE */
533       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
534       src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
535       tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_GEQUAL, src0, src1);
536       dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero);
537       break;
538 
539    case TGSI_OPCODE_MAD:
540    /* TGSI_OPCODE_MADD */
541       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
542       src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
543       src2 = lp_build_emit_fetch(&bld->bld_base, inst, 2, LP_CHAN_ALL);
544       tmp0 = lp_build_mul(&bld->bld_base.base, src0, src1);
545       dst0 = lp_build_add(&bld->bld_base.base, tmp0, src2);
546       break;
547 
548    case TGSI_OPCODE_LRP:
549       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
550       src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
551       src2 = lp_build_emit_fetch(&bld->bld_base, inst, 2, LP_CHAN_ALL);
552       tmp0 = lp_build_sub(&bld->bld_base.base, src1, src2);
553       tmp0 = lp_build_mul(&bld->bld_base.base, src0, tmp0);
554       dst0 = lp_build_add(&bld->bld_base.base, tmp0, src2);
555       break;
556 
557    case TGSI_OPCODE_FRC:
558       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
559       tmp0 = lp_build_floor(&bld->bld_base.base, src0);
560       dst0 = lp_build_sub(&bld->bld_base.base, src0, tmp0);
561       break;
562 
563    case TGSI_OPCODE_FLR:
564       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
565       dst0 = lp_build_floor(&bld->bld_base.base, src0);
566       break;
567 
568    case TGSI_OPCODE_ROUND:
569       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
570       dst0 = lp_build_round(&bld->bld_base.base, src0);
571       break;
572 
573    case TGSI_OPCODE_EX2:
574       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
575       tmp0 = lp_build_swizzle_scalar_aos(&bld->bld_base.base, src0, TGSI_SWIZZLE_X, TGSI_NUM_CHANNELS);
576       dst0 = lp_build_exp2(&bld->bld_base.base, tmp0);
577       break;
578 
579    case TGSI_OPCODE_LG2:
580       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
581       tmp0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X);
582       dst0 = lp_build_log2(&bld->bld_base.base, tmp0);
583       break;
584 
585    case TGSI_OPCODE_POW:
586       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
587       src0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X);
588       src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
589       src1 = swizzle_scalar_aos(bld, src1, TGSI_SWIZZLE_X);
590       dst0 = lp_build_pow(&bld->bld_base.base, src0, src1);
591       break;
592 
593    case TGSI_OPCODE_COS:
594       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
595       tmp0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X);
596       dst0 = lp_build_cos(&bld->bld_base.base, tmp0);
597       break;
598 
599    case TGSI_OPCODE_DDX:
600       return FALSE;
601 
602    case TGSI_OPCODE_DDY:
603       return FALSE;
604 
605    case TGSI_OPCODE_KILL:
606       return FALSE;
607 
608    case TGSI_OPCODE_KILL_IF:
609       return FALSE;
610 
611    case TGSI_OPCODE_PK2H:
612       return FALSE;
613       break;
614 
615    case TGSI_OPCODE_PK2US:
616       return FALSE;
617       break;
618 
619    case TGSI_OPCODE_PK4B:
620       return FALSE;
621       break;
622 
623    case TGSI_OPCODE_PK4UB:
624       return FALSE;
625 
626    case TGSI_OPCODE_SEQ:
627       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
628       src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
629       tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_EQUAL, src0, src1);
630       dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero);
631       break;
632 
633    case TGSI_OPCODE_SGT:
634       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
635       src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
636       tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_GREATER, src0, src1);
637       dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero);
638       break;
639 
640    case TGSI_OPCODE_SIN:
641       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
642       tmp0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X);
643       dst0 = lp_build_sin(&bld->bld_base.base, tmp0);
644       break;
645 
646    case TGSI_OPCODE_SLE:
647       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
648       src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
649       tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_LEQUAL, src0, src1);
650       dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero);
651       break;
652 
653    case TGSI_OPCODE_SNE:
654       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
655       src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
656       tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_NOTEQUAL, src0, src1);
657       dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero);
658       break;
659 
660    case TGSI_OPCODE_TEX:
661       dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_NONE);
662       break;
663 
664    case TGSI_OPCODE_TXD:
665       dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV);
666       break;
667 
668    case TGSI_OPCODE_UP2H:
669       /* deprecated */
670       assert (0);
671       return FALSE;
672       break;
673 
674    case TGSI_OPCODE_UP2US:
675       /* deprecated */
676       assert(0);
677       return FALSE;
678       break;
679 
680    case TGSI_OPCODE_UP4B:
681       /* deprecated */
682       assert(0);
683       return FALSE;
684       break;
685 
686    case TGSI_OPCODE_UP4UB:
687       /* deprecated */
688       assert(0);
689       return FALSE;
690       break;
691 
692    case TGSI_OPCODE_ARR:
693       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
694       dst0 = lp_build_round(&bld->bld_base.base, src0);
695       break;
696 
697    case TGSI_OPCODE_CAL:
698       return FALSE;
699 
700    case TGSI_OPCODE_RET:
701       /* safe to ignore at end */
702       break;
703 
704    case TGSI_OPCODE_END:
705       *pc = -1;
706       break;
707 
708    case TGSI_OPCODE_SSG:
709    /* TGSI_OPCODE_SGN */
710       tmp0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
711       dst0 = lp_build_sgn(&bld->bld_base.base, tmp0);
712       break;
713 
714    case TGSI_OPCODE_CMP:
715       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
716       src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
717       src2 = lp_build_emit_fetch(&bld->bld_base, inst, 2, LP_CHAN_ALL);
718       tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_LESS, src0, bld->bld_base.base.zero);
719       dst0 = lp_build_select(&bld->bld_base.base, tmp0, src1, src2);
720       break;
721 
722    case TGSI_OPCODE_TXB:
723       dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_LOD_BIAS);
724       break;
725 
726    case TGSI_OPCODE_DIV:
727       assert(0);
728       return FALSE;
729       break;
730 
731    case TGSI_OPCODE_DP2:
732       return FALSE;
733 
734    case TGSI_OPCODE_TXL:
735       dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD);
736       break;
737 
738    case TGSI_OPCODE_TXP:
739       dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_PROJECTED);
740       break;
741 
742    case TGSI_OPCODE_BRK:
743       return FALSE;
744 
745    case TGSI_OPCODE_IF:
746    case TGSI_OPCODE_UIF:
747       return FALSE;
748 
749    case TGSI_OPCODE_BGNLOOP:
750       return FALSE;
751 
752    case TGSI_OPCODE_BGNSUB:
753       return FALSE;
754 
755    case TGSI_OPCODE_ELSE:
756       return FALSE;
757 
758    case TGSI_OPCODE_ENDIF:
759       return FALSE;
760 
761    case TGSI_OPCODE_ENDLOOP:
762       return FALSE;
763 
764    case TGSI_OPCODE_ENDSUB:
765       return FALSE;
766 
767    case TGSI_OPCODE_CEIL:
768       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
769       dst0 = lp_build_ceil(&bld->bld_base.base, src0);
770       break;
771 
772    case TGSI_OPCODE_I2F:
773       assert(0);
774       return FALSE;
775       break;
776 
777    case TGSI_OPCODE_NOT:
778       assert(0);
779       return FALSE;
780       break;
781 
782    case TGSI_OPCODE_TRUNC:
783       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
784       dst0 = lp_build_trunc(&bld->bld_base.base, src0);
785       break;
786 
787    case TGSI_OPCODE_SHL:
788       assert(0);
789       return FALSE;
790       break;
791 
792    case TGSI_OPCODE_ISHR:
793       assert(0);
794       return FALSE;
795       break;
796 
797    case TGSI_OPCODE_AND:
798       assert(0);
799       return FALSE;
800       break;
801 
802    case TGSI_OPCODE_OR:
803       assert(0);
804       return FALSE;
805       break;
806 
807    case TGSI_OPCODE_MOD:
808       assert(0);
809       return FALSE;
810       break;
811 
812    case TGSI_OPCODE_XOR:
813       assert(0);
814       return FALSE;
815       break;
816 
817    case TGSI_OPCODE_TXF:
818       assert(0);
819       return FALSE;
820       break;
821 
822    case TGSI_OPCODE_TXQ:
823       assert(0);
824       return FALSE;
825       break;
826 
827    case TGSI_OPCODE_CONT:
828       return FALSE;
829 
830    case TGSI_OPCODE_EMIT:
831       return FALSE;
832       break;
833 
834    case TGSI_OPCODE_ENDPRIM:
835       return FALSE;
836       break;
837 
838    case TGSI_OPCODE_NOP:
839       break;
840 
841    case TGSI_OPCODE_SAMPLE:
842       dst0 = emit_sample(bld, inst, LP_BLD_TEX_MODIFIER_NONE);
843       break;
844 
845    default:
846       return FALSE;
847    }
848 
849    if (info->num_dst) {
850       lp_emit_store_aos(bld, inst, 0, dst0);
851    }
852 
853    return TRUE;
854 }
855 
856 
857 void
lp_build_tgsi_aos(struct gallivm_state * gallivm,const struct tgsi_token * tokens,struct lp_type type,const unsigned char swizzles[4],LLVMValueRef consts_ptr,const LLVMValueRef * inputs,LLVMValueRef * outputs,const struct lp_build_sampler_aos * sampler,const struct tgsi_shader_info * info)858 lp_build_tgsi_aos(struct gallivm_state *gallivm,
859                   const struct tgsi_token *tokens,
860                   struct lp_type type,
861                   const unsigned char swizzles[4],
862                   LLVMValueRef consts_ptr,
863                   const LLVMValueRef *inputs,
864                   LLVMValueRef *outputs,
865                   const struct lp_build_sampler_aos *sampler,
866                   const struct tgsi_shader_info *info)
867 {
868    struct lp_build_tgsi_aos_context bld;
869    struct tgsi_parse_context parse;
870    uint num_immediates = 0;
871    unsigned chan;
872    int pc = 0;
873 
874    /* Setup build context */
875    memset(&bld, 0, sizeof bld);
876    lp_build_context_init(&bld.bld_base.base, gallivm, type);
877    lp_build_context_init(&bld.bld_base.uint_bld, gallivm, lp_uint_type(type));
878    lp_build_context_init(&bld.bld_base.int_bld, gallivm, lp_int_type(type));
879    lp_build_context_init(&bld.int_bld, gallivm, lp_int_type(type));
880 
881    for (chan = 0; chan < 4; ++chan) {
882       bld.swizzles[chan] = swizzles[chan];
883       bld.inv_swizzles[swizzles[chan]] = chan;
884    }
885 
886    bld.inputs = inputs;
887    bld.outputs = outputs;
888    bld.consts_ptr = consts_ptr;
889    bld.sampler = sampler;
890    bld.indirect_files = info->indirect_files;
891    bld.bld_base.emit_swizzle = swizzle_aos;
892    bld.bld_base.info = info;
893 
894    bld.bld_base.emit_fetch_funcs[TGSI_FILE_CONSTANT] = emit_fetch_constant;
895    bld.bld_base.emit_fetch_funcs[TGSI_FILE_IMMEDIATE] = emit_fetch_immediate;
896    bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_input;
897    bld.bld_base.emit_fetch_funcs[TGSI_FILE_TEMPORARY] = emit_fetch_temporary;
898 
899    /* Set opcode actions */
900    lp_set_default_actions_cpu(&bld.bld_base);
901 
902    if (!lp_bld_tgsi_list_init(&bld.bld_base)) {
903       return;
904    }
905 
906    tgsi_parse_init(&parse, tokens);
907 
908    while (!tgsi_parse_end_of_tokens(&parse)) {
909       tgsi_parse_token(&parse);
910 
911       switch(parse.FullToken.Token.Type) {
912       case TGSI_TOKEN_TYPE_DECLARATION:
913          /* Inputs already interpolated */
914          lp_emit_declaration_aos(&bld, &parse.FullToken.FullDeclaration);
915          break;
916 
917       case TGSI_TOKEN_TYPE_INSTRUCTION:
918          /* save expanded instruction */
919          lp_bld_tgsi_add_instruction(&bld.bld_base,
920                                      &parse.FullToken.FullInstruction);
921          break;
922 
923       case TGSI_TOKEN_TYPE_IMMEDIATE:
924          /* simply copy the immediate values into the next immediates[] slot */
925          {
926             const uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
927             float imm[4];
928             assert(size <= 4);
929             assert(num_immediates < LP_MAX_INLINED_IMMEDIATES);
930             for (chan = 0; chan < 4; ++chan) {
931                imm[chan] = 0.0f;
932             }
933             for (chan = 0; chan < size; ++chan) {
934                unsigned swizzle = bld.swizzles[chan];
935                imm[swizzle] = parse.FullToken.FullImmediate.u[chan].Float;
936             }
937             bld.immediates[num_immediates] =
938                      lp_build_const_aos(gallivm, type,
939                                         imm[0], imm[1], imm[2], imm[3],
940                                         NULL);
941             num_immediates++;
942          }
943          break;
944 
945       case TGSI_TOKEN_TYPE_PROPERTY:
946          break;
947 
948       default:
949          assert(0);
950       }
951    }
952 
953    while (pc != -1) {
954       struct tgsi_full_instruction *instr = bld.bld_base.instructions + pc;
955       const struct tgsi_opcode_info *opcode_info =
956          tgsi_get_opcode_info(instr->Instruction.Opcode);
957       if (!lp_emit_instruction_aos(&bld, instr, opcode_info, &pc))
958          _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n",
959                        tgsi_get_opcode_name(instr->Instruction.Opcode));
960    }
961 
962    if (0) {
963       LLVMBasicBlockRef block = LLVMGetInsertBlock(gallivm->builder);
964       LLVMValueRef function = LLVMGetBasicBlockParent(block);
965       debug_printf("11111111111111111111111111111 \n");
966       tgsi_dump(tokens, 0);
967       lp_debug_dump_value(function);
968       debug_printf("2222222222222222222222222222 \n");
969    }
970    tgsi_parse_free(&parse);
971    FREE(bld.bld_base.instructions);
972 
973    if (0) {
974       LLVMModuleRef module = LLVMGetGlobalParent(
975          LLVMGetBasicBlockParent(LLVMGetInsertBlock(gallivm->builder)));
976       LLVMDumpModule(module);
977    }
978 
979 }
980 
981