1 /**************************************************************************
2  *
3  * Copyright 2009 VMware, Inc.
4  * Copyright 2007-2008 VMware, Inc.
5  * All Rights Reserved.
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining a
8  * copy of this software and associated documentation files (the
9  * "Software"), to deal in the Software without restriction, including
10  * without limitation the rights to use, copy, modify, merge, publish,
11  * distribute, sub license, and/or sell copies of the Software, and to
12  * permit persons to whom the Software is furnished to do so, subject to
13  * the following conditions:
14  *
15  * The above copyright notice and this permission notice (including the
16  * next paragraph) shall be included in all copies or substantial portions
17  * of the Software.
18  *
19  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
23  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26  *
27  **************************************************************************/
28 
29 /**
30  * @file
31  * TGSI to LLVM IR translation -- SoA.
32  *
33  * @author Jose Fonseca <jfonseca@vmware.com>
34  *
35  * Based on tgsi_sse2.c code written by Michal Krol, Keith Whitwell,
36  * Brian Paul, and others.
37  */
38 
39 #include "pipe/p_config.h"
40 #include "pipe/p_shader_tokens.h"
41 #include "util/u_debug.h"
42 #include "util/u_math.h"
43 #include "util/u_memory.h"
44 #include "tgsi/tgsi_dump.h"
45 #include "tgsi/tgsi_exec.h"
46 #include "tgsi/tgsi_info.h"
47 #include "tgsi/tgsi_parse.h"
48 #include "tgsi/tgsi_util.h"
49 #include "tgsi/tgsi_scan.h"
50 #include "tgsi/tgsi_strings.h"
51 #include "lp_bld_tgsi_action.h"
52 #include "lp_bld_type.h"
53 #include "lp_bld_const.h"
54 #include "lp_bld_arit.h"
55 #include "lp_bld_bitarit.h"
56 #include "lp_bld_gather.h"
57 #include "lp_bld_init.h"
58 #include "lp_bld_logic.h"
59 #include "lp_bld_swizzle.h"
60 #include "lp_bld_flow.h"
61 #include "lp_bld_quad.h"
62 #include "lp_bld_tgsi.h"
63 #include "lp_bld_limits.h"
64 #include "lp_bld_debug.h"
65 #include "lp_bld_printf.h"
66 #include "lp_bld_sample.h"
67 #include "lp_bld_struct.h"
68 
69 /* SM 4.0 says that subroutines can nest 32 deep and
70  * we need one more for our main function */
71 #define LP_MAX_NUM_FUNCS 33
72 
73 #define DUMP_GS_EMITS 0
74 
75 /*
76  * If non-zero, the generated LLVM IR will print intermediate results on every TGSI
77  * instruction.
78  *
79  * TODO:
80  * - take execution masks in consideration
81  * - debug control-flow instructions
82  */
83 #define DEBUG_EXECUTION 0
84 
85 
86 /*
87  * Emit code to print a register value.
88  */
89 static void
emit_dump_reg(struct gallivm_state * gallivm,unsigned file,unsigned index,unsigned chan,LLVMValueRef value)90 emit_dump_reg(struct gallivm_state *gallivm,
91               unsigned file,
92               unsigned index,
93               unsigned chan,
94               LLVMValueRef value)
95 {
96    char buf[32];
97 
98    util_snprintf(buf, sizeof buf, "    %s[%u].%c = ",
99                  tgsi_file_name(file),
100                  index, "xyzw"[chan]);
101 
102    lp_build_print_value(gallivm, buf, value);
103 }
104 
105 /*
106  * Return the context for the current function.
107  * (always 'main', if shader doesn't do any function calls)
108  */
109 static inline struct function_ctx *
func_ctx(struct lp_exec_mask * mask)110 func_ctx(struct lp_exec_mask *mask)
111 {
112    assert(mask->function_stack_size > 0);
113    assert(mask->function_stack_size <= LP_MAX_NUM_FUNCS);
114    return &mask->function_stack[mask->function_stack_size - 1];
115 }
116 
117 /*
118  * Returns true if we're in a loop.
119  * It's global, meaning that it returns true even if there's
120  * no loop inside the current function, but we were inside
121  * a loop inside another function, from which this one was called.
122  */
123 static inline boolean
mask_has_loop(struct lp_exec_mask * mask)124 mask_has_loop(struct lp_exec_mask *mask)
125 {
126    int i;
127    for (i = mask->function_stack_size - 1; i >= 0; --i) {
128       const struct function_ctx *ctx = &mask->function_stack[i];
129       if (ctx->loop_stack_size > 0)
130          return TRUE;
131    }
132    return FALSE;
133 }
134 
135 /*
136  * Returns true if we're inside a switch statement.
137  * It's global, meaning that it returns true even if there's
138  * no switch in the current function, but we were inside
139  * a switch inside another function, from which this one was called.
140  */
141 static inline boolean
mask_has_switch(struct lp_exec_mask * mask)142 mask_has_switch(struct lp_exec_mask *mask)
143 {
144    int i;
145    for (i = mask->function_stack_size - 1; i >= 0; --i) {
146       const struct function_ctx *ctx = &mask->function_stack[i];
147       if (ctx->switch_stack_size > 0)
148          return TRUE;
149    }
150    return FALSE;
151 }
152 
153 /*
154  * Returns true if we're inside a conditional.
155  * It's global, meaning that it returns true even if there's
156  * no conditional in the current function, but we were inside
157  * a conditional inside another function, from which this one was called.
158  */
159 static inline boolean
mask_has_cond(struct lp_exec_mask * mask)160 mask_has_cond(struct lp_exec_mask *mask)
161 {
162    int i;
163    for (i = mask->function_stack_size - 1; i >= 0; --i) {
164       const struct function_ctx *ctx = &mask->function_stack[i];
165       if (ctx->cond_stack_size > 0)
166          return TRUE;
167    }
168    return FALSE;
169 }
170 
171 
172 /*
173  * Initialize a function context at the specified index.
174  */
175 static void
lp_exec_mask_function_init(struct lp_exec_mask * mask,int function_idx)176 lp_exec_mask_function_init(struct lp_exec_mask *mask, int function_idx)
177 {
178    LLVMTypeRef int_type = LLVMInt32TypeInContext(mask->bld->gallivm->context);
179    LLVMBuilderRef builder = mask->bld->gallivm->builder;
180    struct function_ctx *ctx =  &mask->function_stack[function_idx];
181 
182    ctx->cond_stack_size = 0;
183    ctx->loop_stack_size = 0;
184    ctx->switch_stack_size = 0;
185 
186    if (function_idx == 0) {
187       ctx->ret_mask = mask->ret_mask;
188    }
189 
190    ctx->loop_limiter = lp_build_alloca(mask->bld->gallivm,
191                                        int_type, "looplimiter");
192    LLVMBuildStore(
193       builder,
194       LLVMConstInt(int_type, LP_MAX_TGSI_LOOP_ITERATIONS, false),
195       ctx->loop_limiter);
196 }
197 
lp_exec_mask_init(struct lp_exec_mask * mask,struct lp_build_context * bld)198 static void lp_exec_mask_init(struct lp_exec_mask *mask, struct lp_build_context *bld)
199 {
200    mask->bld = bld;
201    mask->has_mask = FALSE;
202    mask->ret_in_main = FALSE;
203    /* For the main function */
204    mask->function_stack_size = 1;
205 
206    mask->int_vec_type = lp_build_int_vec_type(bld->gallivm, mask->bld->type);
207    mask->exec_mask = mask->ret_mask = mask->break_mask = mask->cont_mask =
208          mask->cond_mask = mask->switch_mask =
209          LLVMConstAllOnes(mask->int_vec_type);
210 
211    mask->function_stack = CALLOC(LP_MAX_NUM_FUNCS,
212                                  sizeof(mask->function_stack[0]));
213    lp_exec_mask_function_init(mask, 0);
214 }
215 
216 static void
lp_exec_mask_fini(struct lp_exec_mask * mask)217 lp_exec_mask_fini(struct lp_exec_mask *mask)
218 {
219    FREE(mask->function_stack);
220 }
221 
lp_exec_mask_update(struct lp_exec_mask * mask)222 static void lp_exec_mask_update(struct lp_exec_mask *mask)
223 {
224    LLVMBuilderRef builder = mask->bld->gallivm->builder;
225    boolean has_loop_mask = mask_has_loop(mask);
226    boolean has_cond_mask = mask_has_cond(mask);
227    boolean has_switch_mask = mask_has_switch(mask);
228    boolean has_ret_mask = mask->function_stack_size > 1 ||
229          mask->ret_in_main;
230 
231    if (has_loop_mask) {
232       /*for loops we need to update the entire mask at runtime */
233       LLVMValueRef tmp;
234       assert(mask->break_mask);
235       tmp = LLVMBuildAnd(builder,
236                          mask->cont_mask,
237                          mask->break_mask,
238                          "maskcb");
239       mask->exec_mask = LLVMBuildAnd(builder,
240                                      mask->cond_mask,
241                                      tmp,
242                                      "maskfull");
243    } else
244       mask->exec_mask = mask->cond_mask;
245 
246    if (has_switch_mask) {
247       mask->exec_mask = LLVMBuildAnd(builder,
248                                      mask->exec_mask,
249                                      mask->switch_mask,
250                                      "switchmask");
251    }
252 
253    if (has_ret_mask) {
254       mask->exec_mask = LLVMBuildAnd(builder,
255                                      mask->exec_mask,
256                                      mask->ret_mask,
257                                      "callmask");
258    }
259 
260    mask->has_mask = (has_cond_mask ||
261                      has_loop_mask ||
262                      has_switch_mask ||
263                      has_ret_mask);
264 }
265 
lp_exec_mask_cond_push(struct lp_exec_mask * mask,LLVMValueRef val)266 static void lp_exec_mask_cond_push(struct lp_exec_mask *mask,
267                                    LLVMValueRef val)
268 {
269    LLVMBuilderRef builder = mask->bld->gallivm->builder;
270    struct function_ctx *ctx = func_ctx(mask);
271 
272    if (ctx->cond_stack_size >= LP_MAX_TGSI_NESTING) {
273       ctx->cond_stack_size++;
274       return;
275    }
276    if (ctx->cond_stack_size == 0 && mask->function_stack_size == 1) {
277       assert(mask->cond_mask == LLVMConstAllOnes(mask->int_vec_type));
278    }
279    ctx->cond_stack[ctx->cond_stack_size++] = mask->cond_mask;
280    assert(LLVMTypeOf(val) == mask->int_vec_type);
281    mask->cond_mask = LLVMBuildAnd(builder,
282                                   mask->cond_mask,
283                                   val,
284                                   "");
285    lp_exec_mask_update(mask);
286 }
287 
lp_exec_mask_cond_invert(struct lp_exec_mask * mask)288 static void lp_exec_mask_cond_invert(struct lp_exec_mask *mask)
289 {
290    LLVMBuilderRef builder = mask->bld->gallivm->builder;
291    struct function_ctx *ctx = func_ctx(mask);
292    LLVMValueRef prev_mask;
293    LLVMValueRef inv_mask;
294 
295    assert(ctx->cond_stack_size);
296    if (ctx->cond_stack_size >= LP_MAX_TGSI_NESTING)
297       return;
298    prev_mask = ctx->cond_stack[ctx->cond_stack_size - 1];
299    if (ctx->cond_stack_size == 1 && mask->function_stack_size == 1) {
300       assert(prev_mask == LLVMConstAllOnes(mask->int_vec_type));
301    }
302 
303    inv_mask = LLVMBuildNot(builder, mask->cond_mask, "");
304 
305    mask->cond_mask = LLVMBuildAnd(builder,
306                                   inv_mask,
307                                   prev_mask, "");
308    lp_exec_mask_update(mask);
309 }
310 
lp_exec_mask_cond_pop(struct lp_exec_mask * mask)311 static void lp_exec_mask_cond_pop(struct lp_exec_mask *mask)
312 {
313    struct function_ctx *ctx = func_ctx(mask);
314    assert(ctx->cond_stack_size);
315    --ctx->cond_stack_size;
316    if (ctx->cond_stack_size >= LP_MAX_TGSI_NESTING)
317       return;
318    mask->cond_mask = ctx->cond_stack[ctx->cond_stack_size];
319    lp_exec_mask_update(mask);
320 }
321 
lp_exec_bgnloop(struct lp_exec_mask * mask)322 static void lp_exec_bgnloop(struct lp_exec_mask *mask)
323 {
324    LLVMBuilderRef builder = mask->bld->gallivm->builder;
325    struct function_ctx *ctx = func_ctx(mask);
326 
327    if (ctx->loop_stack_size >= LP_MAX_TGSI_NESTING) {
328       ++ctx->loop_stack_size;
329       return;
330    }
331 
332    ctx->break_type_stack[ctx->loop_stack_size + ctx->switch_stack_size] =
333       ctx->break_type;
334    ctx->break_type = LP_EXEC_MASK_BREAK_TYPE_LOOP;
335 
336    ctx->loop_stack[ctx->loop_stack_size].loop_block = ctx->loop_block;
337    ctx->loop_stack[ctx->loop_stack_size].cont_mask = mask->cont_mask;
338    ctx->loop_stack[ctx->loop_stack_size].break_mask = mask->break_mask;
339    ctx->loop_stack[ctx->loop_stack_size].break_var = ctx->break_var;
340    ++ctx->loop_stack_size;
341 
342    ctx->break_var = lp_build_alloca(mask->bld->gallivm, mask->int_vec_type, "");
343    LLVMBuildStore(builder, mask->break_mask, ctx->break_var);
344 
345    ctx->loop_block = lp_build_insert_new_block(mask->bld->gallivm, "bgnloop");
346 
347    LLVMBuildBr(builder, ctx->loop_block);
348    LLVMPositionBuilderAtEnd(builder, ctx->loop_block);
349 
350    mask->break_mask = LLVMBuildLoad(builder, ctx->break_var, "");
351 
352    lp_exec_mask_update(mask);
353 }
354 
lp_exec_break(struct lp_exec_mask * mask,struct lp_build_tgsi_context * bld_base)355 static void lp_exec_break(struct lp_exec_mask *mask,
356                           struct lp_build_tgsi_context * bld_base)
357 {
358    LLVMBuilderRef builder = mask->bld->gallivm->builder;
359    struct function_ctx *ctx = func_ctx(mask);
360 
361    if (ctx->break_type == LP_EXEC_MASK_BREAK_TYPE_LOOP) {
362       LLVMValueRef exec_mask = LLVMBuildNot(builder,
363                                             mask->exec_mask,
364                                             "break");
365 
366       mask->break_mask = LLVMBuildAnd(builder,
367                                       mask->break_mask,
368                                       exec_mask, "break_full");
369    }
370    else {
371       unsigned opcode = bld_base->instructions[bld_base->pc + 1].Instruction.Opcode;
372       boolean break_always = (opcode == TGSI_OPCODE_ENDSWITCH ||
373                               opcode == TGSI_OPCODE_CASE);
374 
375 
376       if (ctx->switch_in_default) {
377          /*
378           * stop default execution but only if this is an unconditional switch.
379           * (The condition here is not perfect since dead code after break is
380           * allowed but should be sufficient since false negatives are just
381           * unoptimized - so we don't have to pre-evaluate that).
382           */
383          if(break_always && ctx->switch_pc) {
384             bld_base->pc = ctx->switch_pc;
385             return;
386          }
387       }
388 
389       if (break_always) {
390          mask->switch_mask = LLVMConstNull(mask->bld->int_vec_type);
391       }
392       else {
393          LLVMValueRef exec_mask = LLVMBuildNot(builder,
394                                                mask->exec_mask,
395                                                "break");
396          mask->switch_mask = LLVMBuildAnd(builder,
397                                           mask->switch_mask,
398                                           exec_mask, "break_switch");
399       }
400    }
401 
402    lp_exec_mask_update(mask);
403 }
404 
lp_exec_continue(struct lp_exec_mask * mask)405 static void lp_exec_continue(struct lp_exec_mask *mask)
406 {
407    LLVMBuilderRef builder = mask->bld->gallivm->builder;
408    LLVMValueRef exec_mask = LLVMBuildNot(builder,
409                                          mask->exec_mask,
410                                          "");
411 
412    mask->cont_mask = LLVMBuildAnd(builder,
413                                   mask->cont_mask,
414                                   exec_mask, "");
415 
416    lp_exec_mask_update(mask);
417 }
418 
419 
lp_exec_endloop(struct gallivm_state * gallivm,struct lp_exec_mask * mask)420 static void lp_exec_endloop(struct gallivm_state *gallivm,
421                             struct lp_exec_mask *mask)
422 {
423    LLVMBuilderRef builder = mask->bld->gallivm->builder;
424    struct function_ctx *ctx = func_ctx(mask);
425    LLVMBasicBlockRef endloop;
426    LLVMTypeRef int_type = LLVMInt32TypeInContext(mask->bld->gallivm->context);
427    LLVMTypeRef reg_type = LLVMIntTypeInContext(gallivm->context,
428                                                mask->bld->type.width *
429                                                mask->bld->type.length);
430    LLVMValueRef i1cond, i2cond, icond, limiter;
431 
432    assert(mask->break_mask);
433 
434 
435    assert(ctx->loop_stack_size);
436    if (ctx->loop_stack_size > LP_MAX_TGSI_NESTING) {
437       --ctx->loop_stack_size;
438       return;
439    }
440 
441    /*
442     * Restore the cont_mask, but don't pop
443     */
444    mask->cont_mask = ctx->loop_stack[ctx->loop_stack_size - 1].cont_mask;
445    lp_exec_mask_update(mask);
446 
447    /*
448     * Unlike the continue mask, the break_mask must be preserved across loop
449     * iterations
450     */
451    LLVMBuildStore(builder, mask->break_mask, ctx->break_var);
452 
453    /* Decrement the loop limiter */
454    limiter = LLVMBuildLoad(builder, ctx->loop_limiter, "");
455 
456    limiter = LLVMBuildSub(
457       builder,
458       limiter,
459       LLVMConstInt(int_type, 1, false),
460       "");
461 
462    LLVMBuildStore(builder, limiter, ctx->loop_limiter);
463 
464    /* i1cond = (mask != 0) */
465    i1cond = LLVMBuildICmp(
466       builder,
467       LLVMIntNE,
468       LLVMBuildBitCast(builder, mask->exec_mask, reg_type, ""),
469       LLVMConstNull(reg_type), "i1cond");
470 
471    /* i2cond = (looplimiter > 0) */
472    i2cond = LLVMBuildICmp(
473       builder,
474       LLVMIntSGT,
475       limiter,
476       LLVMConstNull(int_type), "i2cond");
477 
478    /* if( i1cond && i2cond ) */
479    icond = LLVMBuildAnd(builder, i1cond, i2cond, "");
480 
481    endloop = lp_build_insert_new_block(mask->bld->gallivm, "endloop");
482 
483    LLVMBuildCondBr(builder,
484                    icond, ctx->loop_block, endloop);
485 
486    LLVMPositionBuilderAtEnd(builder, endloop);
487 
488    assert(ctx->loop_stack_size);
489    --ctx->loop_stack_size;
490    mask->cont_mask = ctx->loop_stack[ctx->loop_stack_size].cont_mask;
491    mask->break_mask = ctx->loop_stack[ctx->loop_stack_size].break_mask;
492    ctx->loop_block = ctx->loop_stack[ctx->loop_stack_size].loop_block;
493    ctx->break_var = ctx->loop_stack[ctx->loop_stack_size].break_var;
494    ctx->break_type = ctx->break_type_stack[ctx->loop_stack_size +
495          ctx->switch_stack_size];
496 
497    lp_exec_mask_update(mask);
498 }
499 
lp_exec_switch(struct lp_exec_mask * mask,LLVMValueRef switchval)500 static void lp_exec_switch(struct lp_exec_mask *mask,
501                            LLVMValueRef switchval)
502 {
503    struct function_ctx *ctx = func_ctx(mask);
504 
505    if (ctx->switch_stack_size >= LP_MAX_TGSI_NESTING ||
506        ctx->loop_stack_size > LP_MAX_TGSI_NESTING) {
507       ctx->switch_stack_size++;
508       return;
509    }
510 
511    ctx->break_type_stack[ctx->loop_stack_size + ctx->switch_stack_size] =
512       ctx->break_type;
513    ctx->break_type = LP_EXEC_MASK_BREAK_TYPE_SWITCH;
514 
515    ctx->switch_stack[ctx->switch_stack_size].switch_mask = mask->switch_mask;
516    ctx->switch_stack[ctx->switch_stack_size].switch_val = ctx->switch_val;
517    ctx->switch_stack[ctx->switch_stack_size].switch_mask_default = ctx->switch_mask_default;
518    ctx->switch_stack[ctx->switch_stack_size].switch_in_default = ctx->switch_in_default;
519    ctx->switch_stack[ctx->switch_stack_size].switch_pc = ctx->switch_pc;
520    ctx->switch_stack_size++;
521 
522    mask->switch_mask = LLVMConstNull(mask->int_vec_type);
523    ctx->switch_val = switchval;
524    ctx->switch_mask_default = LLVMConstNull(mask->int_vec_type);
525    ctx->switch_in_default = false;
526    ctx->switch_pc = 0;
527 
528    lp_exec_mask_update(mask);
529 }
530 
lp_exec_endswitch(struct lp_exec_mask * mask,struct lp_build_tgsi_context * bld_base)531 static void lp_exec_endswitch(struct lp_exec_mask *mask,
532                               struct lp_build_tgsi_context * bld_base)
533 {
534    LLVMBuilderRef builder = mask->bld->gallivm->builder;
535    struct function_ctx *ctx = func_ctx(mask);
536 
537    if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) {
538       ctx->switch_stack_size--;
539       return;
540    }
541 
542    /* check if there's deferred default if so do it now */
543    if (ctx->switch_pc && !ctx->switch_in_default) {
544       LLVMValueRef prevmask, defaultmask;
545       unsigned tmp_pc;
546       prevmask = ctx->switch_stack[ctx->switch_stack_size - 1].switch_mask;
547       defaultmask = LLVMBuildNot(builder, ctx->switch_mask_default, "sw_default_mask");
548       mask->switch_mask = LLVMBuildAnd(builder, prevmask, defaultmask, "sw_mask");
549       ctx->switch_in_default = true;
550 
551       lp_exec_mask_update(mask);
552 
553       assert(bld_base->instructions[ctx->switch_pc - 1].Instruction.Opcode ==
554              TGSI_OPCODE_DEFAULT);
555 
556       tmp_pc = bld_base->pc;
557       bld_base->pc = ctx->switch_pc;
558       /*
559        * re-purpose switch_pc to point to here again, since we stop execution of
560        * the deferred default after next break.
561        */
562       ctx->switch_pc = tmp_pc - 1;
563 
564       return;
565    }
566 
567    else if (ctx->switch_pc && ctx->switch_in_default) {
568       assert(bld_base->pc == ctx->switch_pc + 1);
569    }
570 
571    ctx->switch_stack_size--;
572    mask->switch_mask = ctx->switch_stack[ctx->switch_stack_size].switch_mask;
573    ctx->switch_val = ctx->switch_stack[ctx->switch_stack_size].switch_val;
574    ctx->switch_mask_default = ctx->switch_stack[ctx->switch_stack_size].switch_mask_default;
575    ctx->switch_in_default = ctx->switch_stack[ctx->switch_stack_size].switch_in_default;
576    ctx->switch_pc = ctx->switch_stack[ctx->switch_stack_size].switch_pc;
577 
578    ctx->break_type = ctx->break_type_stack[ctx->loop_stack_size + ctx->switch_stack_size];
579 
580    lp_exec_mask_update(mask);
581 }
582 
lp_exec_case(struct lp_exec_mask * mask,LLVMValueRef caseval)583 static void lp_exec_case(struct lp_exec_mask *mask,
584                          LLVMValueRef caseval)
585 {
586    LLVMBuilderRef builder = mask->bld->gallivm->builder;
587    struct function_ctx *ctx = func_ctx(mask);
588 
589    LLVMValueRef casemask, prevmask;
590 
591    if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) {
592       return;
593    }
594 
595    /* skipping case mask evaluation here is NOT optional (not in all cases anyway). */
596    if (!ctx->switch_in_default) {
597       prevmask = ctx->switch_stack[ctx->switch_stack_size - 1].switch_mask;
598       casemask = lp_build_cmp(mask->bld, PIPE_FUNC_EQUAL, caseval, ctx->switch_val);
599       ctx->switch_mask_default = LLVMBuildOr(builder, casemask,
600                                              ctx->switch_mask_default, "sw_default_mask");
601       casemask = LLVMBuildOr(builder, casemask, mask->switch_mask, "");
602       mask->switch_mask = LLVMBuildAnd(builder, casemask, prevmask, "sw_mask");
603 
604       lp_exec_mask_update(mask);
605    }
606 }
607 
608 /*
609  * Analyse default statement in a switch.
610  * \return true if default is last statement, false otherwise
611  * \param default_pc_start contains pc of instruction to jump to
612  *                         if default wasn't last but there's no
613  *                         fallthrough into default.
614  */
default_analyse_is_last(struct lp_exec_mask * mask,struct lp_build_tgsi_context * bld_base,int * default_pc_start)615 static boolean default_analyse_is_last(struct lp_exec_mask *mask,
616                                        struct lp_build_tgsi_context * bld_base,
617                                        int *default_pc_start)
618 {
619    unsigned pc = bld_base->pc;
620    struct function_ctx *ctx = func_ctx(mask);
621    int curr_switch_stack = ctx->switch_stack_size;
622 
623    if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) {
624       return false;
625    }
626 
627    /* skip over case statements which are together with default */
628    while (bld_base->instructions[pc].Instruction.Opcode == TGSI_OPCODE_CASE) {
629       pc++;
630    }
631 
632    while (pc != ~0u && pc < bld_base->num_instructions) {
633       unsigned opcode = bld_base->instructions[pc].Instruction.Opcode;
634       switch (opcode) {
635       case TGSI_OPCODE_CASE:
636          if (curr_switch_stack == ctx->switch_stack_size) {
637             *default_pc_start = pc - 1;
638             return false;
639          }
640          break;
641       case TGSI_OPCODE_SWITCH:
642          curr_switch_stack++;
643          break;
644       case TGSI_OPCODE_ENDSWITCH:
645          if (curr_switch_stack == ctx->switch_stack_size) {
646             *default_pc_start = pc - 1;
647             return true;
648          }
649          curr_switch_stack--;
650          break;
651       }
652       pc++;
653    }
654    /* should never arrive here */
655    assert(0);
656    return true;
657 }
658 
lp_exec_default(struct lp_exec_mask * mask,struct lp_build_tgsi_context * bld_base)659 static void lp_exec_default(struct lp_exec_mask *mask,
660                             struct lp_build_tgsi_context * bld_base)
661 {
662    LLVMBuilderRef builder = mask->bld->gallivm->builder;
663    struct function_ctx *ctx = func_ctx(mask);
664 
665    int default_exec_pc;
666    boolean default_is_last;
667 
668    if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) {
669       return;
670    }
671 
672    /*
673     * This is a messy opcode, because it may not be always at the end and
674     * there can be fallthrough in and out of it.
675     */
676 
677    default_is_last = default_analyse_is_last(mask, bld_base, &default_exec_pc);
678    /*
679     * If it is last statement in switch (note that case statements appearing
680     * "at the same time" as default don't change that) everything is just fine,
681     * update switch mask and go on. This means we can handle default with
682     * fallthrough INTO it without overhead, if it is last.
683     */
684    if (default_is_last) {
685       LLVMValueRef prevmask, defaultmask;
686       prevmask = ctx->switch_stack[ctx->switch_stack_size - 1].switch_mask;
687       defaultmask = LLVMBuildNot(builder, ctx->switch_mask_default, "sw_default_mask");
688       defaultmask = LLVMBuildOr(builder, defaultmask, mask->switch_mask, "");
689       mask->switch_mask = LLVMBuildAnd(builder, prevmask, defaultmask, "sw_mask");
690       ctx->switch_in_default = true;
691 
692       lp_exec_mask_update(mask);
693    }
694    else {
695       /*
696        * Technically, "case" immediately before default isn't really a
697        * fallthrough, however we still have to count them as such as we
698        * already have updated the masks.
699        * If that happens in practice could add a switch optimizer pass
700        * which just gets rid of all case statements appearing together with
701        * default (or could do switch analysis at switch start time instead).
702        */
703       unsigned opcode = bld_base->instructions[bld_base->pc - 1].Instruction.Opcode;
704       boolean ft_into = (opcode != TGSI_OPCODE_BRK &&
705                          opcode != TGSI_OPCODE_SWITCH);
706       /*
707        * If it is not last statement and there was no fallthrough into it,
708        * we record the PC and continue execution at next case (again, those
709        * case encountered at the same time don't count). At endswitch
710        * time, we update switchmask, and go back executing the code we skipped
711        * until the next break (possibly re-executing some code with changed mask
712        * if there was a fallthrough out of default).
713        * Finally, if it is not last statement and there was a fallthrough into it,
714        * do the same as with the former case, except instead of skipping the code
715        * just execute it without updating the mask, then go back and re-execute.
716        */
717       ctx->switch_pc = bld_base->pc;
718       if (!ft_into) {
719          bld_base->pc = default_exec_pc;
720       }
721    }
722 }
723 
724 
725 /* stores val into an address pointed to by dst_ptr.
726  * mask->exec_mask is used to figure out which bits of val
727  * should be stored into the address
728  * (0 means don't store this bit, 1 means do store).
729  */
lp_exec_mask_store(struct lp_exec_mask * mask,struct lp_build_context * bld_store,LLVMValueRef val,LLVMValueRef dst_ptr)730 static void lp_exec_mask_store(struct lp_exec_mask *mask,
731                                struct lp_build_context *bld_store,
732                                LLVMValueRef val,
733                                LLVMValueRef dst_ptr)
734 {
735    LLVMBuilderRef builder = mask->bld->gallivm->builder;
736    LLVMValueRef exec_mask = mask->has_mask ? mask->exec_mask : NULL;
737 
738    assert(lp_check_value(bld_store->type, val));
739    assert(LLVMGetTypeKind(LLVMTypeOf(dst_ptr)) == LLVMPointerTypeKind);
740    assert(LLVMGetElementType(LLVMTypeOf(dst_ptr)) == LLVMTypeOf(val));
741 
742    if (exec_mask) {
743       LLVMValueRef res, dst;
744 
745       dst = LLVMBuildLoad(builder, dst_ptr, "");
746       res = lp_build_select(bld_store, exec_mask, val, dst);
747       LLVMBuildStore(builder, res, dst_ptr);
748    } else
749       LLVMBuildStore(builder, val, dst_ptr);
750 }
751 
lp_exec_mask_call(struct lp_exec_mask * mask,int func,int * pc)752 static void lp_exec_mask_call(struct lp_exec_mask *mask,
753                               int func,
754                               int *pc)
755 {
756    if (mask->function_stack_size >= LP_MAX_NUM_FUNCS) {
757       return;
758    }
759 
760    lp_exec_mask_function_init(mask, mask->function_stack_size);
761    mask->function_stack[mask->function_stack_size].pc = *pc;
762    mask->function_stack[mask->function_stack_size].ret_mask = mask->ret_mask;
763    mask->function_stack_size++;
764    *pc = func;
765 }
766 
lp_exec_mask_ret(struct lp_exec_mask * mask,int * pc)767 static void lp_exec_mask_ret(struct lp_exec_mask *mask, int *pc)
768 {
769    LLVMBuilderRef builder = mask->bld->gallivm->builder;
770    struct function_ctx *ctx = func_ctx(mask);
771    LLVMValueRef exec_mask;
772 
773    if (ctx->cond_stack_size == 0 &&
774        ctx->loop_stack_size == 0 &&
775        ctx->switch_stack_size == 0 &&
776        mask->function_stack_size == 1) {
777       /* returning from main() */
778       *pc = -1;
779       return;
780    }
781 
782    if (mask->function_stack_size == 1) {
783       /*
784        * This requires special handling since we need to ensure
785        * we don't drop the mask even if we have no call stack
786        * (e.g. after a ret in a if clause after the endif)
787        */
788       mask->ret_in_main = TRUE;
789    }
790 
791    exec_mask = LLVMBuildNot(builder,
792                             mask->exec_mask,
793                             "ret");
794 
795    mask->ret_mask = LLVMBuildAnd(builder,
796                                  mask->ret_mask,
797                                  exec_mask, "ret_full");
798 
799    lp_exec_mask_update(mask);
800 }
801 
lp_exec_mask_bgnsub(struct lp_exec_mask * mask)802 static void lp_exec_mask_bgnsub(struct lp_exec_mask *mask)
803 {
804 }
805 
lp_exec_mask_endsub(struct lp_exec_mask * mask,int * pc)806 static void lp_exec_mask_endsub(struct lp_exec_mask *mask, int *pc)
807 {
808    struct function_ctx *ctx;
809 
810    assert(mask->function_stack_size > 1);
811    assert(mask->function_stack_size <= LP_MAX_NUM_FUNCS);
812 
813    ctx = func_ctx(mask);
814    mask->function_stack_size--;
815 
816    *pc = ctx->pc;
817    mask->ret_mask = ctx->ret_mask;
818 
819    lp_exec_mask_update(mask);
820 }
821 
822 
823 static LLVMValueRef
get_file_ptr(struct lp_build_tgsi_soa_context * bld,unsigned file,int index,unsigned chan)824 get_file_ptr(struct lp_build_tgsi_soa_context *bld,
825              unsigned file,
826              int index,
827              unsigned chan)
828 {
829    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
830    LLVMValueRef (*array_of_vars)[TGSI_NUM_CHANNELS];
831    LLVMValueRef var_of_array;
832 
833    switch (file) {
834    case TGSI_FILE_TEMPORARY:
835       array_of_vars = bld->temps;
836       var_of_array = bld->temps_array;
837       break;
838    case TGSI_FILE_OUTPUT:
839       array_of_vars = bld->outputs;
840       var_of_array = bld->outputs_array;
841       break;
842    default:
843       assert(0);
844       return NULL;
845    }
846 
847    assert(chan < 4);
848 
849    if (bld->indirect_files & (1 << file)) {
850       LLVMValueRef lindex = lp_build_const_int32(bld->bld_base.base.gallivm, index * 4 + chan);
851       return LLVMBuildGEP(builder, var_of_array, &lindex, 1, "");
852    }
853    else {
854       assert(index <= bld->bld_base.info->file_max[file]);
855       return array_of_vars[index][chan];
856    }
857 }
858 
859 
860 /**
861  * Return pointer to a temporary register channel (src or dest).
862  * Note that indirect addressing cannot be handled here.
863  * \param index  which temporary register
864  * \param chan  which channel of the temp register.
865  */
866 LLVMValueRef
lp_get_temp_ptr_soa(struct lp_build_tgsi_soa_context * bld,unsigned index,unsigned chan)867 lp_get_temp_ptr_soa(struct lp_build_tgsi_soa_context *bld,
868              unsigned index,
869              unsigned chan)
870 {
871    return get_file_ptr(bld, TGSI_FILE_TEMPORARY, index, chan);
872 }
873 
874 /**
875  * Return pointer to a output register channel (src or dest).
876  * Note that indirect addressing cannot be handled here.
877  * \param index  which output register
878  * \param chan  which channel of the output register.
879  */
880 LLVMValueRef
lp_get_output_ptr(struct lp_build_tgsi_soa_context * bld,unsigned index,unsigned chan)881 lp_get_output_ptr(struct lp_build_tgsi_soa_context *bld,
882                unsigned index,
883                unsigned chan)
884 {
885    return get_file_ptr(bld, TGSI_FILE_OUTPUT, index, chan);
886 }
887 
888 /*
889  * If we have indirect addressing in outputs copy our alloca array
890  * to the outputs slots specified by the caller to make sure
891  * our outputs are delivered consistently via the same interface.
892  */
893 static void
gather_outputs(struct lp_build_tgsi_soa_context * bld)894 gather_outputs(struct lp_build_tgsi_soa_context * bld)
895 {
896    if ((bld->indirect_files & (1 << TGSI_FILE_OUTPUT))) {
897       unsigned index, chan;
898       assert(bld->bld_base.info->num_outputs <=
899              bld->bld_base.info->file_max[TGSI_FILE_OUTPUT] + 1);
900       for (index = 0; index < bld->bld_base.info->num_outputs; ++index) {
901          for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
902             bld->outputs[index][chan] = lp_get_output_ptr(bld, index, chan);
903          }
904       }
905    }
906 }
907 
908 /**
909  * Gather vector.
910  * XXX the lp_build_gather() function should be capable of doing this
911  * with a little work.
912  */
913 static LLVMValueRef
build_gather(struct lp_build_tgsi_context * bld_base,LLVMValueRef base_ptr,LLVMValueRef indexes,LLVMValueRef overflow_mask,LLVMValueRef indexes2)914 build_gather(struct lp_build_tgsi_context *bld_base,
915              LLVMValueRef base_ptr,
916              LLVMValueRef indexes,
917              LLVMValueRef overflow_mask,
918              LLVMValueRef indexes2)
919 {
920    struct gallivm_state *gallivm = bld_base->base.gallivm;
921    LLVMBuilderRef builder = gallivm->builder;
922    struct lp_build_context *uint_bld = &bld_base->uint_bld;
923    struct lp_build_context *bld = &bld_base->base;
924    LLVMValueRef res;
925    unsigned i;
926 
927    if (indexes2)
928       res = LLVMGetUndef(LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), bld_base->base.type.length * 2));
929    else
930       res = bld->undef;
931    /*
932     * overflow_mask is a vector telling us which channels
933     * in the vector overflowed. We use the overflow behavior for
934     * constant buffers which is defined as:
935     * Out of bounds access to constant buffer returns 0 in all
936     * components. Out of bounds behavior is always with respect
937     * to the size of the buffer bound at that slot.
938     */
939 
940    if (overflow_mask) {
941       /*
942        * We avoid per-element control flow here (also due to llvm going crazy,
943        * though I suspect it's better anyway since overflow is likely rare).
944        * Note that since we still fetch from buffers even if num_elements was
945        * zero (in this case we'll fetch from index zero) the jit func callers
946        * MUST provide valid fake constant buffers of size 4x32 (the values do
947        * not matter), otherwise we'd still need (not per element though)
948        * control flow.
949        */
950       indexes = lp_build_select(uint_bld, overflow_mask, uint_bld->zero, indexes);
951       if (indexes2)
952          indexes2 = lp_build_select(uint_bld, overflow_mask, uint_bld->zero, indexes2);
953    }
954 
955    /*
956     * Loop over elements of index_vec, load scalar value, insert it into 'res'.
957     */
958    for (i = 0; i < bld->type.length * (indexes2 ? 2 : 1); i++) {
959       LLVMValueRef si, di;
960       LLVMValueRef index;
961       LLVMValueRef scalar_ptr, scalar;
962 
963       di = lp_build_const_int32(bld->gallivm, i);
964       if (indexes2)
965          si = lp_build_const_int32(bld->gallivm, i >> 1);
966       else
967          si = di;
968 
969       if (indexes2 && (i & 1)) {
970          index = LLVMBuildExtractElement(builder,
971                                          indexes2, si, "");
972       } else {
973          index = LLVMBuildExtractElement(builder,
974                                          indexes, si, "");
975       }
976       scalar_ptr = LLVMBuildGEP(builder, base_ptr,
977                                 &index, 1, "gather_ptr");
978       scalar = LLVMBuildLoad(builder, scalar_ptr, "");
979 
980       res = LLVMBuildInsertElement(builder, res, scalar, di, "");
981    }
982 
983    if (overflow_mask) {
984       if (indexes2) {
985          res = LLVMBuildBitCast(builder, res, bld_base->dbl_bld.vec_type, "");
986          overflow_mask = LLVMBuildSExt(builder, overflow_mask,
987                                        bld_base->dbl_bld.int_vec_type, "");
988          res = lp_build_select(&bld_base->dbl_bld, overflow_mask,
989                                bld_base->dbl_bld.zero, res);
990       } else
991          res = lp_build_select(bld, overflow_mask, bld->zero, res);
992    }
993 
994    return res;
995 }
996 
997 
998 /**
999  * Scatter/store vector.
1000  */
1001 static void
emit_mask_scatter(struct lp_build_tgsi_soa_context * bld,LLVMValueRef base_ptr,LLVMValueRef indexes,LLVMValueRef values,struct lp_exec_mask * mask)1002 emit_mask_scatter(struct lp_build_tgsi_soa_context *bld,
1003                   LLVMValueRef base_ptr,
1004                   LLVMValueRef indexes,
1005                   LLVMValueRef values,
1006                   struct lp_exec_mask *mask)
1007 {
1008    struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1009    LLVMBuilderRef builder = gallivm->builder;
1010    unsigned i;
1011    LLVMValueRef pred = mask->has_mask ? mask->exec_mask : NULL;
1012 
1013    /*
1014     * Loop over elements of index_vec, store scalar value.
1015     */
1016    for (i = 0; i < bld->bld_base.base.type.length; i++) {
1017       LLVMValueRef ii = lp_build_const_int32(gallivm, i);
1018       LLVMValueRef index = LLVMBuildExtractElement(builder, indexes, ii, "");
1019       LLVMValueRef scalar_ptr = LLVMBuildGEP(builder, base_ptr, &index, 1, "scatter_ptr");
1020       LLVMValueRef val = LLVMBuildExtractElement(builder, values, ii, "scatter_val");
1021       LLVMValueRef scalar_pred = pred ?
1022          LLVMBuildExtractElement(builder, pred, ii, "scatter_pred") : NULL;
1023 
1024       if (0)
1025          lp_build_printf(gallivm, "scatter %d: val %f at %d %p\n",
1026                          ii, val, index, scalar_ptr);
1027 
1028       if (scalar_pred) {
1029          LLVMValueRef real_val, dst_val;
1030          dst_val = LLVMBuildLoad(builder, scalar_ptr, "");
1031          real_val = lp_build_select(&bld->elem_bld, scalar_pred, val, dst_val);
1032          LLVMBuildStore(builder, real_val, scalar_ptr);
1033       }
1034       else {
1035          LLVMBuildStore(builder, val, scalar_ptr);
1036       }
1037    }
1038 }
1039 
1040 
1041 /**
1042  * Read the current value of the ADDR register, convert the floats to
1043  * ints, add the base index and return the vector of offsets.
1044  * The offsets will be used to index into the constant buffer or
1045  * temporary register file.
1046  */
1047 static LLVMValueRef
get_indirect_index(struct lp_build_tgsi_soa_context * bld,unsigned reg_file,unsigned reg_index,const struct tgsi_ind_register * indirect_reg)1048 get_indirect_index(struct lp_build_tgsi_soa_context *bld,
1049                    unsigned reg_file, unsigned reg_index,
1050                    const struct tgsi_ind_register *indirect_reg)
1051 {
1052    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
1053    struct lp_build_context *uint_bld = &bld->bld_base.uint_bld;
1054    /* always use X component of address register */
1055    unsigned swizzle = indirect_reg->Swizzle;
1056    LLVMValueRef base;
1057    LLVMValueRef rel;
1058    LLVMValueRef max_index;
1059    LLVMValueRef index;
1060 
1061    assert(bld->indirect_files & (1 << reg_file));
1062 
1063    base = lp_build_const_int_vec(bld->bld_base.base.gallivm, uint_bld->type, reg_index);
1064 
1065    assert(swizzle < 4);
1066    switch (indirect_reg->File) {
1067    case TGSI_FILE_ADDRESS:
1068       rel = LLVMBuildLoad(builder,
1069                           bld->addr[indirect_reg->Index][swizzle],
1070                           "load addr reg");
1071       /* ADDR LLVM values already have LLVM integer type. */
1072       break;
1073    case TGSI_FILE_TEMPORARY:
1074       rel = lp_get_temp_ptr_soa(bld, indirect_reg->Index, swizzle);
1075       rel = LLVMBuildLoad(builder, rel, "load temp reg");
1076       /* TEMP LLVM values always have LLVM float type, but for indirection, the
1077        * value actually stored is expected to be an integer */
1078       rel = LLVMBuildBitCast(builder, rel, uint_bld->vec_type, "");
1079       break;
1080    default:
1081       assert(0);
1082       rel = uint_bld->zero;
1083    }
1084 
1085    index = lp_build_add(uint_bld, base, rel);
1086 
1087    /*
1088     * emit_fetch_constant handles constant buffer overflow so this code
1089     * is pointless for them.
1090     * Furthermore the D3D10 spec in section 6.5 says:
1091     * If the constant buffer bound to a slot is larger than the size
1092     * declared in the shader for that slot, implementations are allowed
1093     * to return incorrect data (not necessarily 0) for indices that are
1094     * larger than the declared size but smaller than the buffer size.
1095     */
1096    if (reg_file != TGSI_FILE_CONSTANT) {
1097       max_index = lp_build_const_int_vec(bld->bld_base.base.gallivm,
1098                                          uint_bld->type,
1099                                          bld->bld_base.info->file_max[reg_file]);
1100 
1101       assert(!uint_bld->type.sign);
1102       index = lp_build_min(uint_bld, index, max_index);
1103    }
1104 
1105    return index;
1106 }
1107 
1108 static struct lp_build_context *
stype_to_fetch(struct lp_build_tgsi_context * bld_base,enum tgsi_opcode_type stype)1109 stype_to_fetch(struct lp_build_tgsi_context * bld_base,
1110 	       enum tgsi_opcode_type stype)
1111 {
1112    struct lp_build_context *bld_fetch;
1113 
1114    switch (stype) {
1115    case TGSI_TYPE_FLOAT:
1116    case TGSI_TYPE_UNTYPED:
1117       bld_fetch = &bld_base->base;
1118       break;
1119    case TGSI_TYPE_UNSIGNED:
1120       bld_fetch = &bld_base->uint_bld;
1121       break;
1122    case TGSI_TYPE_SIGNED:
1123       bld_fetch = &bld_base->int_bld;
1124       break;
1125    case TGSI_TYPE_DOUBLE:
1126       bld_fetch = &bld_base->dbl_bld;
1127       break;
1128    case TGSI_TYPE_UNSIGNED64:
1129       bld_fetch = &bld_base->uint64_bld;
1130       break;
1131    case TGSI_TYPE_SIGNED64:
1132       bld_fetch = &bld_base->int64_bld;
1133       break;
1134    case TGSI_TYPE_VOID:
1135    default:
1136       assert(0);
1137       bld_fetch = NULL;
1138       break;
1139    }
1140    return bld_fetch;
1141 }
1142 
1143 static LLVMValueRef
get_soa_array_offsets(struct lp_build_context * uint_bld,LLVMValueRef indirect_index,unsigned chan_index,boolean need_perelement_offset)1144 get_soa_array_offsets(struct lp_build_context *uint_bld,
1145                       LLVMValueRef indirect_index,
1146                       unsigned chan_index,
1147                       boolean need_perelement_offset)
1148 {
1149    struct gallivm_state *gallivm = uint_bld->gallivm;
1150    LLVMValueRef chan_vec =
1151       lp_build_const_int_vec(uint_bld->gallivm, uint_bld->type, chan_index);
1152    LLVMValueRef length_vec =
1153       lp_build_const_int_vec(gallivm, uint_bld->type, uint_bld->type.length);
1154    LLVMValueRef index_vec;
1155 
1156    /* index_vec = (indirect_index * 4 + chan_index) * length + offsets */
1157    index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
1158    index_vec = lp_build_add(uint_bld, index_vec, chan_vec);
1159    index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
1160 
1161    if (need_perelement_offset) {
1162       LLVMValueRef pixel_offsets;
1163       unsigned i;
1164      /* build pixel offset vector: {0, 1, 2, 3, ...} */
1165       pixel_offsets = uint_bld->undef;
1166       for (i = 0; i < uint_bld->type.length; i++) {
1167          LLVMValueRef ii = lp_build_const_int32(gallivm, i);
1168          pixel_offsets = LLVMBuildInsertElement(gallivm->builder, pixel_offsets,
1169                                                 ii, ii, "");
1170       }
1171       index_vec = lp_build_add(uint_bld, index_vec, pixel_offsets);
1172    }
1173    return index_vec;
1174 }
1175 
1176 static LLVMValueRef
emit_fetch_constant(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_src_register * reg,enum tgsi_opcode_type stype,unsigned swizzle)1177 emit_fetch_constant(
1178    struct lp_build_tgsi_context * bld_base,
1179    const struct tgsi_full_src_register * reg,
1180    enum tgsi_opcode_type stype,
1181    unsigned swizzle)
1182 {
1183    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1184    struct gallivm_state *gallivm = bld_base->base.gallivm;
1185    LLVMBuilderRef builder = gallivm->builder;
1186    struct lp_build_context *uint_bld = &bld_base->uint_bld;
1187    unsigned dimension = 0;
1188    LLVMValueRef consts_ptr;
1189    LLVMValueRef num_consts;
1190    LLVMValueRef res;
1191 
1192    /* XXX: Handle fetching xyzw components as a vector */
1193    assert(swizzle != ~0u);
1194 
1195    if (reg->Register.Dimension) {
1196       assert(!reg->Dimension.Indirect);
1197       dimension = reg->Dimension.Index;
1198       assert(dimension < LP_MAX_TGSI_CONST_BUFFERS);
1199    }
1200 
1201    consts_ptr = bld->consts[dimension];
1202    num_consts = bld->consts_sizes[dimension];
1203 
1204    if (reg->Register.Indirect) {
1205       LLVMValueRef indirect_index;
1206       LLVMValueRef swizzle_vec =
1207          lp_build_const_int_vec(gallivm, uint_bld->type, swizzle);
1208       LLVMValueRef index_vec;  /* index into the const buffer */
1209       LLVMValueRef overflow_mask;
1210       LLVMValueRef index_vec2 = NULL;
1211 
1212       indirect_index = get_indirect_index(bld,
1213                                           reg->Register.File,
1214                                           reg->Register.Index,
1215                                           &reg->Indirect);
1216 
1217       /* All fetches are from the same constant buffer, so
1218        * we need to propagate the size to a vector to do a
1219        * vector comparison */
1220       num_consts = lp_build_broadcast_scalar(uint_bld, num_consts);
1221       /* Construct a boolean vector telling us which channels
1222        * overflow the bound constant buffer */
1223       overflow_mask = lp_build_compare(gallivm, uint_bld->type, PIPE_FUNC_GEQUAL,
1224                                        indirect_index, num_consts);
1225 
1226       /* index_vec = indirect_index * 4 + swizzle */
1227       index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
1228       index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec);
1229 
1230       if (tgsi_type_is_64bit(stype)) {
1231          LLVMValueRef swizzle_vec2;
1232          swizzle_vec2 = lp_build_const_int_vec(gallivm, uint_bld->type, swizzle + 1);
1233          index_vec2 = lp_build_shl_imm(uint_bld, indirect_index, 2);
1234          index_vec2 = lp_build_add(uint_bld, index_vec2, swizzle_vec2);
1235       }
1236       /* Gather values from the constant buffer */
1237       res = build_gather(bld_base, consts_ptr, index_vec, overflow_mask, index_vec2);
1238    }
1239    else {
1240       LLVMValueRef index;  /* index into the const buffer */
1241       LLVMValueRef scalar, scalar_ptr;
1242       struct lp_build_context *bld_broad = &bld_base->base;
1243       index = lp_build_const_int32(gallivm, reg->Register.Index * 4 + swizzle);
1244 
1245       scalar_ptr = LLVMBuildGEP(builder, consts_ptr,
1246                                 &index, 1, "");
1247       if (stype == TGSI_TYPE_DOUBLE) {
1248          LLVMTypeRef dptr_type = LLVMPointerType(LLVMDoubleTypeInContext(gallivm->context), 0);
1249          scalar_ptr = LLVMBuildBitCast(builder, scalar_ptr, dptr_type, "");
1250          bld_broad = &bld_base->dbl_bld;
1251       } else if (stype == TGSI_TYPE_UNSIGNED64) {
1252          LLVMTypeRef u64ptr_type = LLVMPointerType(LLVMInt64TypeInContext(gallivm->context), 0);
1253          scalar_ptr = LLVMBuildBitCast(builder, scalar_ptr, u64ptr_type, "");
1254          bld_broad = &bld_base->uint64_bld;
1255       } else if (stype == TGSI_TYPE_SIGNED64) {
1256          LLVMTypeRef i64ptr_type = LLVMPointerType(LLVMInt64TypeInContext(gallivm->context), 0);
1257          scalar_ptr = LLVMBuildBitCast(builder, scalar_ptr, i64ptr_type, "");
1258          bld_broad = &bld_base->int64_bld;
1259       }
1260       scalar = LLVMBuildLoad(builder, scalar_ptr, "");
1261       res = lp_build_broadcast_scalar(bld_broad, scalar);
1262    }
1263 
1264    if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED || stype == TGSI_TYPE_DOUBLE || stype == TGSI_TYPE_SIGNED64 || stype == TGSI_TYPE_UNSIGNED64) {
1265       struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
1266       res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
1267    }
1268 
1269    return res;
1270 }
1271 
1272 /**
1273  * Fetch 64-bit values from two separate channels.
1274  * 64-bit values are stored split across two channels, like xy and zw.
1275  * This function creates a set of vec_length*2 floats,
1276  * extracts the values from the two channels,
1277  * puts them in the correct place, then casts to vec_length 64-bits.
1278  */
1279 static LLVMValueRef
emit_fetch_64bit(struct lp_build_tgsi_context * bld_base,enum tgsi_opcode_type stype,LLVMValueRef input,LLVMValueRef input2)1280 emit_fetch_64bit(
1281    struct lp_build_tgsi_context * bld_base,
1282    enum tgsi_opcode_type stype,
1283    LLVMValueRef input,
1284    LLVMValueRef input2)
1285 {
1286    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1287    struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1288    LLVMBuilderRef builder = gallivm->builder;
1289    LLVMValueRef res;
1290    struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
1291    int i;
1292    LLVMValueRef shuffles[2 * (LP_MAX_VECTOR_WIDTH/32)];
1293    int len = bld_base->base.type.length * 2;
1294    assert(len <= (2 * (LP_MAX_VECTOR_WIDTH/32)));
1295 
1296    for (i = 0; i < bld_base->base.type.length * 2; i+=2) {
1297       shuffles[i] = lp_build_const_int32(gallivm, i / 2);
1298       shuffles[i + 1] = lp_build_const_int32(gallivm, i / 2 + bld_base->base.type.length);
1299    }
1300    res = LLVMBuildShuffleVector(builder, input, input2, LLVMConstVector(shuffles, len), "");
1301 
1302    return LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
1303 }
1304 
1305 static LLVMValueRef
emit_fetch_immediate(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_src_register * reg,enum tgsi_opcode_type stype,unsigned swizzle)1306 emit_fetch_immediate(
1307    struct lp_build_tgsi_context * bld_base,
1308    const struct tgsi_full_src_register * reg,
1309    enum tgsi_opcode_type stype,
1310    unsigned swizzle)
1311 {
1312    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1313    struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1314    LLVMBuilderRef builder = gallivm->builder;
1315    LLVMValueRef res = NULL;
1316 
1317    if (bld->use_immediates_array || reg->Register.Indirect) {
1318       LLVMValueRef imms_array;
1319       LLVMTypeRef fptr_type;
1320 
1321       /* cast imms_array pointer to float* */
1322       fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
1323       imms_array = LLVMBuildBitCast(builder, bld->imms_array, fptr_type, "");
1324 
1325       if (reg->Register.Indirect) {
1326          LLVMValueRef indirect_index;
1327          LLVMValueRef index_vec;  /* index into the immediate register array */
1328          LLVMValueRef index_vec2 = NULL;
1329          indirect_index = get_indirect_index(bld,
1330                                              reg->Register.File,
1331                                              reg->Register.Index,
1332                                              &reg->Indirect);
1333          /*
1334           * Unlike for other reg classes, adding pixel offsets is unnecessary -
1335           * immediates are stored as full vectors (FIXME??? - might be better
1336           * to store them the same as constants) but all elements are the same
1337           * in any case.
1338           */
1339          index_vec = get_soa_array_offsets(&bld_base->uint_bld,
1340                                            indirect_index,
1341                                            swizzle,
1342                                            FALSE);
1343          if (tgsi_type_is_64bit(stype))
1344             index_vec2 = get_soa_array_offsets(&bld_base->uint_bld,
1345                                               indirect_index,
1346                                               swizzle + 1,
1347                                               FALSE);
1348          /* Gather values from the immediate register array */
1349          res = build_gather(bld_base, imms_array, index_vec, NULL, index_vec2);
1350       } else {
1351          LLVMValueRef lindex = lp_build_const_int32(gallivm,
1352                                         reg->Register.Index * 4 + swizzle);
1353          LLVMValueRef imms_ptr =  LLVMBuildGEP(builder,
1354                                                 bld->imms_array, &lindex, 1, "");
1355          res = LLVMBuildLoad(builder, imms_ptr, "");
1356 
1357          if (tgsi_type_is_64bit(stype)) {
1358             LLVMValueRef lindex1;
1359             LLVMValueRef imms_ptr2;
1360             LLVMValueRef res2;
1361 
1362             lindex1 = lp_build_const_int32(gallivm,
1363                                            reg->Register.Index * 4 + swizzle + 1);
1364             imms_ptr2 = LLVMBuildGEP(builder,
1365                                       bld->imms_array, &lindex1, 1, "");
1366             res2 = LLVMBuildLoad(builder, imms_ptr2, "");
1367             res = emit_fetch_64bit(bld_base, stype, res, res2);
1368          }
1369       }
1370    }
1371    else {
1372       res = bld->immediates[reg->Register.Index][swizzle];
1373       if (tgsi_type_is_64bit(stype))
1374          res = emit_fetch_64bit(bld_base, stype, res, bld->immediates[reg->Register.Index][swizzle + 1]);
1375    }
1376 
1377    if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED || tgsi_type_is_64bit(stype)) {
1378       struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
1379       res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
1380    }
1381    return res;
1382 }
1383 
1384 static LLVMValueRef
emit_fetch_input(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_src_register * reg,enum tgsi_opcode_type stype,unsigned swizzle)1385 emit_fetch_input(
1386    struct lp_build_tgsi_context * bld_base,
1387    const struct tgsi_full_src_register * reg,
1388    enum tgsi_opcode_type stype,
1389    unsigned swizzle)
1390 {
1391    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1392    struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1393    LLVMBuilderRef builder = gallivm->builder;
1394    LLVMValueRef res;
1395 
1396    if (reg->Register.Indirect) {
1397       LLVMValueRef indirect_index;
1398       LLVMValueRef index_vec;  /* index into the input reg array */
1399       LLVMValueRef index_vec2 = NULL;
1400       LLVMValueRef inputs_array;
1401       LLVMTypeRef fptr_type;
1402 
1403       indirect_index = get_indirect_index(bld,
1404                                           reg->Register.File,
1405                                           reg->Register.Index,
1406                                           &reg->Indirect);
1407 
1408       index_vec = get_soa_array_offsets(&bld_base->uint_bld,
1409                                         indirect_index,
1410                                         swizzle,
1411                                         TRUE);
1412       if (tgsi_type_is_64bit(stype)) {
1413          index_vec2 = get_soa_array_offsets(&bld_base->uint_bld,
1414                                            indirect_index,
1415                                            swizzle + 1,
1416                                            TRUE);
1417       }
1418       /* cast inputs_array pointer to float* */
1419       fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
1420       inputs_array = LLVMBuildBitCast(builder, bld->inputs_array, fptr_type, "");
1421 
1422       /* Gather values from the input register array */
1423       res = build_gather(bld_base, inputs_array, index_vec, NULL, index_vec2);
1424    } else {
1425       if (bld->indirect_files & (1 << TGSI_FILE_INPUT)) {
1426          LLVMValueRef lindex = lp_build_const_int32(gallivm,
1427                                         reg->Register.Index * 4 + swizzle);
1428          LLVMValueRef input_ptr = LLVMBuildGEP(builder,
1429                                                bld->inputs_array, &lindex, 1, "");
1430 
1431          res = LLVMBuildLoad(builder, input_ptr, "");
1432          if (tgsi_type_is_64bit(stype)) {
1433             LLVMValueRef lindex1;
1434             LLVMValueRef input_ptr2;
1435             LLVMValueRef res2;
1436 
1437             lindex1 = lp_build_const_int32(gallivm,
1438                                            reg->Register.Index * 4 + swizzle + 1);
1439             input_ptr2 = LLVMBuildGEP(builder,
1440                                       bld->inputs_array, &lindex1, 1, "");
1441             res2 = LLVMBuildLoad(builder, input_ptr2, "");
1442             res = emit_fetch_64bit(bld_base, stype, res, res2);
1443          }
1444       }
1445       else {
1446          res = bld->inputs[reg->Register.Index][swizzle];
1447          if (tgsi_type_is_64bit(stype))
1448             res = emit_fetch_64bit(bld_base, stype, res, bld->inputs[reg->Register.Index][swizzle + 1]);
1449       }
1450    }
1451 
1452    assert(res);
1453 
1454    if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED || tgsi_type_is_64bit(stype)) {
1455       struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
1456       res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
1457    }
1458 
1459    return res;
1460 }
1461 
1462 
1463 static LLVMValueRef
emit_fetch_gs_input(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_src_register * reg,enum tgsi_opcode_type stype,unsigned swizzle)1464 emit_fetch_gs_input(
1465    struct lp_build_tgsi_context * bld_base,
1466    const struct tgsi_full_src_register * reg,
1467    enum tgsi_opcode_type stype,
1468    unsigned swizzle)
1469 {
1470    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1471    struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1472    const struct tgsi_shader_info *info = bld->bld_base.info;
1473    LLVMBuilderRef builder = gallivm->builder;
1474    LLVMValueRef attrib_index = NULL;
1475    LLVMValueRef vertex_index = NULL;
1476    LLVMValueRef swizzle_index = lp_build_const_int32(gallivm, swizzle);
1477    LLVMValueRef res;
1478 
1479    if (info->input_semantic_name[reg->Register.Index] == TGSI_SEMANTIC_PRIMID) {
1480       /* This is really a system value not a regular input */
1481       assert(!reg->Register.Indirect);
1482       assert(!reg->Dimension.Indirect);
1483       res = bld->system_values.prim_id;
1484       if (stype != TGSI_TYPE_UNSIGNED && stype != TGSI_TYPE_SIGNED) {
1485          res = LLVMBuildBitCast(builder, res, bld_base->base.vec_type, "");
1486       }
1487       return res;
1488    }
1489 
1490    if (reg->Register.Indirect) {
1491       attrib_index = get_indirect_index(bld,
1492                                         reg->Register.File,
1493                                         reg->Register.Index,
1494                                         &reg->Indirect);
1495    } else {
1496       attrib_index = lp_build_const_int32(gallivm, reg->Register.Index);
1497    }
1498 
1499    if (reg->Dimension.Indirect) {
1500       vertex_index = get_indirect_index(bld,
1501                                         reg->Register.File,
1502                                         reg->Dimension.Index,
1503                                         &reg->DimIndirect);
1504    } else {
1505       vertex_index = lp_build_const_int32(gallivm, reg->Dimension.Index);
1506    }
1507 
1508    res = bld->gs_iface->fetch_input(bld->gs_iface, bld_base,
1509                                     reg->Dimension.Indirect,
1510                                     vertex_index,
1511                                     reg->Register.Indirect,
1512                                     attrib_index,
1513                                     swizzle_index);
1514 
1515    assert(res);
1516    if (tgsi_type_is_64bit(stype)) {
1517       LLVMValueRef swizzle_index = lp_build_const_int32(gallivm, swizzle + 1);
1518       LLVMValueRef res2;
1519       res2 = bld->gs_iface->fetch_input(bld->gs_iface, bld_base,
1520                                         reg->Dimension.Indirect,
1521                                         vertex_index,
1522                                         reg->Register.Indirect,
1523                                         attrib_index,
1524                                         swizzle_index);
1525       assert(res2);
1526       res = emit_fetch_64bit(bld_base, stype, res, res2);
1527    } else if (stype == TGSI_TYPE_UNSIGNED) {
1528       res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, "");
1529    } else if (stype == TGSI_TYPE_SIGNED) {
1530       res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, "");
1531    }
1532 
1533    return res;
1534 }
1535 
1536 static LLVMValueRef
emit_fetch_temporary(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_src_register * reg,enum tgsi_opcode_type stype,unsigned swizzle)1537 emit_fetch_temporary(
1538    struct lp_build_tgsi_context * bld_base,
1539    const struct tgsi_full_src_register * reg,
1540    enum tgsi_opcode_type stype,
1541    unsigned swizzle)
1542 {
1543    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1544    struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1545    LLVMBuilderRef builder = gallivm->builder;
1546    LLVMValueRef res;
1547 
1548    if (reg->Register.Indirect) {
1549       LLVMValueRef indirect_index;
1550       LLVMValueRef index_vec, index_vec2 = NULL;  /* index into the temp reg array */
1551       LLVMValueRef temps_array;
1552       LLVMTypeRef fptr_type;
1553 
1554       indirect_index = get_indirect_index(bld,
1555                                           reg->Register.File,
1556                                           reg->Register.Index,
1557                                           &reg->Indirect);
1558 
1559       index_vec = get_soa_array_offsets(&bld_base->uint_bld,
1560                                         indirect_index,
1561                                         swizzle,
1562                                         TRUE);
1563       if (tgsi_type_is_64bit(stype)) {
1564                index_vec2 = get_soa_array_offsets(&bld_base->uint_bld,
1565                                                   indirect_index,
1566                                                   swizzle + 1,
1567                                                   TRUE);
1568       }
1569 
1570       /* cast temps_array pointer to float* */
1571       fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
1572       temps_array = LLVMBuildBitCast(builder, bld->temps_array, fptr_type, "");
1573 
1574       /* Gather values from the temporary register array */
1575       res = build_gather(bld_base, temps_array, index_vec, NULL, index_vec2);
1576    }
1577    else {
1578       LLVMValueRef temp_ptr;
1579       temp_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index, swizzle);
1580       res = LLVMBuildLoad(builder, temp_ptr, "");
1581 
1582       if (tgsi_type_is_64bit(stype)) {
1583          LLVMValueRef temp_ptr2, res2;
1584 
1585          temp_ptr2 = lp_get_temp_ptr_soa(bld, reg->Register.Index, swizzle + 1);
1586          res2 = LLVMBuildLoad(builder, temp_ptr2, "");
1587          res = emit_fetch_64bit(bld_base, stype, res, res2);
1588       }
1589    }
1590 
1591    if (stype == TGSI_TYPE_SIGNED ||
1592        stype == TGSI_TYPE_UNSIGNED ||
1593        stype == TGSI_TYPE_DOUBLE ||
1594        stype == TGSI_TYPE_SIGNED64 ||
1595        stype == TGSI_TYPE_UNSIGNED64) {
1596       struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
1597       res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
1598    }
1599 
1600    return res;
1601 }
1602 
1603 static LLVMValueRef
emit_fetch_system_value(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_src_register * reg,enum tgsi_opcode_type stype,unsigned swizzle)1604 emit_fetch_system_value(
1605    struct lp_build_tgsi_context * bld_base,
1606    const struct tgsi_full_src_register * reg,
1607    enum tgsi_opcode_type stype,
1608    unsigned swizzle)
1609 {
1610    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1611    struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1612    const struct tgsi_shader_info *info = bld->bld_base.info;
1613    LLVMBuilderRef builder = gallivm->builder;
1614    LLVMValueRef res;
1615    enum tgsi_opcode_type atype; // Actual type of the value
1616 
1617    assert(!reg->Register.Indirect);
1618 
1619    switch (info->system_value_semantic_name[reg->Register.Index]) {
1620    case TGSI_SEMANTIC_INSTANCEID:
1621       res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.instance_id);
1622       atype = TGSI_TYPE_UNSIGNED;
1623       break;
1624 
1625    case TGSI_SEMANTIC_VERTEXID:
1626       res = bld->system_values.vertex_id;
1627       atype = TGSI_TYPE_UNSIGNED;
1628       break;
1629 
1630    case TGSI_SEMANTIC_VERTEXID_NOBASE:
1631       res = bld->system_values.vertex_id_nobase;
1632       atype = TGSI_TYPE_UNSIGNED;
1633       break;
1634 
1635    case TGSI_SEMANTIC_BASEVERTEX:
1636       res = bld->system_values.basevertex;
1637       atype = TGSI_TYPE_UNSIGNED;
1638       break;
1639 
1640    case TGSI_SEMANTIC_PRIMID:
1641       res = bld->system_values.prim_id;
1642       atype = TGSI_TYPE_UNSIGNED;
1643       break;
1644 
1645    case TGSI_SEMANTIC_INVOCATIONID:
1646       res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.invocation_id);
1647       atype = TGSI_TYPE_UNSIGNED;
1648       break;
1649 
1650    default:
1651       assert(!"unexpected semantic in emit_fetch_system_value");
1652       res = bld_base->base.zero;
1653       atype = TGSI_TYPE_FLOAT;
1654       break;
1655    }
1656 
1657    if (atype != stype) {
1658       if (stype == TGSI_TYPE_FLOAT) {
1659          res = LLVMBuildBitCast(builder, res, bld_base->base.vec_type, "");
1660       } else if (stype == TGSI_TYPE_UNSIGNED) {
1661          res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, "");
1662       } else if (stype == TGSI_TYPE_SIGNED) {
1663          res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, "");
1664       }
1665    }
1666 
1667    return res;
1668 }
1669 
1670 /**
1671  * Register fetch with derivatives.
1672  */
1673 static void
emit_fetch_deriv(struct lp_build_tgsi_soa_context * bld,LLVMValueRef src,LLVMValueRef * res,LLVMValueRef * ddx,LLVMValueRef * ddy)1674 emit_fetch_deriv(
1675    struct lp_build_tgsi_soa_context *bld,
1676    LLVMValueRef src,
1677    LLVMValueRef *res,
1678    LLVMValueRef *ddx,
1679    LLVMValueRef *ddy)
1680 {
1681    if (res)
1682       *res = src;
1683 
1684    /* TODO: use interpolation coeffs for inputs */
1685 
1686    if (ddx)
1687       *ddx = lp_build_ddx(&bld->bld_base.base, src);
1688 
1689    if (ddy)
1690       *ddy = lp_build_ddy(&bld->bld_base.base, src);
1691 }
1692 
1693 /**
1694  * store an array of vec-length 64-bit into two arrays of vec_length floats
1695  * i.e.
1696  * value is d0, d1, d2, d3 etc.
1697  * each 64-bit has high and low pieces x, y
1698  * so gets stored into the separate channels as:
1699  * chan_ptr = d0.x, d1.x, d2.x, d3.x
1700  * chan_ptr2 = d0.y, d1.y, d2.y, d3.y
1701  */
1702 static void
emit_store_64bit_chan(struct lp_build_tgsi_context * bld_base,LLVMValueRef chan_ptr,LLVMValueRef chan_ptr2,LLVMValueRef value)1703 emit_store_64bit_chan(struct lp_build_tgsi_context *bld_base,
1704                       LLVMValueRef chan_ptr, LLVMValueRef chan_ptr2,
1705                       LLVMValueRef value)
1706 {
1707    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1708    struct gallivm_state *gallivm = bld_base->base.gallivm;
1709    LLVMBuilderRef builder = gallivm->builder;
1710    struct lp_build_context *float_bld = &bld_base->base;
1711    unsigned i;
1712    LLVMValueRef temp, temp2;
1713    LLVMValueRef shuffles[LP_MAX_VECTOR_WIDTH/32];
1714    LLVMValueRef shuffles2[LP_MAX_VECTOR_WIDTH/32];
1715 
1716    for (i = 0; i < bld_base->base.type.length; i++) {
1717       shuffles[i] = lp_build_const_int32(gallivm, i * 2);
1718       shuffles2[i] = lp_build_const_int32(gallivm, (i * 2) + 1);
1719    }
1720 
1721    temp = LLVMBuildShuffleVector(builder, value,
1722                                  LLVMGetUndef(LLVMTypeOf(value)),
1723                                  LLVMConstVector(shuffles,
1724                                                  bld_base->base.type.length),
1725                                  "");
1726    temp2 = LLVMBuildShuffleVector(builder, value,
1727                                   LLVMGetUndef(LLVMTypeOf(value)),
1728                                   LLVMConstVector(shuffles2,
1729                                                   bld_base->base.type.length),
1730                                   "");
1731 
1732    lp_exec_mask_store(&bld->exec_mask, float_bld, temp, chan_ptr);
1733    lp_exec_mask_store(&bld->exec_mask, float_bld, temp2, chan_ptr2);
1734 }
1735 
1736 /**
1737  * Register store.
1738  */
1739 static void
emit_store_chan(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_instruction * inst,unsigned index,unsigned chan_index,LLVMValueRef value)1740 emit_store_chan(
1741    struct lp_build_tgsi_context *bld_base,
1742    const struct tgsi_full_instruction *inst,
1743    unsigned index,
1744    unsigned chan_index,
1745    LLVMValueRef value)
1746 {
1747    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1748    struct gallivm_state *gallivm = bld_base->base.gallivm;
1749    LLVMBuilderRef builder = gallivm->builder;
1750    const struct tgsi_full_dst_register *reg = &inst->Dst[index];
1751    struct lp_build_context *float_bld = &bld_base->base;
1752    struct lp_build_context *int_bld = &bld_base->int_bld;
1753    LLVMValueRef indirect_index = NULL;
1754    enum tgsi_opcode_type dtype = tgsi_opcode_infer_dst_type(inst->Instruction.Opcode, index);
1755 
1756    /*
1757     * Apply saturation.
1758     *
1759     * It is always assumed to be float.
1760     */
1761    if (inst->Instruction.Saturate) {
1762       assert(dtype == TGSI_TYPE_FLOAT ||
1763              dtype == TGSI_TYPE_UNTYPED);
1764       value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
1765       value = lp_build_clamp_zero_one_nanzero(float_bld, value);
1766    }
1767 
1768    if (reg->Register.Indirect) {
1769       /*
1770        * Currently the mesa/st doesn't generate indirect stores
1771        * to 64-bit values, it normally uses MOV to do indirect stores.
1772        */
1773       assert(!tgsi_type_is_64bit(dtype));
1774       indirect_index = get_indirect_index(bld,
1775                                           reg->Register.File,
1776                                           reg->Register.Index,
1777                                           &reg->Indirect);
1778    } else {
1779       assert(reg->Register.Index <=
1780                              bld_base->info->file_max[reg->Register.File]);
1781    }
1782 
1783    if (DEBUG_EXECUTION) {
1784       emit_dump_reg(gallivm, reg->Register.File, reg->Register.Index, chan_index, value);
1785    }
1786 
1787    switch( reg->Register.File ) {
1788    case TGSI_FILE_OUTPUT:
1789       /* Outputs are always stored as floats */
1790       value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
1791 
1792       if (reg->Register.Indirect) {
1793          LLVMValueRef index_vec;  /* indexes into the output registers */
1794          LLVMValueRef outputs_array;
1795          LLVMTypeRef fptr_type;
1796 
1797          index_vec = get_soa_array_offsets(&bld_base->uint_bld,
1798                                            indirect_index,
1799                                            chan_index,
1800                                            TRUE);
1801 
1802          fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
1803          outputs_array = LLVMBuildBitCast(builder, bld->outputs_array, fptr_type, "");
1804 
1805          /* Scatter store values into output registers */
1806          emit_mask_scatter(bld, outputs_array, index_vec, value,
1807                            &bld->exec_mask);
1808       }
1809       else {
1810          LLVMValueRef out_ptr = lp_get_output_ptr(bld, reg->Register.Index,
1811                                                   chan_index);
1812 
1813          if (tgsi_type_is_64bit(dtype)) {
1814             LLVMValueRef out_ptr2 = lp_get_output_ptr(bld, reg->Register.Index,
1815                                                       chan_index + 1);
1816             emit_store_64bit_chan(bld_base, out_ptr, out_ptr2,
1817                                   value);
1818          } else
1819             lp_exec_mask_store(&bld->exec_mask, float_bld, value, out_ptr);
1820       }
1821       break;
1822 
1823    case TGSI_FILE_TEMPORARY:
1824       /* Temporaries are always stored as floats */
1825       if (!tgsi_type_is_64bit(dtype))
1826          value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
1827       else
1828          value = LLVMBuildBitCast(builder, value,  LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), bld_base->base.type.length * 2), "");
1829 
1830       if (reg->Register.Indirect) {
1831          LLVMValueRef index_vec;  /* indexes into the temp registers */
1832          LLVMValueRef temps_array;
1833          LLVMTypeRef fptr_type;
1834 
1835          index_vec = get_soa_array_offsets(&bld_base->uint_bld,
1836                                            indirect_index,
1837                                            chan_index,
1838                                            TRUE);
1839 
1840          fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
1841          temps_array = LLVMBuildBitCast(builder, bld->temps_array, fptr_type, "");
1842 
1843          /* Scatter store values into temp registers */
1844          emit_mask_scatter(bld, temps_array, index_vec, value,
1845                            &bld->exec_mask);
1846       }
1847       else {
1848          LLVMValueRef temp_ptr;
1849          temp_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index, chan_index);
1850 
1851          if (tgsi_type_is_64bit(dtype)) {
1852             LLVMValueRef temp_ptr2 = lp_get_temp_ptr_soa(bld,
1853                                                          reg->Register.Index,
1854                                                          chan_index + 1);
1855             emit_store_64bit_chan(bld_base, temp_ptr, temp_ptr2,
1856                                   value);
1857          }
1858          else
1859             lp_exec_mask_store(&bld->exec_mask, float_bld, value, temp_ptr);
1860       }
1861       break;
1862 
1863    case TGSI_FILE_ADDRESS:
1864       assert(dtype == TGSI_TYPE_SIGNED);
1865       assert(LLVMTypeOf(value) == int_bld->vec_type);
1866       value = LLVMBuildBitCast(builder, value, int_bld->vec_type, "");
1867       lp_exec_mask_store(&bld->exec_mask, int_bld, value,
1868                          bld->addr[reg->Register.Index][chan_index]);
1869       break;
1870 
1871    default:
1872       assert( 0 );
1873    }
1874 
1875    (void)dtype;
1876 }
1877 
1878 /*
1879  * Called at the beginning of the translation of each TGSI instruction, to
1880  * emit some debug code.
1881  */
1882 static void
emit_debug(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_instruction * inst,const struct tgsi_opcode_info * info)1883 emit_debug(
1884    struct lp_build_tgsi_context * bld_base,
1885    const struct tgsi_full_instruction * inst,
1886    const struct tgsi_opcode_info * info)
1887 
1888 {
1889    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1890 
1891    if (DEBUG_EXECUTION) {
1892       /*
1893        * Dump the TGSI instruction.
1894        */
1895 
1896       struct gallivm_state *gallivm = bld_base->base.gallivm;
1897       char buf[512];
1898       buf[0] = '$';
1899       buf[1] = ' ';
1900       tgsi_dump_instruction_str(inst, bld_base->pc, &buf[2], sizeof buf - 2);
1901       lp_build_printf(gallivm, buf);
1902 
1903       /* Dump the execution mask.
1904        */
1905       if (bld->exec_mask.has_mask) {
1906          lp_build_print_value(gallivm, "    mask = ", bld->exec_mask.exec_mask);
1907       }
1908    }
1909 }
1910 
1911 static void
emit_store(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_instruction * inst,const struct tgsi_opcode_info * info,unsigned index,LLVMValueRef dst[4])1912 emit_store(
1913    struct lp_build_tgsi_context * bld_base,
1914    const struct tgsi_full_instruction * inst,
1915    const struct tgsi_opcode_info * info,
1916    unsigned index,
1917    LLVMValueRef dst[4])
1918 
1919 {
1920    enum tgsi_opcode_type dtype = tgsi_opcode_infer_dst_type(inst->Instruction.Opcode, index);
1921 
1922    unsigned writemask = inst->Dst[index].Register.WriteMask;
1923    while (writemask) {
1924       unsigned chan_index = u_bit_scan(&writemask);
1925       if (tgsi_type_is_64bit(dtype) && (chan_index == 1 || chan_index == 3))
1926           continue;
1927       emit_store_chan(bld_base, inst, index, chan_index, dst[chan_index]);
1928    }
1929 }
1930 
1931 static unsigned
tgsi_to_pipe_tex_target(unsigned tgsi_target)1932 tgsi_to_pipe_tex_target(unsigned tgsi_target)
1933 {
1934    switch (tgsi_target) {
1935    case TGSI_TEXTURE_BUFFER:
1936       return PIPE_BUFFER;
1937    case TGSI_TEXTURE_1D:
1938    case TGSI_TEXTURE_SHADOW1D:
1939       return PIPE_TEXTURE_1D;
1940    case TGSI_TEXTURE_2D:
1941    case TGSI_TEXTURE_SHADOW2D:
1942    case TGSI_TEXTURE_2D_MSAA:
1943       return PIPE_TEXTURE_2D;
1944    case TGSI_TEXTURE_3D:
1945       return PIPE_TEXTURE_3D;
1946    case TGSI_TEXTURE_CUBE:
1947    case TGSI_TEXTURE_SHADOWCUBE:
1948       return PIPE_TEXTURE_CUBE;
1949    case TGSI_TEXTURE_RECT:
1950    case TGSI_TEXTURE_SHADOWRECT:
1951       return PIPE_TEXTURE_RECT;
1952    case TGSI_TEXTURE_1D_ARRAY:
1953    case TGSI_TEXTURE_SHADOW1D_ARRAY:
1954       return PIPE_TEXTURE_1D_ARRAY;
1955    case TGSI_TEXTURE_2D_ARRAY:
1956    case TGSI_TEXTURE_SHADOW2D_ARRAY:
1957    case TGSI_TEXTURE_2D_ARRAY_MSAA:
1958       return PIPE_TEXTURE_2D_ARRAY;
1959    case TGSI_TEXTURE_CUBE_ARRAY:
1960    case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
1961       return PIPE_TEXTURE_CUBE_ARRAY;
1962    default:
1963       assert(0);
1964       return PIPE_BUFFER;
1965    }
1966 }
1967 
1968 
1969 static enum lp_sampler_lod_property
lp_build_lod_property(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_instruction * inst,unsigned src_op)1970 lp_build_lod_property(
1971    struct lp_build_tgsi_context *bld_base,
1972    const struct tgsi_full_instruction *inst,
1973    unsigned src_op)
1974 {
1975    const struct tgsi_full_src_register *reg = &inst->Src[src_op];
1976    enum lp_sampler_lod_property lod_property;
1977 
1978    /*
1979     * Not much we can do here. We could try catching inputs declared
1980     * with constant interpolation but not sure it's worth it - since for
1981     * TEX opcodes as well as FETCH/LD the lod comes from same reg as
1982     * the coords, so it could only work for SAMPLE/TXQ/SVIEWINFO), just
1983     * like the constant/immediate recognition below.
1984     * What seems to be of more value would be to recognize temps holding
1985     * broadcasted scalars but no way we can do it.
1986     * Tried asking llvm but without any success (using LLVMIsConstant
1987     * even though this isn't exactly what we'd need), even as simple as
1988     * IMM[0] UINT32 (0,-1,0,0)
1989     * MOV TEMP[0] IMM[0].yyyy
1990     * SVIEWINFO TEMP[1], TEMP[0].xxxx, SVIEWINFO[0]
1991     * doesn't work.
1992     * This means there's ZERO chance this will ever catch a scalar lod
1993     * with traditional tex opcodes as well as texel fetches, since the lod
1994     * comes from the same reg as coords (except some test shaders using
1995     * constant coords maybe).
1996     * There's at least hope for sample opcodes as well as size queries.
1997     */
1998    if (reg->Register.File == TGSI_FILE_CONSTANT ||
1999        reg->Register.File == TGSI_FILE_IMMEDIATE) {
2000       lod_property = LP_SAMPLER_LOD_SCALAR;
2001    }
2002    else if (bld_base->info->processor == PIPE_SHADER_FRAGMENT) {
2003       if (gallivm_debug & GALLIVM_DEBUG_NO_QUAD_LOD) {
2004          lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2005       }
2006       else {
2007          lod_property = LP_SAMPLER_LOD_PER_QUAD;
2008       }
2009    }
2010    else {
2011       /* never use scalar (per-quad) lod the results are just too wrong. */
2012       lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2013    }
2014    return lod_property;
2015 }
2016 
2017 
2018 /**
2019  * High-level instruction translators.
2020  */
2021 
2022 static void
emit_tex(struct lp_build_tgsi_soa_context * bld,const struct tgsi_full_instruction * inst,enum lp_build_tex_modifier modifier,LLVMValueRef * texel,unsigned sampler_reg,enum lp_sampler_op_type sampler_op)2023 emit_tex( struct lp_build_tgsi_soa_context *bld,
2024           const struct tgsi_full_instruction *inst,
2025           enum lp_build_tex_modifier modifier,
2026           LLVMValueRef *texel,
2027           unsigned sampler_reg,
2028           enum lp_sampler_op_type sampler_op)
2029 {
2030    unsigned unit = inst->Src[sampler_reg].Register.Index;
2031    LLVMValueRef oow = NULL;
2032    LLVMValueRef lod = NULL;
2033    LLVMValueRef coords[5];
2034    LLVMValueRef offsets[3] = { NULL };
2035    struct lp_derivatives derivs;
2036    struct lp_sampler_params params;
2037    enum lp_sampler_lod_property lod_property = LP_SAMPLER_LOD_SCALAR;
2038    unsigned num_derivs, num_offsets, i;
2039    unsigned shadow_coord = 0;
2040    unsigned layer_coord = 0;
2041    unsigned sample_key = sampler_op << LP_SAMPLER_OP_TYPE_SHIFT;
2042 
2043    memset(&params, 0, sizeof(params));
2044 
2045    if (!bld->sampler) {
2046       _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
2047       for (i = 0; i < 4; i++) {
2048          texel[i] = bld->bld_base.base.undef;
2049       }
2050       return;
2051    }
2052 
2053    switch (inst->Texture.Texture) {
2054    case TGSI_TEXTURE_1D_ARRAY:
2055       layer_coord = 1;
2056       /* fallthrough */
2057    case TGSI_TEXTURE_1D:
2058       num_offsets = 1;
2059       num_derivs = 1;
2060       break;
2061    case TGSI_TEXTURE_2D_ARRAY:
2062       layer_coord = 2;
2063       /* fallthrough */
2064    case TGSI_TEXTURE_2D:
2065    case TGSI_TEXTURE_RECT:
2066       num_offsets = 2;
2067       num_derivs = 2;
2068       break;
2069    case TGSI_TEXTURE_SHADOW1D_ARRAY:
2070       layer_coord = 1;
2071       /* fallthrough */
2072    case TGSI_TEXTURE_SHADOW1D:
2073       shadow_coord = 2;
2074       num_offsets = 1;
2075       num_derivs = 1;
2076       break;
2077    case TGSI_TEXTURE_SHADOW2D_ARRAY:
2078       layer_coord = 2;
2079       shadow_coord = 3;
2080       num_offsets = 2;
2081       num_derivs = 2;
2082       break;
2083    case TGSI_TEXTURE_SHADOW2D:
2084    case TGSI_TEXTURE_SHADOWRECT:
2085       shadow_coord = 2;
2086       num_offsets = 2;
2087       num_derivs = 2;
2088       break;
2089    case TGSI_TEXTURE_CUBE:
2090       num_offsets = 2;
2091       num_derivs = 3;
2092       break;
2093    case TGSI_TEXTURE_3D:
2094       num_offsets = 3;
2095       num_derivs = 3;
2096       break;
2097    case TGSI_TEXTURE_SHADOWCUBE:
2098       shadow_coord = 3;
2099       num_offsets = 2;
2100       num_derivs = 3;
2101       break;
2102    case TGSI_TEXTURE_CUBE_ARRAY:
2103       num_offsets = 2;
2104       num_derivs = 3;
2105       layer_coord = 3;
2106       break;
2107    case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
2108       num_offsets = 2;
2109       num_derivs = 3;
2110       layer_coord = 3;
2111       shadow_coord = 4; /* shadow coord special different reg */
2112       break;
2113    case TGSI_TEXTURE_2D_MSAA:
2114    case TGSI_TEXTURE_2D_ARRAY_MSAA:
2115    default:
2116       assert(0);
2117       return;
2118    }
2119 
2120    /* Note lod and especially projected are illegal in a LOT of cases */
2121    if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS ||
2122        modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
2123       if (inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE ||
2124           inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY) {
2125          /* note that shadow cube array with bias/explicit lod does not exist */
2126          lod = lp_build_emit_fetch(&bld->bld_base, inst, 1, 0);
2127       }
2128       else {
2129          lod = lp_build_emit_fetch(&bld->bld_base, inst, 0, 3);
2130       }
2131       if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS) {
2132          sample_key |= LP_SAMPLER_LOD_BIAS << LP_SAMPLER_LOD_CONTROL_SHIFT;
2133       }
2134       else if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
2135          sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT;
2136       }
2137       lod_property = lp_build_lod_property(&bld->bld_base, inst, 0);
2138    }
2139 
2140    if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED) {
2141       oow = lp_build_emit_fetch(&bld->bld_base, inst, 0, 3);
2142       oow = lp_build_rcp(&bld->bld_base.base, oow);
2143    }
2144 
2145    for (i = 0; i < num_derivs; i++) {
2146       coords[i] = lp_build_emit_fetch(&bld->bld_base, inst, 0, i);
2147       if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED)
2148          coords[i] = lp_build_mul(&bld->bld_base.base, coords[i], oow);
2149    }
2150    for (i = num_derivs; i < 5; i++) {
2151       coords[i] = bld->bld_base.base.undef;
2152    }
2153 
2154    /* Layer coord always goes into 3rd slot, except for cube map arrays */
2155    if (layer_coord) {
2156       if (layer_coord == 3) {
2157          coords[3] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
2158       }
2159       else {
2160          coords[2] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
2161       }
2162       if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED)
2163          coords[2] = lp_build_mul(&bld->bld_base.base, coords[2], oow);
2164    }
2165    /* Shadow coord occupies always 5th slot. */
2166    if (shadow_coord) {
2167       sample_key |= LP_SAMPLER_SHADOW;
2168       if (shadow_coord == 4) {
2169          coords[4] = lp_build_emit_fetch(&bld->bld_base, inst, 1, 0);
2170       }
2171       else {
2172          coords[4] = lp_build_emit_fetch(&bld->bld_base, inst, 0, shadow_coord);
2173       }
2174       if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED)
2175          coords[4] = lp_build_mul(&bld->bld_base.base, coords[4], oow);
2176    }
2177 
2178    if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
2179       unsigned dim;
2180       sample_key |= LP_SAMPLER_LOD_DERIVATIVES << LP_SAMPLER_LOD_CONTROL_SHIFT;
2181       for (dim = 0; dim < num_derivs; ++dim) {
2182          derivs.ddx[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 1, dim);
2183          derivs.ddy[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 2, dim);
2184       }
2185       params.derivs = &derivs;
2186       /*
2187        * could also check all src regs if constant but I doubt such
2188        * cases exist in practice.
2189        */
2190       if (bld->bld_base.info->processor == PIPE_SHADER_FRAGMENT) {
2191          if (gallivm_debug & GALLIVM_DEBUG_NO_QUAD_LOD) {
2192             lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2193          }
2194          else {
2195             lod_property = LP_SAMPLER_LOD_PER_QUAD;
2196          }
2197       }
2198       else {
2199          lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2200       }
2201    }
2202    sample_key |= lod_property << LP_SAMPLER_LOD_PROPERTY_SHIFT;
2203 
2204    /* we don't handle the 4 offset version of tg4 */
2205    if (inst->Texture.NumOffsets == 1) {
2206       unsigned dim;
2207       sample_key |= LP_SAMPLER_OFFSETS;
2208       for (dim = 0; dim < num_offsets; dim++) {
2209          offsets[dim] = lp_build_emit_fetch_texoffset(&bld->bld_base, inst, 0, dim);
2210       }
2211    }
2212 
2213    params.type = bld->bld_base.base.type;
2214    params.sample_key = sample_key;
2215    params.texture_index = unit;
2216    params.sampler_index = unit;
2217    params.context_ptr = bld->context_ptr;
2218    params.thread_data_ptr = bld->thread_data_ptr;
2219    params.coords = coords;
2220    params.offsets = offsets;
2221    params.lod = lod;
2222    params.texel = texel;
2223 
2224    bld->sampler->emit_tex_sample(bld->sampler,
2225                                  bld->bld_base.base.gallivm,
2226                                  &params);
2227 }
2228 
2229 static void
emit_sample(struct lp_build_tgsi_soa_context * bld,const struct tgsi_full_instruction * inst,enum lp_build_tex_modifier modifier,boolean compare,enum lp_sampler_op_type sample_type,LLVMValueRef * texel)2230 emit_sample(struct lp_build_tgsi_soa_context *bld,
2231             const struct tgsi_full_instruction *inst,
2232             enum lp_build_tex_modifier modifier,
2233             boolean compare,
2234             enum lp_sampler_op_type sample_type,
2235             LLVMValueRef *texel)
2236 {
2237    struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
2238    unsigned texture_unit, sampler_unit;
2239    LLVMValueRef lod = NULL;
2240    LLVMValueRef coords[5];
2241    LLVMValueRef offsets[3] = { NULL };
2242    struct lp_derivatives derivs;
2243    struct lp_sampler_params params;
2244    enum lp_sampler_lod_property lod_property = LP_SAMPLER_LOD_SCALAR;
2245 
2246    unsigned num_offsets, num_derivs, i;
2247    unsigned layer_coord = 0;
2248    unsigned sample_key = sample_type << LP_SAMPLER_OP_TYPE_SHIFT;
2249 
2250    memset(&params, 0, sizeof(params));
2251 
2252    if (!bld->sampler) {
2253       _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
2254       for (i = 0; i < 4; i++) {
2255          texel[i] = bld->bld_base.base.undef;
2256       }
2257       return;
2258    }
2259 
2260    /*
2261     * unlike old-style tex opcodes the texture/sampler indices
2262     * always come from src1 and src2 respectively.
2263     */
2264    texture_unit = inst->Src[1].Register.Index;
2265    sampler_unit = inst->Src[2].Register.Index;
2266 
2267    /*
2268     * Note inst->Texture.Texture will contain the number of offsets,
2269     * however the target information is NOT there and comes from the
2270     * declared sampler views instead.
2271     */
2272    switch (bld->sv[texture_unit].Resource) {
2273    case TGSI_TEXTURE_1D:
2274       num_offsets = 1;
2275       num_derivs = 1;
2276       break;
2277    case TGSI_TEXTURE_1D_ARRAY:
2278       layer_coord = 1;
2279       num_offsets = 1;
2280       num_derivs = 1;
2281       break;
2282    case TGSI_TEXTURE_2D:
2283    case TGSI_TEXTURE_RECT:
2284       num_offsets = 2;
2285       num_derivs = 2;
2286       break;
2287    case TGSI_TEXTURE_2D_ARRAY:
2288       layer_coord = 2;
2289       num_offsets = 2;
2290       num_derivs = 2;
2291       break;
2292    case TGSI_TEXTURE_CUBE:
2293       num_offsets = 2;
2294       num_derivs = 3;
2295       break;
2296    case TGSI_TEXTURE_3D:
2297       num_offsets = 3;
2298       num_derivs = 3;
2299       break;
2300    case TGSI_TEXTURE_CUBE_ARRAY:
2301       layer_coord = 3;
2302       num_offsets = 2;
2303       num_derivs = 3;
2304       break;
2305    default:
2306       assert(0);
2307       return;
2308    }
2309 
2310    if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS ||
2311        modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
2312       lod = lp_build_emit_fetch(&bld->bld_base, inst, 3, 0);
2313       if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS) {
2314          sample_key |= LP_SAMPLER_LOD_BIAS << LP_SAMPLER_LOD_CONTROL_SHIFT;
2315       }
2316       else if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
2317          sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT;
2318       }
2319       lod_property = lp_build_lod_property(&bld->bld_base, inst, 0);
2320    }
2321    else if (modifier == LP_BLD_TEX_MODIFIER_LOD_ZERO) {
2322       /* XXX might be better to explicitly pass the level zero information */
2323       sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT;
2324       lod = lp_build_const_vec(gallivm, bld->bld_base.base.type, 0.0F);
2325    }
2326 
2327    for (i = 0; i < num_derivs; i++) {
2328       coords[i] = lp_build_emit_fetch(&bld->bld_base, inst, 0, i);
2329    }
2330    for (i = num_derivs; i < 5; i++) {
2331       coords[i] = bld->bld_base.base.undef;
2332    }
2333 
2334    /* Layer coord always goes into 3rd slot, except for cube map arrays */
2335    if (layer_coord) {
2336       if (layer_coord == 3)
2337          coords[3] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
2338       else
2339          coords[2] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
2340    }
2341    /* Shadow coord occupies always 5th slot. */
2342    if (compare) {
2343       sample_key |= LP_SAMPLER_SHADOW;
2344       coords[4] = lp_build_emit_fetch(&bld->bld_base, inst, 3, 0);
2345    }
2346 
2347    if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
2348       unsigned dim;
2349       sample_key |= LP_SAMPLER_LOD_DERIVATIVES << LP_SAMPLER_LOD_CONTROL_SHIFT;
2350       for (dim = 0; dim < num_derivs; ++dim) {
2351          derivs.ddx[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 3, dim);
2352          derivs.ddy[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 4, dim);
2353       }
2354       params.derivs = &derivs;
2355       /*
2356        * could also check all src regs if constant but I doubt such
2357        * cases exist in practice.
2358        */
2359       if (bld->bld_base.info->processor == PIPE_SHADER_FRAGMENT) {
2360          if (gallivm_debug & GALLIVM_DEBUG_NO_QUAD_LOD) {
2361             lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2362          }
2363          else {
2364             lod_property = LP_SAMPLER_LOD_PER_QUAD;
2365          }
2366       }
2367       else {
2368          lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2369       }
2370    }
2371 
2372    /* some advanced gather instructions (txgo) would require 4 offsets */
2373    if (inst->Texture.NumOffsets == 1) {
2374       unsigned dim;
2375       sample_key |= LP_SAMPLER_OFFSETS;
2376       for (dim = 0; dim < num_offsets; dim++) {
2377          offsets[dim] = lp_build_emit_fetch_texoffset(&bld->bld_base, inst, 0, dim);
2378       }
2379    }
2380    sample_key |= lod_property << LP_SAMPLER_LOD_PROPERTY_SHIFT;
2381 
2382    params.type = bld->bld_base.base.type;
2383    params.sample_key = sample_key;
2384    params.texture_index = texture_unit;
2385    params.sampler_index = sampler_unit;
2386    params.context_ptr = bld->context_ptr;
2387    params.thread_data_ptr = bld->thread_data_ptr;
2388    params.coords = coords;
2389    params.offsets = offsets;
2390    params.lod = lod;
2391    params.texel = texel;
2392 
2393    bld->sampler->emit_tex_sample(bld->sampler,
2394                                  bld->bld_base.base.gallivm,
2395                                  &params);
2396 
2397    if (inst->Src[1].Register.SwizzleX != PIPE_SWIZZLE_X ||
2398        inst->Src[1].Register.SwizzleY != PIPE_SWIZZLE_Y ||
2399        inst->Src[1].Register.SwizzleZ != PIPE_SWIZZLE_Z ||
2400        inst->Src[1].Register.SwizzleW != PIPE_SWIZZLE_W) {
2401       unsigned char swizzles[4];
2402       swizzles[0] = inst->Src[1].Register.SwizzleX;
2403       swizzles[1] = inst->Src[1].Register.SwizzleY;
2404       swizzles[2] = inst->Src[1].Register.SwizzleZ;
2405       swizzles[3] = inst->Src[1].Register.SwizzleW;
2406 
2407       lp_build_swizzle_soa_inplace(&bld->bld_base.base, texel, swizzles);
2408    }
2409 }
2410 
2411 static void
emit_fetch_texels(struct lp_build_tgsi_soa_context * bld,const struct tgsi_full_instruction * inst,LLVMValueRef * texel,boolean is_samplei)2412 emit_fetch_texels( struct lp_build_tgsi_soa_context *bld,
2413                    const struct tgsi_full_instruction *inst,
2414                    LLVMValueRef *texel,
2415                    boolean is_samplei)
2416 {
2417    unsigned unit, target;
2418    LLVMValueRef coord_undef = LLVMGetUndef(bld->bld_base.base.int_vec_type);
2419    LLVMValueRef explicit_lod = NULL;
2420    LLVMValueRef coords[5];
2421    LLVMValueRef offsets[3] = { NULL };
2422    struct lp_sampler_params params;
2423    enum lp_sampler_lod_property lod_property = LP_SAMPLER_LOD_SCALAR;
2424    unsigned dims, i;
2425    unsigned layer_coord = 0;
2426    unsigned sample_key = LP_SAMPLER_OP_FETCH << LP_SAMPLER_OP_TYPE_SHIFT;
2427 
2428    memset(&params, 0, sizeof(params));
2429 
2430    if (!bld->sampler) {
2431       _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
2432       for (i = 0; i < 4; i++) {
2433          texel[i] = coord_undef;
2434       }
2435       return;
2436    }
2437 
2438    unit = inst->Src[1].Register.Index;
2439 
2440    if (is_samplei) {
2441       target = bld->sv[unit].Resource;
2442    }
2443    else {
2444       target = inst->Texture.Texture;
2445    }
2446 
2447    switch (target) {
2448    case TGSI_TEXTURE_1D:
2449    case TGSI_TEXTURE_BUFFER:
2450       dims = 1;
2451       break;
2452    case TGSI_TEXTURE_1D_ARRAY:
2453       layer_coord = 1;
2454       dims = 1;
2455       break;
2456    case TGSI_TEXTURE_2D:
2457    case TGSI_TEXTURE_RECT:
2458    case TGSI_TEXTURE_2D_MSAA:
2459       dims = 2;
2460       break;
2461    case TGSI_TEXTURE_2D_ARRAY:
2462    case TGSI_TEXTURE_2D_ARRAY_MSAA:
2463       layer_coord = 2;
2464       dims = 2;
2465       break;
2466    case TGSI_TEXTURE_3D:
2467       dims = 3;
2468       break;
2469    default:
2470       assert(0);
2471       return;
2472    }
2473 
2474    /* always have lod except for buffers and msaa targets ? */
2475    if (target != TGSI_TEXTURE_BUFFER &&
2476        target != TGSI_TEXTURE_2D_MSAA &&
2477        target != TGSI_TEXTURE_2D_ARRAY_MSAA) {
2478       sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT;
2479       explicit_lod = lp_build_emit_fetch(&bld->bld_base, inst, 0, 3);
2480       lod_property = lp_build_lod_property(&bld->bld_base, inst, 0);
2481    }
2482    /*
2483     * XXX: for real msaa support, the w component (or src2.x for sample_i_ms)
2484     * would be the sample index.
2485     */
2486 
2487    for (i = 0; i < dims; i++) {
2488       coords[i] = lp_build_emit_fetch(&bld->bld_base, inst, 0, i);
2489    }
2490    /* never use more than 3 coords here but emit_fetch_texel copies all 5 anyway */
2491    for (i = dims; i < 5; i++) {
2492       coords[i] = coord_undef;
2493    }
2494    if (layer_coord)
2495       coords[2] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
2496 
2497    if (inst->Texture.NumOffsets == 1) {
2498       unsigned dim;
2499       sample_key |= LP_SAMPLER_OFFSETS;
2500       for (dim = 0; dim < dims; dim++) {
2501          offsets[dim] = lp_build_emit_fetch_texoffset(&bld->bld_base, inst, 0, dim);
2502       }
2503    }
2504    sample_key |= lod_property << LP_SAMPLER_LOD_PROPERTY_SHIFT;
2505 
2506    params.type = bld->bld_base.base.type;
2507    params.sample_key = sample_key;
2508    params.texture_index = unit;
2509    /*
2510     * sampler not actually used, set to 0 so it won't exceed PIPE_MAX_SAMPLERS
2511     * and trigger some assertions with d3d10 where the sampler view number
2512     * can exceed this.
2513     */
2514    params.sampler_index = 0;
2515    params.context_ptr = bld->context_ptr;
2516    params.thread_data_ptr = bld->thread_data_ptr;
2517    params.coords = coords;
2518    params.offsets = offsets;
2519    params.derivs = NULL;
2520    params.lod = explicit_lod;
2521    params.texel = texel;
2522 
2523    bld->sampler->emit_tex_sample(bld->sampler,
2524                                  bld->bld_base.base.gallivm,
2525                                  &params);
2526 
2527    if (is_samplei &&
2528        (inst->Src[1].Register.SwizzleX != PIPE_SWIZZLE_X ||
2529         inst->Src[1].Register.SwizzleY != PIPE_SWIZZLE_Y ||
2530         inst->Src[1].Register.SwizzleZ != PIPE_SWIZZLE_Z ||
2531         inst->Src[1].Register.SwizzleW != PIPE_SWIZZLE_W)) {
2532       unsigned char swizzles[4];
2533       swizzles[0] = inst->Src[1].Register.SwizzleX;
2534       swizzles[1] = inst->Src[1].Register.SwizzleY;
2535       swizzles[2] = inst->Src[1].Register.SwizzleZ;
2536       swizzles[3] = inst->Src[1].Register.SwizzleW;
2537 
2538       lp_build_swizzle_soa_inplace(&bld->bld_base.base, texel, swizzles);
2539    }
2540 }
2541 
2542 static void
emit_size_query(struct lp_build_tgsi_soa_context * bld,const struct tgsi_full_instruction * inst,LLVMValueRef * sizes_out,boolean is_sviewinfo)2543 emit_size_query( struct lp_build_tgsi_soa_context *bld,
2544                  const struct tgsi_full_instruction *inst,
2545                  LLVMValueRef *sizes_out,
2546                  boolean is_sviewinfo)
2547 {
2548    LLVMValueRef explicit_lod;
2549    enum lp_sampler_lod_property lod_property;
2550    unsigned has_lod;
2551    unsigned i;
2552    unsigned unit = inst->Src[1].Register.Index;
2553    unsigned target, pipe_target;
2554    struct lp_sampler_size_query_params params;
2555 
2556    if (is_sviewinfo) {
2557       target = bld->sv[unit].Resource;
2558    }
2559    else {
2560       target = inst->Texture.Texture;
2561    }
2562    switch (target) {
2563    case TGSI_TEXTURE_BUFFER:
2564    case TGSI_TEXTURE_RECT:
2565    case TGSI_TEXTURE_SHADOWRECT:
2566       has_lod = 0;
2567       break;
2568    default:
2569       has_lod = 1;
2570       break;
2571    }
2572 
2573    if (!bld->sampler) {
2574       _debug_printf("warning: found texture query instruction but no sampler generator supplied\n");
2575       for (i = 0; i < 4; i++)
2576          sizes_out[i] = bld->bld_base.int_bld.undef;
2577       return;
2578    }
2579 
2580    if (has_lod) {
2581       explicit_lod = lp_build_emit_fetch(&bld->bld_base, inst, 0, 0);
2582       lod_property = lp_build_lod_property(&bld->bld_base, inst, 0);
2583    }
2584    else {
2585       explicit_lod = NULL;
2586       lod_property = LP_SAMPLER_LOD_SCALAR;
2587    }
2588 
2589 
2590    pipe_target = tgsi_to_pipe_tex_target(target);
2591 
2592    params.int_type = bld->bld_base.int_bld.type;
2593    params.texture_unit = unit;
2594    params.target = pipe_target;
2595    params.context_ptr = bld->context_ptr;
2596    params.is_sviewinfo = TRUE;
2597    params.lod_property = lod_property;
2598    params.explicit_lod = explicit_lod;
2599    params.sizes_out = sizes_out;
2600 
2601    bld->sampler->emit_size_query(bld->sampler,
2602                                  bld->bld_base.base.gallivm,
2603                                  &params);
2604 }
2605 
2606 static boolean
near_end_of_shader(struct lp_build_tgsi_soa_context * bld,int pc)2607 near_end_of_shader(struct lp_build_tgsi_soa_context *bld,
2608                    int pc)
2609 {
2610    unsigned i;
2611 
2612    for (i = 0; i < 5; i++) {
2613       unsigned opcode;
2614 
2615       if (pc + i >= bld->bld_base.info->num_instructions)
2616          return TRUE;
2617 
2618       opcode = bld->bld_base.instructions[pc + i].Instruction.Opcode;
2619 
2620       if (opcode == TGSI_OPCODE_END)
2621          return TRUE;
2622 
2623       if (opcode == TGSI_OPCODE_TEX ||
2624          opcode == TGSI_OPCODE_TXP ||
2625          opcode == TGSI_OPCODE_TXD ||
2626          opcode == TGSI_OPCODE_TXB ||
2627          opcode == TGSI_OPCODE_TXL ||
2628          opcode == TGSI_OPCODE_TXF ||
2629          opcode == TGSI_OPCODE_TXQ ||
2630          opcode == TGSI_OPCODE_TEX2 ||
2631          opcode == TGSI_OPCODE_TXB2 ||
2632          opcode == TGSI_OPCODE_TXL2 ||
2633          opcode == TGSI_OPCODE_SAMPLE ||
2634          opcode == TGSI_OPCODE_SAMPLE_B ||
2635          opcode == TGSI_OPCODE_SAMPLE_C ||
2636          opcode == TGSI_OPCODE_SAMPLE_C_LZ ||
2637          opcode == TGSI_OPCODE_SAMPLE_D ||
2638          opcode == TGSI_OPCODE_SAMPLE_I ||
2639          opcode == TGSI_OPCODE_SAMPLE_I_MS ||
2640          opcode == TGSI_OPCODE_SAMPLE_L ||
2641          opcode == TGSI_OPCODE_SVIEWINFO ||
2642          opcode == TGSI_OPCODE_CAL ||
2643          opcode == TGSI_OPCODE_IF ||
2644          opcode == TGSI_OPCODE_UIF ||
2645          opcode == TGSI_OPCODE_BGNLOOP ||
2646          opcode == TGSI_OPCODE_SWITCH)
2647          return FALSE;
2648    }
2649 
2650    return TRUE;
2651 }
2652 
2653 
2654 
2655 /**
2656  * Kill fragment if any of the src register values are negative.
2657  */
2658 static void
emit_kill_if(struct lp_build_tgsi_soa_context * bld,const struct tgsi_full_instruction * inst,int pc)2659 emit_kill_if(
2660    struct lp_build_tgsi_soa_context *bld,
2661    const struct tgsi_full_instruction *inst,
2662    int pc)
2663 {
2664    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
2665    const struct tgsi_full_src_register *reg = &inst->Src[0];
2666    LLVMValueRef terms[TGSI_NUM_CHANNELS];
2667    LLVMValueRef mask;
2668    unsigned chan_index;
2669 
2670    memset(&terms, 0, sizeof terms);
2671 
2672    TGSI_FOR_EACH_CHANNEL( chan_index ) {
2673       unsigned swizzle;
2674 
2675       /* Unswizzle channel */
2676       swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
2677 
2678       /* Check if the component has not been already tested. */
2679       assert(swizzle < TGSI_NUM_CHANNELS);
2680       if( !terms[swizzle] )
2681          /* TODO: change the comparison operator instead of setting the sign */
2682          terms[swizzle] =  lp_build_emit_fetch(&bld->bld_base, inst, 0, chan_index );
2683    }
2684 
2685    mask = NULL;
2686    TGSI_FOR_EACH_CHANNEL( chan_index ) {
2687       if(terms[chan_index]) {
2688          LLVMValueRef chan_mask;
2689 
2690          /*
2691           * If term < 0 then mask = 0 else mask = ~0.
2692           */
2693          chan_mask = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_GEQUAL, terms[chan_index], bld->bld_base.base.zero);
2694 
2695          if(mask)
2696             mask = LLVMBuildAnd(builder, mask, chan_mask, "");
2697          else
2698             mask = chan_mask;
2699       }
2700    }
2701 
2702    if (bld->exec_mask.has_mask) {
2703       LLVMValueRef invmask;
2704       invmask = LLVMBuildNot(builder, bld->exec_mask.exec_mask, "kilp");
2705       mask = LLVMBuildOr(builder, mask, invmask, "");
2706    }
2707 
2708    lp_build_mask_update(bld->mask, mask);
2709    if (!near_end_of_shader(bld, pc))
2710       lp_build_mask_check(bld->mask);
2711 }
2712 
2713 
2714 /**
2715  * Unconditional fragment kill.
2716  * The only predication is the execution mask which will apply if
2717  * we're inside a loop or conditional.
2718  */
2719 static void
emit_kill(struct lp_build_tgsi_soa_context * bld,int pc)2720 emit_kill(struct lp_build_tgsi_soa_context *bld,
2721           int pc)
2722 {
2723    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
2724    LLVMValueRef mask;
2725 
2726    /* For those channels which are "alive", disable fragment shader
2727     * execution.
2728     */
2729    if (bld->exec_mask.has_mask) {
2730       mask = LLVMBuildNot(builder, bld->exec_mask.exec_mask, "kilp");
2731    }
2732    else {
2733       LLVMValueRef zero = LLVMConstNull(bld->bld_base.base.int_vec_type);
2734       mask = zero;
2735    }
2736 
2737    lp_build_mask_update(bld->mask, mask);
2738 
2739    if (!near_end_of_shader(bld, pc))
2740       lp_build_mask_check(bld->mask);
2741 }
2742 
2743 
2744 /**
2745  * Emit code which will dump the value of all the temporary registers
2746  * to stdout.
2747  */
2748 static void
emit_dump_file(struct lp_build_tgsi_soa_context * bld,unsigned file)2749 emit_dump_file(struct lp_build_tgsi_soa_context *bld,
2750                unsigned file)
2751 {
2752    const struct tgsi_shader_info *info = bld->bld_base.info;
2753    struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
2754    LLVMBuilderRef builder = gallivm->builder;
2755    LLVMValueRef reg_ptr;
2756    int index;
2757    int max_index = info->file_max[file];
2758 
2759    /*
2760     * Some register files, particularly constants, can be very large,
2761     * and dumping everything could make this unusably slow.
2762     */
2763    max_index = MIN2(max_index, 32);
2764 
2765    for (index = 0; index <= max_index; index++) {
2766       LLVMValueRef res;
2767       unsigned mask;
2768       int chan;
2769 
2770       if (index < 8 * sizeof(unsigned) &&
2771           (info->file_mask[file] & (1u << index)) == 0)  {
2772          /* This was not declared.*/
2773          continue;
2774       }
2775 
2776       if (file == TGSI_FILE_INPUT) {
2777          mask = info->input_usage_mask[index];
2778       } else {
2779          mask = TGSI_WRITEMASK_XYZW;
2780       }
2781 
2782       for (chan = 0; chan < 4; chan++) {
2783          if ((mask & (1 << chan)) == 0) {
2784             /* This channel is not used.*/
2785             continue;
2786          }
2787 
2788          if (file == TGSI_FILE_CONSTANT) {
2789             struct tgsi_full_src_register reg;
2790             memset(&reg, 0, sizeof reg);
2791             reg.Register.File = file;
2792             reg.Register.Index = index;
2793             reg.Register.SwizzleX = 0;
2794             reg.Register.SwizzleY = 1;
2795             reg.Register.SwizzleZ = 2;
2796             reg.Register.SwizzleW = 3;
2797 
2798             res = bld->bld_base.emit_fetch_funcs[file](&bld->bld_base, &reg, TGSI_TYPE_FLOAT, chan);
2799             if (!res) {
2800                continue;
2801             }
2802          } else if (file == TGSI_FILE_INPUT) {
2803             res = bld->inputs[index][chan];
2804             if (!res) {
2805                continue;
2806             }
2807          } else if (file == TGSI_FILE_TEMPORARY) {
2808             reg_ptr = lp_get_temp_ptr_soa(bld, index, chan);
2809             assert(reg_ptr);
2810             res = LLVMBuildLoad(builder, reg_ptr, "");
2811          } else if (file == TGSI_FILE_OUTPUT) {
2812             reg_ptr = lp_get_output_ptr(bld, index, chan);
2813             assert(reg_ptr);
2814             res = LLVMBuildLoad(builder, reg_ptr, "");
2815          } else {
2816             assert(0);
2817             continue;
2818          }
2819 
2820          emit_dump_reg(gallivm, file, index, chan, res);
2821       }
2822    }
2823 }
2824 
2825 
2826 
2827 void
lp_emit_declaration_soa(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_declaration * decl)2828 lp_emit_declaration_soa(
2829    struct lp_build_tgsi_context *bld_base,
2830    const struct tgsi_full_declaration *decl)
2831 {
2832    struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
2833    struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
2834    LLVMTypeRef vec_type = bld->bld_base.base.vec_type;
2835    const unsigned first = decl->Range.First;
2836    const unsigned last = decl->Range.Last;
2837    unsigned idx, i;
2838 
2839    assert(last <= bld->bld_base.info->file_max[decl->Declaration.File]);
2840 
2841    switch (decl->Declaration.File) {
2842    case TGSI_FILE_TEMPORARY:
2843       if (!(bld->indirect_files & (1 << TGSI_FILE_TEMPORARY))) {
2844          assert(last < LP_MAX_INLINED_TEMPS);
2845          for (idx = first; idx <= last; ++idx) {
2846             for (i = 0; i < TGSI_NUM_CHANNELS; i++)
2847                bld->temps[idx][i] = lp_build_alloca(gallivm, vec_type, "temp");
2848          }
2849       }
2850       break;
2851 
2852    case TGSI_FILE_OUTPUT:
2853       if (!(bld->indirect_files & (1 << TGSI_FILE_OUTPUT))) {
2854          for (idx = first; idx <= last; ++idx) {
2855             for (i = 0; i < TGSI_NUM_CHANNELS; i++)
2856                bld->outputs[idx][i] = lp_build_alloca(gallivm,
2857                                                       vec_type, "output");
2858          }
2859       }
2860       break;
2861 
2862    case TGSI_FILE_ADDRESS:
2863       /* ADDR registers are only allocated with an integer LLVM IR type,
2864        * as they are guaranteed to always have integers.
2865        * XXX: Not sure if this exception is worthwhile (or the whole idea of
2866        * an ADDR register for that matter).
2867        */
2868       assert(last < LP_MAX_TGSI_ADDRS);
2869       for (idx = first; idx <= last; ++idx) {
2870          assert(idx < LP_MAX_TGSI_ADDRS);
2871          for (i = 0; i < TGSI_NUM_CHANNELS; i++)
2872             bld->addr[idx][i] = lp_build_alloca(gallivm, bld_base->base.int_vec_type, "addr");
2873       }
2874       break;
2875 
2876    case TGSI_FILE_SAMPLER_VIEW:
2877       /*
2878        * The target stored here MUST match whatever there actually
2879        * is in the set sampler views (what about return type?).
2880        */
2881       assert(last < PIPE_MAX_SHADER_SAMPLER_VIEWS);
2882       for (idx = first; idx <= last; ++idx) {
2883          bld->sv[idx] = decl->SamplerView;
2884       }
2885       break;
2886 
2887    case TGSI_FILE_CONSTANT:
2888    {
2889       /*
2890        * We could trivially fetch the per-buffer pointer when fetching the
2891        * constant, relying on llvm to figure out it's always the same pointer
2892        * anyway. However, doing so results in a huge (more than factor of 10)
2893        * slowdown in llvm compilation times for some (but not all) shaders
2894        * (more specifically, the IR optimization spends way more time in
2895        * DominatorTree::dominates). At least with llvm versions 3.1, 3.3.
2896        */
2897       unsigned idx2D = decl->Dim.Index2D;
2898       LLVMValueRef index2D = lp_build_const_int32(gallivm, idx2D);
2899       assert(idx2D < LP_MAX_TGSI_CONST_BUFFERS);
2900       bld->consts[idx2D] =
2901          lp_build_array_get(gallivm, bld->consts_ptr, index2D);
2902       bld->consts_sizes[idx2D] =
2903          lp_build_array_get(gallivm, bld->const_sizes_ptr, index2D);
2904    }
2905       break;
2906 
2907    default:
2908       /* don't need to declare other vars */
2909       break;
2910    }
2911 }
2912 
2913 
lp_emit_immediate_soa(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_immediate * imm)2914 void lp_emit_immediate_soa(
2915    struct lp_build_tgsi_context *bld_base,
2916    const struct tgsi_full_immediate *imm)
2917 {
2918    struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
2919    struct gallivm_state * gallivm = bld_base->base.gallivm;
2920    LLVMValueRef imms[4];
2921    unsigned i;
2922    const uint size = imm->Immediate.NrTokens - 1;
2923    assert(size <= 4);
2924    switch (imm->Immediate.DataType) {
2925    case TGSI_IMM_FLOAT32:
2926       for( i = 0; i < size; ++i )
2927          imms[i] =
2928                lp_build_const_vec(gallivm, bld_base->base.type, imm->u[i].Float);
2929 
2930       break;
2931    case TGSI_IMM_FLOAT64:
2932    case TGSI_IMM_UINT64:
2933    case TGSI_IMM_INT64:
2934    case TGSI_IMM_UINT32:
2935       for( i = 0; i < size; ++i ) {
2936          LLVMValueRef tmp = lp_build_const_vec(gallivm, bld_base->uint_bld.type, imm->u[i].Uint);
2937          imms[i] = LLVMConstBitCast(tmp, bld_base->base.vec_type);
2938       }
2939 
2940       break;
2941    case TGSI_IMM_INT32:
2942       for( i = 0; i < size; ++i ) {
2943          LLVMValueRef tmp = lp_build_const_vec(gallivm, bld_base->int_bld.type, imm->u[i].Int);
2944          imms[i] = LLVMConstBitCast(tmp, bld_base->base.vec_type);
2945       }
2946 
2947       break;
2948    }
2949    for( i = size; i < 4; ++i )
2950       imms[i] = bld_base->base.undef;
2951 
2952    if (bld->use_immediates_array) {
2953       unsigned index = bld->num_immediates;
2954       struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
2955       LLVMBuilderRef builder = gallivm->builder;
2956 
2957       assert(bld->indirect_files & (1 << TGSI_FILE_IMMEDIATE));
2958       for (i = 0; i < 4; ++i ) {
2959          LLVMValueRef lindex = lp_build_const_int32(
2960                   bld->bld_base.base.gallivm, index * 4 + i);
2961          LLVMValueRef imm_ptr = LLVMBuildGEP(builder,
2962                                              bld->imms_array, &lindex, 1, "");
2963          LLVMBuildStore(builder, imms[i], imm_ptr);
2964       }
2965    } else {
2966       /* simply copy the immediate values into the next immediates[] slot */
2967       unsigned i;
2968       assert(imm->Immediate.NrTokens - 1 <= 4);
2969       assert(bld->num_immediates < LP_MAX_INLINED_IMMEDIATES);
2970 
2971       for(i = 0; i < 4; ++i )
2972          bld->immediates[bld->num_immediates][i] = imms[i];
2973 
2974       if (bld->indirect_files & (1 << TGSI_FILE_IMMEDIATE)) {
2975          unsigned index = bld->num_immediates;
2976          struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
2977          LLVMBuilderRef builder = gallivm->builder;
2978          for (i = 0; i < 4; ++i ) {
2979             LLVMValueRef lindex = lp_build_const_int32(
2980                      bld->bld_base.base.gallivm, index * 4 + i);
2981             LLVMValueRef imm_ptr = LLVMBuildGEP(builder,
2982                                                 bld->imms_array, &lindex, 1, "");
2983             LLVMBuildStore(builder,
2984                            bld->immediates[index][i],
2985                            imm_ptr);
2986          }
2987       }
2988    }
2989 
2990    bld->num_immediates++;
2991 }
2992 
2993 static void
ddx_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)2994 ddx_emit(
2995    const struct lp_build_tgsi_action * action,
2996    struct lp_build_tgsi_context * bld_base,
2997    struct lp_build_emit_data * emit_data)
2998 {
2999    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3000 
3001    emit_fetch_deriv(bld, emit_data->args[0], NULL,
3002                     &emit_data->output[emit_data->chan], NULL);
3003 }
3004 
3005 static void
ddy_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3006 ddy_emit(
3007    const struct lp_build_tgsi_action * action,
3008    struct lp_build_tgsi_context * bld_base,
3009    struct lp_build_emit_data * emit_data)
3010 {
3011    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3012 
3013    emit_fetch_deriv(bld, emit_data->args[0], NULL, NULL,
3014                     &emit_data->output[emit_data->chan]);
3015 }
3016 
3017 static void
kill_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3018 kill_emit(
3019    const struct lp_build_tgsi_action * action,
3020    struct lp_build_tgsi_context * bld_base,
3021    struct lp_build_emit_data * emit_data)
3022 {
3023    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3024 
3025    emit_kill(bld, bld_base->pc - 1);
3026 }
3027 
3028 static void
kill_if_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3029 kill_if_emit(
3030    const struct lp_build_tgsi_action * action,
3031    struct lp_build_tgsi_context * bld_base,
3032    struct lp_build_emit_data * emit_data)
3033 {
3034    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3035 
3036    emit_kill_if(bld, emit_data->inst, bld_base->pc - 1);
3037 }
3038 
3039 static void
tex_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3040 tex_emit(
3041    const struct lp_build_tgsi_action * action,
3042    struct lp_build_tgsi_context * bld_base,
3043    struct lp_build_emit_data * emit_data)
3044 {
3045    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3046 
3047    emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3048             emit_data->output, 1, LP_SAMPLER_OP_TEXTURE);
3049 }
3050 
3051 static void
tex2_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3052 tex2_emit(
3053    const struct lp_build_tgsi_action * action,
3054    struct lp_build_tgsi_context * bld_base,
3055    struct lp_build_emit_data * emit_data)
3056 {
3057    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3058 
3059    emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3060             emit_data->output, 2, LP_SAMPLER_OP_TEXTURE);
3061 }
3062 
3063 static void
txb_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3064 txb_emit(
3065    const struct lp_build_tgsi_action * action,
3066    struct lp_build_tgsi_context * bld_base,
3067    struct lp_build_emit_data * emit_data)
3068 {
3069    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3070 
3071    emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_BIAS,
3072             emit_data->output, 1, LP_SAMPLER_OP_TEXTURE);
3073 }
3074 
3075 static void
txb2_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3076 txb2_emit(
3077    const struct lp_build_tgsi_action * action,
3078    struct lp_build_tgsi_context * bld_base,
3079    struct lp_build_emit_data * emit_data)
3080 {
3081    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3082 
3083    emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_BIAS,
3084             emit_data->output, 2, LP_SAMPLER_OP_TEXTURE);
3085 }
3086 
3087 static void
txd_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3088 txd_emit(
3089    const struct lp_build_tgsi_action * action,
3090    struct lp_build_tgsi_context * bld_base,
3091    struct lp_build_emit_data * emit_data)
3092 {
3093    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3094 
3095    emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV,
3096             emit_data->output, 3, LP_SAMPLER_OP_TEXTURE);
3097 }
3098 
3099 static void
txl_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3100 txl_emit(
3101    const struct lp_build_tgsi_action * action,
3102    struct lp_build_tgsi_context * bld_base,
3103    struct lp_build_emit_data * emit_data)
3104 {
3105    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3106 
3107    emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD,
3108             emit_data->output, 1, LP_SAMPLER_OP_TEXTURE);
3109 }
3110 
3111 static void
txl2_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3112 txl2_emit(
3113    const struct lp_build_tgsi_action * action,
3114    struct lp_build_tgsi_context * bld_base,
3115    struct lp_build_emit_data * emit_data)
3116 {
3117    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3118 
3119    emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD,
3120             emit_data->output, 2, LP_SAMPLER_OP_TEXTURE);
3121 }
3122 
3123 static void
txp_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3124 txp_emit(
3125    const struct lp_build_tgsi_action * action,
3126    struct lp_build_tgsi_context * bld_base,
3127    struct lp_build_emit_data * emit_data)
3128 {
3129    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3130 
3131    emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_PROJECTED,
3132             emit_data->output, 1, LP_SAMPLER_OP_TEXTURE);
3133 }
3134 
3135 static void
tg4_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3136 tg4_emit(
3137    const struct lp_build_tgsi_action * action,
3138    struct lp_build_tgsi_context * bld_base,
3139    struct lp_build_emit_data * emit_data)
3140 {
3141    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3142 
3143    emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3144             emit_data->output, 2, LP_SAMPLER_OP_GATHER);
3145 }
3146 
3147 static void
lodq_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3148 lodq_emit(
3149    const struct lp_build_tgsi_action * action,
3150    struct lp_build_tgsi_context * bld_base,
3151    struct lp_build_emit_data * emit_data)
3152 {
3153    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3154 
3155    emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3156             emit_data->output, 1, LP_SAMPLER_OP_LODQ);
3157 }
3158 
3159 static void
txq_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3160 txq_emit(
3161    const struct lp_build_tgsi_action * action,
3162    struct lp_build_tgsi_context * bld_base,
3163    struct lp_build_emit_data * emit_data)
3164 {
3165    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3166 
3167    emit_size_query(bld, emit_data->inst, emit_data->output, FALSE);
3168 }
3169 
3170 static void
txf_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3171 txf_emit(
3172    const struct lp_build_tgsi_action * action,
3173    struct lp_build_tgsi_context * bld_base,
3174    struct lp_build_emit_data * emit_data)
3175 {
3176    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3177 
3178    emit_fetch_texels(bld, emit_data->inst, emit_data->output, FALSE);
3179 }
3180 
3181 static void
sample_i_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3182 sample_i_emit(
3183    const struct lp_build_tgsi_action * action,
3184    struct lp_build_tgsi_context * bld_base,
3185    struct lp_build_emit_data * emit_data)
3186 {
3187    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3188 
3189    emit_fetch_texels(bld, emit_data->inst, emit_data->output, TRUE);
3190 }
3191 
3192 static void
sample_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3193 sample_emit(
3194    const struct lp_build_tgsi_action * action,
3195    struct lp_build_tgsi_context * bld_base,
3196    struct lp_build_emit_data * emit_data)
3197 {
3198    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3199 
3200    emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3201                FALSE, LP_SAMPLER_OP_TEXTURE, emit_data->output);
3202 }
3203 
3204 static void
sample_b_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3205 sample_b_emit(
3206    const struct lp_build_tgsi_action * action,
3207    struct lp_build_tgsi_context * bld_base,
3208    struct lp_build_emit_data * emit_data)
3209 {
3210    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3211 
3212    emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_BIAS,
3213                FALSE, LP_SAMPLER_OP_TEXTURE, emit_data->output);
3214 }
3215 
3216 static void
sample_c_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3217 sample_c_emit(
3218    const struct lp_build_tgsi_action * action,
3219    struct lp_build_tgsi_context * bld_base,
3220    struct lp_build_emit_data * emit_data)
3221 {
3222    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3223 
3224    emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3225                TRUE, LP_SAMPLER_OP_TEXTURE, emit_data->output);
3226 }
3227 
3228 static void
sample_c_lz_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3229 sample_c_lz_emit(
3230    const struct lp_build_tgsi_action * action,
3231    struct lp_build_tgsi_context * bld_base,
3232    struct lp_build_emit_data * emit_data)
3233 {
3234    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3235 
3236    emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_ZERO,
3237                TRUE, LP_SAMPLER_OP_TEXTURE, emit_data->output);
3238 }
3239 
3240 static void
sample_d_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3241 sample_d_emit(
3242    const struct lp_build_tgsi_action * action,
3243    struct lp_build_tgsi_context * bld_base,
3244    struct lp_build_emit_data * emit_data)
3245 {
3246    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3247 
3248    emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV,
3249                FALSE, LP_SAMPLER_OP_TEXTURE, emit_data->output);
3250 }
3251 
3252 static void
sample_l_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3253 sample_l_emit(
3254    const struct lp_build_tgsi_action * action,
3255    struct lp_build_tgsi_context * bld_base,
3256    struct lp_build_emit_data * emit_data)
3257 {
3258    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3259 
3260    emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD,
3261                FALSE, LP_SAMPLER_OP_TEXTURE, emit_data->output);
3262 }
3263 
3264 static void
gather4_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3265 gather4_emit(
3266    const struct lp_build_tgsi_action * action,
3267    struct lp_build_tgsi_context * bld_base,
3268    struct lp_build_emit_data * emit_data)
3269 {
3270    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3271 
3272    emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3273                FALSE, LP_SAMPLER_OP_GATHER, emit_data->output);
3274 }
3275 
3276 static void
sviewinfo_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3277 sviewinfo_emit(
3278    const struct lp_build_tgsi_action * action,
3279    struct lp_build_tgsi_context * bld_base,
3280    struct lp_build_emit_data * emit_data)
3281 {
3282    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3283 
3284    emit_size_query(bld, emit_data->inst, emit_data->output, TRUE);
3285 }
3286 
3287 static void
lod_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3288 lod_emit(
3289    const struct lp_build_tgsi_action * action,
3290    struct lp_build_tgsi_context * bld_base,
3291    struct lp_build_emit_data * emit_data)
3292 {
3293    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3294 
3295    emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3296                FALSE, LP_SAMPLER_OP_LODQ, emit_data->output);
3297 }
3298 
3299 static LLVMValueRef
mask_vec(struct lp_build_tgsi_context * bld_base)3300 mask_vec(struct lp_build_tgsi_context *bld_base)
3301 {
3302    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3303    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
3304    struct lp_exec_mask *exec_mask = &bld->exec_mask;
3305 
3306    if (!exec_mask->has_mask) {
3307       return lp_build_mask_value(bld->mask);
3308    }
3309    return LLVMBuildAnd(builder, lp_build_mask_value(bld->mask),
3310                        exec_mask->exec_mask, "");
3311 }
3312 
3313 static void
increment_vec_ptr_by_mask(struct lp_build_tgsi_context * bld_base,LLVMValueRef ptr,LLVMValueRef mask)3314 increment_vec_ptr_by_mask(struct lp_build_tgsi_context * bld_base,
3315                           LLVMValueRef ptr,
3316                           LLVMValueRef mask)
3317 {
3318    LLVMBuilderRef builder = bld_base->base.gallivm->builder;
3319    LLVMValueRef current_vec = LLVMBuildLoad(builder, ptr, "");
3320 
3321    current_vec = LLVMBuildSub(builder, current_vec, mask, "");
3322 
3323    LLVMBuildStore(builder, current_vec, ptr);
3324 }
3325 
3326 static void
clear_uint_vec_ptr_from_mask(struct lp_build_tgsi_context * bld_base,LLVMValueRef ptr,LLVMValueRef mask)3327 clear_uint_vec_ptr_from_mask(struct lp_build_tgsi_context * bld_base,
3328                              LLVMValueRef ptr,
3329                              LLVMValueRef mask)
3330 {
3331    LLVMBuilderRef builder = bld_base->base.gallivm->builder;
3332    LLVMValueRef current_vec = LLVMBuildLoad(builder, ptr, "");
3333 
3334    current_vec = lp_build_select(&bld_base->uint_bld,
3335                                  mask,
3336                                  bld_base->uint_bld.zero,
3337                                  current_vec);
3338 
3339    LLVMBuildStore(builder, current_vec, ptr);
3340 }
3341 
3342 static LLVMValueRef
clamp_mask_to_max_output_vertices(struct lp_build_tgsi_soa_context * bld,LLVMValueRef current_mask_vec,LLVMValueRef total_emitted_vertices_vec)3343 clamp_mask_to_max_output_vertices(struct lp_build_tgsi_soa_context * bld,
3344                                   LLVMValueRef current_mask_vec,
3345                                   LLVMValueRef total_emitted_vertices_vec)
3346 {
3347    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
3348    struct lp_build_context *int_bld = &bld->bld_base.int_bld;
3349    LLVMValueRef max_mask = lp_build_cmp(int_bld, PIPE_FUNC_LESS,
3350                                         total_emitted_vertices_vec,
3351                                         bld->max_output_vertices_vec);
3352 
3353    return LLVMBuildAnd(builder, current_mask_vec, max_mask, "");
3354 }
3355 
3356 static void
emit_vertex(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3357 emit_vertex(
3358    const struct lp_build_tgsi_action * action,
3359    struct lp_build_tgsi_context * bld_base,
3360    struct lp_build_emit_data * emit_data)
3361 {
3362    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3363    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
3364 
3365    if (bld->gs_iface->emit_vertex) {
3366       LLVMValueRef mask = mask_vec(bld_base);
3367       LLVMValueRef total_emitted_vertices_vec =
3368          LLVMBuildLoad(builder, bld->total_emitted_vertices_vec_ptr, "");
3369       mask = clamp_mask_to_max_output_vertices(bld, mask,
3370                                                total_emitted_vertices_vec);
3371       gather_outputs(bld);
3372       bld->gs_iface->emit_vertex(bld->gs_iface, &bld->bld_base,
3373                                  bld->outputs,
3374                                  total_emitted_vertices_vec);
3375       increment_vec_ptr_by_mask(bld_base, bld->emitted_vertices_vec_ptr,
3376                                 mask);
3377       increment_vec_ptr_by_mask(bld_base, bld->total_emitted_vertices_vec_ptr,
3378                                 mask);
3379 #if DUMP_GS_EMITS
3380       lp_build_print_value(bld->bld_base.base.gallivm,
3381                            " +++ emit vertex masked ones = ",
3382                            mask);
3383       lp_build_print_value(bld->bld_base.base.gallivm,
3384                            " +++ emit vertex emitted = ",
3385                            total_emitted_vertices_vec);
3386 #endif
3387    }
3388 }
3389 
3390 
3391 static void
end_primitive_masked(struct lp_build_tgsi_context * bld_base,LLVMValueRef mask)3392 end_primitive_masked(struct lp_build_tgsi_context * bld_base,
3393                      LLVMValueRef mask)
3394 {
3395    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3396    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
3397 
3398    if (bld->gs_iface->end_primitive) {
3399       struct lp_build_context *uint_bld = &bld_base->uint_bld;
3400       LLVMValueRef emitted_vertices_vec =
3401          LLVMBuildLoad(builder, bld->emitted_vertices_vec_ptr, "");
3402       LLVMValueRef emitted_prims_vec =
3403          LLVMBuildLoad(builder, bld->emitted_prims_vec_ptr, "");
3404 
3405       LLVMValueRef emitted_mask = lp_build_cmp(uint_bld, PIPE_FUNC_NOTEQUAL,
3406                                                emitted_vertices_vec,
3407                                                uint_bld->zero);
3408       /* We need to combine the current execution mask with the mask
3409          telling us which, if any, execution slots actually have
3410          unemitted primitives, this way we make sure that end_primitives
3411          executes only on the paths that have unflushed vertices */
3412       mask = LLVMBuildAnd(builder, mask, emitted_mask, "");
3413 
3414       bld->gs_iface->end_primitive(bld->gs_iface, &bld->bld_base,
3415                                    emitted_vertices_vec,
3416                                    emitted_prims_vec);
3417 
3418 #if DUMP_GS_EMITS
3419       lp_build_print_value(bld->bld_base.base.gallivm,
3420                            " +++ end prim masked ones = ",
3421                            mask);
3422       lp_build_print_value(bld->bld_base.base.gallivm,
3423                            " +++ end prim emitted verts1 = ",
3424                            emitted_vertices_vec);
3425       lp_build_print_value(bld->bld_base.base.gallivm,
3426                            " +++ end prim emitted prims1 = ",
3427                            LLVMBuildLoad(builder,
3428                                          bld->emitted_prims_vec_ptr, ""));
3429 #endif
3430       increment_vec_ptr_by_mask(bld_base, bld->emitted_prims_vec_ptr,
3431                                 mask);
3432       clear_uint_vec_ptr_from_mask(bld_base, bld->emitted_vertices_vec_ptr,
3433                                    mask);
3434 #if DUMP_GS_EMITS
3435       lp_build_print_value(bld->bld_base.base.gallivm,
3436                            " +++ end prim emitted verts2 = ",
3437                            LLVMBuildLoad(builder,
3438                                          bld->emitted_vertices_vec_ptr, ""));
3439 #endif
3440    }
3441 
3442 }
3443 
3444 static void
end_primitive(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3445 end_primitive(
3446    const struct lp_build_tgsi_action * action,
3447    struct lp_build_tgsi_context * bld_base,
3448    struct lp_build_emit_data * emit_data)
3449 {
3450    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3451 
3452    if (bld->gs_iface->end_primitive) {
3453       LLVMValueRef mask = mask_vec(bld_base);
3454       end_primitive_masked(bld_base, mask);
3455    }
3456 }
3457 
3458 static void
cal_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3459 cal_emit(
3460    const struct lp_build_tgsi_action * action,
3461    struct lp_build_tgsi_context * bld_base,
3462    struct lp_build_emit_data * emit_data)
3463 {
3464    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3465 
3466    lp_exec_mask_call(&bld->exec_mask, emit_data->inst->Label.Label,
3467                      &bld_base->pc);
3468 }
3469 
3470 static void
ret_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3471 ret_emit(
3472    const struct lp_build_tgsi_action * action,
3473    struct lp_build_tgsi_context * bld_base,
3474    struct lp_build_emit_data * emit_data)
3475 {
3476    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3477 
3478    lp_exec_mask_ret(&bld->exec_mask, &bld_base->pc);
3479 }
3480 
3481 static void
brk_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3482 brk_emit(
3483    const struct lp_build_tgsi_action * action,
3484    struct lp_build_tgsi_context * bld_base,
3485    struct lp_build_emit_data * emit_data)
3486 {
3487    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3488 
3489    lp_exec_break(&bld->exec_mask, bld_base);
3490 }
3491 
3492 static void
if_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3493 if_emit(
3494    const struct lp_build_tgsi_action * action,
3495    struct lp_build_tgsi_context * bld_base,
3496    struct lp_build_emit_data * emit_data)
3497 {
3498    LLVMValueRef tmp;
3499    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3500 
3501    tmp = lp_build_cmp(&bld_base->base, PIPE_FUNC_NOTEQUAL,
3502                       emit_data->args[0], bld->bld_base.base.zero);
3503    lp_exec_mask_cond_push(&bld->exec_mask, tmp);
3504 }
3505 
3506 static void
uif_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3507 uif_emit(
3508    const struct lp_build_tgsi_action * action,
3509    struct lp_build_tgsi_context * bld_base,
3510    struct lp_build_emit_data * emit_data)
3511 {
3512    LLVMValueRef tmp;
3513    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3514    struct lp_build_context *uint_bld = &bld_base->uint_bld;
3515 
3516    tmp = lp_build_cmp(uint_bld, PIPE_FUNC_NOTEQUAL,
3517                       emit_data->args[0], uint_bld->zero);
3518    lp_exec_mask_cond_push(&bld->exec_mask, tmp);
3519 }
3520 
3521 static void
case_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3522 case_emit(
3523    const struct lp_build_tgsi_action * action,
3524    struct lp_build_tgsi_context * bld_base,
3525    struct lp_build_emit_data * emit_data)
3526 {
3527    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3528 
3529    lp_exec_case(&bld->exec_mask, emit_data->args[0]);
3530 }
3531 
3532 static void
default_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3533 default_emit(
3534    const struct lp_build_tgsi_action * action,
3535    struct lp_build_tgsi_context * bld_base,
3536    struct lp_build_emit_data * emit_data)
3537 {
3538    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3539 
3540    lp_exec_default(&bld->exec_mask, bld_base);
3541 }
3542 
3543 static void
switch_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3544 switch_emit(
3545    const struct lp_build_tgsi_action * action,
3546    struct lp_build_tgsi_context * bld_base,
3547    struct lp_build_emit_data * emit_data)
3548 {
3549    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3550 
3551    lp_exec_switch(&bld->exec_mask, emit_data->args[0]);
3552 }
3553 
3554 static void
endswitch_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3555 endswitch_emit(
3556    const struct lp_build_tgsi_action * action,
3557    struct lp_build_tgsi_context * bld_base,
3558    struct lp_build_emit_data * emit_data)
3559 {
3560    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3561 
3562    lp_exec_endswitch(&bld->exec_mask, bld_base);
3563 }
3564 
3565 static void
bgnloop_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3566 bgnloop_emit(
3567    const struct lp_build_tgsi_action * action,
3568    struct lp_build_tgsi_context * bld_base,
3569    struct lp_build_emit_data * emit_data)
3570 {
3571    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3572 
3573    lp_exec_bgnloop(&bld->exec_mask);
3574 }
3575 
3576 static void
bgnsub_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3577 bgnsub_emit(
3578    const struct lp_build_tgsi_action * action,
3579    struct lp_build_tgsi_context * bld_base,
3580    struct lp_build_emit_data * emit_data)
3581 {
3582    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3583 
3584    lp_exec_mask_bgnsub(&bld->exec_mask);
3585 }
3586 
3587 static void
else_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3588 else_emit(
3589    const struct lp_build_tgsi_action * action,
3590    struct lp_build_tgsi_context * bld_base,
3591    struct lp_build_emit_data * emit_data)
3592 {
3593    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3594 
3595    lp_exec_mask_cond_invert(&bld->exec_mask);
3596 }
3597 
3598 static void
endif_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3599 endif_emit(
3600    const struct lp_build_tgsi_action * action,
3601    struct lp_build_tgsi_context * bld_base,
3602    struct lp_build_emit_data * emit_data)
3603 {
3604    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3605 
3606    lp_exec_mask_cond_pop(&bld->exec_mask);
3607 }
3608 
3609 static void
endloop_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3610 endloop_emit(
3611    const struct lp_build_tgsi_action * action,
3612    struct lp_build_tgsi_context * bld_base,
3613    struct lp_build_emit_data * emit_data)
3614 {
3615    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3616 
3617    lp_exec_endloop(bld_base->base.gallivm, &bld->exec_mask);
3618 }
3619 
3620 static void
endsub_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3621 endsub_emit(
3622    const struct lp_build_tgsi_action * action,
3623    struct lp_build_tgsi_context * bld_base,
3624    struct lp_build_emit_data * emit_data)
3625 {
3626    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3627 
3628    lp_exec_mask_endsub(&bld->exec_mask, &bld_base->pc);
3629 }
3630 
3631 static void
cont_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3632 cont_emit(
3633    const struct lp_build_tgsi_action * action,
3634    struct lp_build_tgsi_context * bld_base,
3635    struct lp_build_emit_data * emit_data)
3636 {
3637    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3638 
3639    lp_exec_continue(&bld->exec_mask);
3640 }
3641 
emit_prologue(struct lp_build_tgsi_context * bld_base)3642 static void emit_prologue(struct lp_build_tgsi_context * bld_base)
3643 {
3644    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3645    struct gallivm_state * gallivm = bld_base->base.gallivm;
3646 
3647    if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
3648       LLVMValueRef array_size =
3649          lp_build_const_int32(gallivm,
3650                          bld_base->info->file_max[TGSI_FILE_TEMPORARY] * 4 + 4);
3651       bld->temps_array = lp_build_array_alloca(gallivm,
3652                                               bld_base->base.vec_type, array_size,
3653                                               "temp_array");
3654    }
3655 
3656    if (bld->indirect_files & (1 << TGSI_FILE_OUTPUT)) {
3657       LLVMValueRef array_size =
3658          lp_build_const_int32(gallivm,
3659                             bld_base->info->file_max[TGSI_FILE_OUTPUT] * 4 + 4);
3660       bld->outputs_array = lp_build_array_alloca(gallivm,
3661                                                 bld_base->base.vec_type, array_size,
3662                                                 "output_array");
3663    }
3664 
3665    if (bld->indirect_files & (1 << TGSI_FILE_IMMEDIATE)) {
3666       LLVMValueRef array_size =
3667          lp_build_const_int32(gallivm,
3668                          bld_base->info->file_max[TGSI_FILE_IMMEDIATE] * 4 + 4);
3669       bld->imms_array = lp_build_array_alloca(gallivm,
3670                                               bld_base->base.vec_type, array_size,
3671                                               "imms_array");
3672    }
3673 
3674    /* If we have indirect addressing in inputs we need to copy them into
3675     * our alloca array to be able to iterate over them */
3676    if (bld->indirect_files & (1 << TGSI_FILE_INPUT) && !bld->gs_iface) {
3677       unsigned index, chan;
3678       LLVMTypeRef vec_type = bld_base->base.vec_type;
3679       LLVMValueRef array_size = lp_build_const_int32(gallivm,
3680             bld_base->info->file_max[TGSI_FILE_INPUT]*4 + 4);
3681       bld->inputs_array = lp_build_array_alloca(gallivm,
3682                                                vec_type, array_size,
3683                                                "input_array");
3684 
3685       assert(bld_base->info->num_inputs
3686                         <= bld_base->info->file_max[TGSI_FILE_INPUT] + 1);
3687 
3688       for (index = 0; index < bld_base->info->num_inputs; ++index) {
3689          for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
3690             LLVMValueRef lindex =
3691                lp_build_const_int32(gallivm, index * 4 + chan);
3692             LLVMValueRef input_ptr =
3693                LLVMBuildGEP(gallivm->builder, bld->inputs_array,
3694                             &lindex, 1, "");
3695             LLVMValueRef value = bld->inputs[index][chan];
3696             if (value)
3697                LLVMBuildStore(gallivm->builder, value, input_ptr);
3698          }
3699       }
3700    }
3701 
3702    if (bld->gs_iface) {
3703       struct lp_build_context *uint_bld = &bld->bld_base.uint_bld;
3704       bld->emitted_prims_vec_ptr =
3705          lp_build_alloca(gallivm,
3706                          uint_bld->vec_type,
3707                          "emitted_prims_ptr");
3708       bld->emitted_vertices_vec_ptr =
3709          lp_build_alloca(gallivm,
3710                          uint_bld->vec_type,
3711                          "emitted_vertices_ptr");
3712       bld->total_emitted_vertices_vec_ptr =
3713          lp_build_alloca(gallivm,
3714                          uint_bld->vec_type,
3715                          "total_emitted_vertices_ptr");
3716 
3717       LLVMBuildStore(gallivm->builder, uint_bld->zero,
3718                      bld->emitted_prims_vec_ptr);
3719       LLVMBuildStore(gallivm->builder, uint_bld->zero,
3720                      bld->emitted_vertices_vec_ptr);
3721       LLVMBuildStore(gallivm->builder, uint_bld->zero,
3722                      bld->total_emitted_vertices_vec_ptr);
3723    }
3724 
3725    if (DEBUG_EXECUTION) {
3726       lp_build_printf(gallivm, "\n");
3727       emit_dump_file(bld, TGSI_FILE_CONSTANT);
3728       if (!bld->gs_iface)
3729          emit_dump_file(bld, TGSI_FILE_INPUT);
3730    }
3731 }
3732 
emit_epilogue(struct lp_build_tgsi_context * bld_base)3733 static void emit_epilogue(struct lp_build_tgsi_context * bld_base)
3734 {
3735    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3736    LLVMBuilderRef builder = bld_base->base.gallivm->builder;
3737 
3738    if (DEBUG_EXECUTION) {
3739       /* for debugging */
3740       if (0) {
3741          emit_dump_file(bld, TGSI_FILE_TEMPORARY);
3742       }
3743       emit_dump_file(bld, TGSI_FILE_OUTPUT);
3744       lp_build_printf(bld_base->base.gallivm, "\n");
3745    }
3746 
3747    /* If we have indirect addressing in outputs we need to copy our alloca array
3748     * to the outputs slots specified by the caller */
3749    if (bld->gs_iface) {
3750       LLVMValueRef total_emitted_vertices_vec;
3751       LLVMValueRef emitted_prims_vec;
3752       /* implicit end_primitives, needed in case there are any unflushed
3753          vertices in the cache. Note must not call end_primitive here
3754          since the exec_mask is not valid at this point. */
3755       end_primitive_masked(bld_base, lp_build_mask_value(bld->mask));
3756 
3757       total_emitted_vertices_vec =
3758          LLVMBuildLoad(builder, bld->total_emitted_vertices_vec_ptr, "");
3759       emitted_prims_vec =
3760          LLVMBuildLoad(builder, bld->emitted_prims_vec_ptr, "");
3761 
3762       bld->gs_iface->gs_epilogue(bld->gs_iface,
3763                                  &bld->bld_base,
3764                                  total_emitted_vertices_vec,
3765                                  emitted_prims_vec);
3766    } else {
3767       gather_outputs(bld);
3768    }
3769 }
3770 
3771 void
lp_build_tgsi_soa(struct gallivm_state * gallivm,const struct tgsi_token * tokens,struct lp_type type,struct lp_build_mask_context * mask,LLVMValueRef consts_ptr,LLVMValueRef const_sizes_ptr,const struct lp_bld_tgsi_system_values * system_values,const LLVMValueRef (* inputs)[TGSI_NUM_CHANNELS],LLVMValueRef (* outputs)[TGSI_NUM_CHANNELS],LLVMValueRef context_ptr,LLVMValueRef thread_data_ptr,struct lp_build_sampler_soa * sampler,const struct tgsi_shader_info * info,const struct lp_build_tgsi_gs_iface * gs_iface)3772 lp_build_tgsi_soa(struct gallivm_state *gallivm,
3773                   const struct tgsi_token *tokens,
3774                   struct lp_type type,
3775                   struct lp_build_mask_context *mask,
3776                   LLVMValueRef consts_ptr,
3777                   LLVMValueRef const_sizes_ptr,
3778                   const struct lp_bld_tgsi_system_values *system_values,
3779                   const LLVMValueRef (*inputs)[TGSI_NUM_CHANNELS],
3780                   LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS],
3781                   LLVMValueRef context_ptr,
3782                   LLVMValueRef thread_data_ptr,
3783                   struct lp_build_sampler_soa *sampler,
3784                   const struct tgsi_shader_info *info,
3785                   const struct lp_build_tgsi_gs_iface *gs_iface)
3786 {
3787    struct lp_build_tgsi_soa_context bld;
3788 
3789    struct lp_type res_type;
3790 
3791    assert(type.length <= LP_MAX_VECTOR_LENGTH);
3792    memset(&res_type, 0, sizeof res_type);
3793    res_type.width = type.width;
3794    res_type.length = type.length;
3795    res_type.sign = 1;
3796 
3797    /* Setup build context */
3798    memset(&bld, 0, sizeof bld);
3799    lp_build_context_init(&bld.bld_base.base, gallivm, type);
3800    lp_build_context_init(&bld.bld_base.uint_bld, gallivm, lp_uint_type(type));
3801    lp_build_context_init(&bld.bld_base.int_bld, gallivm, lp_int_type(type));
3802    lp_build_context_init(&bld.elem_bld, gallivm, lp_elem_type(type));
3803    {
3804       struct lp_type dbl_type;
3805       dbl_type = type;
3806       dbl_type.width *= 2;
3807       lp_build_context_init(&bld.bld_base.dbl_bld, gallivm, dbl_type);
3808    }
3809    {
3810       struct lp_type uint64_type;
3811       uint64_type = lp_uint_type(type);
3812       uint64_type.width *= 2;
3813       lp_build_context_init(&bld.bld_base.uint64_bld, gallivm, uint64_type);
3814    }
3815    {
3816       struct lp_type int64_type;
3817       int64_type = lp_int_type(type);
3818       int64_type.width *= 2;
3819       lp_build_context_init(&bld.bld_base.int64_bld, gallivm, int64_type);
3820    }
3821    bld.mask = mask;
3822    bld.inputs = inputs;
3823    bld.outputs = outputs;
3824    bld.consts_ptr = consts_ptr;
3825    bld.const_sizes_ptr = const_sizes_ptr;
3826    bld.sampler = sampler;
3827    bld.bld_base.info = info;
3828    bld.indirect_files = info->indirect_files;
3829    bld.context_ptr = context_ptr;
3830    bld.thread_data_ptr = thread_data_ptr;
3831 
3832    /*
3833     * If the number of temporaries is rather large then we just
3834     * allocate them as an array right from the start and treat
3835     * like indirect temporaries.
3836     */
3837    if (info->file_max[TGSI_FILE_TEMPORARY] >= LP_MAX_INLINED_TEMPS) {
3838       bld.indirect_files |= (1 << TGSI_FILE_TEMPORARY);
3839    }
3840    /*
3841     * For performance reason immediates are always backed in a static
3842     * array, but if their number is too great, we have to use just
3843     * a dynamically allocated array.
3844     */
3845    bld.use_immediates_array =
3846          (info->file_max[TGSI_FILE_IMMEDIATE] >= LP_MAX_INLINED_IMMEDIATES);
3847    if (bld.use_immediates_array) {
3848       bld.indirect_files |= (1 << TGSI_FILE_IMMEDIATE);
3849    }
3850 
3851 
3852    bld.bld_base.soa = TRUE;
3853    bld.bld_base.emit_debug = emit_debug;
3854    bld.bld_base.emit_fetch_funcs[TGSI_FILE_CONSTANT] = emit_fetch_constant;
3855    bld.bld_base.emit_fetch_funcs[TGSI_FILE_IMMEDIATE] = emit_fetch_immediate;
3856    bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_input;
3857    bld.bld_base.emit_fetch_funcs[TGSI_FILE_TEMPORARY] = emit_fetch_temporary;
3858    bld.bld_base.emit_fetch_funcs[TGSI_FILE_SYSTEM_VALUE] = emit_fetch_system_value;
3859    bld.bld_base.emit_store = emit_store;
3860 
3861    bld.bld_base.emit_declaration = lp_emit_declaration_soa;
3862    bld.bld_base.emit_immediate = lp_emit_immediate_soa;
3863 
3864    bld.bld_base.emit_prologue = emit_prologue;
3865    bld.bld_base.emit_epilogue = emit_epilogue;
3866 
3867    /* Set opcode actions */
3868    lp_set_default_actions_cpu(&bld.bld_base);
3869 
3870    bld.bld_base.op_actions[TGSI_OPCODE_BGNLOOP].emit = bgnloop_emit;
3871    bld.bld_base.op_actions[TGSI_OPCODE_BGNSUB].emit = bgnsub_emit;
3872    bld.bld_base.op_actions[TGSI_OPCODE_BRK].emit = brk_emit;
3873    bld.bld_base.op_actions[TGSI_OPCODE_CAL].emit = cal_emit;
3874    bld.bld_base.op_actions[TGSI_OPCODE_CASE].emit = case_emit;
3875    bld.bld_base.op_actions[TGSI_OPCODE_CONT].emit = cont_emit;
3876    bld.bld_base.op_actions[TGSI_OPCODE_DDX].emit = ddx_emit;
3877    bld.bld_base.op_actions[TGSI_OPCODE_DDY].emit = ddy_emit;
3878    bld.bld_base.op_actions[TGSI_OPCODE_DEFAULT].emit = default_emit;
3879    bld.bld_base.op_actions[TGSI_OPCODE_ELSE].emit = else_emit;
3880    bld.bld_base.op_actions[TGSI_OPCODE_ENDIF].emit = endif_emit;
3881    bld.bld_base.op_actions[TGSI_OPCODE_ENDLOOP].emit = endloop_emit;
3882    bld.bld_base.op_actions[TGSI_OPCODE_ENDSUB].emit = endsub_emit;
3883    bld.bld_base.op_actions[TGSI_OPCODE_ENDSWITCH].emit = endswitch_emit;
3884    bld.bld_base.op_actions[TGSI_OPCODE_IF].emit = if_emit;
3885    bld.bld_base.op_actions[TGSI_OPCODE_UIF].emit = uif_emit;
3886    bld.bld_base.op_actions[TGSI_OPCODE_KILL_IF].emit = kill_if_emit;
3887    bld.bld_base.op_actions[TGSI_OPCODE_KILL].emit = kill_emit;
3888    bld.bld_base.op_actions[TGSI_OPCODE_RET].emit = ret_emit;
3889    bld.bld_base.op_actions[TGSI_OPCODE_SWITCH].emit = switch_emit;
3890    bld.bld_base.op_actions[TGSI_OPCODE_TEX].emit = tex_emit;
3891    bld.bld_base.op_actions[TGSI_OPCODE_TXB].emit = txb_emit;
3892    bld.bld_base.op_actions[TGSI_OPCODE_TXD].emit = txd_emit;
3893    bld.bld_base.op_actions[TGSI_OPCODE_TXL].emit = txl_emit;
3894    bld.bld_base.op_actions[TGSI_OPCODE_TXP].emit = txp_emit;
3895    bld.bld_base.op_actions[TGSI_OPCODE_TXQ].emit = txq_emit;
3896    bld.bld_base.op_actions[TGSI_OPCODE_TXF].emit = txf_emit;
3897    bld.bld_base.op_actions[TGSI_OPCODE_TEX2].emit = tex2_emit;
3898    bld.bld_base.op_actions[TGSI_OPCODE_TXB2].emit = txb2_emit;
3899    bld.bld_base.op_actions[TGSI_OPCODE_TXL2].emit = txl2_emit;
3900    bld.bld_base.op_actions[TGSI_OPCODE_TG4].emit = tg4_emit;
3901    bld.bld_base.op_actions[TGSI_OPCODE_LODQ].emit = lodq_emit;
3902    /* DX10 sampling ops */
3903    bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE].emit = sample_emit;
3904    bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_B].emit = sample_b_emit;
3905    bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_C].emit = sample_c_emit;
3906    bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_C_LZ].emit = sample_c_lz_emit;
3907    bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_D].emit = sample_d_emit;
3908    bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_I].emit = sample_i_emit;
3909    bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_I_MS].emit = sample_i_emit;
3910    bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_L].emit = sample_l_emit;
3911    bld.bld_base.op_actions[TGSI_OPCODE_GATHER4].emit = gather4_emit;
3912    bld.bld_base.op_actions[TGSI_OPCODE_SVIEWINFO].emit = sviewinfo_emit;
3913    bld.bld_base.op_actions[TGSI_OPCODE_LOD].emit = lod_emit;
3914 
3915 
3916    if (gs_iface) {
3917       /* There's no specific value for this because it should always
3918        * be set, but apps using ext_geometry_shader4 quite often
3919        * were forgetting so we're using MAX_VERTEX_VARYING from
3920        * that spec even though we could debug_assert if it's not
3921        * set, but that's a lot uglier. */
3922       uint max_output_vertices;
3923 
3924       /* inputs are always indirect with gs */
3925       bld.indirect_files |= (1 << TGSI_FILE_INPUT);
3926       bld.gs_iface = gs_iface;
3927       bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_gs_input;
3928       bld.bld_base.op_actions[TGSI_OPCODE_EMIT].emit = emit_vertex;
3929       bld.bld_base.op_actions[TGSI_OPCODE_ENDPRIM].emit = end_primitive;
3930 
3931       max_output_vertices =
3932             info->properties[TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES];
3933       if (!max_output_vertices)
3934          max_output_vertices = 32;
3935 
3936       bld.max_output_vertices_vec =
3937          lp_build_const_int_vec(gallivm, bld.bld_base.int_bld.type,
3938                                 max_output_vertices);
3939    }
3940 
3941    lp_exec_mask_init(&bld.exec_mask, &bld.bld_base.int_bld);
3942 
3943    bld.system_values = *system_values;
3944 
3945    lp_build_tgsi_llvm(&bld.bld_base, tokens);
3946 
3947    if (0) {
3948       LLVMBasicBlockRef block = LLVMGetInsertBlock(gallivm->builder);
3949       LLVMValueRef function = LLVMGetBasicBlockParent(block);
3950       debug_printf("11111111111111111111111111111 \n");
3951       tgsi_dump(tokens, 0);
3952       lp_debug_dump_value(function);
3953       debug_printf("2222222222222222222222222222 \n");
3954    }
3955 
3956    if (0) {
3957       LLVMModuleRef module = LLVMGetGlobalParent(
3958          LLVMGetBasicBlockParent(LLVMGetInsertBlock(gallivm->builder)));
3959       LLVMDumpModule(module);
3960 
3961    }
3962    lp_exec_mask_fini(&bld.exec_mask);
3963 }
3964