1 /**************************************************************************
2  *
3  * Copyright 2009 VMware, Inc.
4  * Copyright 2007-2008 VMware, Inc.
5  * All Rights Reserved.
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining a
8  * copy of this software and associated documentation files (the
9  * "Software"), to deal in the Software without restriction, including
10  * without limitation the rights to use, copy, modify, merge, publish,
11  * distribute, sub license, and/or sell copies of the Software, and to
12  * permit persons to whom the Software is furnished to do so, subject to
13  * the following conditions:
14  *
15  * The above copyright notice and this permission notice (including the
16  * next paragraph) shall be included in all copies or substantial portions
17  * of the Software.
18  *
19  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
23  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26  *
27  **************************************************************************/
28 
29 /**
30  * @file
31  * TGSI to LLVM IR translation -- SoA.
32  *
33  * @author Jose Fonseca <jfonseca@vmware.com>
34  *
35  * Based on tgsi_sse2.c code written by Michal Krol, Keith Whitwell,
36  * Brian Paul, and others.
37  */
38 
39 #include "pipe/p_config.h"
40 #include "pipe/p_shader_tokens.h"
41 #include "util/u_debug.h"
42 #include "util/u_math.h"
43 #include "util/u_memory.h"
44 #include "util/u_prim.h"
45 #include "tgsi/tgsi_dump.h"
46 #include "tgsi/tgsi_exec.h"
47 #include "tgsi/tgsi_info.h"
48 #include "tgsi/tgsi_parse.h"
49 #include "tgsi/tgsi_util.h"
50 #include "tgsi/tgsi_scan.h"
51 #include "tgsi/tgsi_strings.h"
52 #include "lp_bld_tgsi_action.h"
53 #include "lp_bld_type.h"
54 #include "lp_bld_const.h"
55 #include "lp_bld_arit.h"
56 #include "lp_bld_bitarit.h"
57 #include "lp_bld_gather.h"
58 #include "lp_bld_init.h"
59 #include "lp_bld_logic.h"
60 #include "lp_bld_misc.h"
61 #include "lp_bld_swizzle.h"
62 #include "lp_bld_flow.h"
63 #include "lp_bld_coro.h"
64 #include "lp_bld_quad.h"
65 #include "lp_bld_tgsi.h"
66 #include "lp_bld_limits.h"
67 #include "lp_bld_debug.h"
68 #include "lp_bld_printf.h"
69 #include "lp_bld_sample.h"
70 #include "lp_bld_struct.h"
71 
72 #define DUMP_GS_EMITS 0
73 
74 /*
75  * If non-zero, the generated LLVM IR will print intermediate results on every TGSI
76  * instruction.
77  *
78  * TODO:
79  * - take execution masks in consideration
80  * - debug control-flow instructions
81  */
82 #define DEBUG_EXECUTION 0
83 
84 
85 /*
86  * Emit code to print a register value.
87  */
88 static void
emit_dump_reg(struct gallivm_state * gallivm,unsigned file,unsigned index,unsigned chan,LLVMValueRef value)89 emit_dump_reg(struct gallivm_state *gallivm,
90               unsigned file,
91               unsigned index,
92               unsigned chan,
93               LLVMValueRef value)
94 {
95    char buf[32];
96 
97    snprintf(buf, sizeof buf, "    %s[%u].%c = ",
98             tgsi_file_name(file),
99             index, "xyzw"[chan]);
100 
101    lp_build_print_value(gallivm, buf, value);
102 }
103 
104 static inline struct function_ctx *
func_ctx(struct lp_exec_mask * mask)105 func_ctx(struct lp_exec_mask *mask)
106 {
107    assert(mask->function_stack_size > 0);
108    assert(mask->function_stack_size <= LP_MAX_NUM_FUNCS);
109    return &mask->function_stack[mask->function_stack_size - 1];
110 }
111 
112 /*
113  * combine the execution mask if there is one with the current mask.
114  */
115 static LLVMValueRef
mask_vec(struct lp_build_tgsi_context * bld_base)116 mask_vec(struct lp_build_tgsi_context *bld_base)
117 {
118    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
119    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
120    struct lp_exec_mask *exec_mask = &bld->exec_mask;
121    LLVMValueRef bld_mask = bld->mask ? lp_build_mask_value(bld->mask) : NULL;
122    if (!exec_mask->has_mask) {
123       return bld_mask;
124    }
125    if (!bld_mask)
126       return exec_mask->exec_mask;
127    return LLVMBuildAnd(builder, lp_build_mask_value(bld->mask),
128                        exec_mask->exec_mask, "");
129 }
130 
lp_exec_tgsi_break(struct lp_exec_mask * mask,struct lp_build_tgsi_context * bld_base)131 static void lp_exec_tgsi_break(struct lp_exec_mask *mask,
132                           struct lp_build_tgsi_context * bld_base)
133 {
134    enum tgsi_opcode opcode =
135       bld_base->instructions[bld_base->pc + 1].Instruction.Opcode;
136    bool break_always = (opcode == TGSI_OPCODE_ENDSWITCH ||
137                         opcode == TGSI_OPCODE_CASE);
138    lp_exec_break(mask, &bld_base->pc, break_always);
139 }
140 
lp_exec_switch(struct lp_exec_mask * mask,LLVMValueRef switchval)141 static void lp_exec_switch(struct lp_exec_mask *mask,
142                            LLVMValueRef switchval)
143 {
144    struct function_ctx *ctx = func_ctx(mask);
145 
146    if (ctx->switch_stack_size >= LP_MAX_TGSI_NESTING ||
147        ctx->loop_stack_size > LP_MAX_TGSI_NESTING) {
148       ctx->switch_stack_size++;
149       return;
150    }
151 
152    ctx->break_type_stack[ctx->loop_stack_size + ctx->switch_stack_size] =
153       ctx->break_type;
154    ctx->break_type = LP_EXEC_MASK_BREAK_TYPE_SWITCH;
155 
156    ctx->switch_stack[ctx->switch_stack_size].switch_mask = mask->switch_mask;
157    ctx->switch_stack[ctx->switch_stack_size].switch_val = ctx->switch_val;
158    ctx->switch_stack[ctx->switch_stack_size].switch_mask_default = ctx->switch_mask_default;
159    ctx->switch_stack[ctx->switch_stack_size].switch_in_default = ctx->switch_in_default;
160    ctx->switch_stack[ctx->switch_stack_size].switch_pc = ctx->switch_pc;
161    ctx->switch_stack_size++;
162 
163    mask->switch_mask = LLVMConstNull(mask->int_vec_type);
164    ctx->switch_val = switchval;
165    ctx->switch_mask_default = LLVMConstNull(mask->int_vec_type);
166    ctx->switch_in_default = false;
167    ctx->switch_pc = 0;
168 
169    lp_exec_mask_update(mask);
170 }
171 
lp_exec_endswitch(struct lp_exec_mask * mask,struct lp_build_tgsi_context * bld_base)172 static void lp_exec_endswitch(struct lp_exec_mask *mask,
173                               struct lp_build_tgsi_context * bld_base)
174 {
175    LLVMBuilderRef builder = mask->bld->gallivm->builder;
176    struct function_ctx *ctx = func_ctx(mask);
177 
178    if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) {
179       ctx->switch_stack_size--;
180       return;
181    }
182 
183    /* check if there's deferred default if so do it now */
184    if (ctx->switch_pc && !ctx->switch_in_default) {
185       LLVMValueRef prevmask, defaultmask;
186       unsigned tmp_pc;
187       prevmask = ctx->switch_stack[ctx->switch_stack_size - 1].switch_mask;
188       defaultmask = LLVMBuildNot(builder, ctx->switch_mask_default, "sw_default_mask");
189       mask->switch_mask = LLVMBuildAnd(builder, prevmask, defaultmask, "sw_mask");
190       ctx->switch_in_default = true;
191 
192       lp_exec_mask_update(mask);
193 
194       assert(bld_base->instructions[ctx->switch_pc - 1].Instruction.Opcode ==
195              TGSI_OPCODE_DEFAULT);
196 
197       tmp_pc = bld_base->pc;
198       bld_base->pc = ctx->switch_pc;
199       /*
200        * re-purpose switch_pc to point to here again, since we stop execution of
201        * the deferred default after next break.
202        */
203       ctx->switch_pc = tmp_pc - 1;
204 
205       return;
206    }
207 
208    else if (ctx->switch_pc && ctx->switch_in_default) {
209       assert(bld_base->pc == ctx->switch_pc + 1);
210    }
211 
212    ctx->switch_stack_size--;
213    mask->switch_mask = ctx->switch_stack[ctx->switch_stack_size].switch_mask;
214    ctx->switch_val = ctx->switch_stack[ctx->switch_stack_size].switch_val;
215    ctx->switch_mask_default = ctx->switch_stack[ctx->switch_stack_size].switch_mask_default;
216    ctx->switch_in_default = ctx->switch_stack[ctx->switch_stack_size].switch_in_default;
217    ctx->switch_pc = ctx->switch_stack[ctx->switch_stack_size].switch_pc;
218 
219    ctx->break_type = ctx->break_type_stack[ctx->loop_stack_size + ctx->switch_stack_size];
220 
221    lp_exec_mask_update(mask);
222 }
223 
lp_exec_case(struct lp_exec_mask * mask,LLVMValueRef caseval)224 static void lp_exec_case(struct lp_exec_mask *mask,
225                          LLVMValueRef caseval)
226 {
227    LLVMBuilderRef builder = mask->bld->gallivm->builder;
228    struct function_ctx *ctx = func_ctx(mask);
229 
230    LLVMValueRef casemask, prevmask;
231 
232    if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) {
233       return;
234    }
235 
236    /* skipping case mask evaluation here is NOT optional (not in all cases anyway). */
237    if (!ctx->switch_in_default) {
238       prevmask = ctx->switch_stack[ctx->switch_stack_size - 1].switch_mask;
239       casemask = lp_build_cmp(mask->bld, PIPE_FUNC_EQUAL, caseval, ctx->switch_val);
240       ctx->switch_mask_default = LLVMBuildOr(builder, casemask,
241                                              ctx->switch_mask_default, "sw_default_mask");
242       casemask = LLVMBuildOr(builder, casemask, mask->switch_mask, "");
243       mask->switch_mask = LLVMBuildAnd(builder, casemask, prevmask, "sw_mask");
244 
245       lp_exec_mask_update(mask);
246    }
247 }
248 
249 /*
250  * Analyse default statement in a switch.
251  * \return true if default is last statement, false otherwise
252  * \param default_pc_start contains pc of instruction to jump to
253  *                         if default wasn't last but there's no
254  *                         fallthrough into default.
255  */
default_analyse_is_last(struct lp_exec_mask * mask,struct lp_build_tgsi_context * bld_base,int * default_pc_start)256 static boolean default_analyse_is_last(struct lp_exec_mask *mask,
257                                        struct lp_build_tgsi_context * bld_base,
258                                        int *default_pc_start)
259 {
260    unsigned pc = bld_base->pc;
261    struct function_ctx *ctx = func_ctx(mask);
262    int curr_switch_stack = ctx->switch_stack_size;
263 
264    if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) {
265       return false;
266    }
267 
268    /* skip over case statements which are together with default */
269    while (bld_base->instructions[pc].Instruction.Opcode == TGSI_OPCODE_CASE) {
270       pc++;
271    }
272 
273    while (pc != ~0u && pc < bld_base->num_instructions) {
274       enum tgsi_opcode opcode = bld_base->instructions[pc].Instruction.Opcode;
275       switch (opcode) {
276       case TGSI_OPCODE_CASE:
277          if (curr_switch_stack == ctx->switch_stack_size) {
278             *default_pc_start = pc - 1;
279             return false;
280          }
281          break;
282       case TGSI_OPCODE_SWITCH:
283          curr_switch_stack++;
284          break;
285       case TGSI_OPCODE_ENDSWITCH:
286          if (curr_switch_stack == ctx->switch_stack_size) {
287             *default_pc_start = pc - 1;
288             return true;
289          }
290          curr_switch_stack--;
291          break;
292       default:
293          ; /* nothing */
294       }
295       pc++;
296    }
297    /* should never arrive here */
298    assert(0);
299    return true;
300 }
301 
lp_exec_default(struct lp_exec_mask * mask,struct lp_build_tgsi_context * bld_base)302 static void lp_exec_default(struct lp_exec_mask *mask,
303                             struct lp_build_tgsi_context * bld_base)
304 {
305    LLVMBuilderRef builder = mask->bld->gallivm->builder;
306    struct function_ctx *ctx = func_ctx(mask);
307 
308    int default_exec_pc;
309    boolean default_is_last;
310 
311    if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) {
312       return;
313    }
314 
315    /*
316     * This is a messy opcode, because it may not be always at the end and
317     * there can be fallthrough in and out of it.
318     */
319 
320    default_is_last = default_analyse_is_last(mask, bld_base, &default_exec_pc);
321    /*
322     * If it is last statement in switch (note that case statements appearing
323     * "at the same time" as default don't change that) everything is just fine,
324     * update switch mask and go on. This means we can handle default with
325     * fallthrough INTO it without overhead, if it is last.
326     */
327    if (default_is_last) {
328       LLVMValueRef prevmask, defaultmask;
329       prevmask = ctx->switch_stack[ctx->switch_stack_size - 1].switch_mask;
330       defaultmask = LLVMBuildNot(builder, ctx->switch_mask_default, "sw_default_mask");
331       defaultmask = LLVMBuildOr(builder, defaultmask, mask->switch_mask, "");
332       mask->switch_mask = LLVMBuildAnd(builder, prevmask, defaultmask, "sw_mask");
333       ctx->switch_in_default = true;
334 
335       lp_exec_mask_update(mask);
336    }
337    else {
338       /*
339        * Technically, "case" immediately before default isn't really a
340        * fallthrough, however we still have to count them as such as we
341        * already have updated the masks.
342        * If that happens in practice could add a switch optimizer pass
343        * which just gets rid of all case statements appearing together with
344        * default (or could do switch analysis at switch start time instead).
345        */
346       enum tgsi_opcode opcode =
347          bld_base->instructions[bld_base->pc - 1].Instruction.Opcode;
348       boolean ft_into = (opcode != TGSI_OPCODE_BRK &&
349                          opcode != TGSI_OPCODE_SWITCH);
350       /*
351        * If it is not last statement and there was no fallthrough into it,
352        * we record the PC and continue execution at next case (again, those
353        * case encountered at the same time don't count). At endswitch
354        * time, we update switchmask, and go back executing the code we skipped
355        * until the next break (possibly re-executing some code with changed mask
356        * if there was a fallthrough out of default).
357        * Finally, if it is not last statement and there was a fallthrough into it,
358        * do the same as with the former case, except instead of skipping the code
359        * just execute it without updating the mask, then go back and re-execute.
360        */
361       ctx->switch_pc = bld_base->pc;
362       if (!ft_into) {
363          bld_base->pc = default_exec_pc;
364       }
365    }
366 }
367 
368 
lp_exec_mask_call(struct lp_exec_mask * mask,int func,int * pc)369 static void lp_exec_mask_call(struct lp_exec_mask *mask,
370                               int func,
371                               int *pc)
372 {
373    if (mask->function_stack_size >= LP_MAX_NUM_FUNCS) {
374       return;
375    }
376 
377    lp_exec_mask_function_init(mask, mask->function_stack_size);
378    mask->function_stack[mask->function_stack_size].pc = *pc;
379    mask->function_stack[mask->function_stack_size].ret_mask = mask->ret_mask;
380    mask->function_stack_size++;
381    *pc = func;
382 }
383 
lp_exec_mask_ret(struct lp_exec_mask * mask,int * pc)384 static void lp_exec_mask_ret(struct lp_exec_mask *mask, int *pc)
385 {
386    LLVMBuilderRef builder = mask->bld->gallivm->builder;
387    struct function_ctx *ctx = func_ctx(mask);
388    LLVMValueRef exec_mask;
389 
390    if (ctx->cond_stack_size == 0 &&
391        ctx->loop_stack_size == 0 &&
392        ctx->switch_stack_size == 0 &&
393        mask->function_stack_size == 1) {
394       /* returning from main() */
395       *pc = -1;
396       return;
397    }
398 
399    if (mask->function_stack_size == 1) {
400       /*
401        * This requires special handling since we need to ensure
402        * we don't drop the mask even if we have no call stack
403        * (e.g. after a ret in a if clause after the endif)
404        */
405       mask->ret_in_main = TRUE;
406    }
407 
408    exec_mask = LLVMBuildNot(builder,
409                             mask->exec_mask,
410                             "ret");
411 
412    mask->ret_mask = LLVMBuildAnd(builder,
413                                  mask->ret_mask,
414                                  exec_mask, "ret_full");
415 
416    lp_exec_mask_update(mask);
417 }
418 
lp_exec_mask_bgnsub(struct lp_exec_mask * mask)419 static void lp_exec_mask_bgnsub(struct lp_exec_mask *mask)
420 {
421 }
422 
lp_exec_mask_endsub(struct lp_exec_mask * mask,int * pc)423 static void lp_exec_mask_endsub(struct lp_exec_mask *mask, int *pc)
424 {
425    struct function_ctx *ctx;
426 
427    assert(mask->function_stack_size > 1);
428    assert(mask->function_stack_size <= LP_MAX_NUM_FUNCS);
429 
430    ctx = func_ctx(mask);
431    mask->function_stack_size--;
432 
433    *pc = ctx->pc;
434    mask->ret_mask = ctx->ret_mask;
435 
436    lp_exec_mask_update(mask);
437 }
438 
439 
440 static LLVMValueRef
get_file_ptr(struct lp_build_tgsi_soa_context * bld,unsigned file,int index,unsigned chan)441 get_file_ptr(struct lp_build_tgsi_soa_context *bld,
442              unsigned file,
443              int index,
444              unsigned chan)
445 {
446    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
447    LLVMValueRef (*array_of_vars)[TGSI_NUM_CHANNELS];
448    LLVMValueRef var_of_array;
449 
450    switch (file) {
451    case TGSI_FILE_TEMPORARY:
452       array_of_vars = bld->temps;
453       var_of_array = bld->temps_array;
454       break;
455    case TGSI_FILE_OUTPUT:
456       array_of_vars = bld->outputs;
457       var_of_array = bld->outputs_array;
458       break;
459    default:
460       assert(0);
461       return NULL;
462    }
463 
464    assert(chan < 4);
465 
466    if (bld->indirect_files & (1 << file)) {
467       LLVMValueRef lindex = lp_build_const_int32(bld->bld_base.base.gallivm, index * 4 + chan);
468       if (LLVMGetTypeKind(LLVMGetElementType(LLVMTypeOf(var_of_array))) == LLVMArrayTypeKind) {
469          LLVMValueRef gep[2];
470          gep[0] = lp_build_const_int32(bld->bld_base.base.gallivm, 0);
471          gep[1] = lindex;
472          return LLVMBuildGEP(builder, var_of_array, gep, 2, "");
473       } else {
474          return LLVMBuildGEP(builder, var_of_array, &lindex, 1, "");
475       }
476    }
477    else {
478       assert(index <= bld->bld_base.info->file_max[file]);
479       return array_of_vars[index][chan];
480    }
481 }
482 
483 
484 /**
485  * Return pointer to a temporary register channel (src or dest).
486  * Note that indirect addressing cannot be handled here.
487  * \param index  which temporary register
488  * \param chan  which channel of the temp register.
489  */
490 LLVMValueRef
lp_get_temp_ptr_soa(struct lp_build_tgsi_soa_context * bld,unsigned index,unsigned chan)491 lp_get_temp_ptr_soa(struct lp_build_tgsi_soa_context *bld,
492              unsigned index,
493              unsigned chan)
494 {
495    return get_file_ptr(bld, TGSI_FILE_TEMPORARY, index, chan);
496 }
497 
498 /**
499  * Return pointer to a output register channel (src or dest).
500  * Note that indirect addressing cannot be handled here.
501  * \param index  which output register
502  * \param chan  which channel of the output register.
503  */
504 LLVMValueRef
lp_get_output_ptr(struct lp_build_tgsi_soa_context * bld,unsigned index,unsigned chan)505 lp_get_output_ptr(struct lp_build_tgsi_soa_context *bld,
506                unsigned index,
507                unsigned chan)
508 {
509    return get_file_ptr(bld, TGSI_FILE_OUTPUT, index, chan);
510 }
511 
512 /*
513  * If we have indirect addressing in outputs copy our alloca array
514  * to the outputs slots specified by the caller to make sure
515  * our outputs are delivered consistently via the same interface.
516  */
517 static void
gather_outputs(struct lp_build_tgsi_soa_context * bld)518 gather_outputs(struct lp_build_tgsi_soa_context * bld)
519 {
520    if ((bld->indirect_files & (1 << TGSI_FILE_OUTPUT))) {
521       unsigned index, chan;
522       assert(bld->bld_base.info->num_outputs <=
523              bld->bld_base.info->file_max[TGSI_FILE_OUTPUT] + 1);
524       for (index = 0; index < bld->bld_base.info->num_outputs; ++index) {
525          for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
526             bld->outputs[index][chan] = lp_get_output_ptr(bld, index, chan);
527          }
528       }
529    }
530 }
531 
532 /**
533  * Gather vector.
534  * XXX the lp_build_gather() function should be capable of doing this
535  * with a little work.
536  */
537 static LLVMValueRef
build_gather(struct lp_build_tgsi_context * bld_base,LLVMValueRef base_ptr,LLVMValueRef indexes,LLVMValueRef overflow_mask,LLVMValueRef indexes2)538 build_gather(struct lp_build_tgsi_context *bld_base,
539              LLVMValueRef base_ptr,
540              LLVMValueRef indexes,
541              LLVMValueRef overflow_mask,
542              LLVMValueRef indexes2)
543 {
544    struct gallivm_state *gallivm = bld_base->base.gallivm;
545    LLVMBuilderRef builder = gallivm->builder;
546    struct lp_build_context *uint_bld = &bld_base->uint_bld;
547    struct lp_build_context *bld = &bld_base->base;
548    LLVMValueRef res;
549    unsigned i;
550 
551    if (indexes2)
552       res = LLVMGetUndef(LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), bld_base->base.type.length * 2));
553    else
554       res = bld->undef;
555    /*
556     * overflow_mask is a vector telling us which channels
557     * in the vector overflowed. We use the overflow behavior for
558     * constant buffers which is defined as:
559     * Out of bounds access to constant buffer returns 0 in all
560     * components. Out of bounds behavior is always with respect
561     * to the size of the buffer bound at that slot.
562     */
563 
564    if (overflow_mask) {
565       /*
566        * We avoid per-element control flow here (also due to llvm going crazy,
567        * though I suspect it's better anyway since overflow is likely rare).
568        * Note that since we still fetch from buffers even if num_elements was
569        * zero (in this case we'll fetch from index zero) the jit func callers
570        * MUST provide valid fake constant buffers of size 4x32 (the values do
571        * not matter), otherwise we'd still need (not per element though)
572        * control flow.
573        */
574       indexes = lp_build_select(uint_bld, overflow_mask, uint_bld->zero, indexes);
575       if (indexes2)
576          indexes2 = lp_build_select(uint_bld, overflow_mask, uint_bld->zero, indexes2);
577    }
578 
579    /*
580     * Loop over elements of index_vec, load scalar value, insert it into 'res'.
581     */
582    for (i = 0; i < bld->type.length * (indexes2 ? 2 : 1); i++) {
583       LLVMValueRef si, di;
584       LLVMValueRef index;
585       LLVMValueRef scalar_ptr, scalar;
586 
587       di = lp_build_const_int32(bld->gallivm, i);
588       if (indexes2)
589          si = lp_build_const_int32(bld->gallivm, i >> 1);
590       else
591          si = di;
592 
593       if (indexes2 && (i & 1)) {
594          index = LLVMBuildExtractElement(builder,
595                                          indexes2, si, "");
596       } else {
597          index = LLVMBuildExtractElement(builder,
598                                          indexes, si, "");
599       }
600       scalar_ptr = LLVMBuildGEP(builder, base_ptr,
601                                 &index, 1, "gather_ptr");
602       scalar = LLVMBuildLoad(builder, scalar_ptr, "");
603 
604       res = LLVMBuildInsertElement(builder, res, scalar, di, "");
605    }
606 
607    if (overflow_mask) {
608       if (indexes2) {
609          res = LLVMBuildBitCast(builder, res, bld_base->dbl_bld.vec_type, "");
610          overflow_mask = LLVMBuildSExt(builder, overflow_mask,
611                                        bld_base->dbl_bld.int_vec_type, "");
612          res = lp_build_select(&bld_base->dbl_bld, overflow_mask,
613                                bld_base->dbl_bld.zero, res);
614       } else
615          res = lp_build_select(bld, overflow_mask, bld->zero, res);
616    }
617 
618    return res;
619 }
620 
621 
622 /**
623  * Scatter/store vector.
624  */
625 static void
emit_mask_scatter(struct lp_build_tgsi_soa_context * bld,LLVMValueRef base_ptr,LLVMValueRef indexes,LLVMValueRef values,struct lp_exec_mask * mask)626 emit_mask_scatter(struct lp_build_tgsi_soa_context *bld,
627                   LLVMValueRef base_ptr,
628                   LLVMValueRef indexes,
629                   LLVMValueRef values,
630                   struct lp_exec_mask *mask)
631 {
632    struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
633    LLVMBuilderRef builder = gallivm->builder;
634    unsigned i;
635    LLVMValueRef pred = mask->has_mask ? mask->exec_mask : NULL;
636 
637    /*
638     * Loop over elements of index_vec, store scalar value.
639     */
640    for (i = 0; i < bld->bld_base.base.type.length; i++) {
641       LLVMValueRef ii = lp_build_const_int32(gallivm, i);
642       LLVMValueRef index = LLVMBuildExtractElement(builder, indexes, ii, "");
643       LLVMValueRef scalar_ptr = LLVMBuildGEP(builder, base_ptr, &index, 1, "scatter_ptr");
644       LLVMValueRef val = LLVMBuildExtractElement(builder, values, ii, "scatter_val");
645       LLVMValueRef scalar_pred = pred ?
646          LLVMBuildExtractElement(builder, pred, ii, "scatter_pred") : NULL;
647 
648       if (0)
649          lp_build_printf(gallivm, "scatter %d: val %f at %d %p\n",
650                          ii, val, index, scalar_ptr);
651 
652       if (scalar_pred) {
653          LLVMValueRef real_val, dst_val;
654          dst_val = LLVMBuildLoad(builder, scalar_ptr, "");
655          real_val = lp_build_select(&bld->elem_bld, scalar_pred, val, dst_val);
656          LLVMBuildStore(builder, real_val, scalar_ptr);
657       }
658       else {
659          LLVMBuildStore(builder, val, scalar_ptr);
660       }
661    }
662 }
663 
664 
665 /**
666  * Read the current value of the ADDR register, convert the floats to
667  * ints, add the base index and return the vector of offsets.
668  * The offsets will be used to index into the constant buffer or
669  * temporary register file.
670  */
671 static LLVMValueRef
get_indirect_index(struct lp_build_tgsi_soa_context * bld,unsigned reg_file,unsigned reg_index,const struct tgsi_ind_register * indirect_reg,int index_limit)672 get_indirect_index(struct lp_build_tgsi_soa_context *bld,
673                    unsigned reg_file, unsigned reg_index,
674                    const struct tgsi_ind_register *indirect_reg,
675                    int index_limit)
676 {
677    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
678    struct lp_build_context *uint_bld = &bld->bld_base.uint_bld;
679    /* always use X component of address register */
680    unsigned swizzle = indirect_reg->Swizzle;
681    LLVMValueRef base;
682    LLVMValueRef rel;
683    LLVMValueRef max_index;
684    LLVMValueRef index;
685 
686    assert(bld->indirect_files & (1 << reg_file));
687 
688    base = lp_build_const_int_vec(bld->bld_base.base.gallivm, uint_bld->type, reg_index);
689 
690    assert(swizzle < 4);
691    switch (indirect_reg->File) {
692    case TGSI_FILE_ADDRESS:
693       rel = LLVMBuildLoad(builder,
694                           bld->addr[indirect_reg->Index][swizzle],
695                           "load addr reg");
696       /* ADDR LLVM values already have LLVM integer type. */
697       break;
698    case TGSI_FILE_TEMPORARY:
699       rel = lp_get_temp_ptr_soa(bld, indirect_reg->Index, swizzle);
700       rel = LLVMBuildLoad(builder, rel, "load temp reg");
701       /* TEMP LLVM values always have LLVM float type, but for indirection, the
702        * value actually stored is expected to be an integer */
703       rel = LLVMBuildBitCast(builder, rel, uint_bld->vec_type, "");
704       break;
705    default:
706       assert(0);
707       rel = uint_bld->zero;
708    }
709 
710    index = lp_build_add(uint_bld, base, rel);
711 
712    /*
713     * emit_fetch_constant handles constant buffer overflow so this code
714     * is pointless for them.
715     * Furthermore the D3D10 spec in section 6.5 says:
716     * If the constant buffer bound to a slot is larger than the size
717     * declared in the shader for that slot, implementations are allowed
718     * to return incorrect data (not necessarily 0) for indices that are
719     * larger than the declared size but smaller than the buffer size.
720     */
721    if (reg_file != TGSI_FILE_CONSTANT) {
722       assert(index_limit >= 0);
723       max_index = lp_build_const_int_vec(bld->bld_base.base.gallivm,
724                                          uint_bld->type, index_limit);
725 
726       assert(!uint_bld->type.sign);
727       index = lp_build_min(uint_bld, index, max_index);
728    }
729 
730    return index;
731 }
732 
733 static struct lp_build_context *
stype_to_fetch(struct lp_build_tgsi_context * bld_base,enum tgsi_opcode_type stype)734 stype_to_fetch(struct lp_build_tgsi_context * bld_base,
735 	       enum tgsi_opcode_type stype)
736 {
737    struct lp_build_context *bld_fetch;
738 
739    switch (stype) {
740    case TGSI_TYPE_FLOAT:
741    case TGSI_TYPE_UNTYPED:
742       bld_fetch = &bld_base->base;
743       break;
744    case TGSI_TYPE_UNSIGNED:
745       bld_fetch = &bld_base->uint_bld;
746       break;
747    case TGSI_TYPE_SIGNED:
748       bld_fetch = &bld_base->int_bld;
749       break;
750    case TGSI_TYPE_DOUBLE:
751       bld_fetch = &bld_base->dbl_bld;
752       break;
753    case TGSI_TYPE_UNSIGNED64:
754       bld_fetch = &bld_base->uint64_bld;
755       break;
756    case TGSI_TYPE_SIGNED64:
757       bld_fetch = &bld_base->int64_bld;
758       break;
759    case TGSI_TYPE_VOID:
760    default:
761       assert(0);
762       bld_fetch = NULL;
763       break;
764    }
765    return bld_fetch;
766 }
767 
768 static LLVMValueRef
get_soa_array_offsets(struct lp_build_context * uint_bld,LLVMValueRef indirect_index,unsigned chan_index,boolean need_perelement_offset)769 get_soa_array_offsets(struct lp_build_context *uint_bld,
770                       LLVMValueRef indirect_index,
771                       unsigned chan_index,
772                       boolean need_perelement_offset)
773 {
774    struct gallivm_state *gallivm = uint_bld->gallivm;
775    LLVMValueRef chan_vec =
776       lp_build_const_int_vec(uint_bld->gallivm, uint_bld->type, chan_index);
777    LLVMValueRef length_vec =
778       lp_build_const_int_vec(gallivm, uint_bld->type, uint_bld->type.length);
779    LLVMValueRef index_vec;
780 
781    /* index_vec = (indirect_index * 4 + chan_index) * length + offsets */
782    index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
783    index_vec = lp_build_add(uint_bld, index_vec, chan_vec);
784    index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
785 
786    if (need_perelement_offset) {
787       LLVMValueRef pixel_offsets;
788       unsigned i;
789      /* build pixel offset vector: {0, 1, 2, 3, ...} */
790       pixel_offsets = uint_bld->undef;
791       for (i = 0; i < uint_bld->type.length; i++) {
792          LLVMValueRef ii = lp_build_const_int32(gallivm, i);
793          pixel_offsets = LLVMBuildInsertElement(gallivm->builder, pixel_offsets,
794                                                 ii, ii, "");
795       }
796       index_vec = lp_build_add(uint_bld, index_vec, pixel_offsets);
797    }
798    return index_vec;
799 }
800 
801 static LLVMValueRef
emit_fetch_constant(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_src_register * reg,enum tgsi_opcode_type stype,unsigned swizzle_in)802 emit_fetch_constant(
803    struct lp_build_tgsi_context * bld_base,
804    const struct tgsi_full_src_register * reg,
805    enum tgsi_opcode_type stype,
806    unsigned swizzle_in)
807 {
808    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
809    struct gallivm_state *gallivm = bld_base->base.gallivm;
810    LLVMBuilderRef builder = gallivm->builder;
811    struct lp_build_context *uint_bld = &bld_base->uint_bld;
812    unsigned dimension = 0;
813    LLVMValueRef consts_ptr;
814    LLVMValueRef num_consts;
815    LLVMValueRef res;
816    unsigned swizzle = swizzle_in & 0xffff;
817 
818    /* XXX: Handle fetching xyzw components as a vector */
819    assert(swizzle != ~0u);
820 
821    if (reg->Register.Dimension) {
822       assert(!reg->Dimension.Indirect);
823       dimension = reg->Dimension.Index;
824       assert(dimension < LP_MAX_TGSI_CONST_BUFFERS);
825    }
826 
827    consts_ptr = bld->consts[dimension];
828    num_consts = bld->consts_sizes[dimension];
829 
830    if (reg->Register.Indirect) {
831       LLVMValueRef indirect_index;
832       LLVMValueRef swizzle_vec =
833          lp_build_const_int_vec(gallivm, uint_bld->type, swizzle);
834       LLVMValueRef index_vec;  /* index into the const buffer */
835       LLVMValueRef overflow_mask;
836       LLVMValueRef index_vec2 = NULL;
837 
838       indirect_index = get_indirect_index(bld,
839                                           reg->Register.File,
840                                           reg->Register.Index,
841                                           &reg->Indirect,
842                                           bld->bld_base.info->file_max[reg->Register.File]);
843 
844       /* All fetches are from the same constant buffer, so
845        * we need to propagate the size to a vector to do a
846        * vector comparison */
847       num_consts = lp_build_broadcast_scalar(uint_bld, num_consts);
848       /* Construct a boolean vector telling us which channels
849        * overflow the bound constant buffer */
850       overflow_mask = lp_build_compare(gallivm, uint_bld->type, PIPE_FUNC_GEQUAL,
851                                        indirect_index, num_consts);
852 
853       /* index_vec = indirect_index * 4 + swizzle */
854       index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
855       index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec);
856 
857       if (tgsi_type_is_64bit(stype)) {
858          LLVMValueRef swizzle_vec2;
859          swizzle_vec2 = lp_build_const_int_vec(gallivm, uint_bld->type, swizzle_in >> 16);
860          index_vec2 = lp_build_shl_imm(uint_bld, indirect_index, 2);
861          index_vec2 = lp_build_add(uint_bld, index_vec2, swizzle_vec2);
862       }
863       /* Gather values from the constant buffer */
864       res = build_gather(bld_base, consts_ptr, index_vec, overflow_mask, index_vec2);
865    }
866    else {
867       LLVMValueRef index;  /* index into the const buffer */
868       LLVMValueRef scalar, scalar_ptr;
869       struct lp_build_context *bld_broad = &bld_base->base;
870       index = lp_build_const_int32(gallivm, reg->Register.Index * 4 + swizzle);
871 
872       scalar_ptr = LLVMBuildGEP(builder, consts_ptr,
873                                 &index, 1, "");
874 
875       if (tgsi_type_is_64bit(stype) && ((swizzle_in >> 16) != swizzle + 1)) {
876 
877          LLVMValueRef scalar2, scalar2_ptr;
878          LLVMValueRef shuffles[2];
879          index = lp_build_const_int32(gallivm, reg->Register.Index * 4 + (swizzle_in >> 16));
880 
881          scalar2_ptr = LLVMBuildGEP(builder, consts_ptr,
882                                     &index, 1, "");
883 
884          scalar = LLVMBuildLoad(builder, scalar_ptr, "");
885          scalar2 = LLVMBuildLoad(builder, scalar2_ptr, "");
886          shuffles[0] = lp_build_const_int32(gallivm, 0);
887          shuffles[1] = lp_build_const_int32(gallivm, 1);
888 
889          res = LLVMGetUndef(LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), bld_base->base.type.length * 2));
890          res = LLVMBuildInsertElement(builder, res, scalar, shuffles[0], "");
891          res = LLVMBuildInsertElement(builder, res, scalar2, shuffles[1], "");
892       } else {
893         if (stype == TGSI_TYPE_DOUBLE) {
894            LLVMTypeRef dptr_type = LLVMPointerType(LLVMDoubleTypeInContext(gallivm->context), 0);
895            scalar_ptr = LLVMBuildBitCast(builder, scalar_ptr, dptr_type, "");
896            bld_broad = &bld_base->dbl_bld;
897         } else if (stype == TGSI_TYPE_UNSIGNED64) {
898            LLVMTypeRef u64ptr_type = LLVMPointerType(LLVMInt64TypeInContext(gallivm->context), 0);
899            scalar_ptr = LLVMBuildBitCast(builder, scalar_ptr, u64ptr_type, "");
900            bld_broad = &bld_base->uint64_bld;
901         } else if (stype == TGSI_TYPE_SIGNED64) {
902            LLVMTypeRef i64ptr_type = LLVMPointerType(LLVMInt64TypeInContext(gallivm->context), 0);
903            scalar_ptr = LLVMBuildBitCast(builder, scalar_ptr, i64ptr_type, "");
904            bld_broad = &bld_base->int64_bld;
905         }
906         scalar = LLVMBuildLoad(builder, scalar_ptr, "");
907         res = lp_build_broadcast_scalar(bld_broad, scalar);
908       }
909 
910    }
911 
912    if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED || stype == TGSI_TYPE_DOUBLE || stype == TGSI_TYPE_SIGNED64 || stype == TGSI_TYPE_UNSIGNED64) {
913       struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
914       res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
915    }
916 
917    return res;
918 }
919 
920 /**
921  * Fetch 64-bit values from two separate channels.
922  * 64-bit values are stored split across two channels, like xy and zw.
923  * This function creates a set of vec_length*2 floats,
924  * extracts the values from the two channels,
925  * puts them in the correct place, then casts to vec_length 64-bits.
926  */
927 static LLVMValueRef
emit_fetch_64bit(struct lp_build_tgsi_context * bld_base,enum tgsi_opcode_type stype,LLVMValueRef input,LLVMValueRef input2)928 emit_fetch_64bit(
929    struct lp_build_tgsi_context * bld_base,
930    enum tgsi_opcode_type stype,
931    LLVMValueRef input,
932    LLVMValueRef input2)
933 {
934    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
935    struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
936    LLVMBuilderRef builder = gallivm->builder;
937    LLVMValueRef res;
938    struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
939    int i;
940    LLVMValueRef shuffles[2 * (LP_MAX_VECTOR_WIDTH/32)];
941    int len = bld_base->base.type.length * 2;
942    assert(len <= (2 * (LP_MAX_VECTOR_WIDTH/32)));
943 
944    for (i = 0; i < bld_base->base.type.length * 2; i+=2) {
945       shuffles[i] = lp_build_const_int32(gallivm, i / 2);
946       shuffles[i + 1] = lp_build_const_int32(gallivm, i / 2 + bld_base->base.type.length);
947    }
948    res = LLVMBuildShuffleVector(builder, input, input2, LLVMConstVector(shuffles, len), "");
949 
950    return LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
951 }
952 
953 static LLVMValueRef
emit_fetch_immediate(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_src_register * reg,enum tgsi_opcode_type stype,unsigned swizzle_in)954 emit_fetch_immediate(
955    struct lp_build_tgsi_context * bld_base,
956    const struct tgsi_full_src_register * reg,
957    enum tgsi_opcode_type stype,
958    unsigned swizzle_in)
959 {
960    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
961    struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
962    LLVMBuilderRef builder = gallivm->builder;
963    LLVMValueRef res = NULL;
964    unsigned swizzle = swizzle_in & 0xffff;
965 
966    if (bld->use_immediates_array || reg->Register.Indirect) {
967       LLVMValueRef imms_array;
968       LLVMTypeRef fptr_type;
969 
970       /* cast imms_array pointer to float* */
971       fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
972       imms_array = LLVMBuildBitCast(builder, bld->imms_array, fptr_type, "");
973 
974       if (reg->Register.Indirect) {
975          LLVMValueRef indirect_index;
976          LLVMValueRef index_vec;  /* index into the immediate register array */
977          LLVMValueRef index_vec2 = NULL;
978          indirect_index = get_indirect_index(bld,
979                                              reg->Register.File,
980                                              reg->Register.Index,
981                                              &reg->Indirect,
982                                              bld->bld_base.info->file_max[reg->Register.File]);
983          /*
984           * Unlike for other reg classes, adding pixel offsets is unnecessary -
985           * immediates are stored as full vectors (FIXME??? - might be better
986           * to store them the same as constants) but all elements are the same
987           * in any case.
988           */
989          index_vec = get_soa_array_offsets(&bld_base->uint_bld,
990                                            indirect_index,
991                                            swizzle,
992                                            FALSE);
993          if (tgsi_type_is_64bit(stype))
994             index_vec2 = get_soa_array_offsets(&bld_base->uint_bld,
995                                               indirect_index,
996                                               swizzle_in >> 16,
997                                               FALSE);
998          /* Gather values from the immediate register array */
999          res = build_gather(bld_base, imms_array, index_vec, NULL, index_vec2);
1000       } else {
1001          LLVMValueRef gep[2];
1002          gep[0] = lp_build_const_int32(gallivm, 0);
1003          gep[1] = lp_build_const_int32(gallivm, reg->Register.Index * 4 + swizzle);
1004          LLVMValueRef imms_ptr = LLVMBuildGEP(builder,
1005                                               bld->imms_array, gep, 2, "");
1006          res = LLVMBuildLoad(builder, imms_ptr, "");
1007 
1008          if (tgsi_type_is_64bit(stype)) {
1009             LLVMValueRef imms_ptr2;
1010             LLVMValueRef res2;
1011             gep[1] = lp_build_const_int32(gallivm,
1012                                           reg->Register.Index * 4 + (swizzle_in >> 16));
1013             imms_ptr2 = LLVMBuildGEP(builder,
1014                                      bld->imms_array, gep, 2, "");
1015             res2 = LLVMBuildLoad(builder, imms_ptr2, "");
1016             res = emit_fetch_64bit(bld_base, stype, res, res2);
1017          }
1018       }
1019    }
1020    else {
1021       res = bld->immediates[reg->Register.Index][swizzle];
1022       if (tgsi_type_is_64bit(stype))
1023          res = emit_fetch_64bit(bld_base, stype, res, bld->immediates[reg->Register.Index][swizzle_in >> 16]);
1024    }
1025 
1026    if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED || tgsi_type_is_64bit(stype)) {
1027       struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
1028       res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
1029    }
1030    return res;
1031 }
1032 
1033 static LLVMValueRef
emit_fetch_input(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_src_register * reg,enum tgsi_opcode_type stype,unsigned swizzle_in)1034 emit_fetch_input(
1035    struct lp_build_tgsi_context * bld_base,
1036    const struct tgsi_full_src_register * reg,
1037    enum tgsi_opcode_type stype,
1038    unsigned swizzle_in)
1039 {
1040    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1041    struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1042    LLVMBuilderRef builder = gallivm->builder;
1043    LLVMValueRef res;
1044    unsigned swizzle = swizzle_in & 0xffff;
1045 
1046    if (reg->Register.Indirect) {
1047       LLVMValueRef indirect_index;
1048       LLVMValueRef index_vec;  /* index into the input reg array */
1049       LLVMValueRef index_vec2 = NULL;
1050       LLVMValueRef inputs_array;
1051       LLVMTypeRef fptr_type;
1052 
1053       indirect_index = get_indirect_index(bld,
1054                                           reg->Register.File,
1055                                           reg->Register.Index,
1056                                           &reg->Indirect,
1057                                           bld->bld_base.info->file_max[reg->Register.File]);
1058 
1059       index_vec = get_soa_array_offsets(&bld_base->uint_bld,
1060                                         indirect_index,
1061                                         swizzle,
1062                                         TRUE);
1063       if (tgsi_type_is_64bit(stype)) {
1064          index_vec2 = get_soa_array_offsets(&bld_base->uint_bld,
1065                                            indirect_index,
1066                                            swizzle_in >> 16,
1067                                            TRUE);
1068       }
1069       /* cast inputs_array pointer to float* */
1070       fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
1071       inputs_array = LLVMBuildBitCast(builder, bld->inputs_array, fptr_type, "");
1072 
1073       /* Gather values from the input register array */
1074       res = build_gather(bld_base, inputs_array, index_vec, NULL, index_vec2);
1075    } else {
1076       if (bld->indirect_files & (1 << TGSI_FILE_INPUT)) {
1077          LLVMValueRef lindex = lp_build_const_int32(gallivm,
1078                                         reg->Register.Index * 4 + swizzle);
1079          LLVMValueRef input_ptr = LLVMBuildGEP(builder,
1080                                                bld->inputs_array, &lindex, 1, "");
1081 
1082          res = LLVMBuildLoad(builder, input_ptr, "");
1083          if (tgsi_type_is_64bit(stype)) {
1084             LLVMValueRef lindex1;
1085             LLVMValueRef input_ptr2;
1086             LLVMValueRef res2;
1087 
1088             lindex1 = lp_build_const_int32(gallivm,
1089                                            reg->Register.Index * 4 + (swizzle_in >> 16));
1090             input_ptr2 = LLVMBuildGEP(builder,
1091                                       bld->inputs_array, &lindex1, 1, "");
1092             res2 = LLVMBuildLoad(builder, input_ptr2, "");
1093             res = emit_fetch_64bit(bld_base, stype, res, res2);
1094          }
1095       }
1096       else {
1097          res = bld->inputs[reg->Register.Index][swizzle];
1098          if (tgsi_type_is_64bit(stype))
1099             res = emit_fetch_64bit(bld_base, stype, res, bld->inputs[reg->Register.Index][swizzle_in >> 16]);
1100       }
1101    }
1102 
1103    assert(res);
1104 
1105    if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED || tgsi_type_is_64bit(stype)) {
1106       struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
1107       res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
1108    }
1109 
1110    return res;
1111 }
1112 
1113 
1114 static LLVMValueRef
emit_fetch_gs_input(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_src_register * reg,enum tgsi_opcode_type stype,unsigned swizzle_in)1115 emit_fetch_gs_input(
1116    struct lp_build_tgsi_context * bld_base,
1117    const struct tgsi_full_src_register * reg,
1118    enum tgsi_opcode_type stype,
1119    unsigned swizzle_in)
1120 {
1121    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1122    struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1123    const struct tgsi_shader_info *info = bld->bld_base.info;
1124    LLVMBuilderRef builder = gallivm->builder;
1125    LLVMValueRef attrib_index = NULL;
1126    LLVMValueRef vertex_index = NULL;
1127    unsigned swizzle = swizzle_in & 0xffff;
1128    LLVMValueRef swizzle_index = lp_build_const_int32(gallivm, swizzle);
1129    LLVMValueRef res;
1130 
1131    if (info->input_semantic_name[reg->Register.Index] == TGSI_SEMANTIC_PRIMID) {
1132       /* This is really a system value not a regular input */
1133       assert(!reg->Register.Indirect);
1134       assert(!reg->Dimension.Indirect);
1135       res = bld->system_values.prim_id;
1136       if (stype != TGSI_TYPE_UNSIGNED && stype != TGSI_TYPE_SIGNED) {
1137          res = LLVMBuildBitCast(builder, res, bld_base->base.vec_type, "");
1138       }
1139       return res;
1140    }
1141 
1142    if (reg->Register.Indirect) {
1143       /*
1144        * XXX: this is possibly not quite the right value, since file_max may be
1145        * larger than the max attrib index, due to it being the max of declared
1146        * inputs AND the max vertices per prim (which is 6 for tri adj).
1147        * It should however be safe to use (since we always allocate
1148        * PIPE_MAX_SHADER_INPUTS (80) for it, which is overallocated quite a bit).
1149        */
1150       int index_limit = info->file_max[reg->Register.File];
1151       attrib_index = get_indirect_index(bld,
1152                                         reg->Register.File,
1153                                         reg->Register.Index,
1154                                         &reg->Indirect,
1155                                         index_limit);
1156    } else {
1157       attrib_index = lp_build_const_int32(gallivm, reg->Register.Index);
1158    }
1159 
1160    if (reg->Dimension.Indirect) {
1161       /*
1162        * A fixed 6 should do as well (which is what we allocate).
1163        */
1164       int index_limit = u_vertices_per_prim(info->properties[TGSI_PROPERTY_GS_INPUT_PRIM]);
1165       vertex_index = get_indirect_index(bld,
1166                                         reg->Register.File,
1167                                         reg->Dimension.Index,
1168                                         &reg->DimIndirect,
1169                                         index_limit);
1170    } else {
1171       vertex_index = lp_build_const_int32(gallivm, reg->Dimension.Index);
1172    }
1173 
1174    res = bld->gs_iface->fetch_input(bld->gs_iface, &bld_base->base,
1175                                     reg->Dimension.Indirect,
1176                                     vertex_index,
1177                                     reg->Register.Indirect,
1178                                     attrib_index,
1179                                     swizzle_index);
1180 
1181    assert(res);
1182    if (tgsi_type_is_64bit(stype)) {
1183       LLVMValueRef swizzle_index = lp_build_const_int32(gallivm, swizzle_in >> 16);
1184       LLVMValueRef res2;
1185       res2 = bld->gs_iface->fetch_input(bld->gs_iface, &bld_base->base,
1186                                         reg->Dimension.Indirect,
1187                                         vertex_index,
1188                                         reg->Register.Indirect,
1189                                         attrib_index,
1190                                         swizzle_index);
1191       assert(res2);
1192       res = emit_fetch_64bit(bld_base, stype, res, res2);
1193    } else if (stype == TGSI_TYPE_UNSIGNED) {
1194       res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, "");
1195    } else if (stype == TGSI_TYPE_SIGNED) {
1196       res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, "");
1197    }
1198 
1199    return res;
1200 }
1201 
1202 static LLVMValueRef
emit_fetch_tcs_input(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_src_register * reg,enum tgsi_opcode_type stype,unsigned swizzle_in)1203 emit_fetch_tcs_input(
1204    struct lp_build_tgsi_context * bld_base,
1205    const struct tgsi_full_src_register * reg,
1206    enum tgsi_opcode_type stype,
1207    unsigned swizzle_in)
1208 {
1209    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1210    struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1211    const struct tgsi_shader_info *info = bld->bld_base.info;
1212    LLVMBuilderRef builder = gallivm->builder;
1213    LLVMValueRef attrib_index = NULL;
1214    LLVMValueRef vertex_index = NULL;
1215    unsigned swizzle = swizzle_in & 0xffff;
1216    LLVMValueRef swizzle_index = lp_build_const_int32(gallivm, swizzle);
1217    LLVMValueRef res;
1218 
1219    if (info->input_semantic_name[reg->Register.Index] == TGSI_SEMANTIC_PRIMID) {
1220       /* This is really a system value not a regular input */
1221       assert(!reg->Register.Indirect);
1222       assert(!reg->Dimension.Indirect);
1223       res = bld->system_values.prim_id;
1224       if (stype != TGSI_TYPE_UNSIGNED && stype != TGSI_TYPE_SIGNED) {
1225          res = LLVMBuildBitCast(builder, res, bld_base->base.vec_type, "");
1226       }
1227       return res;
1228    }
1229 
1230    if (reg->Register.Indirect) {
1231       int index_limit = info->file_max[reg->Register.File];
1232       attrib_index = get_indirect_index(bld,
1233                                         reg->Register.File,
1234                                         reg->Register.Index,
1235                                         &reg->Indirect,
1236                                         index_limit);
1237    } else {
1238       attrib_index = lp_build_const_int32(gallivm, reg->Register.Index);
1239    }
1240 
1241    if (reg->Dimension.Indirect) {
1242       vertex_index = get_indirect_index(bld,
1243                                         reg->Register.File,
1244                                         reg->Dimension.Index,
1245                                         &reg->DimIndirect,
1246                                         PIPE_MAX_SHADER_INPUTS);
1247    } else {
1248       vertex_index = lp_build_const_int32(gallivm, reg->Dimension.Index);
1249    }
1250 
1251    // TCS can read from its own outputs
1252    if (reg->Register.File == TGSI_FILE_OUTPUT) {
1253       res = bld->tcs_iface->emit_fetch_output(bld->tcs_iface, (struct lp_build_context*)bld_base,
1254                                               reg->Dimension.Indirect,
1255                                               vertex_index,
1256                                               reg->Register.Indirect,
1257                                               attrib_index,
1258                                               FALSE,
1259                                               swizzle_index,
1260                                               bld_base->info->output_semantic_name[reg->Register.Index]);
1261    } else {
1262       res = bld->tcs_iface->emit_fetch_input(bld->tcs_iface, (struct lp_build_context*)bld_base,
1263                                              reg->Dimension.Indirect,
1264                                              vertex_index,
1265                                              reg->Register.Indirect,
1266                                              attrib_index,
1267                                              FALSE,
1268                                              swizzle_index);
1269    }
1270 
1271 
1272    assert(res);
1273    if (tgsi_type_is_64bit(stype)) {
1274       LLVMValueRef swizzle_index = lp_build_const_int32(gallivm, swizzle_in >> 16);
1275       LLVMValueRef res2;
1276       if (reg->Register.File == TGSI_FILE_OUTPUT) {
1277          res2 = bld->tcs_iface->emit_fetch_output(bld->tcs_iface, (struct lp_build_context*)bld_base,
1278                                                   reg->Dimension.Indirect,
1279                                                   vertex_index,
1280                                                   reg->Register.Indirect,
1281                                                   attrib_index,
1282                                                   FALSE,
1283                                                   swizzle_index,
1284                                                   bld_base->info->output_semantic_name[reg->Register.Index]);
1285       } else {
1286          res2 = bld->tcs_iface->emit_fetch_input(bld->tcs_iface, (struct lp_build_context*)bld_base,
1287                                                  reg->Dimension.Indirect,
1288                                                  vertex_index,
1289                                                  reg->Register.Indirect,
1290                                                  attrib_index,
1291                                                  FALSE,
1292                                                  swizzle_index);
1293       }
1294       assert(res2);
1295       res = emit_fetch_64bit(bld_base, stype, res, res2);
1296    } else if (stype == TGSI_TYPE_UNSIGNED) {
1297       res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, "");
1298    } else if (stype == TGSI_TYPE_SIGNED) {
1299       res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, "");
1300    }
1301 
1302    return res;
1303 }
1304 
1305 static LLVMValueRef
emit_fetch_tes_input(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_src_register * reg,enum tgsi_opcode_type stype,unsigned swizzle_in)1306 emit_fetch_tes_input(
1307    struct lp_build_tgsi_context * bld_base,
1308    const struct tgsi_full_src_register * reg,
1309    enum tgsi_opcode_type stype,
1310    unsigned swizzle_in)
1311 {
1312    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1313    struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1314    const struct tgsi_shader_info *info = bld->bld_base.info;
1315    LLVMBuilderRef builder = gallivm->builder;
1316    LLVMValueRef attrib_index = NULL;
1317    LLVMValueRef vertex_index = NULL;
1318    unsigned swizzle = swizzle_in & 0xffff;
1319    LLVMValueRef swizzle_index = lp_build_const_int32(gallivm, swizzle);
1320    LLVMValueRef res;
1321 
1322    if (info->input_semantic_name[reg->Register.Index] == TGSI_SEMANTIC_PRIMID) {
1323       /* This is really a system value not a regular input */
1324       assert(!reg->Register.Indirect);
1325       assert(!reg->Dimension.Indirect);
1326       res = bld->system_values.prim_id;
1327       if (stype != TGSI_TYPE_UNSIGNED && stype != TGSI_TYPE_SIGNED) {
1328          res = LLVMBuildBitCast(builder, res, bld_base->base.vec_type, "");
1329       }
1330       return res;
1331    }
1332 
1333    if (reg->Register.Indirect) {
1334       int index_limit = info->file_max[reg->Register.File];
1335       attrib_index = get_indirect_index(bld,
1336                                         reg->Register.File,
1337                                         reg->Register.Index,
1338                                         &reg->Indirect,
1339                                         index_limit);
1340    } else {
1341       attrib_index = lp_build_const_int32(gallivm, reg->Register.Index);
1342    }
1343 
1344    if (reg->Dimension.Indirect) {
1345       vertex_index = get_indirect_index(bld,
1346                                         reg->Register.File,
1347                                         reg->Dimension.Index,
1348                                         &reg->DimIndirect,
1349                                         PIPE_MAX_SHADER_INPUTS);
1350    } else {
1351       vertex_index = lp_build_const_int32(gallivm, reg->Dimension.Index);
1352    }
1353 
1354    if (info->input_semantic_name[reg->Register.Index] == TGSI_SEMANTIC_PATCH) {
1355       res = bld->tes_iface->fetch_patch_input(bld->tes_iface, (struct lp_build_context*)bld_base,
1356                                      reg->Register.Indirect,
1357                                      attrib_index,
1358                                      swizzle_index);
1359    } else {
1360       res = bld->tes_iface->fetch_vertex_input(bld->tes_iface, (struct lp_build_context*)bld_base,
1361                                        reg->Dimension.Indirect,
1362                                        vertex_index,
1363                                        reg->Register.Indirect,
1364                                        attrib_index,
1365                                        FALSE,
1366                                        swizzle_index);
1367    }
1368 
1369    assert(res);
1370    if (tgsi_type_is_64bit(stype)) {
1371       LLVMValueRef swizzle_index = lp_build_const_int32(gallivm, swizzle_in >> 16);
1372       LLVMValueRef res2;
1373       if (info->input_semantic_name[reg->Register.Index] == TGSI_SEMANTIC_PATCH) {
1374          res2 = bld->tes_iface->fetch_patch_input(bld->tes_iface, (struct lp_build_context*)bld_base,
1375                                     reg->Register.Indirect,
1376                                     attrib_index,
1377                                     swizzle_index);
1378       }
1379       else {
1380          res2 = bld->tes_iface->fetch_vertex_input(bld->tes_iface, (struct lp_build_context*)bld_base,
1381                                              reg->Dimension.Indirect,
1382                                              vertex_index,
1383                                              reg->Register.Indirect,
1384                                              attrib_index,
1385                                              FALSE,
1386                                              swizzle_index);
1387       }
1388       assert(res2);
1389       res = emit_fetch_64bit(bld_base, stype, res, res2);
1390    } else if (stype == TGSI_TYPE_UNSIGNED) {
1391       res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, "");
1392    } else if (stype == TGSI_TYPE_SIGNED) {
1393       res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, "");
1394    }
1395 
1396    return res;
1397 }
1398 
1399 
1400 
1401 static LLVMValueRef
emit_fetch_temporary(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_src_register * reg,enum tgsi_opcode_type stype,unsigned swizzle_in)1402 emit_fetch_temporary(
1403    struct lp_build_tgsi_context * bld_base,
1404    const struct tgsi_full_src_register * reg,
1405    enum tgsi_opcode_type stype,
1406    unsigned swizzle_in)
1407 {
1408    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1409    struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1410    LLVMBuilderRef builder = gallivm->builder;
1411    LLVMValueRef res;
1412    unsigned swizzle = swizzle_in & 0xffff;
1413 
1414    if (reg->Register.Indirect) {
1415       LLVMValueRef indirect_index;
1416       LLVMValueRef index_vec, index_vec2 = NULL;  /* index into the temp reg array */
1417       LLVMValueRef temps_array;
1418       LLVMTypeRef fptr_type;
1419 
1420       indirect_index = get_indirect_index(bld,
1421                                           reg->Register.File,
1422                                           reg->Register.Index,
1423                                           &reg->Indirect,
1424                                           bld->bld_base.info->file_max[reg->Register.File]);
1425 
1426       index_vec = get_soa_array_offsets(&bld_base->uint_bld,
1427                                         indirect_index,
1428                                         swizzle,
1429                                         TRUE);
1430       if (tgsi_type_is_64bit(stype)) {
1431                index_vec2 = get_soa_array_offsets(&bld_base->uint_bld,
1432                                                   indirect_index,
1433                                                   swizzle_in >> 16,
1434                                                   TRUE);
1435       }
1436 
1437       /* cast temps_array pointer to float* */
1438       fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
1439       temps_array = LLVMBuildBitCast(builder, bld->temps_array, fptr_type, "");
1440 
1441       /* Gather values from the temporary register array */
1442       res = build_gather(bld_base, temps_array, index_vec, NULL, index_vec2);
1443    }
1444    else {
1445       LLVMValueRef temp_ptr;
1446       temp_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index, swizzle);
1447       res = LLVMBuildLoad(builder, temp_ptr, "");
1448 
1449       if (tgsi_type_is_64bit(stype)) {
1450          LLVMValueRef temp_ptr2, res2;
1451 
1452          temp_ptr2 = lp_get_temp_ptr_soa(bld, reg->Register.Index, swizzle_in >> 16);
1453          res2 = LLVMBuildLoad(builder, temp_ptr2, "");
1454          res = emit_fetch_64bit(bld_base, stype, res, res2);
1455       }
1456    }
1457 
1458    if (stype == TGSI_TYPE_SIGNED ||
1459        stype == TGSI_TYPE_UNSIGNED ||
1460        stype == TGSI_TYPE_DOUBLE ||
1461        stype == TGSI_TYPE_SIGNED64 ||
1462        stype == TGSI_TYPE_UNSIGNED64) {
1463       struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
1464       res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
1465    }
1466 
1467    return res;
1468 }
1469 
1470 static LLVMValueRef
emit_fetch_system_value(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_src_register * reg,enum tgsi_opcode_type stype,unsigned swizzle_in)1471 emit_fetch_system_value(
1472    struct lp_build_tgsi_context * bld_base,
1473    const struct tgsi_full_src_register * reg,
1474    enum tgsi_opcode_type stype,
1475    unsigned swizzle_in)
1476 {
1477    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1478    struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1479    const struct tgsi_shader_info *info = bld->bld_base.info;
1480    LLVMBuilderRef builder = gallivm->builder;
1481    LLVMValueRef res;
1482    enum tgsi_opcode_type atype; // Actual type of the value
1483    unsigned swizzle = swizzle_in & 0xffff;
1484 
1485    assert(!reg->Register.Indirect);
1486 
1487    switch (info->system_value_semantic_name[reg->Register.Index]) {
1488    case TGSI_SEMANTIC_INSTANCEID:
1489       res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.instance_id);
1490       atype = TGSI_TYPE_UNSIGNED;
1491       break;
1492 
1493    case TGSI_SEMANTIC_VERTEXID:
1494       res = bld->system_values.vertex_id;
1495       atype = TGSI_TYPE_UNSIGNED;
1496       break;
1497 
1498    case TGSI_SEMANTIC_VERTEXID_NOBASE:
1499       res = bld->system_values.vertex_id_nobase;
1500       atype = TGSI_TYPE_UNSIGNED;
1501       break;
1502 
1503    case TGSI_SEMANTIC_BASEVERTEX:
1504       res = bld->system_values.basevertex;
1505       atype = TGSI_TYPE_UNSIGNED;
1506       break;
1507 
1508    case TGSI_SEMANTIC_BASEINSTANCE:
1509       res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.base_instance);
1510       atype = TGSI_TYPE_UNSIGNED;
1511       break;
1512 
1513    case TGSI_SEMANTIC_PRIMID:
1514       res = bld->system_values.prim_id;
1515       atype = TGSI_TYPE_UNSIGNED;
1516       break;
1517 
1518    case TGSI_SEMANTIC_INVOCATIONID:
1519       if (info->processor == PIPE_SHADER_TESS_CTRL)
1520          res = bld->system_values.invocation_id;
1521       else
1522          res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.invocation_id);
1523       atype = TGSI_TYPE_UNSIGNED;
1524       break;
1525 
1526    case TGSI_SEMANTIC_HELPER_INVOCATION:
1527       res = LLVMBuildNot(gallivm->builder, lp_build_mask_value(bld->mask), "");
1528       atype = TGSI_TYPE_UNSIGNED;
1529       break;
1530 
1531    case TGSI_SEMANTIC_THREAD_ID:
1532       res = LLVMBuildExtractValue(gallivm->builder, bld->system_values.thread_id, swizzle, "");
1533       atype = TGSI_TYPE_UNSIGNED;
1534       break;
1535 
1536    case TGSI_SEMANTIC_BLOCK_ID:
1537       res = lp_build_extract_broadcast(gallivm, lp_type_int_vec(32, 96), bld_base->uint_bld.type, bld->system_values.block_id, lp_build_const_int32(gallivm, swizzle));
1538       atype = TGSI_TYPE_UNSIGNED;
1539       break;
1540 
1541    case TGSI_SEMANTIC_GRID_SIZE:
1542       res = lp_build_extract_broadcast(gallivm, lp_type_int_vec(32, 96), bld_base->uint_bld.type, bld->system_values.grid_size, lp_build_const_int32(gallivm, swizzle));
1543       atype = TGSI_TYPE_UNSIGNED;
1544       break;
1545 
1546    case TGSI_SEMANTIC_TESSCOORD:
1547       {
1548          LLVMValueRef index[] = { lp_build_const_int32(gallivm, 0), lp_build_const_int32(gallivm, swizzle_in) };
1549          LLVMValueRef array_indexed = LLVMBuildGEP(gallivm->builder, bld->system_values.tess_coord, index, 2, "tess_coord_array_indexed");
1550          res = LLVMBuildLoad(builder, array_indexed, "tess_coord");
1551       }
1552       atype = TGSI_TYPE_FLOAT;
1553       break;
1554 
1555    case TGSI_SEMANTIC_FACE:
1556       res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.front_facing);
1557       atype = TGSI_TYPE_UNSIGNED;
1558       break;
1559 
1560   case TGSI_SEMANTIC_DRAWID:
1561       res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.draw_id);
1562       atype = TGSI_TYPE_UNSIGNED;
1563       break;
1564 
1565    case TGSI_SEMANTIC_TESSOUTER:
1566       res = lp_build_extract_broadcast(gallivm, lp_type_float_vec(32, 128), bld_base->base.type,
1567                                        bld->system_values.tess_outer,
1568                                        lp_build_const_int32(gallivm, swizzle_in));
1569       atype = TGSI_TYPE_FLOAT;
1570       break;
1571 
1572    case TGSI_SEMANTIC_TESSINNER:
1573       res = lp_build_extract_broadcast(gallivm, lp_type_float_vec(32, 128), bld_base->base.type,
1574                                        bld->system_values.tess_inner,
1575                                        lp_build_const_int32(gallivm, swizzle_in));
1576       atype = TGSI_TYPE_FLOAT;
1577       break;
1578 
1579    case TGSI_SEMANTIC_VERTICESIN:
1580       res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.vertices_in);
1581       atype = TGSI_TYPE_UNSIGNED;
1582       break;
1583 
1584    default:
1585       assert(!"unexpected semantic in emit_fetch_system_value");
1586       res = bld_base->base.zero;
1587       atype = TGSI_TYPE_FLOAT;
1588       break;
1589    }
1590 
1591    if (atype != stype) {
1592       if (stype == TGSI_TYPE_FLOAT) {
1593          res = LLVMBuildBitCast(builder, res, bld_base->base.vec_type, "");
1594       } else if (stype == TGSI_TYPE_UNSIGNED) {
1595          res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, "");
1596       } else if (stype == TGSI_TYPE_SIGNED) {
1597          res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, "");
1598       }
1599    }
1600 
1601    return res;
1602 }
1603 
1604 /**
1605  * Register fetch with derivatives.
1606  */
1607 static void
emit_fetch_deriv(struct lp_build_tgsi_soa_context * bld,LLVMValueRef src,LLVMValueRef * res,LLVMValueRef * ddx,LLVMValueRef * ddy)1608 emit_fetch_deriv(
1609    struct lp_build_tgsi_soa_context *bld,
1610    LLVMValueRef src,
1611    LLVMValueRef *res,
1612    LLVMValueRef *ddx,
1613    LLVMValueRef *ddy)
1614 {
1615    if (res)
1616       *res = src;
1617 
1618    /* TODO: use interpolation coeffs for inputs */
1619 
1620    if (ddx)
1621       *ddx = lp_build_ddx(&bld->bld_base.base, src);
1622 
1623    if (ddy)
1624       *ddy = lp_build_ddy(&bld->bld_base.base, src);
1625 }
1626 
1627 /**
1628  * store an array of vec-length 64-bit into two arrays of vec_length floats
1629  * i.e.
1630  * value is d0, d1, d2, d3 etc.
1631  * each 64-bit has high and low pieces x, y
1632  * so gets stored into the separate channels as:
1633  * chan_ptr = d0.x, d1.x, d2.x, d3.x
1634  * chan_ptr2 = d0.y, d1.y, d2.y, d3.y
1635  */
1636 static void
emit_store_64bit_chan(struct lp_build_tgsi_context * bld_base,LLVMValueRef chan_ptr,LLVMValueRef chan_ptr2,LLVMValueRef value)1637 emit_store_64bit_chan(struct lp_build_tgsi_context *bld_base,
1638                       LLVMValueRef chan_ptr, LLVMValueRef chan_ptr2,
1639                       LLVMValueRef value)
1640 {
1641    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1642    struct gallivm_state *gallivm = bld_base->base.gallivm;
1643    LLVMBuilderRef builder = gallivm->builder;
1644    struct lp_build_context *float_bld = &bld_base->base;
1645    unsigned i;
1646    LLVMValueRef temp, temp2;
1647    LLVMValueRef shuffles[LP_MAX_VECTOR_WIDTH/32];
1648    LLVMValueRef shuffles2[LP_MAX_VECTOR_WIDTH/32];
1649 
1650    for (i = 0; i < bld_base->base.type.length; i++) {
1651       shuffles[i] = lp_build_const_int32(gallivm, i * 2);
1652       shuffles2[i] = lp_build_const_int32(gallivm, (i * 2) + 1);
1653    }
1654 
1655    temp = LLVMBuildShuffleVector(builder, value,
1656                                  LLVMGetUndef(LLVMTypeOf(value)),
1657                                  LLVMConstVector(shuffles,
1658                                                  bld_base->base.type.length),
1659                                  "");
1660    temp2 = LLVMBuildShuffleVector(builder, value,
1661                                   LLVMGetUndef(LLVMTypeOf(value)),
1662                                   LLVMConstVector(shuffles2,
1663                                                   bld_base->base.type.length),
1664                                   "");
1665 
1666    lp_exec_mask_store(&bld->exec_mask, float_bld, temp, chan_ptr);
1667    lp_exec_mask_store(&bld->exec_mask, float_bld, temp2, chan_ptr2);
1668 }
1669 
1670 static void
emit_store_output(struct lp_build_tgsi_context * bld_base,enum tgsi_opcode_type dtype,const struct tgsi_full_dst_register * reg,unsigned index,unsigned chan_index,LLVMValueRef indirect_index,LLVMValueRef value)1671 emit_store_output(struct lp_build_tgsi_context *bld_base,
1672                   enum tgsi_opcode_type dtype,
1673                   const struct tgsi_full_dst_register *reg,
1674                   unsigned index,
1675                   unsigned chan_index,
1676                   LLVMValueRef indirect_index,
1677                   LLVMValueRef value)
1678 {
1679    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1680    struct gallivm_state *gallivm = bld_base->base.gallivm;
1681    LLVMBuilderRef builder = gallivm->builder;
1682    struct lp_build_context *float_bld = &bld_base->base;
1683 
1684    /* Outputs are always stored as floats */
1685    value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
1686 
1687    if (reg->Register.Indirect) {
1688       LLVMValueRef index_vec;  /* indexes into the output registers */
1689       LLVMValueRef outputs_array;
1690       LLVMTypeRef fptr_type;
1691 
1692       index_vec = get_soa_array_offsets(&bld_base->uint_bld,
1693                                           indirect_index,
1694                                           chan_index,
1695                                           TRUE);
1696 
1697       fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
1698       outputs_array = LLVMBuildBitCast(builder, bld->outputs_array, fptr_type, "");
1699 
1700       /* Scatter store values into output registers */
1701       emit_mask_scatter(bld, outputs_array, index_vec, value,
1702                         &bld->exec_mask);
1703    }
1704    else {
1705       assert(LLVMTypeOf(value) == float_bld->vec_type);
1706       LLVMValueRef out_ptr = lp_get_output_ptr(bld, reg->Register.Index,
1707                                                 chan_index);
1708 
1709       if (tgsi_type_is_64bit(dtype)) {
1710          LLVMValueRef out_ptr2 = lp_get_output_ptr(bld, reg->Register.Index,
1711                                                    chan_index + 1);
1712          emit_store_64bit_chan(bld_base, out_ptr, out_ptr2,
1713                                  value);
1714       } else
1715          lp_exec_mask_store(&bld->exec_mask, float_bld, value, out_ptr);
1716    }
1717 }
1718 
1719 static void
emit_store_tcs_output(struct lp_build_tgsi_context * bld_base,enum tgsi_opcode_type dtype,const struct tgsi_full_dst_register * reg,unsigned index,unsigned chan_index,LLVMValueRef indirect_index,LLVMValueRef value)1720 emit_store_tcs_output(struct lp_build_tgsi_context *bld_base,
1721                       enum tgsi_opcode_type dtype,
1722                       const struct tgsi_full_dst_register *reg,
1723                       unsigned index,
1724                       unsigned chan_index,
1725                       LLVMValueRef indirect_index,
1726                       LLVMValueRef value)
1727 {
1728    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1729    struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1730    const struct tgsi_shader_info *info = bld->bld_base.info;
1731    LLVMValueRef attrib_index = NULL;
1732    LLVMValueRef vertex_index = NULL;
1733    LLVMValueRef channel_index = NULL;
1734 
1735    if (reg->Register.Indirect) {
1736       /*
1737        * XXX: this is possibly not quite the right value, since file_max may be
1738        * larger than the max attrib index, due to it being the max of declared
1739        * inputs AND the max vertices per prim (which is 6 for tri adj).
1740        * It should however be safe to use (since we always allocate
1741        * PIPE_MAX_SHADER_INPUTS (80) for it, which is overallocated quite a bit).
1742        */
1743       int index_limit = info->file_max[reg->Register.File];
1744       attrib_index = get_indirect_index(bld,
1745                                         reg->Register.File,
1746                                         reg->Register.Index,
1747                                         &reg->Indirect,
1748                                         index_limit);
1749    } else {
1750       attrib_index = lp_build_const_int32(gallivm, reg->Register.Index);
1751    }
1752 
1753    if (reg->Dimension.Indirect) {
1754       vertex_index = get_indirect_index(bld,
1755                                         reg->Register.File,
1756                                         reg->Dimension.Index,
1757                                         &reg->DimIndirect,
1758                                         PIPE_MAX_SHADER_OUTPUTS);
1759    } else {
1760       vertex_index = lp_build_const_int32(gallivm, reg->Dimension.Index);
1761    }
1762 
1763    channel_index = lp_build_const_int32(gallivm, chan_index);
1764 
1765    assert(bld->tcs_iface->emit_store_output);
1766    bld->tcs_iface->emit_store_output(bld->tcs_iface, (struct lp_build_context*)bld_base,
1767                                           bld_base->info->output_semantic_name[reg->Register.Index],
1768                                           reg->Dimension.Indirect,
1769                                           vertex_index,
1770                                           reg->Register.Indirect,
1771                                           attrib_index,
1772                                           false,
1773                                           channel_index,
1774                                           value,
1775                                           mask_vec(bld_base));
1776 }
1777 
1778 static void
emit_store_temp(struct lp_build_tgsi_context * bld_base,enum tgsi_opcode_type dtype,const struct tgsi_full_dst_register * reg,unsigned index,unsigned chan_index,LLVMValueRef indirect_index,LLVMValueRef value)1779 emit_store_temp(struct lp_build_tgsi_context *bld_base,
1780                   enum tgsi_opcode_type dtype,
1781                   const struct tgsi_full_dst_register *reg,
1782                   unsigned index,
1783                   unsigned chan_index,
1784                   LLVMValueRef indirect_index,
1785                   LLVMValueRef value)
1786 {
1787    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1788    struct gallivm_state *gallivm = bld_base->base.gallivm;
1789    LLVMBuilderRef builder = gallivm->builder;
1790    struct lp_build_context *float_bld = &bld_base->base;
1791 
1792    /* Temporaries are always stored as floats */
1793    if (!tgsi_type_is_64bit(dtype))
1794       value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
1795    else
1796       value = LLVMBuildBitCast(builder, value,  LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), bld_base->base.type.length * 2), "");
1797 
1798    if (reg->Register.Indirect) {
1799       LLVMValueRef index_vec;  /* indexes into the temp registers */
1800       LLVMValueRef temps_array;
1801       LLVMTypeRef fptr_type;
1802 
1803       index_vec = get_soa_array_offsets(&bld_base->uint_bld,
1804                                           indirect_index,
1805                                           chan_index,
1806                                           TRUE);
1807 
1808       fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
1809       temps_array = LLVMBuildBitCast(builder, bld->temps_array, fptr_type, "");
1810 
1811       /* Scatter store values into temp registers */
1812       emit_mask_scatter(bld, temps_array, index_vec, value,
1813                         &bld->exec_mask);
1814    }
1815    else {
1816       LLVMValueRef temp_ptr;
1817       temp_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index, chan_index);
1818 
1819       if (tgsi_type_is_64bit(dtype)) {
1820          LLVMValueRef temp_ptr2 = lp_get_temp_ptr_soa(bld,
1821                                                       reg->Register.Index,
1822                                                       chan_index + 1);
1823          emit_store_64bit_chan(bld_base, temp_ptr, temp_ptr2,
1824                                  value);
1825       }
1826       else
1827          lp_exec_mask_store(&bld->exec_mask, float_bld, value, temp_ptr);
1828    }
1829 }
1830 
1831 static void
emit_store_address(struct lp_build_tgsi_context * bld_base,enum tgsi_opcode_type dtype,const struct tgsi_full_dst_register * reg,unsigned index,unsigned chan_index,LLVMValueRef indirect_index,LLVMValueRef value)1832 emit_store_address(struct lp_build_tgsi_context *bld_base,
1833                    enum tgsi_opcode_type dtype,
1834                    const struct tgsi_full_dst_register *reg,
1835                    unsigned index,
1836                    unsigned chan_index,
1837                    LLVMValueRef indirect_index,
1838                    LLVMValueRef value)
1839 {
1840    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1841    struct gallivm_state *gallivm = bld_base->base.gallivm;
1842    LLVMBuilderRef builder = gallivm->builder;
1843    struct lp_build_context *int_bld = &bld_base->int_bld;
1844 
1845    assert(dtype == TGSI_TYPE_SIGNED);
1846    assert(LLVMTypeOf(value) == int_bld->vec_type);
1847    value = LLVMBuildBitCast(builder, value, int_bld->vec_type, "");
1848    lp_exec_mask_store(&bld->exec_mask, int_bld, value,
1849                         bld->addr[reg->Register.Index][chan_index]);
1850 }
1851 
1852 /**
1853  * Register store.
1854  */
1855 static void
emit_store_chan(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_instruction * inst,unsigned index,unsigned chan_index,LLVMValueRef value)1856 emit_store_chan(
1857    struct lp_build_tgsi_context *bld_base,
1858    const struct tgsi_full_instruction *inst,
1859    unsigned index,
1860    unsigned chan_index,
1861    LLVMValueRef value)
1862 {
1863    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1864    struct gallivm_state *gallivm = bld_base->base.gallivm;
1865    LLVMBuilderRef builder = gallivm->builder;
1866    const struct tgsi_full_dst_register *reg = &inst->Dst[index];
1867    struct lp_build_context *float_bld = &bld_base->base;
1868    LLVMValueRef indirect_index = NULL;
1869    enum tgsi_opcode_type dtype = tgsi_opcode_infer_dst_type(inst->Instruction.Opcode, index);
1870 
1871    /*
1872     * Apply saturation.
1873     *
1874     * It is always assumed to be float.
1875     */
1876    if (inst->Instruction.Saturate) {
1877       assert(dtype == TGSI_TYPE_FLOAT ||
1878              dtype == TGSI_TYPE_UNTYPED);
1879       value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
1880       value = lp_build_clamp_zero_one_nanzero(float_bld, value);
1881    }
1882 
1883    if (reg->Register.Indirect) {
1884       /*
1885        * Currently the mesa/st doesn't generate indirect stores
1886        * to 64-bit values, it normally uses MOV to do indirect stores.
1887        */
1888       assert(!tgsi_type_is_64bit(dtype));
1889       indirect_index = get_indirect_index(bld,
1890                                           reg->Register.File,
1891                                           reg->Register.Index,
1892                                           &reg->Indirect,
1893                                           bld->bld_base.info->file_max[reg->Register.File]);
1894    } else {
1895       assert(reg->Register.Index <=
1896                              bld_base->info->file_max[reg->Register.File]);
1897    }
1898 
1899    if (DEBUG_EXECUTION) {
1900       emit_dump_reg(gallivm, reg->Register.File, reg->Register.Index, chan_index, value);
1901    }
1902 
1903    assert(bld_base->emit_store_reg_funcs[reg->Register.File]);
1904    bld_base->emit_store_reg_funcs[reg->Register.File](bld_base,
1905                                                       dtype,
1906                                                       reg,
1907                                                       index,
1908                                                       chan_index,
1909                                                       indirect_index,
1910                                                       value);
1911 
1912    (void)dtype;
1913 }
1914 
1915 /*
1916  * Called at the beginning of the translation of each TGSI instruction, to
1917  * emit some debug code.
1918  */
1919 static void
emit_debug(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_instruction * inst,const struct tgsi_opcode_info * info)1920 emit_debug(
1921    struct lp_build_tgsi_context * bld_base,
1922    const struct tgsi_full_instruction * inst,
1923    const struct tgsi_opcode_info * info)
1924 
1925 {
1926    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1927 
1928    if (DEBUG_EXECUTION) {
1929       /*
1930        * Dump the TGSI instruction.
1931        */
1932 
1933       struct gallivm_state *gallivm = bld_base->base.gallivm;
1934       char buf[512];
1935       buf[0] = '$';
1936       buf[1] = ' ';
1937       tgsi_dump_instruction_str(inst, bld_base->pc, &buf[2], sizeof buf - 2);
1938       lp_build_printf(gallivm, buf);
1939 
1940       /* Dump the execution mask.
1941        */
1942       if (bld->exec_mask.has_mask) {
1943          lp_build_print_value(gallivm, "    mask = ", bld->exec_mask.exec_mask);
1944       }
1945    }
1946 }
1947 
1948 static void
emit_store(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_instruction * inst,const struct tgsi_opcode_info * info,unsigned index,LLVMValueRef dst[4])1949 emit_store(
1950    struct lp_build_tgsi_context * bld_base,
1951    const struct tgsi_full_instruction * inst,
1952    const struct tgsi_opcode_info * info,
1953    unsigned index,
1954    LLVMValueRef dst[4])
1955 
1956 {
1957    enum tgsi_opcode_type dtype = tgsi_opcode_infer_dst_type(inst->Instruction.Opcode, index);
1958 
1959    unsigned writemask = inst->Dst[index].Register.WriteMask;
1960    while (writemask) {
1961       unsigned chan_index = u_bit_scan(&writemask);
1962       if (tgsi_type_is_64bit(dtype) && (chan_index == 1 || chan_index == 3))
1963           continue;
1964       emit_store_chan(bld_base, inst, index, chan_index, dst[chan_index]);
1965    }
1966 }
1967 
1968 static unsigned
tgsi_to_pipe_tex_target(unsigned tgsi_target)1969 tgsi_to_pipe_tex_target(unsigned tgsi_target)
1970 {
1971    switch (tgsi_target) {
1972    case TGSI_TEXTURE_BUFFER:
1973       return PIPE_BUFFER;
1974    case TGSI_TEXTURE_1D:
1975    case TGSI_TEXTURE_SHADOW1D:
1976       return PIPE_TEXTURE_1D;
1977    case TGSI_TEXTURE_2D:
1978    case TGSI_TEXTURE_SHADOW2D:
1979    case TGSI_TEXTURE_2D_MSAA:
1980       return PIPE_TEXTURE_2D;
1981    case TGSI_TEXTURE_3D:
1982       return PIPE_TEXTURE_3D;
1983    case TGSI_TEXTURE_CUBE:
1984    case TGSI_TEXTURE_SHADOWCUBE:
1985       return PIPE_TEXTURE_CUBE;
1986    case TGSI_TEXTURE_RECT:
1987    case TGSI_TEXTURE_SHADOWRECT:
1988       return PIPE_TEXTURE_RECT;
1989    case TGSI_TEXTURE_1D_ARRAY:
1990    case TGSI_TEXTURE_SHADOW1D_ARRAY:
1991       return PIPE_TEXTURE_1D_ARRAY;
1992    case TGSI_TEXTURE_2D_ARRAY:
1993    case TGSI_TEXTURE_SHADOW2D_ARRAY:
1994    case TGSI_TEXTURE_2D_ARRAY_MSAA:
1995       return PIPE_TEXTURE_2D_ARRAY;
1996    case TGSI_TEXTURE_CUBE_ARRAY:
1997    case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
1998       return PIPE_TEXTURE_CUBE_ARRAY;
1999    default:
2000       assert(0);
2001       return PIPE_BUFFER;
2002    }
2003 }
2004 
2005 
2006 static enum lp_sampler_lod_property
lp_build_lod_property(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_instruction * inst,unsigned src_op)2007 lp_build_lod_property(
2008    struct lp_build_tgsi_context *bld_base,
2009    const struct tgsi_full_instruction *inst,
2010    unsigned src_op)
2011 {
2012    const struct tgsi_full_src_register *reg = &inst->Src[src_op];
2013    enum lp_sampler_lod_property lod_property;
2014 
2015    /*
2016     * Not much we can do here. We could try catching inputs declared
2017     * with constant interpolation but not sure it's worth it - since for
2018     * TEX opcodes as well as FETCH/LD the lod comes from same reg as
2019     * the coords, so it could only work for SAMPLE/TXQ/SVIEWINFO), just
2020     * like the constant/immediate recognition below.
2021     * What seems to be of more value would be to recognize temps holding
2022     * broadcasted scalars but no way we can do it.
2023     * Tried asking llvm but without any success (using LLVMIsConstant
2024     * even though this isn't exactly what we'd need), even as simple as
2025     * IMM[0] UINT32 (0,-1,0,0)
2026     * MOV TEMP[0] IMM[0].yyyy
2027     * SVIEWINFO TEMP[1], TEMP[0].xxxx, SVIEWINFO[0]
2028     * doesn't work.
2029     * This means there's ZERO chance this will ever catch a scalar lod
2030     * with traditional tex opcodes as well as texel fetches, since the lod
2031     * comes from the same reg as coords (except some test shaders using
2032     * constant coords maybe).
2033     * There's at least hope for sample opcodes as well as size queries.
2034     */
2035    if (inst->Instruction.Opcode == TGSI_OPCODE_TEX_LZ ||
2036        reg->Register.File == TGSI_FILE_CONSTANT ||
2037        reg->Register.File == TGSI_FILE_IMMEDIATE) {
2038       lod_property = LP_SAMPLER_LOD_SCALAR;
2039    }
2040    else if (bld_base->info->processor == PIPE_SHADER_FRAGMENT) {
2041       if (gallivm_perf & GALLIVM_PERF_NO_QUAD_LOD) {
2042          lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2043       }
2044       else {
2045          lod_property = LP_SAMPLER_LOD_PER_QUAD;
2046       }
2047    }
2048    else {
2049       /* never use scalar (per-quad) lod the results are just too wrong. */
2050       lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2051    }
2052    return lod_property;
2053 }
2054 
2055 
2056 /**
2057  * High-level instruction translators.
2058  */
2059 
2060 static void
emit_tex(struct lp_build_tgsi_soa_context * bld,const struct tgsi_full_instruction * inst,enum lp_build_tex_modifier modifier,LLVMValueRef * texel,unsigned sampler_reg,enum lp_sampler_op_type sampler_op)2061 emit_tex( struct lp_build_tgsi_soa_context *bld,
2062           const struct tgsi_full_instruction *inst,
2063           enum lp_build_tex_modifier modifier,
2064           LLVMValueRef *texel,
2065           unsigned sampler_reg,
2066           enum lp_sampler_op_type sampler_op)
2067 {
2068    unsigned unit = inst->Src[sampler_reg].Register.Index;
2069    LLVMValueRef oow = NULL;
2070    LLVMValueRef lod = NULL;
2071    LLVMValueRef coords[5];
2072    LLVMValueRef offsets[3] = { NULL };
2073    struct lp_derivatives derivs;
2074    struct lp_sampler_params params;
2075    enum lp_sampler_lod_property lod_property = LP_SAMPLER_LOD_SCALAR;
2076    unsigned num_derivs, num_offsets, i;
2077    unsigned shadow_coord = 0;
2078    unsigned layer_coord = 0;
2079    unsigned sample_key = sampler_op << LP_SAMPLER_OP_TYPE_SHIFT;
2080 
2081    memset(&params, 0, sizeof(params));
2082 
2083    if (!bld->sampler) {
2084       _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
2085       for (i = 0; i < 4; i++) {
2086          texel[i] = bld->bld_base.base.undef;
2087       }
2088       return;
2089    }
2090 
2091    switch (inst->Texture.Texture) {
2092    case TGSI_TEXTURE_1D_ARRAY:
2093       layer_coord = 1;
2094       /* fallthrough */
2095    case TGSI_TEXTURE_1D:
2096       num_offsets = 1;
2097       num_derivs = 1;
2098       break;
2099    case TGSI_TEXTURE_2D_ARRAY:
2100       layer_coord = 2;
2101       /* fallthrough */
2102    case TGSI_TEXTURE_2D:
2103    case TGSI_TEXTURE_RECT:
2104       num_offsets = 2;
2105       num_derivs = 2;
2106       break;
2107    case TGSI_TEXTURE_SHADOW1D_ARRAY:
2108       layer_coord = 1;
2109       /* fallthrough */
2110    case TGSI_TEXTURE_SHADOW1D:
2111       shadow_coord = 2;
2112       num_offsets = 1;
2113       num_derivs = 1;
2114       break;
2115    case TGSI_TEXTURE_SHADOW2D_ARRAY:
2116       layer_coord = 2;
2117       shadow_coord = 3;
2118       num_offsets = 2;
2119       num_derivs = 2;
2120       break;
2121    case TGSI_TEXTURE_SHADOW2D:
2122    case TGSI_TEXTURE_SHADOWRECT:
2123       shadow_coord = 2;
2124       num_offsets = 2;
2125       num_derivs = 2;
2126       break;
2127    case TGSI_TEXTURE_CUBE:
2128       num_offsets = 2;
2129       num_derivs = 3;
2130       break;
2131    case TGSI_TEXTURE_3D:
2132       num_offsets = 3;
2133       num_derivs = 3;
2134       break;
2135    case TGSI_TEXTURE_SHADOWCUBE:
2136       shadow_coord = 3;
2137       num_offsets = 2;
2138       num_derivs = 3;
2139       break;
2140    case TGSI_TEXTURE_CUBE_ARRAY:
2141       num_offsets = 2;
2142       num_derivs = 3;
2143       layer_coord = 3;
2144       break;
2145    case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
2146       num_offsets = 2;
2147       num_derivs = 3;
2148       layer_coord = 3;
2149       shadow_coord = 4; /* shadow coord special different reg */
2150       break;
2151    case TGSI_TEXTURE_2D_MSAA:
2152    case TGSI_TEXTURE_2D_ARRAY_MSAA:
2153    default:
2154       assert(0);
2155       return;
2156    }
2157 
2158    /* Note lod and especially projected are illegal in a LOT of cases */
2159    if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS ||
2160        modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
2161       if (inst->Instruction.Opcode == TGSI_OPCODE_TEX_LZ) {
2162          lod = bld->bld_base.base.zero;
2163       } else if (inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE ||
2164                  inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY) {
2165          /* note that shadow cube array with bias/explicit lod does not exist */
2166          lod = lp_build_emit_fetch(&bld->bld_base, inst, 1, 0);
2167       }
2168       else {
2169          lod = lp_build_emit_fetch(&bld->bld_base, inst, 0, 3);
2170       }
2171       if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS) {
2172          sample_key |= LP_SAMPLER_LOD_BIAS << LP_SAMPLER_LOD_CONTROL_SHIFT;
2173       }
2174       else if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
2175          sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT;
2176       }
2177       lod_property = lp_build_lod_property(&bld->bld_base, inst, 0);
2178    }
2179 
2180    if (sampler_op == LP_SAMPLER_OP_GATHER) {
2181       uint32_t comp_val = inst->Src[sampler_reg].Register.SwizzleX;
2182       sample_key |= (comp_val << LP_SAMPLER_GATHER_COMP_SHIFT);
2183    }
2184    if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED) {
2185       oow = lp_build_emit_fetch(&bld->bld_base, inst, 0, 3);
2186       oow = lp_build_rcp(&bld->bld_base.base, oow);
2187    }
2188 
2189    for (i = 0; i < num_derivs; i++) {
2190       coords[i] = lp_build_emit_fetch(&bld->bld_base, inst, 0, i);
2191       if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED)
2192          coords[i] = lp_build_mul(&bld->bld_base.base, coords[i], oow);
2193    }
2194    for (i = num_derivs; i < 5; i++) {
2195       coords[i] = bld->bld_base.base.undef;
2196    }
2197 
2198    /* Layer coord always goes into 3rd slot, except for cube map arrays */
2199    if (layer_coord) {
2200       if (layer_coord == 3) {
2201          coords[3] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
2202       }
2203       else {
2204          coords[2] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
2205       }
2206       if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED)
2207          coords[2] = lp_build_mul(&bld->bld_base.base, coords[2], oow);
2208    }
2209    /* Shadow coord occupies always 5th slot. */
2210    if (shadow_coord) {
2211       sample_key |= LP_SAMPLER_SHADOW;
2212       if (shadow_coord == 4) {
2213          coords[4] = lp_build_emit_fetch(&bld->bld_base, inst, 1, 0);
2214       }
2215       else {
2216          coords[4] = lp_build_emit_fetch(&bld->bld_base, inst, 0, shadow_coord);
2217       }
2218       if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED)
2219          coords[4] = lp_build_mul(&bld->bld_base.base, coords[4], oow);
2220    }
2221 
2222    if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
2223       unsigned dim;
2224       sample_key |= LP_SAMPLER_LOD_DERIVATIVES << LP_SAMPLER_LOD_CONTROL_SHIFT;
2225       for (dim = 0; dim < num_derivs; ++dim) {
2226          derivs.ddx[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 1, dim);
2227          derivs.ddy[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 2, dim);
2228       }
2229       params.derivs = &derivs;
2230       /*
2231        * could also check all src regs if constant but I doubt such
2232        * cases exist in practice.
2233        */
2234       if (bld->bld_base.info->processor == PIPE_SHADER_FRAGMENT) {
2235          if (gallivm_perf & GALLIVM_PERF_NO_QUAD_LOD) {
2236             lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2237          }
2238          else {
2239             lod_property = LP_SAMPLER_LOD_PER_QUAD;
2240          }
2241       }
2242       else {
2243          lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2244       }
2245    }
2246    sample_key |= lod_property << LP_SAMPLER_LOD_PROPERTY_SHIFT;
2247 
2248    /* we don't handle the 4 offset version of tg4 */
2249    if (inst->Texture.NumOffsets == 1) {
2250       unsigned dim;
2251       sample_key |= LP_SAMPLER_OFFSETS;
2252       for (dim = 0; dim < num_offsets; dim++) {
2253          offsets[dim] = lp_build_emit_fetch_texoffset(&bld->bld_base, inst, 0, dim);
2254       }
2255    }
2256 
2257    params.type = bld->bld_base.base.type;
2258    params.sample_key = sample_key;
2259    params.texture_index = unit;
2260    params.sampler_index = unit;
2261    params.context_ptr = bld->context_ptr;
2262    params.thread_data_ptr = bld->thread_data_ptr;
2263    params.coords = coords;
2264    params.offsets = offsets;
2265    params.lod = lod;
2266    params.texel = texel;
2267 
2268    bld->sampler->emit_tex_sample(bld->sampler,
2269                                  bld->bld_base.base.gallivm,
2270                                  &params);
2271 }
2272 
2273 static void
emit_sample(struct lp_build_tgsi_soa_context * bld,const struct tgsi_full_instruction * inst,enum lp_build_tex_modifier modifier,boolean compare,enum lp_sampler_op_type sample_type,LLVMValueRef * texel)2274 emit_sample(struct lp_build_tgsi_soa_context *bld,
2275             const struct tgsi_full_instruction *inst,
2276             enum lp_build_tex_modifier modifier,
2277             boolean compare,
2278             enum lp_sampler_op_type sample_type,
2279             LLVMValueRef *texel)
2280 {
2281    struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
2282    unsigned texture_unit, sampler_unit;
2283    LLVMValueRef lod = NULL;
2284    LLVMValueRef coords[5];
2285    LLVMValueRef offsets[3] = { NULL };
2286    struct lp_derivatives derivs;
2287    struct lp_sampler_params params;
2288    enum lp_sampler_lod_property lod_property = LP_SAMPLER_LOD_SCALAR;
2289 
2290    unsigned num_offsets, num_derivs, i;
2291    unsigned layer_coord = 0;
2292    unsigned sample_key = sample_type << LP_SAMPLER_OP_TYPE_SHIFT;
2293 
2294    memset(&params, 0, sizeof(params));
2295 
2296    if (!bld->sampler) {
2297       _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
2298       for (i = 0; i < 4; i++) {
2299          texel[i] = bld->bld_base.base.undef;
2300       }
2301       return;
2302    }
2303 
2304    /*
2305     * unlike old-style tex opcodes the texture/sampler indices
2306     * always come from src1 and src2 respectively.
2307     */
2308    texture_unit = inst->Src[1].Register.Index;
2309    sampler_unit = inst->Src[2].Register.Index;
2310 
2311    /*
2312     * Note inst->Texture.Texture will contain the number of offsets,
2313     * however the target information is NOT there and comes from the
2314     * declared sampler views instead.
2315     */
2316    switch (bld->sv[texture_unit].Resource) {
2317    case TGSI_TEXTURE_1D:
2318       num_offsets = 1;
2319       num_derivs = 1;
2320       break;
2321    case TGSI_TEXTURE_1D_ARRAY:
2322       layer_coord = 1;
2323       num_offsets = 1;
2324       num_derivs = 1;
2325       break;
2326    case TGSI_TEXTURE_2D:
2327    case TGSI_TEXTURE_RECT:
2328       num_offsets = 2;
2329       num_derivs = 2;
2330       break;
2331    case TGSI_TEXTURE_2D_ARRAY:
2332       layer_coord = 2;
2333       num_offsets = 2;
2334       num_derivs = 2;
2335       break;
2336    case TGSI_TEXTURE_CUBE:
2337       num_offsets = 2;
2338       num_derivs = 3;
2339       break;
2340    case TGSI_TEXTURE_3D:
2341       num_offsets = 3;
2342       num_derivs = 3;
2343       break;
2344    case TGSI_TEXTURE_CUBE_ARRAY:
2345       layer_coord = 3;
2346       num_offsets = 2;
2347       num_derivs = 3;
2348       break;
2349    default:
2350       assert(0);
2351       return;
2352    }
2353 
2354    if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS ||
2355        modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
2356       lod = lp_build_emit_fetch(&bld->bld_base, inst, 3, 0);
2357       if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS) {
2358          sample_key |= LP_SAMPLER_LOD_BIAS << LP_SAMPLER_LOD_CONTROL_SHIFT;
2359       }
2360       else if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
2361          sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT;
2362       }
2363       lod_property = lp_build_lod_property(&bld->bld_base, inst, 0);
2364    }
2365    else if (modifier == LP_BLD_TEX_MODIFIER_LOD_ZERO) {
2366       /* XXX might be better to explicitly pass the level zero information */
2367       sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT;
2368       lod = lp_build_const_vec(gallivm, bld->bld_base.base.type, 0.0F);
2369    }
2370 
2371    for (i = 0; i < num_derivs; i++) {
2372       coords[i] = lp_build_emit_fetch(&bld->bld_base, inst, 0, i);
2373    }
2374    for (i = num_derivs; i < 5; i++) {
2375       coords[i] = bld->bld_base.base.undef;
2376    }
2377 
2378    /* Layer coord always goes into 3rd slot, except for cube map arrays */
2379    if (layer_coord) {
2380       if (layer_coord == 3)
2381          coords[3] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
2382       else
2383          coords[2] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
2384    }
2385    /* Shadow coord occupies always 5th slot. */
2386    if (compare) {
2387       sample_key |= LP_SAMPLER_SHADOW;
2388       coords[4] = lp_build_emit_fetch(&bld->bld_base, inst, 3, 0);
2389    }
2390 
2391    if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
2392       unsigned dim;
2393       sample_key |= LP_SAMPLER_LOD_DERIVATIVES << LP_SAMPLER_LOD_CONTROL_SHIFT;
2394       for (dim = 0; dim < num_derivs; ++dim) {
2395          derivs.ddx[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 3, dim);
2396          derivs.ddy[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 4, dim);
2397       }
2398       params.derivs = &derivs;
2399       /*
2400        * could also check all src regs if constant but I doubt such
2401        * cases exist in practice.
2402        */
2403       if (bld->bld_base.info->processor == PIPE_SHADER_FRAGMENT) {
2404          if (gallivm_perf & GALLIVM_PERF_NO_QUAD_LOD) {
2405             lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2406          }
2407          else {
2408             lod_property = LP_SAMPLER_LOD_PER_QUAD;
2409          }
2410       }
2411       else {
2412          lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2413       }
2414    }
2415 
2416    /* some advanced gather instructions (txgo) would require 4 offsets */
2417    if (inst->Texture.NumOffsets == 1) {
2418       unsigned dim;
2419       sample_key |= LP_SAMPLER_OFFSETS;
2420       for (dim = 0; dim < num_offsets; dim++) {
2421          offsets[dim] = lp_build_emit_fetch_texoffset(&bld->bld_base, inst, 0, dim);
2422       }
2423    }
2424    sample_key |= lod_property << LP_SAMPLER_LOD_PROPERTY_SHIFT;
2425 
2426    params.type = bld->bld_base.base.type;
2427    params.sample_key = sample_key;
2428    params.texture_index = texture_unit;
2429    params.sampler_index = sampler_unit;
2430    params.context_ptr = bld->context_ptr;
2431    params.thread_data_ptr = bld->thread_data_ptr;
2432    params.coords = coords;
2433    params.offsets = offsets;
2434    params.lod = lod;
2435    params.texel = texel;
2436 
2437    bld->sampler->emit_tex_sample(bld->sampler,
2438                                  bld->bld_base.base.gallivm,
2439                                  &params);
2440 
2441    if (inst->Src[1].Register.SwizzleX != PIPE_SWIZZLE_X ||
2442        inst->Src[1].Register.SwizzleY != PIPE_SWIZZLE_Y ||
2443        inst->Src[1].Register.SwizzleZ != PIPE_SWIZZLE_Z ||
2444        inst->Src[1].Register.SwizzleW != PIPE_SWIZZLE_W) {
2445       unsigned char swizzles[4];
2446       swizzles[0] = inst->Src[1].Register.SwizzleX;
2447       swizzles[1] = inst->Src[1].Register.SwizzleY;
2448       swizzles[2] = inst->Src[1].Register.SwizzleZ;
2449       swizzles[3] = inst->Src[1].Register.SwizzleW;
2450 
2451       lp_build_swizzle_soa_inplace(&bld->bld_base.base, texel, swizzles);
2452    }
2453 }
2454 
2455 static void
emit_fetch_texels(struct lp_build_tgsi_soa_context * bld,const struct tgsi_full_instruction * inst,LLVMValueRef * texel,boolean is_samplei)2456 emit_fetch_texels( struct lp_build_tgsi_soa_context *bld,
2457                    const struct tgsi_full_instruction *inst,
2458                    LLVMValueRef *texel,
2459                    boolean is_samplei)
2460 {
2461    unsigned unit, target;
2462    LLVMValueRef coord_undef = LLVMGetUndef(bld->bld_base.base.int_vec_type);
2463    LLVMValueRef explicit_lod = NULL;
2464    LLVMValueRef coords[5];
2465    LLVMValueRef offsets[3] = { NULL };
2466    LLVMValueRef ms_index = NULL;
2467    struct lp_sampler_params params;
2468    enum lp_sampler_lod_property lod_property = LP_SAMPLER_LOD_SCALAR;
2469    unsigned dims, i;
2470    unsigned layer_coord = 0;
2471    unsigned sample_key = LP_SAMPLER_OP_FETCH << LP_SAMPLER_OP_TYPE_SHIFT;
2472 
2473    memset(&params, 0, sizeof(params));
2474 
2475    if (!bld->sampler) {
2476       _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
2477       for (i = 0; i < 4; i++) {
2478          texel[i] = coord_undef;
2479       }
2480       return;
2481    }
2482 
2483    unit = inst->Src[1].Register.Index;
2484 
2485    if (is_samplei) {
2486       target = bld->sv[unit].Resource;
2487    }
2488    else {
2489       target = inst->Texture.Texture;
2490    }
2491 
2492    switch (target) {
2493    case TGSI_TEXTURE_1D:
2494    case TGSI_TEXTURE_BUFFER:
2495       dims = 1;
2496       break;
2497    case TGSI_TEXTURE_1D_ARRAY:
2498       layer_coord = 1;
2499       dims = 1;
2500       break;
2501    case TGSI_TEXTURE_2D:
2502    case TGSI_TEXTURE_RECT:
2503    case TGSI_TEXTURE_2D_MSAA:
2504       dims = 2;
2505       break;
2506    case TGSI_TEXTURE_2D_ARRAY:
2507    case TGSI_TEXTURE_2D_ARRAY_MSAA:
2508       layer_coord = 2;
2509       dims = 2;
2510       break;
2511    case TGSI_TEXTURE_3D:
2512       dims = 3;
2513       break;
2514    default:
2515       assert(0);
2516       return;
2517    }
2518 
2519    /* always have lod except for buffers and msaa targets ? */
2520    if (target != TGSI_TEXTURE_BUFFER &&
2521        target != TGSI_TEXTURE_2D_MSAA &&
2522        target != TGSI_TEXTURE_2D_ARRAY_MSAA &&
2523        inst->Instruction.Opcode != TGSI_OPCODE_TXF_LZ) {
2524       sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT;
2525       explicit_lod = lp_build_emit_fetch(&bld->bld_base, inst, 0, 3);
2526       lod_property = lp_build_lod_property(&bld->bld_base, inst, 0);
2527    }
2528 
2529    if (target == TGSI_TEXTURE_2D_MSAA ||
2530        target == TGSI_TEXTURE_2D_ARRAY_MSAA) {
2531       sample_key |= LP_SAMPLER_FETCH_MS;
2532       ms_index = lp_build_emit_fetch(&bld->bld_base, inst, 0, 3);
2533    }
2534 
2535    /*
2536     * XXX: for real msaa support, the w component (or src2.x for sample_i_ms)
2537     * would be the sample index.
2538     */
2539 
2540    for (i = 0; i < dims; i++) {
2541       coords[i] = lp_build_emit_fetch(&bld->bld_base, inst, 0, i);
2542    }
2543    /* never use more than 3 coords here but emit_fetch_texel copies all 5 anyway */
2544    for (i = dims; i < 5; i++) {
2545       coords[i] = coord_undef;
2546    }
2547    if (layer_coord)
2548       coords[2] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
2549 
2550    if (inst->Texture.NumOffsets == 1) {
2551       unsigned dim;
2552       sample_key |= LP_SAMPLER_OFFSETS;
2553       for (dim = 0; dim < dims; dim++) {
2554          offsets[dim] = lp_build_emit_fetch_texoffset(&bld->bld_base, inst, 0, dim);
2555       }
2556    }
2557    sample_key |= lod_property << LP_SAMPLER_LOD_PROPERTY_SHIFT;
2558 
2559    params.type = bld->bld_base.base.type;
2560    params.sample_key = sample_key;
2561    params.texture_index = unit;
2562    /*
2563     * sampler not actually used, set to 0 so it won't exceed PIPE_MAX_SAMPLERS
2564     * and trigger some assertions with d3d10 where the sampler view number
2565     * can exceed this.
2566     */
2567    params.sampler_index = 0;
2568    params.context_ptr = bld->context_ptr;
2569    params.thread_data_ptr = bld->thread_data_ptr;
2570    params.coords = coords;
2571    params.offsets = offsets;
2572    params.derivs = NULL;
2573    params.lod = explicit_lod;
2574    params.texel = texel;
2575    params.ms_index = ms_index;
2576 
2577    bld->sampler->emit_tex_sample(bld->sampler,
2578                                  bld->bld_base.base.gallivm,
2579                                  &params);
2580 
2581    if (is_samplei &&
2582        (inst->Src[1].Register.SwizzleX != PIPE_SWIZZLE_X ||
2583         inst->Src[1].Register.SwizzleY != PIPE_SWIZZLE_Y ||
2584         inst->Src[1].Register.SwizzleZ != PIPE_SWIZZLE_Z ||
2585         inst->Src[1].Register.SwizzleW != PIPE_SWIZZLE_W)) {
2586       unsigned char swizzles[4];
2587       swizzles[0] = inst->Src[1].Register.SwizzleX;
2588       swizzles[1] = inst->Src[1].Register.SwizzleY;
2589       swizzles[2] = inst->Src[1].Register.SwizzleZ;
2590       swizzles[3] = inst->Src[1].Register.SwizzleW;
2591 
2592       lp_build_swizzle_soa_inplace(&bld->bld_base.base, texel, swizzles);
2593    }
2594 }
2595 
2596 static void
emit_size_query(struct lp_build_tgsi_soa_context * bld,const struct tgsi_full_instruction * inst,LLVMValueRef * sizes_out,boolean is_sviewinfo)2597 emit_size_query( struct lp_build_tgsi_soa_context *bld,
2598                  const struct tgsi_full_instruction *inst,
2599                  LLVMValueRef *sizes_out,
2600                  boolean is_sviewinfo)
2601 {
2602    LLVMValueRef explicit_lod;
2603    enum lp_sampler_lod_property lod_property;
2604    unsigned has_lod;
2605    unsigned i;
2606    unsigned unit = inst->Src[1].Register.Index;
2607    unsigned target, pipe_target;
2608    struct lp_sampler_size_query_params params;
2609 
2610    if (is_sviewinfo) {
2611       target = bld->sv[unit].Resource;
2612    }
2613    else {
2614       target = inst->Texture.Texture;
2615    }
2616    switch (target) {
2617    case TGSI_TEXTURE_BUFFER:
2618    case TGSI_TEXTURE_RECT:
2619    case TGSI_TEXTURE_SHADOWRECT:
2620       has_lod = 0;
2621       break;
2622    default:
2623       has_lod = 1;
2624       break;
2625    }
2626 
2627    if (!bld->sampler) {
2628       _debug_printf("warning: found texture query instruction but no sampler generator supplied\n");
2629       for (i = 0; i < 4; i++)
2630          sizes_out[i] = bld->bld_base.int_bld.undef;
2631       return;
2632    }
2633 
2634    if (has_lod) {
2635       explicit_lod = lp_build_emit_fetch(&bld->bld_base, inst, 0, 0);
2636       lod_property = lp_build_lod_property(&bld->bld_base, inst, 0);
2637    }
2638    else {
2639       explicit_lod = NULL;
2640       lod_property = LP_SAMPLER_LOD_SCALAR;
2641    }
2642 
2643 
2644    pipe_target = tgsi_to_pipe_tex_target(target);
2645 
2646    params.int_type = bld->bld_base.int_bld.type;
2647    params.texture_unit = unit;
2648    params.target = pipe_target;
2649    params.context_ptr = bld->context_ptr;
2650    params.is_sviewinfo = TRUE;
2651    params.lod_property = lod_property;
2652    params.explicit_lod = explicit_lod;
2653    params.sizes_out = sizes_out;
2654    params.samples_only = false;
2655 
2656    bld->sampler->emit_size_query(bld->sampler,
2657                                  bld->bld_base.base.gallivm,
2658                                  &params);
2659 }
2660 
2661 static boolean
near_end_of_shader(struct lp_build_tgsi_soa_context * bld,int pc)2662 near_end_of_shader(struct lp_build_tgsi_soa_context *bld,
2663                    int pc)
2664 {
2665    unsigned i;
2666 
2667    for (i = 0; i < 5; i++) {
2668       enum tgsi_opcode opcode;
2669 
2670       if (pc + i >= bld->bld_base.info->num_instructions)
2671          return TRUE;
2672 
2673       opcode = bld->bld_base.instructions[pc + i].Instruction.Opcode;
2674 
2675       if (opcode == TGSI_OPCODE_END)
2676          return TRUE;
2677 
2678       if (opcode == TGSI_OPCODE_TEX ||
2679          opcode == TGSI_OPCODE_TXP ||
2680          opcode == TGSI_OPCODE_TXD ||
2681          opcode == TGSI_OPCODE_TXB ||
2682          opcode == TGSI_OPCODE_TXL ||
2683          opcode == TGSI_OPCODE_TXF ||
2684          opcode == TGSI_OPCODE_TXQ ||
2685          opcode == TGSI_OPCODE_TEX2 ||
2686          opcode == TGSI_OPCODE_TXB2 ||
2687          opcode == TGSI_OPCODE_TXL2 ||
2688          opcode == TGSI_OPCODE_SAMPLE ||
2689          opcode == TGSI_OPCODE_SAMPLE_B ||
2690          opcode == TGSI_OPCODE_SAMPLE_C ||
2691          opcode == TGSI_OPCODE_SAMPLE_C_LZ ||
2692          opcode == TGSI_OPCODE_SAMPLE_D ||
2693          opcode == TGSI_OPCODE_SAMPLE_I ||
2694          opcode == TGSI_OPCODE_SAMPLE_I_MS ||
2695          opcode == TGSI_OPCODE_SAMPLE_L ||
2696          opcode == TGSI_OPCODE_SVIEWINFO ||
2697          opcode == TGSI_OPCODE_CAL ||
2698          opcode == TGSI_OPCODE_IF ||
2699          opcode == TGSI_OPCODE_UIF ||
2700          opcode == TGSI_OPCODE_BGNLOOP ||
2701          opcode == TGSI_OPCODE_SWITCH)
2702          return FALSE;
2703    }
2704 
2705    return TRUE;
2706 }
2707 
2708 
2709 
2710 /**
2711  * Kill fragment if any of the src register values are negative.
2712  */
2713 static void
emit_kill_if(struct lp_build_tgsi_soa_context * bld,const struct tgsi_full_instruction * inst,int pc)2714 emit_kill_if(
2715    struct lp_build_tgsi_soa_context *bld,
2716    const struct tgsi_full_instruction *inst,
2717    int pc)
2718 {
2719    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
2720    const struct tgsi_full_src_register *reg = &inst->Src[0];
2721    LLVMValueRef terms[TGSI_NUM_CHANNELS];
2722    LLVMValueRef mask;
2723    unsigned chan_index;
2724 
2725    memset(&terms, 0, sizeof terms);
2726 
2727    TGSI_FOR_EACH_CHANNEL( chan_index ) {
2728       unsigned swizzle;
2729 
2730       /* Unswizzle channel */
2731       swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
2732 
2733       /* Check if the component has not been already tested. */
2734       assert(swizzle < TGSI_NUM_CHANNELS);
2735       if( !terms[swizzle] )
2736          /* TODO: change the comparison operator instead of setting the sign */
2737          terms[swizzle] =  lp_build_emit_fetch(&bld->bld_base, inst, 0, chan_index );
2738    }
2739 
2740    mask = NULL;
2741    TGSI_FOR_EACH_CHANNEL( chan_index ) {
2742       if(terms[chan_index]) {
2743          LLVMValueRef chan_mask;
2744 
2745          /*
2746           * If term < 0 then mask = 0 else mask = ~0.
2747           */
2748          chan_mask = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_GEQUAL, terms[chan_index], bld->bld_base.base.zero);
2749 
2750          if(mask)
2751             mask = LLVMBuildAnd(builder, mask, chan_mask, "");
2752          else
2753             mask = chan_mask;
2754       }
2755    }
2756 
2757    if (bld->exec_mask.has_mask) {
2758       LLVMValueRef invmask;
2759       invmask = LLVMBuildNot(builder, bld->exec_mask.exec_mask, "kilp");
2760       mask = LLVMBuildOr(builder, mask, invmask, "");
2761    }
2762 
2763    lp_build_mask_update(bld->mask, mask);
2764    if (!near_end_of_shader(bld, pc))
2765       lp_build_mask_check(bld->mask);
2766 }
2767 
2768 
2769 /**
2770  * Unconditional fragment kill.
2771  * The only predication is the execution mask which will apply if
2772  * we're inside a loop or conditional.
2773  */
2774 static void
emit_kill(struct lp_build_tgsi_soa_context * bld,int pc)2775 emit_kill(struct lp_build_tgsi_soa_context *bld,
2776           int pc)
2777 {
2778    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
2779    LLVMValueRef mask;
2780 
2781    /* For those channels which are "alive", disable fragment shader
2782     * execution.
2783     */
2784    if (bld->exec_mask.has_mask) {
2785       mask = LLVMBuildNot(builder, bld->exec_mask.exec_mask, "kilp");
2786    }
2787    else {
2788       LLVMValueRef zero = LLVMConstNull(bld->bld_base.base.int_vec_type);
2789       mask = zero;
2790    }
2791 
2792    lp_build_mask_update(bld->mask, mask);
2793 
2794    if (!near_end_of_shader(bld, pc))
2795       lp_build_mask_check(bld->mask);
2796 }
2797 
2798 
2799 /**
2800  * Emit code which will dump the value of all the temporary registers
2801  * to stdout.
2802  */
2803 static void
emit_dump_file(struct lp_build_tgsi_soa_context * bld,unsigned file)2804 emit_dump_file(struct lp_build_tgsi_soa_context *bld,
2805                unsigned file)
2806 {
2807    const struct tgsi_shader_info *info = bld->bld_base.info;
2808    struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
2809    LLVMBuilderRef builder = gallivm->builder;
2810    LLVMValueRef reg_ptr;
2811    int index;
2812    int max_index = info->file_max[file];
2813 
2814    /*
2815     * Some register files, particularly constants, can be very large,
2816     * and dumping everything could make this unusably slow.
2817     */
2818    max_index = MIN2(max_index, 32);
2819 
2820    for (index = 0; index <= max_index; index++) {
2821       LLVMValueRef res;
2822       unsigned mask;
2823       int chan;
2824 
2825       if (index < 8 * sizeof(unsigned) &&
2826           (info->file_mask[file] & (1u << index)) == 0)  {
2827          /* This was not declared.*/
2828          continue;
2829       }
2830 
2831       if (file == TGSI_FILE_INPUT) {
2832          mask = info->input_usage_mask[index];
2833       } else {
2834          mask = TGSI_WRITEMASK_XYZW;
2835       }
2836 
2837       for (chan = 0; chan < 4; chan++) {
2838          if ((mask & (1 << chan)) == 0) {
2839             /* This channel is not used.*/
2840             continue;
2841          }
2842 
2843          if (file == TGSI_FILE_CONSTANT) {
2844             struct tgsi_full_src_register reg;
2845             memset(&reg, 0, sizeof reg);
2846             reg.Register.File = file;
2847             reg.Register.Index = index;
2848             reg.Register.SwizzleX = 0;
2849             reg.Register.SwizzleY = 1;
2850             reg.Register.SwizzleZ = 2;
2851             reg.Register.SwizzleW = 3;
2852 
2853             res = bld->bld_base.emit_fetch_funcs[file](&bld->bld_base, &reg, TGSI_TYPE_FLOAT, chan);
2854             if (!res) {
2855                continue;
2856             }
2857          } else if (file == TGSI_FILE_INPUT) {
2858             res = bld->inputs[index][chan];
2859             if (!res) {
2860                continue;
2861             }
2862          } else if (file == TGSI_FILE_TEMPORARY) {
2863             reg_ptr = lp_get_temp_ptr_soa(bld, index, chan);
2864             assert(reg_ptr);
2865             res = LLVMBuildLoad(builder, reg_ptr, "");
2866          } else if (file == TGSI_FILE_OUTPUT) {
2867             reg_ptr = lp_get_output_ptr(bld, index, chan);
2868             assert(reg_ptr);
2869             res = LLVMBuildLoad(builder, reg_ptr, "");
2870          } else {
2871             assert(0);
2872             continue;
2873          }
2874 
2875          emit_dump_reg(gallivm, file, index, chan, res);
2876       }
2877    }
2878 }
2879 
2880 
2881 
2882 void
lp_emit_declaration_soa(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_declaration * decl)2883 lp_emit_declaration_soa(
2884    struct lp_build_tgsi_context *bld_base,
2885    const struct tgsi_full_declaration *decl)
2886 {
2887    struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
2888    struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
2889    LLVMTypeRef vec_type = bld->bld_base.base.vec_type;
2890    const unsigned first = decl->Range.First;
2891    const unsigned last = decl->Range.Last;
2892    unsigned idx, i;
2893 
2894    assert(last <= bld->bld_base.info->file_max[decl->Declaration.File]);
2895 
2896    switch (decl->Declaration.File) {
2897    case TGSI_FILE_TEMPORARY:
2898       if (!(bld->indirect_files & (1 << TGSI_FILE_TEMPORARY))) {
2899          assert(last < LP_MAX_INLINED_TEMPS);
2900          for (idx = first; idx <= last; ++idx) {
2901             for (i = 0; i < TGSI_NUM_CHANNELS; i++)
2902                bld->temps[idx][i] = lp_build_alloca(gallivm, vec_type, "temp");
2903          }
2904       }
2905       break;
2906 
2907    case TGSI_FILE_OUTPUT:
2908       if (!(bld->indirect_files & (1 << TGSI_FILE_OUTPUT))) {
2909          for (idx = first; idx <= last; ++idx) {
2910             for (i = 0; i < TGSI_NUM_CHANNELS; i++)
2911                bld->outputs[idx][i] = lp_build_alloca(gallivm,
2912                                                       vec_type, "output");
2913          }
2914       }
2915       break;
2916 
2917    case TGSI_FILE_ADDRESS:
2918       /* ADDR registers are only allocated with an integer LLVM IR type,
2919        * as they are guaranteed to always have integers.
2920        * XXX: Not sure if this exception is worthwhile (or the whole idea of
2921        * an ADDR register for that matter).
2922        */
2923       assert(last < LP_MAX_TGSI_ADDRS);
2924       for (idx = first; idx <= last; ++idx) {
2925          assert(idx < LP_MAX_TGSI_ADDRS);
2926          for (i = 0; i < TGSI_NUM_CHANNELS; i++)
2927             bld->addr[idx][i] = lp_build_alloca(gallivm, bld_base->base.int_vec_type, "addr");
2928       }
2929       break;
2930 
2931    case TGSI_FILE_SAMPLER_VIEW:
2932       /*
2933        * The target stored here MUST match whatever there actually
2934        * is in the set sampler views (what about return type?).
2935        */
2936       assert(last < PIPE_MAX_SHADER_SAMPLER_VIEWS);
2937       for (idx = first; idx <= last; ++idx) {
2938          bld->sv[idx] = decl->SamplerView;
2939       }
2940       break;
2941 
2942    case TGSI_FILE_CONSTANT:
2943    {
2944       /*
2945        * We could trivially fetch the per-buffer pointer when fetching the
2946        * constant, relying on llvm to figure out it's always the same pointer
2947        * anyway. However, doing so results in a huge (more than factor of 10)
2948        * slowdown in llvm compilation times for some (but not all) shaders
2949        * (more specifically, the IR optimization spends way more time in
2950        * DominatorTree::dominates). At least with llvm versions 3.1, 3.3.
2951        */
2952       unsigned idx2D = decl->Dim.Index2D;
2953       LLVMValueRef index2D = lp_build_const_int32(gallivm, idx2D);
2954       assert(idx2D < LP_MAX_TGSI_CONST_BUFFERS);
2955       bld->consts[idx2D] =
2956          lp_build_array_get(gallivm, bld->consts_ptr, index2D);
2957       bld->consts_sizes[idx2D] =
2958          lp_build_array_get(gallivm, bld->const_sizes_ptr, index2D);
2959    }
2960    break;
2961    case TGSI_FILE_BUFFER:
2962    {
2963       unsigned idx = decl->Range.First;
2964       LLVMValueRef index = lp_build_const_int32(gallivm, idx);
2965       assert(idx < LP_MAX_TGSI_SHADER_BUFFERS);
2966       bld->ssbos[idx] =
2967          lp_build_array_get(gallivm, bld->ssbo_ptr, index);
2968       bld->ssbo_sizes[idx] =
2969          lp_build_array_get(gallivm, bld->ssbo_sizes_ptr, index);
2970 
2971    }
2972    break;
2973    case TGSI_FILE_MEMORY:
2974       break;
2975    default:
2976       /* don't need to declare other vars */
2977       break;
2978    }
2979 }
2980 
2981 
lp_emit_immediate_soa(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_immediate * imm)2982 void lp_emit_immediate_soa(
2983    struct lp_build_tgsi_context *bld_base,
2984    const struct tgsi_full_immediate *imm)
2985 {
2986    struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
2987    struct gallivm_state * gallivm = bld_base->base.gallivm;
2988    LLVMValueRef imms[4];
2989    unsigned i;
2990    const uint size = imm->Immediate.NrTokens - 1;
2991    assert(size <= 4);
2992    switch (imm->Immediate.DataType) {
2993    case TGSI_IMM_FLOAT32:
2994       for( i = 0; i < size; ++i )
2995          imms[i] =
2996                lp_build_const_vec(gallivm, bld_base->base.type, imm->u[i].Float);
2997 
2998       break;
2999    case TGSI_IMM_FLOAT64:
3000    case TGSI_IMM_UINT64:
3001    case TGSI_IMM_INT64:
3002    case TGSI_IMM_UINT32:
3003       for( i = 0; i < size; ++i ) {
3004          LLVMValueRef tmp = lp_build_const_vec(gallivm, bld_base->uint_bld.type, imm->u[i].Uint);
3005          imms[i] = LLVMConstBitCast(tmp, bld_base->base.vec_type);
3006       }
3007 
3008       break;
3009    case TGSI_IMM_INT32:
3010       for( i = 0; i < size; ++i ) {
3011          LLVMValueRef tmp = lp_build_const_vec(gallivm, bld_base->int_bld.type, imm->u[i].Int);
3012          imms[i] = LLVMConstBitCast(tmp, bld_base->base.vec_type);
3013       }
3014 
3015       break;
3016    }
3017    for( i = size; i < 4; ++i )
3018       imms[i] = bld_base->base.undef;
3019 
3020    if (bld->use_immediates_array) {
3021       unsigned index = bld->num_immediates;
3022       struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
3023       LLVMBuilderRef builder = gallivm->builder;
3024       LLVMValueRef gep[2];
3025       gep[0] = lp_build_const_int32(gallivm, 0);
3026 
3027       assert(bld->indirect_files & (1 << TGSI_FILE_IMMEDIATE));
3028       for (i = 0; i < 4; ++i ) {
3029          gep[1] = lp_build_const_int32(gallivm, index * 4 + i);
3030          LLVMValueRef imm_ptr = LLVMBuildGEP(builder,
3031                                              bld->imms_array, gep, 2, "");
3032          LLVMBuildStore(builder, imms[i], imm_ptr);
3033       }
3034    } else {
3035       /* simply copy the immediate values into the next immediates[] slot */
3036       unsigned i;
3037       assert(imm->Immediate.NrTokens - 1 <= 4);
3038       assert(bld->num_immediates < LP_MAX_INLINED_IMMEDIATES);
3039 
3040       for(i = 0; i < 4; ++i )
3041          bld->immediates[bld->num_immediates][i] = imms[i];
3042 
3043       if (bld->indirect_files & (1 << TGSI_FILE_IMMEDIATE)) {
3044          unsigned index = bld->num_immediates;
3045          struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
3046          LLVMBuilderRef builder = gallivm->builder;
3047          LLVMValueRef gep[2];
3048          gep[0] = lp_build_const_int32(gallivm, 0);
3049          for (i = 0; i < 4; ++i ) {
3050             gep[1] = lp_build_const_int32(gallivm, index * 4 + i);
3051             LLVMValueRef imm_ptr = LLVMBuildGEP(builder,
3052                                                 bld->imms_array, gep, 2, "");
3053             LLVMBuildStore(builder,
3054                            bld->immediates[index][i],
3055                            imm_ptr);
3056          }
3057       }
3058    }
3059 
3060    bld->num_immediates++;
3061 }
3062 
3063 static void
ddx_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3064 ddx_emit(
3065    const struct lp_build_tgsi_action * action,
3066    struct lp_build_tgsi_context * bld_base,
3067    struct lp_build_emit_data * emit_data)
3068 {
3069    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3070 
3071    emit_fetch_deriv(bld, emit_data->args[0], NULL,
3072                     &emit_data->output[emit_data->chan], NULL);
3073 }
3074 
3075 static void
ddy_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3076 ddy_emit(
3077    const struct lp_build_tgsi_action * action,
3078    struct lp_build_tgsi_context * bld_base,
3079    struct lp_build_emit_data * emit_data)
3080 {
3081    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3082 
3083    emit_fetch_deriv(bld, emit_data->args[0], NULL, NULL,
3084                     &emit_data->output[emit_data->chan]);
3085 }
3086 
3087 static void
kill_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3088 kill_emit(
3089    const struct lp_build_tgsi_action * action,
3090    struct lp_build_tgsi_context * bld_base,
3091    struct lp_build_emit_data * emit_data)
3092 {
3093    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3094 
3095    emit_kill(bld, bld_base->pc - 1);
3096 }
3097 
3098 static void
kill_if_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3099 kill_if_emit(
3100    const struct lp_build_tgsi_action * action,
3101    struct lp_build_tgsi_context * bld_base,
3102    struct lp_build_emit_data * emit_data)
3103 {
3104    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3105 
3106    emit_kill_if(bld, emit_data->inst, bld_base->pc - 1);
3107 }
3108 
3109 static void
tex_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3110 tex_emit(
3111    const struct lp_build_tgsi_action * action,
3112    struct lp_build_tgsi_context * bld_base,
3113    struct lp_build_emit_data * emit_data)
3114 {
3115    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3116 
3117    emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3118             emit_data->output, 1, LP_SAMPLER_OP_TEXTURE);
3119 }
3120 
3121 static void
tex2_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3122 tex2_emit(
3123    const struct lp_build_tgsi_action * action,
3124    struct lp_build_tgsi_context * bld_base,
3125    struct lp_build_emit_data * emit_data)
3126 {
3127    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3128 
3129    emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3130             emit_data->output, 2, LP_SAMPLER_OP_TEXTURE);
3131 }
3132 
3133 static void
txb_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3134 txb_emit(
3135    const struct lp_build_tgsi_action * action,
3136    struct lp_build_tgsi_context * bld_base,
3137    struct lp_build_emit_data * emit_data)
3138 {
3139    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3140 
3141    emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_BIAS,
3142             emit_data->output, 1, LP_SAMPLER_OP_TEXTURE);
3143 }
3144 
3145 static void
txb2_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3146 txb2_emit(
3147    const struct lp_build_tgsi_action * action,
3148    struct lp_build_tgsi_context * bld_base,
3149    struct lp_build_emit_data * emit_data)
3150 {
3151    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3152 
3153    emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_BIAS,
3154             emit_data->output, 2, LP_SAMPLER_OP_TEXTURE);
3155 }
3156 
3157 static void
txd_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3158 txd_emit(
3159    const struct lp_build_tgsi_action * action,
3160    struct lp_build_tgsi_context * bld_base,
3161    struct lp_build_emit_data * emit_data)
3162 {
3163    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3164 
3165    emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV,
3166             emit_data->output, 3, LP_SAMPLER_OP_TEXTURE);
3167 }
3168 
3169 static void
txl_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3170 txl_emit(
3171    const struct lp_build_tgsi_action * action,
3172    struct lp_build_tgsi_context * bld_base,
3173    struct lp_build_emit_data * emit_data)
3174 {
3175    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3176 
3177    emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD,
3178             emit_data->output, 1, LP_SAMPLER_OP_TEXTURE);
3179 }
3180 
3181 static void
txl2_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3182 txl2_emit(
3183    const struct lp_build_tgsi_action * action,
3184    struct lp_build_tgsi_context * bld_base,
3185    struct lp_build_emit_data * emit_data)
3186 {
3187    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3188 
3189    emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD,
3190             emit_data->output, 2, LP_SAMPLER_OP_TEXTURE);
3191 }
3192 
3193 static void
txp_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3194 txp_emit(
3195    const struct lp_build_tgsi_action * action,
3196    struct lp_build_tgsi_context * bld_base,
3197    struct lp_build_emit_data * emit_data)
3198 {
3199    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3200 
3201    emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_PROJECTED,
3202             emit_data->output, 1, LP_SAMPLER_OP_TEXTURE);
3203 }
3204 
3205 static void
tg4_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3206 tg4_emit(
3207    const struct lp_build_tgsi_action * action,
3208    struct lp_build_tgsi_context * bld_base,
3209    struct lp_build_emit_data * emit_data)
3210 {
3211    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3212 
3213    emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3214             emit_data->output, 2, LP_SAMPLER_OP_GATHER);
3215 }
3216 
3217 static void
lodq_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3218 lodq_emit(
3219    const struct lp_build_tgsi_action * action,
3220    struct lp_build_tgsi_context * bld_base,
3221    struct lp_build_emit_data * emit_data)
3222 {
3223    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3224 
3225    emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3226             emit_data->output, 1, LP_SAMPLER_OP_LODQ);
3227 }
3228 
3229 static void
txq_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3230 txq_emit(
3231    const struct lp_build_tgsi_action * action,
3232    struct lp_build_tgsi_context * bld_base,
3233    struct lp_build_emit_data * emit_data)
3234 {
3235    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3236 
3237    emit_size_query(bld, emit_data->inst, emit_data->output, FALSE);
3238 }
3239 
3240 static void
txf_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3241 txf_emit(
3242    const struct lp_build_tgsi_action * action,
3243    struct lp_build_tgsi_context * bld_base,
3244    struct lp_build_emit_data * emit_data)
3245 {
3246    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3247 
3248    emit_fetch_texels(bld, emit_data->inst, emit_data->output, FALSE);
3249 }
3250 
3251 static void
sample_i_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3252 sample_i_emit(
3253    const struct lp_build_tgsi_action * action,
3254    struct lp_build_tgsi_context * bld_base,
3255    struct lp_build_emit_data * emit_data)
3256 {
3257    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3258 
3259    emit_fetch_texels(bld, emit_data->inst, emit_data->output, TRUE);
3260 }
3261 
3262 static void
sample_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3263 sample_emit(
3264    const struct lp_build_tgsi_action * action,
3265    struct lp_build_tgsi_context * bld_base,
3266    struct lp_build_emit_data * emit_data)
3267 {
3268    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3269 
3270    emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3271                FALSE, LP_SAMPLER_OP_TEXTURE, emit_data->output);
3272 }
3273 
3274 static void
sample_b_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3275 sample_b_emit(
3276    const struct lp_build_tgsi_action * action,
3277    struct lp_build_tgsi_context * bld_base,
3278    struct lp_build_emit_data * emit_data)
3279 {
3280    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3281 
3282    emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_BIAS,
3283                FALSE, LP_SAMPLER_OP_TEXTURE, emit_data->output);
3284 }
3285 
3286 static void
sample_c_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3287 sample_c_emit(
3288    const struct lp_build_tgsi_action * action,
3289    struct lp_build_tgsi_context * bld_base,
3290    struct lp_build_emit_data * emit_data)
3291 {
3292    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3293 
3294    emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3295                TRUE, LP_SAMPLER_OP_TEXTURE, emit_data->output);
3296 }
3297 
3298 static void
sample_c_lz_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3299 sample_c_lz_emit(
3300    const struct lp_build_tgsi_action * action,
3301    struct lp_build_tgsi_context * bld_base,
3302    struct lp_build_emit_data * emit_data)
3303 {
3304    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3305 
3306    emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_ZERO,
3307                TRUE, LP_SAMPLER_OP_TEXTURE, emit_data->output);
3308 }
3309 
3310 static void
sample_d_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3311 sample_d_emit(
3312    const struct lp_build_tgsi_action * action,
3313    struct lp_build_tgsi_context * bld_base,
3314    struct lp_build_emit_data * emit_data)
3315 {
3316    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3317 
3318    emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV,
3319                FALSE, LP_SAMPLER_OP_TEXTURE, emit_data->output);
3320 }
3321 
3322 static void
sample_l_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3323 sample_l_emit(
3324    const struct lp_build_tgsi_action * action,
3325    struct lp_build_tgsi_context * bld_base,
3326    struct lp_build_emit_data * emit_data)
3327 {
3328    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3329 
3330    emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD,
3331                FALSE, LP_SAMPLER_OP_TEXTURE, emit_data->output);
3332 }
3333 
3334 static void
gather4_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3335 gather4_emit(
3336    const struct lp_build_tgsi_action * action,
3337    struct lp_build_tgsi_context * bld_base,
3338    struct lp_build_emit_data * emit_data)
3339 {
3340    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3341 
3342    emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3343                FALSE, LP_SAMPLER_OP_GATHER, emit_data->output);
3344 }
3345 
3346 static void
sviewinfo_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3347 sviewinfo_emit(
3348    const struct lp_build_tgsi_action * action,
3349    struct lp_build_tgsi_context * bld_base,
3350    struct lp_build_emit_data * emit_data)
3351 {
3352    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3353 
3354    emit_size_query(bld, emit_data->inst, emit_data->output, TRUE);
3355 }
3356 
3357 static void
lod_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3358 lod_emit(
3359    const struct lp_build_tgsi_action * action,
3360    struct lp_build_tgsi_context * bld_base,
3361    struct lp_build_emit_data * emit_data)
3362 {
3363    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3364 
3365    emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3366                FALSE, LP_SAMPLER_OP_LODQ, emit_data->output);
3367 }
3368 
target_to_dims_layer(unsigned target,unsigned * dims,unsigned * layer_coord)3369 static void target_to_dims_layer(unsigned target,
3370                                  unsigned *dims,
3371                                  unsigned *layer_coord)
3372 {
3373    *layer_coord = 0;
3374    switch (target) {
3375    case TGSI_TEXTURE_1D:
3376    case TGSI_TEXTURE_BUFFER:
3377       *dims = 1;
3378       break;
3379    case TGSI_TEXTURE_1D_ARRAY:
3380       *layer_coord = 1;
3381       *dims = 1;
3382       break;
3383    case TGSI_TEXTURE_2D:
3384    case TGSI_TEXTURE_RECT:
3385       *dims = 2;
3386       break;
3387    case TGSI_TEXTURE_2D_ARRAY:
3388       *layer_coord = 2;
3389       *dims = 2;
3390       break;
3391    case TGSI_TEXTURE_3D:
3392    case TGSI_TEXTURE_CUBE:
3393    case TGSI_TEXTURE_CUBE_ARRAY:
3394       *dims = 3;
3395       break;
3396    default:
3397       assert(0);
3398       *dims = 0;
3399       return;
3400    }
3401 }
3402 
3403 static void
img_load_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3404 img_load_emit(
3405    const struct lp_build_tgsi_action * action,
3406    struct lp_build_tgsi_context * bld_base,
3407    struct lp_build_emit_data * emit_data)
3408 {
3409    struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
3410    struct lp_img_params params;
3411    LLVMValueRef coords[5];
3412    LLVMValueRef coord_undef = LLVMGetUndef(bld->bld_base.base.int_vec_type);
3413    unsigned dims;
3414    unsigned target = emit_data->inst->Memory.Texture;
3415    unsigned layer_coord;
3416 
3417    target_to_dims_layer(target, &dims, &layer_coord);
3418 
3419    for (unsigned i = 0; i < dims; i++) {
3420       coords[i] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, i);
3421    }
3422    for (unsigned i = dims; i < 5; i++) {
3423       coords[i] = coord_undef;
3424    }
3425    if (layer_coord)
3426       coords[2] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, layer_coord);
3427 
3428    memset(&params, 0, sizeof(params));
3429 
3430    params.type = bld->bld_base.base.type;
3431    params.context_ptr = bld->context_ptr;
3432    params.thread_data_ptr = bld->thread_data_ptr;
3433    params.coords = coords;
3434    params.outdata = emit_data->output;
3435    params.target = tgsi_to_pipe_tex_target(target);
3436    params.image_index = emit_data->inst->Src[0].Register.Index;
3437    params.img_op = LP_IMG_LOAD;
3438    bld->image->emit_op(bld->image,
3439                          bld->bld_base.base.gallivm,
3440                          &params);
3441 }
3442 
3443 static void
load_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3444 load_emit(
3445    const struct lp_build_tgsi_action * action,
3446    struct lp_build_tgsi_context * bld_base,
3447    struct lp_build_emit_data * emit_data)
3448 {
3449    struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
3450    struct gallivm_state * gallivm = bld_base->base.gallivm;
3451    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
3452    const struct tgsi_full_src_register *bufreg = &emit_data->inst->Src[0];
3453    unsigned buf = bufreg->Register.Index;
3454    assert(bufreg->Register.File == TGSI_FILE_BUFFER ||
3455           bufreg->Register.File == TGSI_FILE_IMAGE ||
3456           bufreg->Register.File == TGSI_FILE_MEMORY ||
3457           bufreg->Register.File == TGSI_FILE_CONSTBUF);
3458    bool is_shared = bufreg->Register.File == TGSI_FILE_MEMORY;
3459    struct lp_build_context *uint_bld = &bld_base->uint_bld;
3460 
3461    if (bufreg->Register.File == TGSI_FILE_IMAGE) {
3462       img_load_emit(action, bld_base, emit_data);
3463    } else if (bufreg->Register.File == TGSI_FILE_CONSTBUF) {
3464       LLVMValueRef consts_ptr = bld->consts[buf];
3465       LLVMValueRef num_consts = bld->consts_sizes[buf];
3466 
3467       LLVMValueRef indirect_index;
3468       LLVMValueRef overflow_mask;
3469 
3470       indirect_index = lp_build_emit_fetch(bld_base, emit_data->inst, 1, 0);
3471       indirect_index = lp_build_shr_imm(uint_bld, indirect_index, 4);
3472 
3473       /* All fetches are from the same constant buffer, so
3474        * we need to propagate the size to a vector to do a
3475        * vector comparison */
3476       num_consts = lp_build_broadcast_scalar(uint_bld, num_consts);
3477 
3478       /* Gather values from the constant buffer */
3479       unsigned chan_index;
3480       TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(emit_data->inst, chan_index) {
3481          /* Construct a boolean vector telling us which channels
3482           * overflow the bound constant buffer */
3483          overflow_mask = lp_build_compare(gallivm, uint_bld->type, PIPE_FUNC_GEQUAL,
3484                                           indirect_index, num_consts);
3485 
3486          /* index_vec = indirect_index * 4 */
3487          LLVMValueRef index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
3488          index_vec = lp_build_add(uint_bld, index_vec,
3489                                   lp_build_const_int_vec(gallivm, uint_bld->type, chan_index));
3490 
3491          emit_data->output[chan_index] = build_gather(bld_base, consts_ptr, index_vec, overflow_mask, NULL);
3492       }
3493    } else if (0) {
3494       /* for indirect support with ARB_gpu_shader5 */
3495    } else {
3496       LLVMValueRef index;
3497       LLVMValueRef scalar, scalar_ptr;
3498       unsigned chan_index;
3499 
3500       index = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, 0);
3501       index = lp_build_shr_imm(uint_bld, index, 2);
3502 
3503       scalar_ptr = is_shared ? bld->shared_ptr : bld->ssbos[buf];
3504 
3505       LLVMValueRef ssbo_limit = NULL;
3506 
3507       if (!is_shared) {
3508          ssbo_limit = LLVMBuildAShr(gallivm->builder, bld->ssbo_sizes[buf], lp_build_const_int32(gallivm, 2), "");
3509          ssbo_limit = lp_build_broadcast_scalar(uint_bld, ssbo_limit);
3510       }
3511 
3512       TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(emit_data->inst, chan_index) {
3513          LLVMValueRef loop_index = lp_build_add(uint_bld, index, lp_build_const_int_vec(gallivm, uint_bld->type, chan_index));
3514 
3515          LLVMValueRef exec_mask = mask_vec(bld_base);
3516          if (!is_shared) {
3517             LLVMValueRef ssbo_oob_cmp = lp_build_cmp(uint_bld, PIPE_FUNC_LESS, loop_index, ssbo_limit);
3518             exec_mask = LLVMBuildAnd(builder, exec_mask, ssbo_oob_cmp, "");
3519          }
3520 
3521          LLVMValueRef result = lp_build_alloca(gallivm, uint_bld->vec_type, "");
3522          struct lp_build_loop_state loop_state;
3523          lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
3524 
3525          struct lp_build_if_state ifthen;
3526          LLVMValueRef cond, temp_res;
3527 
3528          loop_index = LLVMBuildExtractElement(gallivm->builder, loop_index,
3529                                               loop_state.counter, "");
3530 
3531          cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");
3532          cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, "");
3533 
3534          lp_build_if(&ifthen, gallivm, cond);
3535          scalar = lp_build_pointer_get(builder, scalar_ptr, loop_index);
3536 
3537          temp_res = LLVMBuildLoad(builder, result, "");
3538          temp_res = LLVMBuildInsertElement(builder, temp_res, scalar, loop_state.counter, "");
3539          LLVMBuildStore(builder, temp_res, result);
3540          lp_build_else(&ifthen);
3541          temp_res = LLVMBuildLoad(builder, result, "");
3542          temp_res = LLVMBuildInsertElement(builder, temp_res, lp_build_const_int32(gallivm, 0), loop_state.counter, "");
3543          LLVMBuildStore(builder, temp_res, result);
3544          lp_build_endif(&ifthen);
3545          lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length),
3546                                 NULL, LLVMIntUGE);
3547          emit_data->output[chan_index] = LLVMBuildLoad(gallivm->builder, result, "");
3548       }
3549    }
3550 }
3551 
3552 static void
img_store_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3553 img_store_emit(
3554    const struct lp_build_tgsi_action * action,
3555    struct lp_build_tgsi_context * bld_base,
3556    struct lp_build_emit_data * emit_data)
3557 {
3558    struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
3559    struct lp_img_params params;
3560    LLVMValueRef coords[5];
3561    LLVMValueRef coord_undef = LLVMGetUndef(bld->bld_base.base.int_vec_type);
3562    unsigned dims;
3563    unsigned target = emit_data->inst->Memory.Texture;
3564    unsigned layer_coord;
3565 
3566    target_to_dims_layer(target, &dims, &layer_coord);
3567    for (unsigned i = 0; i < dims; i++) {
3568       coords[i] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 0, i);
3569    }
3570    for (unsigned i = dims; i < 5; i++) {
3571       coords[i] = coord_undef;
3572    }
3573    if (layer_coord)
3574       coords[2] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 0, layer_coord);
3575    memset(&params, 0, sizeof(params));
3576 
3577    params.type = bld->bld_base.base.type;
3578    params.context_ptr = bld->context_ptr;
3579    params.thread_data_ptr = bld->thread_data_ptr;
3580    params.coords = coords;
3581    params.outdata = NULL;
3582    params.exec_mask = mask_vec(bld_base);
3583    params.target = tgsi_to_pipe_tex_target(target);
3584    params.image_index = emit_data->inst->Dst[0].Register.Index;
3585    params.img_op = LP_IMG_STORE;
3586    for (unsigned i = 0; i < 4; i++)
3587       params.indata[i] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, i);
3588 
3589    bld->image->emit_op(bld->image,
3590                        bld->bld_base.base.gallivm,
3591                        &params);
3592 }
3593 
3594 static void
store_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3595 store_emit(
3596    const struct lp_build_tgsi_action * action,
3597    struct lp_build_tgsi_context * bld_base,
3598    struct lp_build_emit_data * emit_data)
3599 {
3600    struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
3601    struct gallivm_state * gallivm = bld_base->base.gallivm;
3602    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
3603    struct lp_build_context *uint_bld = &bld_base->uint_bld;
3604    const struct tgsi_full_dst_register *bufreg = &emit_data->inst->Dst[0];
3605    unsigned buf = bufreg->Register.Index;
3606    assert(bufreg->Register.File == TGSI_FILE_BUFFER || bufreg->Register.File == TGSI_FILE_IMAGE || bufreg->Register.File == TGSI_FILE_MEMORY);
3607    bool is_shared = bufreg->Register.File == TGSI_FILE_MEMORY;
3608 
3609    if (bufreg->Register.File == TGSI_FILE_IMAGE) {
3610       img_store_emit(action, bld_base, emit_data);
3611    } else if (0) {
3612 
3613    } else {
3614       LLVMValueRef index;  /* index into the const buffer */
3615       LLVMValueRef scalar_ptr;
3616       LLVMValueRef value;
3617       unsigned chan_index;
3618 
3619       index = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 0, 0);
3620       index = lp_build_shr_imm(uint_bld, index, 2);
3621 
3622       scalar_ptr = is_shared ? bld->shared_ptr : bld->ssbos[buf];
3623 
3624       LLVMValueRef ssbo_limit = NULL;
3625 
3626       if (!is_shared) {
3627          ssbo_limit = LLVMBuildAShr(gallivm->builder, bld->ssbo_sizes[buf], lp_build_const_int32(gallivm, 2), "");
3628          ssbo_limit = lp_build_broadcast_scalar(uint_bld, ssbo_limit);
3629       }
3630 
3631       TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(emit_data->inst, chan_index) {
3632          LLVMValueRef loop_index = lp_build_add(uint_bld, index, lp_build_const_int_vec(gallivm, uint_bld->type, chan_index));
3633 
3634          value = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, chan_index);
3635 
3636          LLVMValueRef exec_mask = mask_vec(bld_base);
3637          if (!is_shared) {
3638             LLVMValueRef ssbo_oob_cmp = lp_build_cmp(uint_bld, PIPE_FUNC_LESS, loop_index, ssbo_limit);
3639             exec_mask = LLVMBuildAnd(builder, exec_mask, ssbo_oob_cmp, "");
3640          }
3641 
3642          struct lp_build_loop_state loop_state;
3643          lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
3644 
3645          LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, value,
3646                                                           loop_state.counter, "");
3647          value_ptr = LLVMBuildBitCast(gallivm->builder, value_ptr, uint_bld->elem_type, "");
3648 
3649          struct lp_build_if_state ifthen;
3650          LLVMValueRef cond;
3651 
3652          loop_index = LLVMBuildExtractElement(gallivm->builder, loop_index,
3653                                               loop_state.counter, "");
3654 
3655          cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");
3656          cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, "");
3657          lp_build_if(&ifthen, gallivm, cond);
3658 
3659          lp_build_pointer_set(builder, scalar_ptr, loop_index, value_ptr);
3660 
3661          lp_build_endif(&ifthen);
3662          lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length),
3663                                 NULL, LLVMIntUGE);
3664       }
3665    }
3666 }
3667 
3668 static void
resq_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3669 resq_emit(
3670    const struct lp_build_tgsi_action * action,
3671    struct lp_build_tgsi_context * bld_base,
3672    struct lp_build_emit_data * emit_data)
3673 {
3674    struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
3675    struct lp_build_context *uint_bld = &bld_base->uint_bld;
3676    const struct tgsi_full_src_register *bufreg = &emit_data->inst->Src[0];
3677 
3678    unsigned buf = bufreg->Register.Index;
3679    assert(bufreg->Register.File == TGSI_FILE_BUFFER || bufreg->Register.File == TGSI_FILE_IMAGE);
3680 
3681    if (bufreg->Register.File == TGSI_FILE_IMAGE) {
3682       unsigned target = emit_data->inst->Memory.Texture;
3683       struct lp_sampler_size_query_params params = { 0 };
3684       params.int_type = bld->bld_base.int_bld.type;
3685       params.texture_unit = buf;
3686       params.target = tgsi_to_pipe_tex_target(target);
3687       params.context_ptr = bld->context_ptr;
3688       params.sizes_out = emit_data->output;
3689 
3690       bld->image->emit_size_query(bld->image,
3691                                   bld->bld_base.base.gallivm,
3692                                   &params);
3693    } else {
3694       LLVMValueRef num_ssbo = bld->ssbo_sizes[buf];
3695 
3696       emit_data->output[emit_data->chan] = lp_build_broadcast_scalar(uint_bld, num_ssbo);
3697    }
3698 }
3699 
3700 static void
img_atomic_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data,LLVMAtomicRMWBinOp op)3701 img_atomic_emit(
3702    const struct lp_build_tgsi_action * action,
3703    struct lp_build_tgsi_context * bld_base,
3704    struct lp_build_emit_data * emit_data,
3705    LLVMAtomicRMWBinOp op)
3706 {
3707    struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
3708    struct lp_img_params params;
3709    LLVMValueRef coords[5];
3710    LLVMValueRef coord_undef = LLVMGetUndef(bld->bld_base.base.int_vec_type);
3711    unsigned dims;
3712    unsigned layer_coord;
3713    unsigned target = emit_data->inst->Memory.Texture;
3714 
3715    target_to_dims_layer(target, &dims, &layer_coord);
3716 
3717    for (unsigned i = 0; i < dims; i++) {
3718       coords[i] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, i);
3719    }
3720    for (unsigned i = dims; i < 5; i++) {
3721       coords[i] = coord_undef;
3722    }
3723    if (layer_coord)
3724       coords[2] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, layer_coord);
3725    memset(&params, 0, sizeof(params));
3726 
3727    params.type = bld->bld_base.base.type;
3728    params.context_ptr = bld->context_ptr;
3729    params.thread_data_ptr = bld->thread_data_ptr;
3730    params.exec_mask = mask_vec(bld_base);
3731    params.image_index = emit_data->inst->Src[0].Register.Index;
3732    params.coords = coords;
3733    params.target = tgsi_to_pipe_tex_target(target);
3734    params.op = op;
3735    params.outdata = emit_data->output;
3736    params.img_op = (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) ? LP_IMG_ATOMIC_CAS : LP_IMG_ATOMIC;
3737 
3738    for (unsigned i = 0; i < 4; i++)
3739       params.indata[i] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 2, i);
3740    if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) {
3741       for (unsigned i = 0; i < 4; i++)
3742          params.indata2[i] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 3, i);
3743    }
3744    bld->image->emit_op(bld->image,
3745                        bld->bld_base.base.gallivm,
3746                        &params);
3747 }
3748 
3749 static void
atomic_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3750 atomic_emit(
3751    const struct lp_build_tgsi_action * action,
3752    struct lp_build_tgsi_context * bld_base,
3753    struct lp_build_emit_data * emit_data)
3754 {
3755    struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
3756    struct gallivm_state * gallivm = bld_base->base.gallivm;
3757    LLVMBuilderRef builder = gallivm->builder;
3758    struct lp_build_context *uint_bld = &bld_base->uint_bld;
3759    const struct tgsi_full_src_register *bufreg = &emit_data->inst->Src[0];
3760 
3761    assert(bufreg->Register.File == TGSI_FILE_BUFFER || bufreg->Register.File == TGSI_FILE_IMAGE || bufreg->Register.File == TGSI_FILE_MEMORY);
3762    unsigned buf = bufreg->Register.Index;
3763    bool is_shared = bufreg->Register.File == TGSI_FILE_MEMORY;
3764 
3765    LLVMAtomicRMWBinOp op = -1;
3766    switch (emit_data->inst->Instruction.Opcode) {
3767    case TGSI_OPCODE_ATOMUADD:
3768       op = LLVMAtomicRMWBinOpAdd;
3769       break;
3770    case TGSI_OPCODE_ATOMXCHG:
3771       op = LLVMAtomicRMWBinOpXchg;
3772       break;
3773    case TGSI_OPCODE_ATOMAND:
3774       op = LLVMAtomicRMWBinOpAnd;
3775       break;
3776    case TGSI_OPCODE_ATOMOR:
3777       op = LLVMAtomicRMWBinOpOr;
3778       break;
3779    case TGSI_OPCODE_ATOMXOR:
3780       op = LLVMAtomicRMWBinOpXor;
3781       break;
3782    case TGSI_OPCODE_ATOMUMIN:
3783       op = LLVMAtomicRMWBinOpUMin;
3784       break;
3785    case TGSI_OPCODE_ATOMUMAX:
3786       op = LLVMAtomicRMWBinOpUMax;
3787       break;
3788    case TGSI_OPCODE_ATOMIMIN:
3789       op = LLVMAtomicRMWBinOpMin;
3790       break;
3791    case TGSI_OPCODE_ATOMIMAX:
3792       op = LLVMAtomicRMWBinOpMax;
3793       break;
3794    case TGSI_OPCODE_ATOMCAS:
3795       break;
3796    default:
3797       assert(0);
3798       return;
3799    }
3800 
3801    if (bufreg->Register.File == TGSI_FILE_IMAGE) {
3802       img_atomic_emit(action, bld_base, emit_data, op);
3803    } else if (0) {
3804    } else {
3805       LLVMValueRef index;  /* index into the const buffer */
3806       LLVMValueRef scalar, scalar_ptr;
3807       LLVMValueRef value;
3808 
3809       index = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, 0);
3810       value = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 2, 0);
3811 
3812       index = lp_build_shr_imm(uint_bld, index, 2);
3813 
3814       if (!is_shared) {
3815          index = lp_build_add(uint_bld, index, lp_build_const_int_vec(gallivm, uint_bld->type, emit_data->chan));
3816          scalar_ptr = bld->ssbos[buf];
3817       } else
3818          scalar_ptr = bld->shared_ptr;
3819 
3820       LLVMValueRef atom_res = lp_build_alloca(gallivm,
3821                                               uint_bld->vec_type, "");
3822 
3823       LLVMValueRef ssbo_limit;
3824       if (!is_shared) {
3825          ssbo_limit = LLVMBuildAShr(gallivm->builder, bld->ssbo_sizes[buf], lp_build_const_int32(gallivm, 2), "");
3826          ssbo_limit = lp_build_broadcast_scalar(uint_bld, ssbo_limit);
3827       }
3828 
3829       LLVMValueRef exec_mask = mask_vec(bld_base);
3830 
3831       if (!is_shared) {
3832          LLVMValueRef ssbo_oob_cmp = lp_build_cmp(uint_bld, PIPE_FUNC_LESS, index, ssbo_limit);
3833          exec_mask = LLVMBuildAnd(builder, exec_mask, ssbo_oob_cmp, "");
3834       }
3835 
3836       struct lp_build_loop_state loop_state;
3837       lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
3838 
3839       LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, value,
3840                                                        loop_state.counter, "");
3841       value_ptr = LLVMBuildBitCast(gallivm->builder, value_ptr, uint_bld->elem_type, "");
3842 
3843       index = LLVMBuildExtractElement(gallivm->builder, index,
3844                                       loop_state.counter, "");
3845 
3846       scalar_ptr = LLVMBuildGEP(builder, scalar_ptr,
3847                                 &index, 1, "");
3848 
3849       struct lp_build_if_state ifthen;
3850       LLVMValueRef cond, temp_res;
3851 
3852       cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");
3853       cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, "");
3854       lp_build_if(&ifthen, gallivm, cond);
3855 
3856       if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) {
3857          LLVMValueRef cas_src = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 3, 0);
3858          LLVMValueRef cas_src_ptr = LLVMBuildExtractElement(gallivm->builder, cas_src,
3859                                                             loop_state.counter, "");
3860          cas_src_ptr = LLVMBuildBitCast(gallivm->builder, cas_src_ptr, uint_bld->elem_type, "");
3861          scalar = LLVMBuildAtomicCmpXchg(builder, scalar_ptr, value_ptr,
3862                                          cas_src_ptr,
3863                                          LLVMAtomicOrderingSequentiallyConsistent,
3864                                          LLVMAtomicOrderingSequentiallyConsistent,
3865                                          false);
3866          scalar = LLVMBuildExtractValue(gallivm->builder, scalar, 0, "");
3867       } else {
3868          scalar = LLVMBuildAtomicRMW(builder, op,
3869                                      scalar_ptr, value_ptr,
3870                                      LLVMAtomicOrderingSequentiallyConsistent,
3871                                      false);
3872       }
3873       temp_res = LLVMBuildLoad(builder, atom_res, "");
3874       temp_res = LLVMBuildInsertElement(builder, temp_res, scalar, loop_state.counter, "");
3875       LLVMBuildStore(builder, temp_res, atom_res);
3876       lp_build_else(&ifthen);
3877       temp_res = LLVMBuildLoad(builder, atom_res, "");
3878       temp_res = LLVMBuildInsertElement(builder, temp_res, lp_build_const_int32(gallivm, 0), loop_state.counter, "");
3879       LLVMBuildStore(builder, temp_res, atom_res);
3880       lp_build_endif(&ifthen);
3881 
3882       lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length),
3883                              NULL, LLVMIntUGE);
3884       emit_data->output[emit_data->chan] = LLVMBuildLoad(gallivm->builder, atom_res, "");
3885    }
3886 }
3887 
3888 static void
barrier_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3889 barrier_emit(
3890    const struct lp_build_tgsi_action * action,
3891    struct lp_build_tgsi_context * bld_base,
3892    struct lp_build_emit_data * emit_data)
3893 {
3894    struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
3895    struct gallivm_state * gallivm = bld_base->base.gallivm;
3896 
3897    LLVMBasicBlockRef resume = lp_build_insert_new_block(gallivm, "resume");
3898 
3899    lp_build_coro_suspend_switch(gallivm, bld->coro, resume, false);
3900    LLVMPositionBuilderAtEnd(gallivm->builder, resume);
3901 }
3902 
3903 static void
membar_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3904 membar_emit(
3905    const struct lp_build_tgsi_action * action,
3906    struct lp_build_tgsi_context * bld_base,
3907    struct lp_build_emit_data * emit_data)
3908 {
3909    LLVMBuilderRef builder = bld_base->base.gallivm->builder;
3910    LLVMBuildFence(builder, LLVMAtomicOrderingSequentiallyConsistent, false, "");
3911 }
3912 
3913 static void
increment_vec_ptr_by_mask(struct lp_build_tgsi_context * bld_base,LLVMValueRef ptr,LLVMValueRef mask)3914 increment_vec_ptr_by_mask(struct lp_build_tgsi_context * bld_base,
3915                           LLVMValueRef ptr,
3916                           LLVMValueRef mask)
3917 {
3918    LLVMBuilderRef builder = bld_base->base.gallivm->builder;
3919    LLVMValueRef current_vec = LLVMBuildLoad(builder, ptr, "");
3920 
3921    current_vec = LLVMBuildSub(builder, current_vec, mask, "");
3922 
3923    LLVMBuildStore(builder, current_vec, ptr);
3924 }
3925 
3926 static void
clear_uint_vec_ptr_from_mask(struct lp_build_tgsi_context * bld_base,LLVMValueRef ptr,LLVMValueRef mask)3927 clear_uint_vec_ptr_from_mask(struct lp_build_tgsi_context * bld_base,
3928                              LLVMValueRef ptr,
3929                              LLVMValueRef mask)
3930 {
3931    LLVMBuilderRef builder = bld_base->base.gallivm->builder;
3932    LLVMValueRef current_vec = LLVMBuildLoad(builder, ptr, "");
3933 
3934    current_vec = lp_build_select(&bld_base->uint_bld,
3935                                  mask,
3936                                  bld_base->uint_bld.zero,
3937                                  current_vec);
3938 
3939    LLVMBuildStore(builder, current_vec, ptr);
3940 }
3941 
3942 static LLVMValueRef
clamp_mask_to_max_output_vertices(struct lp_build_tgsi_soa_context * bld,LLVMValueRef current_mask_vec,LLVMValueRef total_emitted_vertices_vec)3943 clamp_mask_to_max_output_vertices(struct lp_build_tgsi_soa_context * bld,
3944                                   LLVMValueRef current_mask_vec,
3945                                   LLVMValueRef total_emitted_vertices_vec)
3946 {
3947    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
3948    struct lp_build_context *int_bld = &bld->bld_base.int_bld;
3949    LLVMValueRef max_mask = lp_build_cmp(int_bld, PIPE_FUNC_LESS,
3950                                         total_emitted_vertices_vec,
3951                                         bld->max_output_vertices_vec);
3952 
3953    return LLVMBuildAnd(builder, current_mask_vec, max_mask, "");
3954 }
3955 
3956 static void
emit_vertex(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3957 emit_vertex(
3958    const struct lp_build_tgsi_action * action,
3959    struct lp_build_tgsi_context * bld_base,
3960    struct lp_build_emit_data * emit_data)
3961 {
3962    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3963    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
3964 
3965    if (bld->gs_iface->emit_vertex) {
3966       LLVMValueRef stream_id = emit_fetch_immediate(bld_base, &emit_data->inst->Src[0],
3967                                                     TGSI_TYPE_UNSIGNED,
3968                                                     emit_data->inst->Src[0].Register.SwizzleX);
3969       LLVMValueRef mask = mask_vec(bld_base);
3970       LLVMValueRef total_emitted_vertices_vec =
3971          LLVMBuildLoad(builder, bld->total_emitted_vertices_vec_ptr, "");
3972 
3973       mask = clamp_mask_to_max_output_vertices(bld, mask,
3974                                                total_emitted_vertices_vec);
3975       gather_outputs(bld);
3976       bld->gs_iface->emit_vertex(bld->gs_iface, &bld->bld_base.base,
3977                                  bld->outputs,
3978                                  total_emitted_vertices_vec,
3979                                  mask,
3980                                  stream_id);
3981       increment_vec_ptr_by_mask(bld_base, bld->emitted_vertices_vec_ptr,
3982                                 mask);
3983       increment_vec_ptr_by_mask(bld_base, bld->total_emitted_vertices_vec_ptr,
3984                                 mask);
3985 #if DUMP_GS_EMITS
3986       lp_build_print_value(bld->bld_base.base.gallivm,
3987                            " +++ emit vertex masked ones = ",
3988                            mask);
3989       lp_build_print_value(bld->bld_base.base.gallivm,
3990                            " +++ emit vertex emitted = ",
3991                            total_emitted_vertices_vec);
3992 #endif
3993    }
3994 }
3995 
3996 
3997 static void
end_primitive_masked(struct lp_build_tgsi_context * bld_base,LLVMValueRef mask)3998 end_primitive_masked(struct lp_build_tgsi_context * bld_base,
3999                      LLVMValueRef mask)
4000 {
4001    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4002    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
4003 
4004    if (bld->gs_iface->end_primitive) {
4005       struct lp_build_context *uint_bld = &bld_base->uint_bld;
4006       LLVMValueRef emitted_vertices_vec =
4007          LLVMBuildLoad(builder, bld->emitted_vertices_vec_ptr, "");
4008       LLVMValueRef emitted_prims_vec =
4009          LLVMBuildLoad(builder, bld->emitted_prims_vec_ptr, "");
4010       LLVMValueRef total_emitted_vertices_vec =
4011          LLVMBuildLoad(builder, bld->total_emitted_vertices_vec_ptr, "");
4012       LLVMValueRef emitted_mask = lp_build_cmp(uint_bld, PIPE_FUNC_NOTEQUAL,
4013                                                emitted_vertices_vec,
4014                                                uint_bld->zero);
4015       /* We need to combine the current execution mask with the mask
4016          telling us which, if any, execution slots actually have
4017          unemitted primitives, this way we make sure that end_primitives
4018          executes only on the paths that have unflushed vertices */
4019       mask = LLVMBuildAnd(builder, mask, emitted_mask, "");
4020 
4021       bld->gs_iface->end_primitive(bld->gs_iface, &bld->bld_base.base,
4022                                    total_emitted_vertices_vec,
4023                                    emitted_vertices_vec,
4024                                    emitted_prims_vec,
4025                                    mask_vec(bld_base), 0);
4026 
4027 #if DUMP_GS_EMITS
4028       lp_build_print_value(bld->bld_base.base.gallivm,
4029                            " +++ end prim masked ones = ",
4030                            mask);
4031       lp_build_print_value(bld->bld_base.base.gallivm,
4032                            " +++ end prim emitted verts1 = ",
4033                            emitted_vertices_vec);
4034       lp_build_print_value(bld->bld_base.base.gallivm,
4035                            " +++ end prim emitted prims1 = ",
4036                            LLVMBuildLoad(builder,
4037                                          bld->emitted_prims_vec_ptr, ""));
4038 #endif
4039       increment_vec_ptr_by_mask(bld_base, bld->emitted_prims_vec_ptr,
4040                                 mask);
4041       clear_uint_vec_ptr_from_mask(bld_base, bld->emitted_vertices_vec_ptr,
4042                                    mask);
4043 #if DUMP_GS_EMITS
4044       lp_build_print_value(bld->bld_base.base.gallivm,
4045                            " +++ end prim emitted verts2 = ",
4046                            LLVMBuildLoad(builder,
4047                                          bld->emitted_vertices_vec_ptr, ""));
4048 #endif
4049    }
4050 
4051 }
4052 
4053 static void
end_primitive(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)4054 end_primitive(
4055    const struct lp_build_tgsi_action * action,
4056    struct lp_build_tgsi_context * bld_base,
4057    struct lp_build_emit_data * emit_data)
4058 {
4059    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4060 
4061    if (bld->gs_iface->end_primitive) {
4062       LLVMValueRef mask = mask_vec(bld_base);
4063       end_primitive_masked(bld_base, mask);
4064    }
4065 }
4066 
4067 static void
barrier_emit_tcs(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)4068 barrier_emit_tcs(
4069    const struct lp_build_tgsi_action * action,
4070    struct lp_build_tgsi_context * bld_base,
4071    struct lp_build_emit_data * emit_data)
4072 {
4073    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4074 
4075    if (bld->tcs_iface->emit_barrier) {
4076       bld->tcs_iface->emit_barrier((struct lp_build_context*)bld_base);
4077    }
4078 }
4079 
4080 
4081 static void
cal_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)4082 cal_emit(
4083    const struct lp_build_tgsi_action * action,
4084    struct lp_build_tgsi_context * bld_base,
4085    struct lp_build_emit_data * emit_data)
4086 {
4087    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4088 
4089    lp_exec_mask_call(&bld->exec_mask, emit_data->inst->Label.Label,
4090                      &bld_base->pc);
4091 }
4092 
4093 static void
ret_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)4094 ret_emit(
4095    const struct lp_build_tgsi_action * action,
4096    struct lp_build_tgsi_context * bld_base,
4097    struct lp_build_emit_data * emit_data)
4098 {
4099    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4100 
4101    lp_exec_mask_ret(&bld->exec_mask, &bld_base->pc);
4102 }
4103 
4104 static void
brk_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)4105 brk_emit(
4106    const struct lp_build_tgsi_action * action,
4107    struct lp_build_tgsi_context * bld_base,
4108    struct lp_build_emit_data * emit_data)
4109 {
4110    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4111 
4112    lp_exec_tgsi_break(&bld->exec_mask, bld_base);
4113 }
4114 
4115 static void
if_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)4116 if_emit(
4117    const struct lp_build_tgsi_action * action,
4118    struct lp_build_tgsi_context * bld_base,
4119    struct lp_build_emit_data * emit_data)
4120 {
4121    LLVMValueRef tmp;
4122    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4123 
4124    tmp = lp_build_cmp(&bld_base->base, PIPE_FUNC_NOTEQUAL,
4125                       emit_data->args[0], bld->bld_base.base.zero);
4126    lp_exec_mask_cond_push(&bld->exec_mask, tmp);
4127 }
4128 
4129 static void
uif_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)4130 uif_emit(
4131    const struct lp_build_tgsi_action * action,
4132    struct lp_build_tgsi_context * bld_base,
4133    struct lp_build_emit_data * emit_data)
4134 {
4135    LLVMValueRef tmp;
4136    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4137    struct lp_build_context *uint_bld = &bld_base->uint_bld;
4138 
4139    tmp = lp_build_cmp(uint_bld, PIPE_FUNC_NOTEQUAL,
4140                       emit_data->args[0], uint_bld->zero);
4141    lp_exec_mask_cond_push(&bld->exec_mask, tmp);
4142 }
4143 
4144 static void
case_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)4145 case_emit(
4146    const struct lp_build_tgsi_action * action,
4147    struct lp_build_tgsi_context * bld_base,
4148    struct lp_build_emit_data * emit_data)
4149 {
4150    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4151 
4152    lp_exec_case(&bld->exec_mask, emit_data->args[0]);
4153 }
4154 
4155 static void
default_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)4156 default_emit(
4157    const struct lp_build_tgsi_action * action,
4158    struct lp_build_tgsi_context * bld_base,
4159    struct lp_build_emit_data * emit_data)
4160 {
4161    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4162 
4163    lp_exec_default(&bld->exec_mask, bld_base);
4164 }
4165 
4166 static void
switch_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)4167 switch_emit(
4168    const struct lp_build_tgsi_action * action,
4169    struct lp_build_tgsi_context * bld_base,
4170    struct lp_build_emit_data * emit_data)
4171 {
4172    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4173 
4174    lp_exec_switch(&bld->exec_mask, emit_data->args[0]);
4175 }
4176 
4177 static void
endswitch_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)4178 endswitch_emit(
4179    const struct lp_build_tgsi_action * action,
4180    struct lp_build_tgsi_context * bld_base,
4181    struct lp_build_emit_data * emit_data)
4182 {
4183    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4184 
4185    lp_exec_endswitch(&bld->exec_mask, bld_base);
4186 }
4187 
4188 static void
bgnloop_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)4189 bgnloop_emit(
4190    const struct lp_build_tgsi_action * action,
4191    struct lp_build_tgsi_context * bld_base,
4192    struct lp_build_emit_data * emit_data)
4193 {
4194    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4195 
4196    lp_exec_bgnloop(&bld->exec_mask, true);
4197 }
4198 
4199 static void
bgnsub_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)4200 bgnsub_emit(
4201    const struct lp_build_tgsi_action * action,
4202    struct lp_build_tgsi_context * bld_base,
4203    struct lp_build_emit_data * emit_data)
4204 {
4205    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4206 
4207    lp_exec_mask_bgnsub(&bld->exec_mask);
4208 }
4209 
4210 static void
else_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)4211 else_emit(
4212    const struct lp_build_tgsi_action * action,
4213    struct lp_build_tgsi_context * bld_base,
4214    struct lp_build_emit_data * emit_data)
4215 {
4216    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4217 
4218    lp_exec_mask_cond_invert(&bld->exec_mask);
4219 }
4220 
4221 static void
endif_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)4222 endif_emit(
4223    const struct lp_build_tgsi_action * action,
4224    struct lp_build_tgsi_context * bld_base,
4225    struct lp_build_emit_data * emit_data)
4226 {
4227    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4228 
4229    lp_exec_mask_cond_pop(&bld->exec_mask);
4230 }
4231 
4232 static void
endloop_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)4233 endloop_emit(
4234    const struct lp_build_tgsi_action * action,
4235    struct lp_build_tgsi_context * bld_base,
4236    struct lp_build_emit_data * emit_data)
4237 {
4238    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4239 
4240    lp_exec_endloop(bld_base->base.gallivm, &bld->exec_mask);
4241 }
4242 
4243 static void
endsub_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)4244 endsub_emit(
4245    const struct lp_build_tgsi_action * action,
4246    struct lp_build_tgsi_context * bld_base,
4247    struct lp_build_emit_data * emit_data)
4248 {
4249    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4250 
4251    lp_exec_mask_endsub(&bld->exec_mask, &bld_base->pc);
4252 }
4253 
4254 static void
cont_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)4255 cont_emit(
4256    const struct lp_build_tgsi_action * action,
4257    struct lp_build_tgsi_context * bld_base,
4258    struct lp_build_emit_data * emit_data)
4259 {
4260    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4261 
4262    lp_exec_continue(&bld->exec_mask);
4263 }
4264 
emit_prologue(struct lp_build_tgsi_context * bld_base)4265 static void emit_prologue(struct lp_build_tgsi_context * bld_base)
4266 {
4267    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4268    struct gallivm_state * gallivm = bld_base->base.gallivm;
4269 
4270    if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
4271       unsigned array_size = bld_base->info->file_max[TGSI_FILE_TEMPORARY] * 4 + 4;
4272       bld->temps_array = lp_build_alloca_undef(gallivm,
4273                                                LLVMArrayType(bld_base->base.vec_type, array_size),
4274                                                "temp_array");
4275    }
4276 
4277    if (bld->indirect_files & (1 << TGSI_FILE_OUTPUT)) {
4278       LLVMValueRef array_size =
4279          lp_build_const_int32(gallivm,
4280                             bld_base->info->file_max[TGSI_FILE_OUTPUT] * 4 + 4);
4281       bld->outputs_array = lp_build_array_alloca(gallivm,
4282                                                 bld_base->base.vec_type, array_size,
4283                                                 "output_array");
4284    }
4285 
4286    if (bld->indirect_files & (1 << TGSI_FILE_IMMEDIATE)) {
4287       unsigned array_size = bld_base->info->file_max[TGSI_FILE_IMMEDIATE] * 4 + 4;
4288       bld->imms_array = lp_build_alloca_undef(gallivm,
4289                                               LLVMArrayType(bld_base->base.vec_type, array_size),
4290                                               "imms_array");
4291    }
4292 
4293    /* If we have indirect addressing in inputs we need to copy them into
4294     * our alloca array to be able to iterate over them */
4295    if (bld->indirect_files & (1 << TGSI_FILE_INPUT) &&
4296        !bld->gs_iface && !bld->tes_iface && !bld->tcs_iface) {
4297       unsigned index, chan;
4298       LLVMTypeRef vec_type = bld_base->base.vec_type;
4299       LLVMValueRef array_size = lp_build_const_int32(gallivm,
4300             bld_base->info->file_max[TGSI_FILE_INPUT]*4 + 4);
4301       bld->inputs_array = lp_build_array_alloca(gallivm,
4302                                                vec_type, array_size,
4303                                                "input_array");
4304 
4305       assert(bld_base->info->num_inputs
4306                         <= bld_base->info->file_max[TGSI_FILE_INPUT] + 1);
4307 
4308       for (index = 0; index < bld_base->info->num_inputs; ++index) {
4309          for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
4310             LLVMValueRef lindex =
4311                lp_build_const_int32(gallivm, index * 4 + chan);
4312             LLVMValueRef input_ptr =
4313                LLVMBuildGEP(gallivm->builder, bld->inputs_array,
4314                             &lindex, 1, "");
4315             LLVMValueRef value = bld->inputs[index][chan];
4316             if (value)
4317                LLVMBuildStore(gallivm->builder, value, input_ptr);
4318          }
4319       }
4320    }
4321 
4322    if (bld->gs_iface) {
4323       struct lp_build_context *uint_bld = &bld->bld_base.uint_bld;
4324       bld->emitted_prims_vec_ptr =
4325          lp_build_alloca(gallivm,
4326                          uint_bld->vec_type,
4327                          "emitted_prims_ptr");
4328       bld->emitted_vertices_vec_ptr =
4329          lp_build_alloca(gallivm,
4330                          uint_bld->vec_type,
4331                          "emitted_vertices_ptr");
4332       bld->total_emitted_vertices_vec_ptr =
4333          lp_build_alloca(gallivm,
4334                          uint_bld->vec_type,
4335                          "total_emitted_vertices_ptr");
4336 
4337       LLVMBuildStore(gallivm->builder, uint_bld->zero,
4338                      bld->emitted_prims_vec_ptr);
4339       LLVMBuildStore(gallivm->builder, uint_bld->zero,
4340                      bld->emitted_vertices_vec_ptr);
4341       LLVMBuildStore(gallivm->builder, uint_bld->zero,
4342                      bld->total_emitted_vertices_vec_ptr);
4343    }
4344 
4345    if (DEBUG_EXECUTION) {
4346       lp_build_printf(gallivm, "\n");
4347       emit_dump_file(bld, TGSI_FILE_CONSTANT);
4348       if (!bld->gs_iface)
4349          emit_dump_file(bld, TGSI_FILE_INPUT);
4350    }
4351 }
4352 
emit_prologue_post_decl(struct lp_build_tgsi_context * bld_base)4353 static void emit_prologue_post_decl(struct lp_build_tgsi_context * bld_base)
4354 {
4355    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4356 
4357    if (bld->tcs_iface && bld->tcs_iface->emit_prologue) {
4358       bld->tcs_iface->emit_prologue((struct lp_build_context*)bld_base);
4359    }
4360 }
4361 
emit_epilogue(struct lp_build_tgsi_context * bld_base)4362 static void emit_epilogue(struct lp_build_tgsi_context * bld_base)
4363 {
4364    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4365    LLVMBuilderRef builder = bld_base->base.gallivm->builder;
4366 
4367    if (DEBUG_EXECUTION) {
4368       /* for debugging */
4369       if (0) {
4370          emit_dump_file(bld, TGSI_FILE_TEMPORARY);
4371       }
4372       emit_dump_file(bld, TGSI_FILE_OUTPUT);
4373       lp_build_printf(bld_base->base.gallivm, "\n");
4374    }
4375 
4376    if (bld->tcs_iface && bld->tcs_iface->emit_epilogue) {
4377       bld->tcs_iface->emit_epilogue((struct lp_build_context*)bld_base);
4378    }
4379 
4380    /* If we have indirect addressing in outputs we need to copy our alloca array
4381     * to the outputs slots specified by the caller */
4382    if (bld->gs_iface) {
4383       LLVMValueRef total_emitted_vertices_vec;
4384       LLVMValueRef emitted_prims_vec;
4385       /* implicit end_primitives, needed in case there are any unflushed
4386          vertices in the cache. Note must not call end_primitive here
4387          since the exec_mask is not valid at this point. */
4388       end_primitive_masked(bld_base, lp_build_mask_value(bld->mask));
4389 
4390       total_emitted_vertices_vec =
4391          LLVMBuildLoad(builder, bld->total_emitted_vertices_vec_ptr, "");
4392       emitted_prims_vec =
4393          LLVMBuildLoad(builder, bld->emitted_prims_vec_ptr, "");
4394 
4395       bld->gs_iface->gs_epilogue(bld->gs_iface,
4396                                  total_emitted_vertices_vec,
4397                                  emitted_prims_vec, 0);
4398    } else {
4399       gather_outputs(bld);
4400    }
4401 }
4402 
4403 void
lp_build_tgsi_soa(struct gallivm_state * gallivm,const struct tgsi_token * tokens,const struct lp_build_tgsi_params * params,LLVMValueRef (* outputs)[TGSI_NUM_CHANNELS])4404 lp_build_tgsi_soa(struct gallivm_state *gallivm,
4405                   const struct tgsi_token *tokens,
4406                   const struct lp_build_tgsi_params *params,
4407                   LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS])
4408 {
4409    struct lp_build_tgsi_soa_context bld;
4410    struct lp_type type = params->type;
4411    struct lp_type res_type;
4412 
4413    assert(type.length <= LP_MAX_VECTOR_LENGTH);
4414    memset(&res_type, 0, sizeof res_type);
4415    res_type.width = type.width;
4416    res_type.length = type.length;
4417    res_type.sign = 1;
4418 
4419    /* Setup build context */
4420    memset(&bld, 0, sizeof bld);
4421    lp_build_context_init(&bld.bld_base.base, gallivm, type);
4422    lp_build_context_init(&bld.bld_base.uint_bld, gallivm, lp_uint_type(type));
4423    lp_build_context_init(&bld.bld_base.int_bld, gallivm, lp_int_type(type));
4424    lp_build_context_init(&bld.elem_bld, gallivm, lp_elem_type(type));
4425    {
4426       struct lp_type dbl_type;
4427       dbl_type = type;
4428       dbl_type.width *= 2;
4429       lp_build_context_init(&bld.bld_base.dbl_bld, gallivm, dbl_type);
4430    }
4431    {
4432       struct lp_type uint64_type;
4433       uint64_type = lp_uint_type(type);
4434       uint64_type.width *= 2;
4435       lp_build_context_init(&bld.bld_base.uint64_bld, gallivm, uint64_type);
4436    }
4437    {
4438       struct lp_type int64_type;
4439       int64_type = lp_int_type(type);
4440       int64_type.width *= 2;
4441       lp_build_context_init(&bld.bld_base.int64_bld, gallivm, int64_type);
4442    }
4443    bld.mask = params->mask;
4444    bld.inputs = params->inputs;
4445    bld.outputs = outputs;
4446    bld.consts_ptr = params->consts_ptr;
4447    bld.const_sizes_ptr = params->const_sizes_ptr;
4448    bld.ssbo_ptr = params->ssbo_ptr;
4449    bld.ssbo_sizes_ptr = params->ssbo_sizes_ptr;
4450    bld.sampler = params->sampler;
4451    bld.bld_base.info = params->info;
4452    bld.indirect_files = params->info->indirect_files;
4453    bld.context_ptr = params->context_ptr;
4454    bld.thread_data_ptr = params->thread_data_ptr;
4455    bld.image = params->image;
4456    bld.shared_ptr = params->shared_ptr;
4457    bld.coro = params->coro;
4458 
4459    /*
4460     * If the number of temporaries is rather large then we just
4461     * allocate them as an array right from the start and treat
4462     * like indirect temporaries.
4463     */
4464    if (params->info->file_max[TGSI_FILE_TEMPORARY] >= LP_MAX_INLINED_TEMPS) {
4465       bld.indirect_files |= (1 << TGSI_FILE_TEMPORARY);
4466    }
4467    /*
4468     * For performance reason immediates are always backed in a static
4469     * array, but if their number is too great, we have to use just
4470     * a dynamically allocated array.
4471     */
4472    bld.use_immediates_array =
4473          (params->info->file_max[TGSI_FILE_IMMEDIATE] >= LP_MAX_INLINED_IMMEDIATES);
4474    if (bld.use_immediates_array) {
4475       bld.indirect_files |= (1 << TGSI_FILE_IMMEDIATE);
4476    }
4477 
4478 
4479    bld.bld_base.soa = TRUE;
4480    bld.bld_base.emit_debug = emit_debug;
4481    bld.bld_base.emit_fetch_funcs[TGSI_FILE_CONSTANT] = emit_fetch_constant;
4482    bld.bld_base.emit_fetch_funcs[TGSI_FILE_IMMEDIATE] = emit_fetch_immediate;
4483    bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_input;
4484    bld.bld_base.emit_fetch_funcs[TGSI_FILE_TEMPORARY] = emit_fetch_temporary;
4485    bld.bld_base.emit_fetch_funcs[TGSI_FILE_SYSTEM_VALUE] = emit_fetch_system_value;
4486 
4487    bld.bld_base.emit_store = emit_store;
4488    bld.bld_base.emit_store_reg_funcs[TGSI_FILE_OUTPUT] = emit_store_output;
4489    bld.bld_base.emit_store_reg_funcs[TGSI_FILE_TEMPORARY] = emit_store_temp;
4490    bld.bld_base.emit_store_reg_funcs[TGSI_FILE_ADDRESS] = emit_store_address;
4491 
4492    bld.bld_base.emit_declaration = lp_emit_declaration_soa;
4493    bld.bld_base.emit_immediate = lp_emit_immediate_soa;
4494 
4495    bld.bld_base.emit_prologue = emit_prologue;
4496    bld.bld_base.emit_prologue_post_decl = emit_prologue_post_decl;
4497    bld.bld_base.emit_epilogue = emit_epilogue;
4498 
4499    /* Set opcode actions */
4500    lp_set_default_actions_cpu(&bld.bld_base);
4501 
4502    bld.bld_base.op_actions[TGSI_OPCODE_BGNLOOP].emit = bgnloop_emit;
4503    bld.bld_base.op_actions[TGSI_OPCODE_BGNSUB].emit = bgnsub_emit;
4504    bld.bld_base.op_actions[TGSI_OPCODE_BRK].emit = brk_emit;
4505    bld.bld_base.op_actions[TGSI_OPCODE_CAL].emit = cal_emit;
4506    bld.bld_base.op_actions[TGSI_OPCODE_CASE].emit = case_emit;
4507    bld.bld_base.op_actions[TGSI_OPCODE_CONT].emit = cont_emit;
4508    bld.bld_base.op_actions[TGSI_OPCODE_DDX].emit = ddx_emit;
4509    bld.bld_base.op_actions[TGSI_OPCODE_DDY].emit = ddy_emit;
4510    bld.bld_base.op_actions[TGSI_OPCODE_DEFAULT].emit = default_emit;
4511    bld.bld_base.op_actions[TGSI_OPCODE_ELSE].emit = else_emit;
4512    bld.bld_base.op_actions[TGSI_OPCODE_ENDIF].emit = endif_emit;
4513    bld.bld_base.op_actions[TGSI_OPCODE_ENDLOOP].emit = endloop_emit;
4514    bld.bld_base.op_actions[TGSI_OPCODE_ENDSUB].emit = endsub_emit;
4515    bld.bld_base.op_actions[TGSI_OPCODE_ENDSWITCH].emit = endswitch_emit;
4516    bld.bld_base.op_actions[TGSI_OPCODE_IF].emit = if_emit;
4517    bld.bld_base.op_actions[TGSI_OPCODE_UIF].emit = uif_emit;
4518    bld.bld_base.op_actions[TGSI_OPCODE_KILL_IF].emit = kill_if_emit;
4519    bld.bld_base.op_actions[TGSI_OPCODE_KILL].emit = kill_emit;
4520    bld.bld_base.op_actions[TGSI_OPCODE_RET].emit = ret_emit;
4521    bld.bld_base.op_actions[TGSI_OPCODE_SWITCH].emit = switch_emit;
4522    bld.bld_base.op_actions[TGSI_OPCODE_TEX].emit = tex_emit;
4523    bld.bld_base.op_actions[TGSI_OPCODE_TXB].emit = txb_emit;
4524    bld.bld_base.op_actions[TGSI_OPCODE_TXD].emit = txd_emit;
4525    bld.bld_base.op_actions[TGSI_OPCODE_TXL].emit = txl_emit;
4526    bld.bld_base.op_actions[TGSI_OPCODE_TEX_LZ].emit = txl_emit;
4527    bld.bld_base.op_actions[TGSI_OPCODE_TXP].emit = txp_emit;
4528    bld.bld_base.op_actions[TGSI_OPCODE_TXQ].emit = txq_emit;
4529    bld.bld_base.op_actions[TGSI_OPCODE_TXF].emit = txf_emit;
4530    bld.bld_base.op_actions[TGSI_OPCODE_TXF_LZ].emit = txf_emit;
4531    bld.bld_base.op_actions[TGSI_OPCODE_TEX2].emit = tex2_emit;
4532    bld.bld_base.op_actions[TGSI_OPCODE_TXB2].emit = txb2_emit;
4533    bld.bld_base.op_actions[TGSI_OPCODE_TXL2].emit = txl2_emit;
4534    bld.bld_base.op_actions[TGSI_OPCODE_TG4].emit = tg4_emit;
4535    bld.bld_base.op_actions[TGSI_OPCODE_LODQ].emit = lodq_emit;
4536    /* DX10 sampling ops */
4537    bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE].emit = sample_emit;
4538    bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_B].emit = sample_b_emit;
4539    bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_C].emit = sample_c_emit;
4540    bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_C_LZ].emit = sample_c_lz_emit;
4541    bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_D].emit = sample_d_emit;
4542    bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_I].emit = sample_i_emit;
4543    bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_I_MS].emit = sample_i_emit;
4544    bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_L].emit = sample_l_emit;
4545    bld.bld_base.op_actions[TGSI_OPCODE_GATHER4].emit = gather4_emit;
4546    bld.bld_base.op_actions[TGSI_OPCODE_SVIEWINFO].emit = sviewinfo_emit;
4547    bld.bld_base.op_actions[TGSI_OPCODE_LOD].emit = lod_emit;
4548 
4549    bld.bld_base.op_actions[TGSI_OPCODE_LOAD].emit = load_emit;
4550    bld.bld_base.op_actions[TGSI_OPCODE_STORE].emit = store_emit;
4551    bld.bld_base.op_actions[TGSI_OPCODE_RESQ].emit = resq_emit;
4552 
4553    bld.bld_base.op_actions[TGSI_OPCODE_ATOMUADD].emit = atomic_emit;
4554    bld.bld_base.op_actions[TGSI_OPCODE_ATOMXCHG].emit = atomic_emit;
4555    bld.bld_base.op_actions[TGSI_OPCODE_ATOMCAS].emit = atomic_emit;
4556    bld.bld_base.op_actions[TGSI_OPCODE_ATOMAND].emit = atomic_emit;
4557    bld.bld_base.op_actions[TGSI_OPCODE_ATOMOR].emit = atomic_emit;
4558    bld.bld_base.op_actions[TGSI_OPCODE_ATOMXOR].emit = atomic_emit;
4559    bld.bld_base.op_actions[TGSI_OPCODE_ATOMUMIN].emit = atomic_emit;
4560    bld.bld_base.op_actions[TGSI_OPCODE_ATOMUMAX].emit = atomic_emit;
4561    bld.bld_base.op_actions[TGSI_OPCODE_ATOMIMIN].emit = atomic_emit;
4562    bld.bld_base.op_actions[TGSI_OPCODE_ATOMIMAX].emit = atomic_emit;
4563 
4564    bld.bld_base.op_actions[TGSI_OPCODE_MEMBAR].emit = membar_emit;
4565    bld.bld_base.op_actions[TGSI_OPCODE_BARRIER].emit = barrier_emit;
4566 
4567    if (params->gs_iface) {
4568       /* There's no specific value for this because it should always
4569        * be set, but apps using ext_geometry_shader4 quite often
4570        * were forgetting so we're using MAX_VERTEX_VARYING from
4571        * that spec even though we could debug_assert if it's not
4572        * set, but that's a lot uglier. */
4573       uint max_output_vertices;
4574 
4575       /* inputs are always indirect with gs */
4576       bld.indirect_files |= (1 << TGSI_FILE_INPUT);
4577       bld.gs_iface = params->gs_iface;
4578       bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_gs_input;
4579       bld.bld_base.op_actions[TGSI_OPCODE_EMIT].emit = emit_vertex;
4580       bld.bld_base.op_actions[TGSI_OPCODE_ENDPRIM].emit = end_primitive;
4581 
4582       max_output_vertices =
4583          params->info->properties[TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES];
4584       if (!max_output_vertices)
4585          max_output_vertices = 32;
4586 
4587       bld.max_output_vertices_vec =
4588          lp_build_const_int_vec(gallivm, bld.bld_base.int_bld.type,
4589                                 max_output_vertices);
4590    }
4591 
4592    if (params->tes_iface) {
4593       /* inputs are always indirect with tes */
4594       bld.indirect_files |= (1 << TGSI_FILE_INPUT);
4595       bld.tes_iface = params->tes_iface;
4596       bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_tes_input;
4597    }
4598 
4599    if (params->tcs_iface) {
4600       bld.tcs_iface = params->tcs_iface;
4601       /* outputs and inputs are always indirect with tcs */
4602       bld.indirect_files |= (1 << TGSI_FILE_OUTPUT);
4603       bld.bld_base.emit_store_reg_funcs[TGSI_FILE_OUTPUT] = emit_store_tcs_output;
4604       bld.indirect_files |= (1 << TGSI_FILE_INPUT);
4605       bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_tcs_input;
4606       bld.bld_base.emit_fetch_funcs[TGSI_FILE_OUTPUT] = emit_fetch_tcs_input;
4607       bld.bld_base.op_actions[TGSI_OPCODE_BARRIER].emit = barrier_emit_tcs;
4608    }
4609 
4610    lp_exec_mask_init(&bld.exec_mask, &bld.bld_base.int_bld);
4611 
4612    bld.system_values = *params->system_values;
4613 
4614    lp_build_tgsi_llvm(&bld.bld_base, tokens);
4615 
4616    if (0) {
4617       LLVMBasicBlockRef block = LLVMGetInsertBlock(gallivm->builder);
4618       LLVMValueRef function = LLVMGetBasicBlockParent(block);
4619       debug_printf("11111111111111111111111111111 \n");
4620       tgsi_dump(tokens, 0);
4621       lp_debug_dump_value(function);
4622       debug_printf("2222222222222222222222222222 \n");
4623    }
4624 
4625    if (0) {
4626       LLVMModuleRef module = LLVMGetGlobalParent(
4627          LLVMGetBasicBlockParent(LLVMGetInsertBlock(gallivm->builder)));
4628       LLVMDumpModule(module);
4629 
4630    }
4631    lp_exec_mask_fini(&bld.exec_mask);
4632 }
4633