1 /**************************************************************************
2  *
3  * Copyright 2019 Red Hat.
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included
14  * in all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  *
24  **************************************************************************/
25 
26 #include "lp_bld_nir.h"
27 #include "lp_bld_init.h"
28 #include "lp_bld_flow.h"
29 #include "lp_bld_logic.h"
30 #include "lp_bld_gather.h"
31 #include "lp_bld_const.h"
32 #include "lp_bld_struct.h"
33 #include "lp_bld_arit.h"
34 #include "lp_bld_bitarit.h"
35 #include "lp_bld_coro.h"
36 #include "lp_bld_printf.h"
37 #include "util/u_math.h"
38 
bit_size_to_shift_size(int bit_size)39 static int bit_size_to_shift_size(int bit_size)
40 {
41    switch (bit_size) {
42    case 64:
43       return 3;
44    default:
45    case 32:
46       return 2;
47    case 16:
48       return 1;
49    case 8:
50       return 0;
51    }
52 }
53 
54 /*
55  * combine the execution mask if there is one with the current mask.
56  */
57 static LLVMValueRef
mask_vec(struct lp_build_nir_context * bld_base)58 mask_vec(struct lp_build_nir_context *bld_base)
59 {
60    struct lp_build_nir_soa_context * bld = (struct lp_build_nir_soa_context *)bld_base;
61    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
62    struct lp_exec_mask *exec_mask = &bld->exec_mask;
63    LLVMValueRef bld_mask = bld->mask ? lp_build_mask_value(bld->mask) : NULL;
64    if (!exec_mask->has_mask) {
65       return bld_mask;
66    }
67    if (!bld_mask)
68       return exec_mask->exec_mask;
69    return LLVMBuildAnd(builder, lp_build_mask_value(bld->mask),
70                        exec_mask->exec_mask, "");
71 }
72 
73 static LLVMValueRef
emit_fetch_64bit(struct lp_build_nir_context * bld_base,LLVMValueRef input,LLVMValueRef input2)74 emit_fetch_64bit(
75    struct lp_build_nir_context * bld_base,
76    LLVMValueRef input,
77    LLVMValueRef input2)
78 {
79    struct gallivm_state *gallivm = bld_base->base.gallivm;
80    LLVMBuilderRef builder = gallivm->builder;
81    LLVMValueRef res;
82    int i;
83    LLVMValueRef shuffles[2 * (LP_MAX_VECTOR_WIDTH/32)];
84    int len = bld_base->base.type.length * 2;
85    assert(len <= (2 * (LP_MAX_VECTOR_WIDTH/32)));
86 
87    for (i = 0; i < bld_base->base.type.length * 2; i+=2) {
88 #if UTIL_ARCH_LITTLE_ENDIAN
89       shuffles[i] = lp_build_const_int32(gallivm, i / 2);
90       shuffles[i + 1] = lp_build_const_int32(gallivm, i / 2 + bld_base->base.type.length);
91 #else
92       shuffles[i] = lp_build_const_int32(gallivm, i / 2 + bld_base->base.type.length);
93       shuffles[i + 1] = lp_build_const_int32(gallivm, i / 2);
94 #endif
95    }
96    res = LLVMBuildShuffleVector(builder, input, input2, LLVMConstVector(shuffles, len), "");
97 
98    return LLVMBuildBitCast(builder, res, bld_base->dbl_bld.vec_type, "");
99 }
100 
101 static void
emit_store_64bit_split(struct lp_build_nir_context * bld_base,LLVMValueRef value,LLVMValueRef split_values[2])102 emit_store_64bit_split(struct lp_build_nir_context *bld_base,
103                        LLVMValueRef value,
104                        LLVMValueRef split_values[2])
105 {
106    struct gallivm_state *gallivm = bld_base->base.gallivm;
107    LLVMBuilderRef builder = gallivm->builder;
108    unsigned i;
109    LLVMValueRef shuffles[LP_MAX_VECTOR_WIDTH/32];
110    LLVMValueRef shuffles2[LP_MAX_VECTOR_WIDTH/32];
111    int len = bld_base->base.type.length * 2;
112 
113    value = LLVMBuildBitCast(gallivm->builder, value, LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), len), "");
114    for (i = 0; i < bld_base->base.type.length; i++) {
115 #if UTIL_ARCH_LITTLE_ENDIAN
116       shuffles[i] = lp_build_const_int32(gallivm, i * 2);
117       shuffles2[i] = lp_build_const_int32(gallivm, (i * 2) + 1);
118 #else
119       shuffles[i] = lp_build_const_int32(gallivm, i * 2 + 1);
120       shuffles2[i] = lp_build_const_int32(gallivm, i * 2);
121 #endif
122    }
123 
124    split_values[0] = LLVMBuildShuffleVector(builder, value,
125                                  LLVMGetUndef(LLVMTypeOf(value)),
126                                  LLVMConstVector(shuffles,
127                                                  bld_base->base.type.length),
128                                  "");
129    split_values[1] = LLVMBuildShuffleVector(builder, value,
130                                   LLVMGetUndef(LLVMTypeOf(value)),
131                                   LLVMConstVector(shuffles2,
132                                                   bld_base->base.type.length),
133                                   "");
134 }
135 
136 static void
emit_store_64bit_chan(struct lp_build_nir_context * bld_base,LLVMValueRef chan_ptr,LLVMValueRef chan_ptr2,LLVMValueRef value)137 emit_store_64bit_chan(struct lp_build_nir_context *bld_base,
138                       LLVMValueRef chan_ptr,
139                       LLVMValueRef chan_ptr2,
140                       LLVMValueRef value)
141 {
142    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
143    struct lp_build_context *float_bld = &bld_base->base;
144    LLVMValueRef split_vals[2];
145 
146    emit_store_64bit_split(bld_base, value, split_vals);
147 
148    lp_exec_mask_store(&bld->exec_mask, float_bld, split_vals[0], chan_ptr);
149    lp_exec_mask_store(&bld->exec_mask, float_bld, split_vals[1], chan_ptr2);
150 }
151 
152 static LLVMValueRef
get_soa_array_offsets(struct lp_build_context * uint_bld,LLVMValueRef indirect_index,int num_components,unsigned chan_index,bool need_perelement_offset)153 get_soa_array_offsets(struct lp_build_context *uint_bld,
154                       LLVMValueRef indirect_index,
155                       int num_components,
156                       unsigned chan_index,
157                       bool need_perelement_offset)
158 {
159    struct gallivm_state *gallivm = uint_bld->gallivm;
160    LLVMValueRef chan_vec =
161       lp_build_const_int_vec(uint_bld->gallivm, uint_bld->type, chan_index);
162    LLVMValueRef length_vec =
163       lp_build_const_int_vec(gallivm, uint_bld->type, uint_bld->type.length);
164    LLVMValueRef index_vec;
165 
166    /* index_vec = (indirect_index * 4 + chan_index) * length + offsets */
167    index_vec = lp_build_mul(uint_bld, indirect_index, lp_build_const_int_vec(uint_bld->gallivm, uint_bld->type, num_components));
168    index_vec = lp_build_add(uint_bld, index_vec, chan_vec);
169    index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
170 
171    if (need_perelement_offset) {
172       LLVMValueRef pixel_offsets;
173       unsigned i;
174      /* build pixel offset vector: {0, 1, 2, 3, ...} */
175       pixel_offsets = uint_bld->undef;
176       for (i = 0; i < uint_bld->type.length; i++) {
177          LLVMValueRef ii = lp_build_const_int32(gallivm, i);
178          pixel_offsets = LLVMBuildInsertElement(gallivm->builder, pixel_offsets,
179                                                 ii, ii, "");
180       }
181       index_vec = lp_build_add(uint_bld, index_vec, pixel_offsets);
182    }
183    return index_vec;
184 }
185 
186 static LLVMValueRef
build_gather(struct lp_build_nir_context * bld_base,struct lp_build_context * bld,LLVMValueRef base_ptr,LLVMValueRef indexes,LLVMValueRef overflow_mask,LLVMValueRef indexes2)187 build_gather(struct lp_build_nir_context *bld_base,
188              struct lp_build_context *bld,
189              LLVMValueRef base_ptr,
190              LLVMValueRef indexes,
191              LLVMValueRef overflow_mask,
192              LLVMValueRef indexes2)
193 {
194    struct gallivm_state *gallivm = bld_base->base.gallivm;
195    LLVMBuilderRef builder = gallivm->builder;
196    struct lp_build_context *uint_bld = &bld_base->uint_bld;
197    LLVMValueRef res;
198    unsigned i;
199 
200    if (indexes2)
201       res = LLVMGetUndef(LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), bld_base->base.type.length * 2));
202    else
203       res = bld->undef;
204    /*
205     * overflow_mask is a vector telling us which channels
206     * in the vector overflowed. We use the overflow behavior for
207     * constant buffers which is defined as:
208     * Out of bounds access to constant buffer returns 0 in all
209     * components. Out of bounds behavior is always with respect
210     * to the size of the buffer bound at that slot.
211     */
212 
213    if (overflow_mask) {
214       /*
215        * We avoid per-element control flow here (also due to llvm going crazy,
216        * though I suspect it's better anyway since overflow is likely rare).
217        * Note that since we still fetch from buffers even if num_elements was
218        * zero (in this case we'll fetch from index zero) the jit func callers
219        * MUST provide valid fake constant buffers of size 4x32 (the values do
220        * not matter), otherwise we'd still need (not per element though)
221        * control flow.
222        */
223       indexes = lp_build_select(uint_bld, overflow_mask, uint_bld->zero, indexes);
224       if (indexes2)
225          indexes2 = lp_build_select(uint_bld, overflow_mask, uint_bld->zero, indexes2);
226    }
227 
228    /*
229     * Loop over elements of index_vec, load scalar value, insert it into 'res'.
230     */
231    for (i = 0; i < bld->type.length * (indexes2 ? 2 : 1); i++) {
232       LLVMValueRef si, di;
233       LLVMValueRef index;
234       LLVMValueRef scalar_ptr, scalar;
235 
236       di = lp_build_const_int32(gallivm, i);
237       if (indexes2)
238          si = lp_build_const_int32(gallivm, i >> 1);
239       else
240          si = di;
241 
242       if (indexes2 && (i & 1)) {
243          index = LLVMBuildExtractElement(builder,
244                                          indexes2, si, "");
245       } else {
246          index = LLVMBuildExtractElement(builder,
247                                          indexes, si, "");
248       }
249       scalar_ptr = LLVMBuildGEP(builder, base_ptr,
250                                 &index, 1, "gather_ptr");
251       scalar = LLVMBuildLoad(builder, scalar_ptr, "");
252 
253       res = LLVMBuildInsertElement(builder, res, scalar, di, "");
254    }
255 
256    if (overflow_mask) {
257       if (indexes2) {
258          res = LLVMBuildBitCast(builder, res, bld_base->dbl_bld.vec_type, "");
259          overflow_mask = LLVMBuildSExt(builder, overflow_mask,
260                                        bld_base->dbl_bld.int_vec_type, "");
261          res = lp_build_select(&bld_base->dbl_bld, overflow_mask,
262                                bld_base->dbl_bld.zero, res);
263       } else
264          res = lp_build_select(bld, overflow_mask, bld->zero, res);
265    }
266 
267    return res;
268 }
269 
270 /**
271  * Scatter/store vector.
272  */
273 static void
emit_mask_scatter(struct lp_build_nir_soa_context * bld,LLVMValueRef base_ptr,LLVMValueRef indexes,LLVMValueRef values,struct lp_exec_mask * mask)274 emit_mask_scatter(struct lp_build_nir_soa_context *bld,
275                   LLVMValueRef base_ptr,
276                   LLVMValueRef indexes,
277                   LLVMValueRef values,
278                   struct lp_exec_mask *mask)
279 {
280    struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
281    LLVMBuilderRef builder = gallivm->builder;
282    unsigned i;
283    LLVMValueRef pred = mask->has_mask ? mask->exec_mask : NULL;
284 
285    /*
286     * Loop over elements of index_vec, store scalar value.
287     */
288    for (i = 0; i < bld->bld_base.base.type.length; i++) {
289       LLVMValueRef ii = lp_build_const_int32(gallivm, i);
290       LLVMValueRef index = LLVMBuildExtractElement(builder, indexes, ii, "");
291       LLVMValueRef scalar_ptr = LLVMBuildGEP(builder, base_ptr, &index, 1, "scatter_ptr");
292       LLVMValueRef val = LLVMBuildExtractElement(builder, values, ii, "scatter_val");
293       LLVMValueRef scalar_pred = pred ?
294          LLVMBuildExtractElement(builder, pred, ii, "scatter_pred") : NULL;
295 
296       if (0)
297          lp_build_printf(gallivm, "scatter %d: val %f at %d %p\n",
298                          ii, val, index, scalar_ptr);
299 
300       if (scalar_pred) {
301          LLVMValueRef real_val, dst_val;
302          dst_val = LLVMBuildLoad(builder, scalar_ptr, "");
303          real_val = lp_build_select(&bld->uint_elem_bld, scalar_pred, val, dst_val);
304          LLVMBuildStore(builder, real_val, scalar_ptr);
305       }
306       else {
307          LLVMBuildStore(builder, val, scalar_ptr);
308       }
309    }
310 }
311 
emit_load_var(struct lp_build_nir_context * bld_base,nir_variable_mode deref_mode,unsigned num_components,unsigned bit_size,nir_variable * var,unsigned vertex_index,LLVMValueRef indir_vertex_index,unsigned const_index,LLVMValueRef indir_index,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])312 static void emit_load_var(struct lp_build_nir_context *bld_base,
313                            nir_variable_mode deref_mode,
314                            unsigned num_components,
315                            unsigned bit_size,
316                            nir_variable *var,
317                            unsigned vertex_index,
318                            LLVMValueRef indir_vertex_index,
319                            unsigned const_index,
320                            LLVMValueRef indir_index,
321                            LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
322 {
323    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
324    struct gallivm_state *gallivm = bld_base->base.gallivm;
325    int dmul = bit_size == 64 ? 2 : 1;
326    unsigned location = var->data.driver_location;
327    unsigned location_frac = var->data.location_frac;
328 
329    if (!var->data.compact && !indir_index)
330       location += const_index;
331    else if (var->data.compact) {
332       location += const_index / 4;
333       location_frac += const_index % 4;
334       const_index = 0;
335    }
336    switch (deref_mode) {
337    case nir_var_shader_in:
338       for (unsigned i = 0; i < num_components; i++) {
339          int idx = (i * dmul) + location_frac;
340          int comp_loc = location;
341 
342          if (bit_size == 64 && idx >= 4) {
343             comp_loc++;
344             idx = idx % 4;
345          }
346 
347          if (bld->gs_iface) {
348             LLVMValueRef vertex_index_val = lp_build_const_int32(gallivm, vertex_index);
349             LLVMValueRef attrib_index_val = lp_build_const_int32(gallivm, comp_loc);
350             LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx);
351             LLVMValueRef result2;
352 
353             result[i] = bld->gs_iface->fetch_input(bld->gs_iface, &bld_base->base,
354                                                    false, vertex_index_val, 0, attrib_index_val, swizzle_index_val);
355             if (bit_size == 64) {
356                LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx + 1);
357                result2 = bld->gs_iface->fetch_input(bld->gs_iface, &bld_base->base,
358                                                     false, vertex_index_val, 0, attrib_index_val, swizzle_index_val);
359                result[i] = emit_fetch_64bit(bld_base, result[i], result2);
360             }
361          } else if (bld->tes_iface) {
362             LLVMValueRef vertex_index_val = lp_build_const_int32(gallivm, vertex_index);
363             LLVMValueRef attrib_index_val;
364             LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx);
365             LLVMValueRef result2;
366 
367             if (indir_index) {
368                if (var->data.compact) {
369                   swizzle_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, idx));
370                   attrib_index_val = lp_build_const_int32(gallivm, comp_loc);
371                } else
372                   attrib_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, comp_loc));
373             } else
374                attrib_index_val = lp_build_const_int32(gallivm, comp_loc);
375 
376             if (var->data.patch) {
377                result[i] = bld->tes_iface->fetch_patch_input(bld->tes_iface, &bld_base->base,
378                                                              indir_index ? true : false, attrib_index_val, swizzle_index_val);
379                if (bit_size == 64) {
380                   LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx + 1);
381                   result2 = bld->tes_iface->fetch_patch_input(bld->tes_iface, &bld_base->base,
382                                                               indir_index ? true : false, attrib_index_val, swizzle_index_val);
383                   result[i] = emit_fetch_64bit(bld_base, result[i], result2);
384                }
385             }
386             else {
387                result[i] = bld->tes_iface->fetch_vertex_input(bld->tes_iface, &bld_base->base,
388                                                               indir_vertex_index ? true : false,
389                                                               indir_vertex_index ? indir_vertex_index : vertex_index_val,
390                                                               (indir_index && !var->data.compact) ? true : false, attrib_index_val,
391                                                               (indir_index && var->data.compact) ? true : false, swizzle_index_val);
392                if (bit_size == 64) {
393                   LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx + 1);
394                   result2 = bld->tes_iface->fetch_vertex_input(bld->tes_iface, &bld_base->base,
395                                                                indir_vertex_index ? true : false,
396                                                                indir_vertex_index ? indir_vertex_index : vertex_index_val,
397                                                                indir_index ? true : false, attrib_index_val, false, swizzle_index_val);
398                   result[i] = emit_fetch_64bit(bld_base, result[i], result2);
399                }
400             }
401          } else if (bld->tcs_iface) {
402             LLVMValueRef vertex_index_val = lp_build_const_int32(gallivm, vertex_index);
403             LLVMValueRef attrib_index_val;
404             LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx);
405 
406             if (indir_index) {
407                if (var->data.compact) {
408                   swizzle_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, idx));
409                   attrib_index_val = lp_build_const_int32(gallivm, comp_loc);
410                } else
411                   attrib_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, comp_loc));
412             } else
413                attrib_index_val = lp_build_const_int32(gallivm, comp_loc);
414             result[i] = bld->tcs_iface->emit_fetch_input(bld->tcs_iface, &bld_base->base,
415                                                          indir_vertex_index ? true : false, indir_vertex_index ? indir_vertex_index : vertex_index_val,
416                                                          (indir_index && !var->data.compact) ? true : false, attrib_index_val,
417                                                          (indir_index && var->data.compact) ? true : false, swizzle_index_val);
418             if (bit_size == 64) {
419                LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx + 1);
420                LLVMValueRef result2 = bld->tcs_iface->emit_fetch_input(bld->tcs_iface, &bld_base->base,
421                                                                        indir_vertex_index ? true : false, indir_vertex_index ? indir_vertex_index : vertex_index_val,
422                                                                        indir_index ? true : false, attrib_index_val,
423                                                                        false, swizzle_index_val);
424                result[i] = emit_fetch_64bit(bld_base, result[i], result2);
425             }
426          } else {
427             if (indir_index) {
428                LLVMValueRef attrib_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, comp_loc));
429                LLVMValueRef index_vec = get_soa_array_offsets(&bld_base->uint_bld,
430                                                               attrib_index_val, 4, idx,
431                                                               TRUE);
432                LLVMValueRef index_vec2 = NULL;
433                LLVMTypeRef fptr_type;
434                LLVMValueRef inputs_array;
435                fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
436                inputs_array = LLVMBuildBitCast(gallivm->builder, bld->inputs_array, fptr_type, "");
437 
438                if (bit_size == 64)
439                   index_vec2 = get_soa_array_offsets(&bld_base->uint_bld,
440                                                      indir_index, 4, idx + 1, TRUE);
441 
442                /* Gather values from the input register array */
443                result[i] = build_gather(bld_base, &bld_base->base, inputs_array, index_vec, NULL, index_vec2);
444             } else {
445                if (bld->indirects & nir_var_shader_in) {
446                   LLVMValueRef lindex = lp_build_const_int32(gallivm,
447                                                              comp_loc * 4 + idx);
448                   LLVMValueRef input_ptr = lp_build_pointer_get(gallivm->builder,
449                                                              bld->inputs_array, lindex);
450                   if (bit_size == 64) {
451                      LLVMValueRef lindex2 = lp_build_const_int32(gallivm,
452                                                                  comp_loc * 4 + (idx + 1));
453                      LLVMValueRef input_ptr2 = lp_build_pointer_get(gallivm->builder,
454                                                                     bld->inputs_array, lindex2);
455                      result[i] = emit_fetch_64bit(bld_base, input_ptr, input_ptr2);
456                   } else {
457                      result[i] = input_ptr;
458                   }
459                } else {
460                   if (bit_size == 64) {
461                      LLVMValueRef tmp[2];
462                      tmp[0] = bld->inputs[comp_loc][idx];
463                      tmp[1] = bld->inputs[comp_loc][idx + 1];
464                      result[i] = emit_fetch_64bit(bld_base, tmp[0], tmp[1]);
465                   } else {
466                      result[i] = bld->inputs[comp_loc][idx];
467                   }
468                }
469             }
470          }
471       }
472       break;
473    case nir_var_shader_out:
474       if (bld->fs_iface && bld->fs_iface->fb_fetch) {
475          bld->fs_iface->fb_fetch(bld->fs_iface, &bld_base->base, var->data.driver_location, result);
476          return;
477       }
478       for (unsigned i = 0; i < num_components; i++) {
479          int idx = (i * dmul) + location_frac;
480          if (bld->tcs_iface) {
481             LLVMValueRef vertex_index_val = lp_build_const_int32(gallivm, vertex_index);
482             LLVMValueRef attrib_index_val;
483             LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx);
484 
485             if (indir_index)
486                attrib_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, var->data.driver_location));
487             else
488                attrib_index_val = lp_build_const_int32(gallivm, location);
489 
490             result[i] = bld->tcs_iface->emit_fetch_output(bld->tcs_iface, &bld_base->base,
491                                                           indir_vertex_index ? true : false, indir_vertex_index ? indir_vertex_index : vertex_index_val,
492                                                           (indir_index && !var->data.compact) ? true : false, attrib_index_val,
493                                                           (indir_index && var->data.compact) ? true : false, swizzle_index_val, 0);
494             if (bit_size == 64) {
495                LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx + 1);
496                LLVMValueRef result2 = bld->tcs_iface->emit_fetch_output(bld->tcs_iface, &bld_base->base,
497                                                                         indir_vertex_index ? true : false, indir_vertex_index ? indir_vertex_index : vertex_index_val,
498                                                                         indir_index ? true : false, attrib_index_val,
499                                                                         false, swizzle_index_val, 0);
500                result[i] = emit_fetch_64bit(bld_base, result[i], result2);
501             }
502          }
503       }
504       break;
505    default:
506       break;
507    }
508 }
509 
emit_store_chan(struct lp_build_nir_context * bld_base,nir_variable_mode deref_mode,unsigned bit_size,unsigned location,unsigned comp,unsigned chan,LLVMValueRef dst)510 static void emit_store_chan(struct lp_build_nir_context *bld_base,
511                             nir_variable_mode deref_mode,
512                             unsigned bit_size,
513                             unsigned location, unsigned comp,
514                             unsigned chan,
515                             LLVMValueRef dst)
516 {
517    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
518    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
519    struct lp_build_context *float_bld = &bld_base->base;
520 
521    if (bit_size == 64) {
522       chan *= 2;
523       chan += comp;
524       if (chan >= 4) {
525          chan -= 4;
526          location++;
527       }
528       emit_store_64bit_chan(bld_base, bld->outputs[location][chan],
529                             bld->outputs[location][chan + 1], dst);
530    } else {
531       dst = LLVMBuildBitCast(builder, dst, float_bld->vec_type, "");
532       lp_exec_mask_store(&bld->exec_mask, float_bld, dst,
533                          bld->outputs[location][chan + comp]);
534    }
535 }
536 
emit_store_tcs_chan(struct lp_build_nir_context * bld_base,bool is_compact,unsigned bit_size,unsigned location,unsigned const_index,LLVMValueRef indir_vertex_index,LLVMValueRef indir_index,unsigned comp,unsigned chan,LLVMValueRef chan_val)537 static void emit_store_tcs_chan(struct lp_build_nir_context *bld_base,
538                                 bool is_compact,
539                                 unsigned bit_size,
540                                 unsigned location,
541                                 unsigned const_index,
542                                 LLVMValueRef indir_vertex_index,
543                                 LLVMValueRef indir_index,
544                                 unsigned comp,
545                                 unsigned chan,
546                                 LLVMValueRef chan_val)
547 {
548    struct gallivm_state *gallivm = bld_base->base.gallivm;
549    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
550    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
551    unsigned swizzle = chan;
552    if (bit_size == 64) {
553       swizzle *= 2;
554       swizzle += comp;
555       if (swizzle >= 4) {
556          swizzle -= 4;
557          location++;
558       }
559    } else
560       swizzle += comp;
561    LLVMValueRef attrib_index_val;
562    LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, swizzle);
563 
564    if (indir_index) {
565       if (is_compact) {
566          swizzle_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, swizzle));
567          attrib_index_val = lp_build_const_int32(gallivm, const_index + location);
568       } else
569          attrib_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, location));
570    } else
571       attrib_index_val = lp_build_const_int32(gallivm, const_index + location);
572    if (bit_size == 64) {
573       LLVMValueRef split_vals[2];
574       LLVMValueRef swizzle_index_val2 = lp_build_const_int32(gallivm, swizzle + 1);
575       emit_store_64bit_split(bld_base, chan_val, split_vals);
576       bld->tcs_iface->emit_store_output(bld->tcs_iface, &bld_base->base, 0,
577                                         indir_vertex_index ? true : false,
578                                         indir_vertex_index,
579                                         indir_index ? true : false,
580                                         attrib_index_val,
581                                         false, swizzle_index_val,
582                                         split_vals[0], mask_vec(bld_base));
583       bld->tcs_iface->emit_store_output(bld->tcs_iface, &bld_base->base, 0,
584                                         indir_vertex_index ? true : false,
585                                         indir_vertex_index,
586                                         indir_index ? true : false,
587                                         attrib_index_val,
588                                         false, swizzle_index_val2,
589                                         split_vals[1], mask_vec(bld_base));
590    } else {
591       chan_val = LLVMBuildBitCast(builder, chan_val, bld_base->base.vec_type, "");
592       bld->tcs_iface->emit_store_output(bld->tcs_iface, &bld_base->base, 0,
593                                         indir_vertex_index ? true : false,
594                                         indir_vertex_index,
595                                         indir_index && !is_compact ? true : false,
596                                         attrib_index_val,
597                                         indir_index && is_compact ? true : false,
598                                         swizzle_index_val,
599                                         chan_val, mask_vec(bld_base));
600    }
601 }
602 
emit_store_var(struct lp_build_nir_context * bld_base,nir_variable_mode deref_mode,unsigned num_components,unsigned bit_size,nir_variable * var,unsigned writemask,LLVMValueRef indir_vertex_index,unsigned const_index,LLVMValueRef indir_index,LLVMValueRef dst)603 static void emit_store_var(struct lp_build_nir_context *bld_base,
604                            nir_variable_mode deref_mode,
605                            unsigned num_components,
606                            unsigned bit_size,
607                            nir_variable *var,
608                            unsigned writemask,
609                            LLVMValueRef indir_vertex_index,
610                            unsigned const_index,
611                            LLVMValueRef indir_index,
612                            LLVMValueRef dst)
613 {
614    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
615    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
616    switch (deref_mode) {
617    case nir_var_shader_out: {
618       unsigned location = var->data.driver_location;
619       unsigned comp = var->data.location_frac;
620       if (bld_base->shader->info.stage == MESA_SHADER_FRAGMENT) {
621          if (var->data.location == FRAG_RESULT_STENCIL)
622             comp = 1;
623          else if (var->data.location == FRAG_RESULT_DEPTH)
624             comp = 2;
625       }
626 
627       if (var->data.compact) {
628          location += const_index / 4;
629          comp += const_index % 4;
630          const_index = 0;
631       }
632 
633       for (unsigned chan = 0; chan < num_components; chan++) {
634          if (writemask & (1u << chan)) {
635             LLVMValueRef chan_val = (num_components == 1) ? dst : LLVMBuildExtractValue(builder, dst, chan, "");
636             if (bld->tcs_iface) {
637                emit_store_tcs_chan(bld_base, var->data.compact, bit_size, location, const_index, indir_vertex_index, indir_index, comp, chan, chan_val);
638             } else
639                emit_store_chan(bld_base, deref_mode, bit_size, location + const_index, comp, chan, chan_val);
640          }
641       }
642       break;
643    }
644    default:
645       break;
646    }
647 }
648 
emit_load_reg(struct lp_build_nir_context * bld_base,struct lp_build_context * reg_bld,const nir_reg_src * reg,LLVMValueRef indir_src,LLVMValueRef reg_storage)649 static LLVMValueRef emit_load_reg(struct lp_build_nir_context *bld_base,
650                                   struct lp_build_context *reg_bld,
651                                   const nir_reg_src *reg,
652                                   LLVMValueRef indir_src,
653                                   LLVMValueRef reg_storage)
654 {
655    struct gallivm_state *gallivm = bld_base->base.gallivm;
656    LLVMBuilderRef builder = gallivm->builder;
657    int nc = reg->reg->num_components;
658    LLVMValueRef vals[NIR_MAX_VEC_COMPONENTS] = { NULL };
659    struct lp_build_context *uint_bld = &bld_base->uint_bld;
660    if (reg->reg->num_array_elems) {
661       LLVMValueRef indirect_val = lp_build_const_int_vec(gallivm, uint_bld->type, reg->base_offset);
662       if (reg->indirect) {
663          LLVMValueRef max_index = lp_build_const_int_vec(gallivm, uint_bld->type, reg->reg->num_array_elems - 1);
664          indirect_val = LLVMBuildAdd(builder, indirect_val, indir_src, "");
665          indirect_val = lp_build_min(uint_bld, indirect_val, max_index);
666       }
667       reg_storage = LLVMBuildBitCast(builder, reg_storage, LLVMPointerType(reg_bld->elem_type, 0), "");
668       for (unsigned i = 0; i < nc; i++) {
669          LLVMValueRef indirect_offset = get_soa_array_offsets(uint_bld, indirect_val, nc, i, TRUE);
670          vals[i] = build_gather(bld_base, reg_bld, reg_storage, indirect_offset, NULL, NULL);
671       }
672    } else {
673       for (unsigned i = 0; i < nc; i++) {
674          LLVMValueRef this_storage = nc == 1 ? reg_storage : lp_build_array_get_ptr(gallivm, reg_storage,
675                                                                                     lp_build_const_int32(gallivm, i));
676          vals[i] = LLVMBuildLoad(builder, this_storage, "");
677       }
678    }
679    return nc == 1 ? vals[0] : lp_nir_array_build_gather_values(builder, vals, nc);
680 }
681 
emit_store_reg(struct lp_build_nir_context * bld_base,struct lp_build_context * reg_bld,const nir_reg_dest * reg,unsigned writemask,LLVMValueRef indir_src,LLVMValueRef reg_storage,LLVMValueRef dst[NIR_MAX_VEC_COMPONENTS])682 static void emit_store_reg(struct lp_build_nir_context *bld_base,
683                            struct lp_build_context *reg_bld,
684                            const nir_reg_dest *reg,
685                            unsigned writemask,
686                            LLVMValueRef indir_src,
687                            LLVMValueRef reg_storage,
688                            LLVMValueRef dst[NIR_MAX_VEC_COMPONENTS])
689 {
690    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
691    struct gallivm_state *gallivm = bld_base->base.gallivm;
692    LLVMBuilderRef builder = gallivm->builder;
693    struct lp_build_context *uint_bld = &bld_base->uint_bld;
694    int nc = reg->reg->num_components;
695    if (reg->reg->num_array_elems > 0) {
696       LLVMValueRef indirect_val = lp_build_const_int_vec(gallivm, uint_bld->type, reg->base_offset);
697       if (reg->indirect) {
698          LLVMValueRef max_index = lp_build_const_int_vec(gallivm, uint_bld->type, reg->reg->num_array_elems - 1);
699          indirect_val = LLVMBuildAdd(builder, indirect_val, indir_src, "");
700          indirect_val = lp_build_min(uint_bld, indirect_val, max_index);
701       }
702       reg_storage = LLVMBuildBitCast(builder, reg_storage, LLVMPointerType(reg_bld->elem_type, 0), "");
703       for (unsigned i = 0; i < nc; i++) {
704          if (!(writemask & (1 << i)))
705             continue;
706          LLVMValueRef indirect_offset = get_soa_array_offsets(uint_bld, indirect_val, nc, i, TRUE);
707          dst[i] = LLVMBuildBitCast(builder, dst[i], reg_bld->vec_type, "");
708          emit_mask_scatter(bld, reg_storage, indirect_offset, dst[i], &bld->exec_mask);
709       }
710       return;
711    }
712 
713    for (unsigned i = 0; i < nc; i++) {
714       LLVMValueRef this_storage = nc == 1 ? reg_storage : lp_build_array_get_ptr(gallivm, reg_storage,
715                                                          lp_build_const_int32(gallivm, i));
716       dst[i] = LLVMBuildBitCast(builder, dst[i], reg_bld->vec_type, "");
717       lp_exec_mask_store(&bld->exec_mask, reg_bld, dst[i], this_storage);
718    }
719 }
720 
emit_load_kernel_arg(struct lp_build_nir_context * bld_base,unsigned nc,unsigned bit_size,unsigned offset_bit_size,bool offset_is_uniform,LLVMValueRef offset,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])721 static void emit_load_kernel_arg(struct lp_build_nir_context *bld_base,
722                                  unsigned nc,
723                                  unsigned bit_size,
724                                  unsigned offset_bit_size,
725                                  bool offset_is_uniform,
726                                  LLVMValueRef offset,
727                                  LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
728 {
729    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
730    struct gallivm_state *gallivm = bld_base->base.gallivm;
731    LLVMBuilderRef builder = gallivm->builder;
732    struct lp_build_context *bld_broad = get_int_bld(bld_base, true, bit_size);
733    LLVMValueRef kernel_args_ptr = bld->kernel_args_ptr;
734    unsigned size_shift = bit_size_to_shift_size(bit_size);
735    struct lp_build_context *bld_offset = get_int_bld(bld_base, true, offset_bit_size);
736    if (size_shift)
737       offset = lp_build_shr(bld_offset, offset, lp_build_const_int_vec(gallivm, bld_offset->type, size_shift));
738 
739    LLVMTypeRef ptr_type = LLVMPointerType(bld_broad->elem_type, 0);
740    kernel_args_ptr = LLVMBuildBitCast(builder, kernel_args_ptr, ptr_type, "");
741 
742    if (offset_is_uniform) {
743       offset = LLVMBuildExtractElement(builder, offset, lp_build_const_int32(gallivm, 0), "");
744 
745       for (unsigned c = 0; c < nc; c++) {
746          LLVMValueRef this_offset = LLVMBuildAdd(builder, offset, offset_bit_size == 64 ? lp_build_const_int64(gallivm, c) : lp_build_const_int32(gallivm, c), "");
747 
748          LLVMValueRef scalar = lp_build_pointer_get(builder, kernel_args_ptr, this_offset);
749          result[c] = lp_build_broadcast_scalar(bld_broad, scalar);
750       }
751    }
752 }
753 
global_addr_to_ptr(struct gallivm_state * gallivm,LLVMValueRef addr_ptr,unsigned bit_size)754 static LLVMValueRef global_addr_to_ptr(struct gallivm_state *gallivm, LLVMValueRef addr_ptr, unsigned bit_size)
755 {
756    LLVMBuilderRef builder = gallivm->builder;
757    switch (bit_size) {
758    case 8:
759       addr_ptr = LLVMBuildIntToPtr(builder, addr_ptr, LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0), "");
760       break;
761    case 16:
762       addr_ptr = LLVMBuildIntToPtr(builder, addr_ptr, LLVMPointerType(LLVMInt16TypeInContext(gallivm->context), 0), "");
763       break;
764    case 32:
765    default:
766       addr_ptr = LLVMBuildIntToPtr(builder, addr_ptr, LLVMPointerType(LLVMInt32TypeInContext(gallivm->context), 0), "");
767       break;
768    case 64:
769       addr_ptr = LLVMBuildIntToPtr(builder, addr_ptr, LLVMPointerType(LLVMInt64TypeInContext(gallivm->context), 0), "");
770       break;
771    }
772    return addr_ptr;
773 }
774 
emit_load_global(struct lp_build_nir_context * bld_base,unsigned nc,unsigned bit_size,unsigned addr_bit_size,LLVMValueRef addr,LLVMValueRef outval[NIR_MAX_VEC_COMPONENTS])775 static void emit_load_global(struct lp_build_nir_context *bld_base,
776                              unsigned nc,
777                              unsigned bit_size,
778                              unsigned addr_bit_size,
779                              LLVMValueRef addr,
780                              LLVMValueRef outval[NIR_MAX_VEC_COMPONENTS])
781 {
782    struct gallivm_state *gallivm = bld_base->base.gallivm;
783    LLVMBuilderRef builder = gallivm->builder;
784    struct lp_build_context *uint_bld = &bld_base->uint_bld;
785    struct lp_build_context *res_bld;
786 
787    res_bld = get_int_bld(bld_base, true, bit_size);
788 
789    for (unsigned c = 0; c < nc; c++) {
790       LLVMValueRef result = lp_build_alloca(gallivm, res_bld->vec_type, "");
791       LLVMValueRef exec_mask = mask_vec(bld_base);
792       struct lp_build_loop_state loop_state;
793       lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
794 
795       struct lp_build_if_state ifthen;
796       LLVMValueRef cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");
797       cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, "");
798       lp_build_if(&ifthen, gallivm, cond);
799 
800       LLVMValueRef addr_ptr = LLVMBuildExtractElement(gallivm->builder, addr,
801                                                       loop_state.counter, "");
802       addr_ptr = global_addr_to_ptr(gallivm, addr_ptr, bit_size);
803 
804       LLVMValueRef value_ptr = lp_build_pointer_get(builder, addr_ptr, lp_build_const_int32(gallivm, c));
805 
806       LLVMValueRef temp_res;
807       temp_res = LLVMBuildLoad(builder, result, "");
808       temp_res = LLVMBuildInsertElement(builder, temp_res, value_ptr, loop_state.counter, "");
809       LLVMBuildStore(builder, temp_res, result);
810       lp_build_endif(&ifthen);
811       lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length),
812                              NULL, LLVMIntUGE);
813       outval[c] = LLVMBuildLoad(builder, result, "");
814    }
815 }
816 
emit_store_global(struct lp_build_nir_context * bld_base,unsigned writemask,unsigned nc,unsigned bit_size,unsigned addr_bit_size,LLVMValueRef addr,LLVMValueRef dst)817 static void emit_store_global(struct lp_build_nir_context *bld_base,
818                               unsigned writemask,
819                               unsigned nc, unsigned bit_size,
820                               unsigned addr_bit_size,
821                               LLVMValueRef addr,
822                               LLVMValueRef dst)
823 {
824    struct gallivm_state *gallivm = bld_base->base.gallivm;
825    LLVMBuilderRef builder = gallivm->builder;
826    struct lp_build_context *uint_bld = &bld_base->uint_bld;
827 
828    for (unsigned c = 0; c < nc; c++) {
829       if (!(writemask & (1u << c)))
830          continue;
831       LLVMValueRef val = (nc == 1) ? dst : LLVMBuildExtractValue(builder, dst, c, "");
832 
833       LLVMValueRef exec_mask = mask_vec(bld_base);
834       struct lp_build_loop_state loop_state;
835       lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
836       LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, val,
837                                                        loop_state.counter, "");
838 
839       LLVMValueRef addr_ptr = LLVMBuildExtractElement(gallivm->builder, addr,
840                                                       loop_state.counter, "");
841       addr_ptr = global_addr_to_ptr(gallivm, addr_ptr, bit_size);
842       switch (bit_size) {
843       case 8:
844          value_ptr = LLVMBuildBitCast(builder, value_ptr, LLVMInt8TypeInContext(gallivm->context), "");
845          break;
846       case 16:
847          value_ptr = LLVMBuildBitCast(builder, value_ptr, LLVMInt16TypeInContext(gallivm->context), "");
848          break;
849       case 32:
850          value_ptr = LLVMBuildBitCast(builder, value_ptr, LLVMInt32TypeInContext(gallivm->context), "");
851          break;
852       case 64:
853          value_ptr = LLVMBuildBitCast(builder, value_ptr, LLVMInt64TypeInContext(gallivm->context), "");
854          break;
855       default:
856          break;
857       }
858       struct lp_build_if_state ifthen;
859 
860       LLVMValueRef cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");
861       cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, "");
862       lp_build_if(&ifthen, gallivm, cond);
863       lp_build_pointer_set(builder, addr_ptr, lp_build_const_int32(gallivm, c), value_ptr);
864       lp_build_endif(&ifthen);
865       lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length),
866                              NULL, LLVMIntUGE);
867    }
868 }
869 
emit_atomic_global(struct lp_build_nir_context * bld_base,nir_intrinsic_op nir_op,unsigned addr_bit_size,LLVMValueRef addr,LLVMValueRef val,LLVMValueRef val2,LLVMValueRef * result)870 static void emit_atomic_global(struct lp_build_nir_context *bld_base,
871                                nir_intrinsic_op nir_op,
872                                unsigned addr_bit_size,
873                                LLVMValueRef addr,
874                                LLVMValueRef val, LLVMValueRef val2,
875                                LLVMValueRef *result)
876 {
877    struct gallivm_state *gallivm = bld_base->base.gallivm;
878    LLVMBuilderRef builder = gallivm->builder;
879    struct lp_build_context *uint_bld = &bld_base->uint_bld;
880 
881    LLVMValueRef atom_res = lp_build_alloca(gallivm,
882                                            uint_bld->vec_type, "");
883    LLVMValueRef exec_mask = mask_vec(bld_base);
884    struct lp_build_loop_state loop_state;
885    lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
886 
887    LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, val,
888                                                     loop_state.counter, "");
889 
890    LLVMValueRef addr_ptr = LLVMBuildExtractElement(gallivm->builder, addr,
891                                                    loop_state.counter, "");
892    addr_ptr = global_addr_to_ptr(gallivm, addr_ptr, 32);
893    struct lp_build_if_state ifthen;
894    LLVMValueRef cond, temp_res;
895    LLVMValueRef scalar;
896    cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");
897    cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, "");
898    lp_build_if(&ifthen, gallivm, cond);
899 
900    if (nir_op == nir_intrinsic_global_atomic_comp_swap) {
901       LLVMValueRef cas_src_ptr = LLVMBuildExtractElement(gallivm->builder, val2,
902                                                          loop_state.counter, "");
903       cas_src_ptr = LLVMBuildBitCast(gallivm->builder, cas_src_ptr, uint_bld->elem_type, "");
904       scalar = LLVMBuildAtomicCmpXchg(builder, addr_ptr, value_ptr,
905                                       cas_src_ptr,
906                                       LLVMAtomicOrderingSequentiallyConsistent,
907                                       LLVMAtomicOrderingSequentiallyConsistent,
908                                       false);
909       scalar = LLVMBuildExtractValue(gallivm->builder, scalar, 0, "");
910    } else {
911       LLVMAtomicRMWBinOp op;
912       switch (nir_op) {
913       case nir_intrinsic_global_atomic_add:
914          op = LLVMAtomicRMWBinOpAdd;
915          break;
916       case nir_intrinsic_global_atomic_exchange:
917          op = LLVMAtomicRMWBinOpXchg;
918          break;
919       case nir_intrinsic_global_atomic_and:
920          op = LLVMAtomicRMWBinOpAnd;
921          break;
922       case nir_intrinsic_global_atomic_or:
923          op = LLVMAtomicRMWBinOpOr;
924          break;
925       case nir_intrinsic_global_atomic_xor:
926          op = LLVMAtomicRMWBinOpXor;
927          break;
928       case nir_intrinsic_global_atomic_umin:
929          op = LLVMAtomicRMWBinOpUMin;
930          break;
931       case nir_intrinsic_global_atomic_umax:
932          op = LLVMAtomicRMWBinOpUMax;
933          break;
934       case nir_intrinsic_global_atomic_imin:
935          op = LLVMAtomicRMWBinOpMin;
936          break;
937       case nir_intrinsic_global_atomic_imax:
938          op = LLVMAtomicRMWBinOpMax;
939          break;
940       default:
941          unreachable("unknown atomic op");
942       }
943 
944       scalar = LLVMBuildAtomicRMW(builder, op,
945                                   addr_ptr, value_ptr,
946                                   LLVMAtomicOrderingSequentiallyConsistent,
947                                   false);
948    }
949    temp_res = LLVMBuildLoad(builder, atom_res, "");
950    temp_res = LLVMBuildInsertElement(builder, temp_res, scalar, loop_state.counter, "");
951    LLVMBuildStore(builder, temp_res, atom_res);
952    lp_build_else(&ifthen);
953    temp_res = LLVMBuildLoad(builder, atom_res, "");
954    temp_res = LLVMBuildInsertElement(builder, temp_res, lp_build_const_int32(gallivm, 0), loop_state.counter, "");
955    LLVMBuildStore(builder, temp_res, atom_res);
956    lp_build_endif(&ifthen);
957    lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length),
958                           NULL, LLVMIntUGE);
959    *result = LLVMBuildLoad(builder, atom_res, "");
960 }
961 
emit_load_ubo(struct lp_build_nir_context * bld_base,unsigned nc,unsigned bit_size,bool offset_is_uniform,LLVMValueRef index,LLVMValueRef offset,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])962 static void emit_load_ubo(struct lp_build_nir_context *bld_base,
963                           unsigned nc,
964                           unsigned bit_size,
965                           bool offset_is_uniform,
966                           LLVMValueRef index,
967                           LLVMValueRef offset,
968                           LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
969 {
970    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
971    struct gallivm_state *gallivm = bld_base->base.gallivm;
972    LLVMBuilderRef builder = gallivm->builder;
973    struct lp_build_context *uint_bld = &bld_base->uint_bld;
974    struct lp_build_context *bld_broad = bit_size == 64 ? &bld_base->dbl_bld : &bld_base->base;
975    LLVMValueRef consts_ptr = lp_build_array_get(gallivm, bld->consts_ptr, index);
976    unsigned size_shift = bit_size_to_shift_size(bit_size);
977    if (size_shift)
978       offset = lp_build_shr(uint_bld, offset, lp_build_const_int_vec(gallivm, uint_bld->type, size_shift));
979    if (bit_size == 64) {
980       LLVMTypeRef dptr_type = LLVMPointerType(bld_base->dbl_bld.elem_type, 0);
981       consts_ptr = LLVMBuildBitCast(builder, consts_ptr, dptr_type, "");
982    }
983 
984    if (offset_is_uniform) {
985       offset = LLVMBuildExtractElement(builder, offset, lp_build_const_int32(gallivm, 0), "");
986 
987       for (unsigned c = 0; c < nc; c++) {
988          LLVMValueRef this_offset = LLVMBuildAdd(builder, offset, lp_build_const_int32(gallivm, c), "");
989 
990          LLVMValueRef scalar = lp_build_pointer_get(builder, consts_ptr, this_offset);
991          result[c] = lp_build_broadcast_scalar(bld_broad, scalar);
992       }
993    } else {
994       LLVMValueRef overflow_mask;
995       LLVMValueRef num_consts = lp_build_array_get(gallivm, bld->const_sizes_ptr, index);
996 
997       num_consts = lp_build_broadcast_scalar(uint_bld, num_consts);
998       for (unsigned c = 0; c < nc; c++) {
999          LLVMValueRef this_offset = lp_build_add(uint_bld, offset, lp_build_const_int_vec(gallivm, uint_bld->type, c));
1000          overflow_mask = lp_build_compare(gallivm, uint_bld->type, PIPE_FUNC_GEQUAL,
1001                                           this_offset, num_consts);
1002          result[c] = build_gather(bld_base, bld_broad, consts_ptr, this_offset, overflow_mask, NULL);
1003       }
1004    }
1005 }
1006 
1007 
emit_load_mem(struct lp_build_nir_context * bld_base,unsigned nc,unsigned bit_size,LLVMValueRef index,LLVMValueRef offset,LLVMValueRef outval[NIR_MAX_VEC_COMPONENTS])1008 static void emit_load_mem(struct lp_build_nir_context *bld_base,
1009                           unsigned nc,
1010                           unsigned bit_size,
1011                           LLVMValueRef index,
1012                           LLVMValueRef offset,
1013                           LLVMValueRef outval[NIR_MAX_VEC_COMPONENTS])
1014 {
1015    struct gallivm_state *gallivm = bld_base->base.gallivm;
1016    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1017    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
1018    LLVMValueRef ssbo_ptr = NULL;
1019    struct lp_build_context *uint_bld = &bld_base->uint_bld;
1020    LLVMValueRef ssbo_limit = NULL;
1021    struct lp_build_context *load_bld;
1022    uint32_t shift_val = bit_size_to_shift_size(bit_size);
1023 
1024    load_bld = get_int_bld(bld_base, true, bit_size);
1025 
1026    if (index) {
1027       LLVMValueRef ssbo_size_ptr = lp_build_array_get(gallivm, bld->ssbo_sizes_ptr, LLVMBuildExtractElement(builder, index, lp_build_const_int32(gallivm, 0), ""));
1028       ssbo_limit = LLVMBuildAShr(gallivm->builder, ssbo_size_ptr, lp_build_const_int32(gallivm, shift_val), "");
1029       ssbo_limit = lp_build_broadcast_scalar(uint_bld, ssbo_limit);
1030 
1031       ssbo_ptr = lp_build_array_get(gallivm, bld->ssbo_ptr, LLVMBuildExtractElement(builder, index, lp_build_const_int32(gallivm, 0), ""));
1032    } else
1033       ssbo_ptr = bld->shared_ptr;
1034 
1035    offset = LLVMBuildAShr(gallivm->builder, offset, lp_build_const_int_vec(gallivm, uint_bld->type, shift_val), "");
1036    for (unsigned c = 0; c < nc; c++) {
1037       LLVMValueRef loop_index = lp_build_add(uint_bld, offset, lp_build_const_int_vec(gallivm, uint_bld->type, c));
1038       LLVMValueRef exec_mask = mask_vec(bld_base);
1039 
1040       if (ssbo_limit) {
1041          LLVMValueRef ssbo_oob_cmp = lp_build_cmp(uint_bld, PIPE_FUNC_LESS, loop_index, ssbo_limit);
1042          exec_mask = LLVMBuildAnd(builder, exec_mask, ssbo_oob_cmp, "");
1043       }
1044 
1045       LLVMValueRef result = lp_build_alloca(gallivm, load_bld->vec_type, "");
1046       struct lp_build_loop_state loop_state;
1047       lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
1048 
1049       struct lp_build_if_state ifthen;
1050       LLVMValueRef cond, temp_res;
1051 
1052       loop_index = LLVMBuildExtractElement(gallivm->builder, loop_index,
1053                                            loop_state.counter, "");
1054 
1055       cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");
1056       cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, "");
1057 
1058       lp_build_if(&ifthen, gallivm, cond);
1059       LLVMValueRef scalar;
1060       if (bit_size != 32) {
1061          LLVMValueRef ssbo_ptr2 = LLVMBuildBitCast(builder, ssbo_ptr, LLVMPointerType(load_bld->elem_type, 0), "");
1062          scalar = lp_build_pointer_get(builder, ssbo_ptr2, loop_index);
1063       } else
1064          scalar = lp_build_pointer_get(builder, ssbo_ptr, loop_index);
1065 
1066       temp_res = LLVMBuildLoad(builder, result, "");
1067       temp_res = LLVMBuildInsertElement(builder, temp_res, scalar, loop_state.counter, "");
1068       LLVMBuildStore(builder, temp_res, result);
1069       lp_build_else(&ifthen);
1070       temp_res = LLVMBuildLoad(builder, result, "");
1071       LLVMValueRef zero;
1072       if (bit_size == 64)
1073          zero = LLVMConstInt(LLVMInt64TypeInContext(gallivm->context), 0, 0);
1074       else if (bit_size == 16)
1075          zero = LLVMConstInt(LLVMInt16TypeInContext(gallivm->context), 0, 0);
1076       else if (bit_size == 8)
1077          zero = LLVMConstInt(LLVMInt8TypeInContext(gallivm->context), 0, 0);
1078       else
1079          zero = lp_build_const_int32(gallivm, 0);
1080       temp_res = LLVMBuildInsertElement(builder, temp_res, zero, loop_state.counter, "");
1081       LLVMBuildStore(builder, temp_res, result);
1082       lp_build_endif(&ifthen);
1083       lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length),
1084                                 NULL, LLVMIntUGE);
1085       outval[c] = LLVMBuildLoad(gallivm->builder, result, "");
1086    }
1087 }
1088 
emit_store_mem(struct lp_build_nir_context * bld_base,unsigned writemask,unsigned nc,unsigned bit_size,LLVMValueRef index,LLVMValueRef offset,LLVMValueRef dst)1089 static void emit_store_mem(struct lp_build_nir_context *bld_base,
1090                            unsigned writemask,
1091                            unsigned nc,
1092                            unsigned bit_size,
1093                            LLVMValueRef index,
1094                            LLVMValueRef offset,
1095                            LLVMValueRef dst)
1096 {
1097    struct gallivm_state *gallivm = bld_base->base.gallivm;
1098    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1099    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
1100    LLVMValueRef ssbo_ptr;
1101    struct lp_build_context *uint_bld = &bld_base->uint_bld;
1102    LLVMValueRef ssbo_limit = NULL;
1103    struct lp_build_context *store_bld;
1104    uint32_t shift_val = bit_size_to_shift_size(bit_size);
1105    store_bld = get_int_bld(bld_base, true, bit_size);
1106 
1107    if (index) {
1108       LLVMValueRef ssbo_size_ptr = lp_build_array_get(gallivm, bld->ssbo_sizes_ptr, LLVMBuildExtractElement(builder, index, lp_build_const_int32(gallivm, 0), ""));
1109       ssbo_limit = LLVMBuildAShr(gallivm->builder, ssbo_size_ptr, lp_build_const_int32(gallivm, shift_val), "");
1110       ssbo_limit = lp_build_broadcast_scalar(uint_bld, ssbo_limit);
1111       ssbo_ptr = lp_build_array_get(gallivm, bld->ssbo_ptr, LLVMBuildExtractElement(builder, index, lp_build_const_int32(gallivm, 0), ""));
1112    } else
1113       ssbo_ptr = bld->shared_ptr;
1114 
1115    offset = lp_build_shr_imm(uint_bld, offset, shift_val);
1116    for (unsigned c = 0; c < nc; c++) {
1117       if (!(writemask & (1u << c)))
1118          continue;
1119       LLVMValueRef loop_index = lp_build_add(uint_bld, offset, lp_build_const_int_vec(gallivm, uint_bld->type, c));
1120       LLVMValueRef val = (nc == 1) ? dst : LLVMBuildExtractValue(builder, dst, c, "");
1121 
1122       LLVMValueRef exec_mask = mask_vec(bld_base);
1123       if (ssbo_limit) {
1124          LLVMValueRef ssbo_oob_cmp = lp_build_cmp(uint_bld, PIPE_FUNC_LESS, loop_index, ssbo_limit);
1125          exec_mask = LLVMBuildAnd(builder, exec_mask, ssbo_oob_cmp, "");
1126       }
1127 
1128       struct lp_build_loop_state loop_state;
1129       lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
1130       LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, val,
1131                                                        loop_state.counter, "");
1132       value_ptr = LLVMBuildBitCast(gallivm->builder, value_ptr, store_bld->elem_type, "");
1133       struct lp_build_if_state ifthen;
1134       LLVMValueRef cond;
1135 
1136       loop_index = LLVMBuildExtractElement(gallivm->builder, loop_index,
1137                                            loop_state.counter, "");
1138       cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");
1139       cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, "");
1140       lp_build_if(&ifthen, gallivm, cond);
1141       if (bit_size != 32) {
1142          LLVMValueRef ssbo_ptr2 = LLVMBuildBitCast(builder, ssbo_ptr, LLVMPointerType(store_bld->elem_type, 0), "");
1143          lp_build_pointer_set(builder, ssbo_ptr2, loop_index, value_ptr);
1144       } else
1145          lp_build_pointer_set(builder, ssbo_ptr, loop_index, value_ptr);
1146       lp_build_endif(&ifthen);
1147       lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length),
1148                              NULL, LLVMIntUGE);
1149    }
1150 }
1151 
emit_atomic_mem(struct lp_build_nir_context * bld_base,nir_intrinsic_op nir_op,LLVMValueRef index,LLVMValueRef offset,LLVMValueRef val,LLVMValueRef val2,LLVMValueRef * result)1152 static void emit_atomic_mem(struct lp_build_nir_context *bld_base,
1153                             nir_intrinsic_op nir_op,
1154                             LLVMValueRef index, LLVMValueRef offset,
1155                             LLVMValueRef val, LLVMValueRef val2,
1156                             LLVMValueRef *result)
1157 {
1158    struct gallivm_state *gallivm = bld_base->base.gallivm;
1159    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1160    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
1161    LLVMValueRef ssbo_ptr;
1162    struct lp_build_context *uint_bld = &bld_base->uint_bld;
1163    LLVMValueRef ssbo_limit = NULL;
1164 
1165    if (index) {
1166       LLVMValueRef ssbo_size_ptr = lp_build_array_get(gallivm, bld->ssbo_sizes_ptr, LLVMBuildExtractElement(builder, index, lp_build_const_int32(gallivm, 0), ""));
1167       ssbo_limit = LLVMBuildAShr(gallivm->builder, ssbo_size_ptr, lp_build_const_int32(gallivm, 2), "");
1168       ssbo_limit = lp_build_broadcast_scalar(uint_bld, ssbo_limit);
1169       ssbo_ptr = lp_build_array_get(gallivm, bld->ssbo_ptr, LLVMBuildExtractElement(builder, index, lp_build_const_int32(gallivm, 0), ""));
1170    } else
1171       ssbo_ptr = bld->shared_ptr;
1172 
1173    offset = lp_build_shr_imm(uint_bld, offset, 2);
1174    LLVMValueRef atom_res = lp_build_alloca(gallivm,
1175                                            uint_bld->vec_type, "");
1176 
1177    LLVMValueRef exec_mask = mask_vec(bld_base);
1178    if (ssbo_limit) {
1179       LLVMValueRef ssbo_oob_cmp = lp_build_cmp(uint_bld, PIPE_FUNC_LESS, offset, ssbo_limit);
1180       exec_mask = LLVMBuildAnd(builder, exec_mask, ssbo_oob_cmp, "");
1181    }
1182 
1183    struct lp_build_loop_state loop_state;
1184    lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
1185 
1186    LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, val,
1187                                                     loop_state.counter, "");
1188    value_ptr = LLVMBuildBitCast(gallivm->builder, value_ptr, uint_bld->elem_type, "");
1189 
1190    offset = LLVMBuildExtractElement(gallivm->builder, offset,
1191                                    loop_state.counter, "");
1192 
1193    LLVMValueRef scalar_ptr = LLVMBuildGEP(builder, ssbo_ptr,
1194                                           &offset, 1, "");
1195 
1196    struct lp_build_if_state ifthen;
1197    LLVMValueRef cond, temp_res;
1198    LLVMValueRef scalar;
1199    cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");
1200    cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, "");
1201    lp_build_if(&ifthen, gallivm, cond);
1202 
1203    if (nir_op == nir_intrinsic_ssbo_atomic_comp_swap || nir_op == nir_intrinsic_shared_atomic_comp_swap) {
1204       LLVMValueRef cas_src_ptr = LLVMBuildExtractElement(gallivm->builder, val2,
1205                                                          loop_state.counter, "");
1206       cas_src_ptr = LLVMBuildBitCast(gallivm->builder, cas_src_ptr, uint_bld->elem_type, "");
1207       scalar = LLVMBuildAtomicCmpXchg(builder, scalar_ptr, value_ptr,
1208                                       cas_src_ptr,
1209                                       LLVMAtomicOrderingSequentiallyConsistent,
1210                                       LLVMAtomicOrderingSequentiallyConsistent,
1211                                       false);
1212       scalar = LLVMBuildExtractValue(gallivm->builder, scalar, 0, "");
1213    } else {
1214       LLVMAtomicRMWBinOp op;
1215 
1216       switch (nir_op) {
1217       case nir_intrinsic_shared_atomic_add:
1218       case nir_intrinsic_ssbo_atomic_add:
1219          op = LLVMAtomicRMWBinOpAdd;
1220          break;
1221       case nir_intrinsic_shared_atomic_exchange:
1222       case nir_intrinsic_ssbo_atomic_exchange:
1223          op = LLVMAtomicRMWBinOpXchg;
1224          break;
1225       case nir_intrinsic_shared_atomic_and:
1226       case nir_intrinsic_ssbo_atomic_and:
1227          op = LLVMAtomicRMWBinOpAnd;
1228          break;
1229       case nir_intrinsic_shared_atomic_or:
1230       case nir_intrinsic_ssbo_atomic_or:
1231          op = LLVMAtomicRMWBinOpOr;
1232          break;
1233       case nir_intrinsic_shared_atomic_xor:
1234       case nir_intrinsic_ssbo_atomic_xor:
1235          op = LLVMAtomicRMWBinOpXor;
1236          break;
1237       case nir_intrinsic_shared_atomic_umin:
1238       case nir_intrinsic_ssbo_atomic_umin:
1239          op = LLVMAtomicRMWBinOpUMin;
1240          break;
1241       case nir_intrinsic_shared_atomic_umax:
1242       case nir_intrinsic_ssbo_atomic_umax:
1243          op = LLVMAtomicRMWBinOpUMax;
1244          break;
1245       case nir_intrinsic_ssbo_atomic_imin:
1246       case nir_intrinsic_shared_atomic_imin:
1247          op = LLVMAtomicRMWBinOpMin;
1248          break;
1249       case nir_intrinsic_ssbo_atomic_imax:
1250       case nir_intrinsic_shared_atomic_imax:
1251          op = LLVMAtomicRMWBinOpMax;
1252          break;
1253       default:
1254          unreachable("unknown atomic op");
1255       }
1256       scalar = LLVMBuildAtomicRMW(builder, op,
1257                                   scalar_ptr, value_ptr,
1258                                   LLVMAtomicOrderingSequentiallyConsistent,
1259                                   false);
1260    }
1261    temp_res = LLVMBuildLoad(builder, atom_res, "");
1262    temp_res = LLVMBuildInsertElement(builder, temp_res, scalar, loop_state.counter, "");
1263    LLVMBuildStore(builder, temp_res, atom_res);
1264    lp_build_else(&ifthen);
1265    temp_res = LLVMBuildLoad(builder, atom_res, "");
1266    temp_res = LLVMBuildInsertElement(builder, temp_res, lp_build_const_int32(gallivm, 0), loop_state.counter, "");
1267    LLVMBuildStore(builder, temp_res, atom_res);
1268    lp_build_endif(&ifthen);
1269 
1270    lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length),
1271                           NULL, LLVMIntUGE);
1272    *result = LLVMBuildLoad(builder, atom_res, "");
1273 }
1274 
emit_barrier(struct lp_build_nir_context * bld_base)1275 static void emit_barrier(struct lp_build_nir_context *bld_base)
1276 {
1277    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1278    struct gallivm_state * gallivm = bld_base->base.gallivm;
1279 
1280    LLVMBasicBlockRef resume = lp_build_insert_new_block(gallivm, "resume");
1281 
1282    lp_build_coro_suspend_switch(gallivm, bld->coro, resume, false);
1283    LLVMPositionBuilderAtEnd(gallivm->builder, resume);
1284 }
1285 
emit_get_ssbo_size(struct lp_build_nir_context * bld_base,LLVMValueRef index)1286 static LLVMValueRef emit_get_ssbo_size(struct lp_build_nir_context *bld_base,
1287                                        LLVMValueRef index)
1288 {
1289    struct gallivm_state *gallivm = bld_base->base.gallivm;
1290    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1291    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
1292    struct lp_build_context *bld_broad = &bld_base->uint_bld;
1293    LLVMValueRef size_ptr = lp_build_array_get(bld_base->base.gallivm, bld->ssbo_sizes_ptr,
1294                                               LLVMBuildExtractElement(builder, index, lp_build_const_int32(gallivm, 0), ""));
1295    return lp_build_broadcast_scalar(bld_broad, size_ptr);
1296 }
1297 
emit_image_op(struct lp_build_nir_context * bld_base,struct lp_img_params * params)1298 static void emit_image_op(struct lp_build_nir_context *bld_base,
1299                           struct lp_img_params *params)
1300 {
1301    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1302    struct gallivm_state *gallivm = bld_base->base.gallivm;
1303 
1304    params->type = bld_base->base.type;
1305    params->context_ptr = bld->context_ptr;
1306    params->thread_data_ptr = bld->thread_data_ptr;
1307    params->exec_mask = mask_vec(bld_base);
1308 
1309    if (params->image_index_offset)
1310       params->image_index_offset = LLVMBuildExtractElement(gallivm->builder, params->image_index_offset,
1311                                                            lp_build_const_int32(gallivm, 0), "");
1312 
1313    bld->image->emit_op(bld->image,
1314                        bld->bld_base.base.gallivm,
1315                        params);
1316 
1317 }
1318 
emit_image_size(struct lp_build_nir_context * bld_base,struct lp_sampler_size_query_params * params)1319 static void emit_image_size(struct lp_build_nir_context *bld_base,
1320                             struct lp_sampler_size_query_params *params)
1321 {
1322    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1323    struct gallivm_state *gallivm = bld_base->base.gallivm;
1324 
1325    params->int_type = bld_base->int_bld.type;
1326    params->context_ptr = bld->context_ptr;
1327 
1328    if (params->texture_unit_offset)
1329       params->texture_unit_offset = LLVMBuildExtractElement(gallivm->builder, params->texture_unit_offset,
1330                                                             lp_build_const_int32(gallivm, 0), "");
1331    bld->image->emit_size_query(bld->image,
1332                                bld->bld_base.base.gallivm,
1333                                params);
1334 
1335 }
1336 
init_var_slots(struct lp_build_nir_context * bld_base,nir_variable * var,unsigned sc)1337 static void init_var_slots(struct lp_build_nir_context *bld_base,
1338                            nir_variable *var, unsigned sc)
1339 {
1340    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1341    unsigned slots = glsl_count_attribute_slots(var->type, false) * 4;
1342 
1343    if (!bld->outputs)
1344      return;
1345    for (unsigned comp = sc; comp < slots + sc; comp++) {
1346       unsigned this_loc = var->data.driver_location + (comp / 4);
1347       unsigned this_chan = comp % 4;
1348 
1349       if (!bld->outputs[this_loc][this_chan])
1350          bld->outputs[this_loc][this_chan] = lp_build_alloca(bld_base->base.gallivm,
1351                                                              bld_base->base.vec_type, "output");
1352    }
1353 }
1354 
emit_var_decl(struct lp_build_nir_context * bld_base,nir_variable * var)1355 static void emit_var_decl(struct lp_build_nir_context *bld_base,
1356                           nir_variable *var)
1357 {
1358    unsigned sc = var->data.location_frac;
1359    switch (var->data.mode) {
1360    case nir_var_shader_out: {
1361       if (bld_base->shader->info.stage == MESA_SHADER_FRAGMENT) {
1362          if (var->data.location == FRAG_RESULT_STENCIL)
1363             sc = 1;
1364          else if (var->data.location == FRAG_RESULT_DEPTH)
1365             sc = 2;
1366       }
1367       init_var_slots(bld_base, var, sc);
1368       break;
1369    }
1370    default:
1371       break;
1372    }
1373 }
1374 
emit_tex(struct lp_build_nir_context * bld_base,struct lp_sampler_params * params)1375 static void emit_tex(struct lp_build_nir_context *bld_base,
1376                      struct lp_sampler_params *params)
1377 {
1378    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1379    struct gallivm_state *gallivm = bld_base->base.gallivm;
1380 
1381    params->type = bld_base->base.type;
1382    params->context_ptr = bld->context_ptr;
1383    params->thread_data_ptr = bld->thread_data_ptr;
1384 
1385    if (params->texture_index_offset && bld_base->shader->info.stage != MESA_SHADER_FRAGMENT) {
1386       /* this is horrible but this can be dynamic */
1387       LLVMValueRef coords[5];
1388       LLVMValueRef *orig_texel_ptr;
1389       struct lp_build_context *uint_bld = &bld_base->uint_bld;
1390       LLVMValueRef result[4] = { LLVMGetUndef(bld_base->base.vec_type),
1391                                  LLVMGetUndef(bld_base->base.vec_type),
1392                                  LLVMGetUndef(bld_base->base.vec_type),
1393                                  LLVMGetUndef(bld_base->base.vec_type) };
1394       LLVMValueRef texel[4], orig_offset;
1395       unsigned i;
1396       orig_texel_ptr = params->texel;
1397 
1398       for (i = 0; i < 5; i++) {
1399          coords[i] = params->coords[i];
1400       }
1401       orig_offset = params->texture_index_offset;
1402 
1403       for (unsigned v = 0; v < uint_bld->type.length; v++) {
1404          LLVMValueRef idx = lp_build_const_int32(gallivm, v);
1405          LLVMValueRef new_coords[5];
1406          for (i = 0; i < 5; i++) {
1407             new_coords[i] = LLVMBuildExtractElement(gallivm->builder,
1408                                                     coords[i], idx, "");
1409          }
1410          params->coords = new_coords;
1411          params->texture_index_offset = LLVMBuildExtractElement(gallivm->builder,
1412                                                                 orig_offset,
1413                                                                 idx, "");
1414          params->type = lp_elem_type(bld_base->base.type);
1415 
1416          params->texel = texel;
1417          bld->sampler->emit_tex_sample(bld->sampler,
1418                                        gallivm,
1419                                        params);
1420 
1421          for (i = 0; i < 4; i++) {
1422             result[i] = LLVMBuildInsertElement(gallivm->builder, result[i], texel[i], idx, "");
1423          }
1424       }
1425       for (i = 0; i < 4; i++) {
1426          orig_texel_ptr[i] = result[i];
1427       }
1428       return;
1429    }
1430 
1431    if (params->texture_index_offset)
1432       params->texture_index_offset = LLVMBuildExtractElement(bld_base->base.gallivm->builder,
1433                                                              params->texture_index_offset,
1434                                                              lp_build_const_int32(bld_base->base.gallivm, 0), "");
1435 
1436    params->type = bld_base->base.type;
1437    bld->sampler->emit_tex_sample(bld->sampler,
1438                                  bld->bld_base.base.gallivm,
1439                                  params);
1440 }
1441 
emit_tex_size(struct lp_build_nir_context * bld_base,struct lp_sampler_size_query_params * params)1442 static void emit_tex_size(struct lp_build_nir_context *bld_base,
1443                           struct lp_sampler_size_query_params *params)
1444 {
1445    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1446 
1447    params->int_type = bld_base->int_bld.type;
1448    params->context_ptr = bld->context_ptr;
1449 
1450    if (params->texture_unit_offset)
1451       params->texture_unit_offset = LLVMBuildExtractElement(bld_base->base.gallivm->builder,
1452                                                              params->texture_unit_offset,
1453                                                              lp_build_const_int32(bld_base->base.gallivm, 0), "");
1454    bld->sampler->emit_size_query(bld->sampler,
1455                                  bld->bld_base.base.gallivm,
1456                                  params);
1457 }
1458 
emit_sysval_intrin(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])1459 static void emit_sysval_intrin(struct lp_build_nir_context *bld_base,
1460                                nir_intrinsic_instr *instr,
1461                                LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1462 {
1463    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1464    struct gallivm_state *gallivm = bld_base->base.gallivm;
1465    struct lp_build_context *bld_broad = get_int_bld(bld_base, true, instr->dest.ssa.bit_size);
1466    switch (instr->intrinsic) {
1467    case nir_intrinsic_load_instance_id:
1468       result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.instance_id);
1469       break;
1470    case nir_intrinsic_load_base_instance:
1471       result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.base_instance);
1472       break;
1473    case nir_intrinsic_load_base_vertex:
1474       result[0] = bld->system_values.basevertex;
1475       break;
1476    case nir_intrinsic_load_vertex_id:
1477       result[0] = bld->system_values.vertex_id;
1478       break;
1479    case nir_intrinsic_load_primitive_id:
1480       result[0] = bld->system_values.prim_id;
1481       break;
1482    case nir_intrinsic_load_work_group_id: {
1483       LLVMValueRef tmp[3];
1484       for (unsigned i = 0; i < 3; i++) {
1485          tmp[i] = LLVMBuildExtractElement(gallivm->builder, bld->system_values.block_id, lp_build_const_int32(gallivm, i), "");
1486          if (instr->dest.ssa.bit_size == 64)
1487             tmp[i] = LLVMBuildZExt(gallivm->builder, tmp[i], bld_base->uint64_bld.elem_type, "");
1488          result[i] = lp_build_broadcast_scalar(bld_broad, tmp[i]);
1489       }
1490       break;
1491    }
1492    case nir_intrinsic_load_local_invocation_id:
1493       for (unsigned i = 0; i < 3; i++)
1494          result[i] = LLVMBuildExtractValue(gallivm->builder, bld->system_values.thread_id, i, "");
1495       break;
1496    case nir_intrinsic_load_num_work_groups: {
1497       LLVMValueRef tmp[3];
1498       for (unsigned i = 0; i < 3; i++) {
1499          tmp[i] = LLVMBuildExtractElement(gallivm->builder, bld->system_values.grid_size, lp_build_const_int32(gallivm, i), "");
1500          if (instr->dest.ssa.bit_size == 64)
1501             tmp[i] = LLVMBuildZExt(gallivm->builder, tmp[i], bld_base->uint64_bld.elem_type, "");
1502          result[i] = lp_build_broadcast_scalar(bld_broad, tmp[i]);
1503       }
1504       break;
1505    }
1506    case nir_intrinsic_load_invocation_id:
1507       if (bld_base->shader->info.stage == MESA_SHADER_TESS_CTRL)
1508          result[0] = bld->system_values.invocation_id;
1509       else
1510          result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.invocation_id);
1511       break;
1512    case nir_intrinsic_load_front_face:
1513       result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.front_facing);
1514       break;
1515    case nir_intrinsic_load_draw_id:
1516       result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.draw_id);
1517       break;
1518    default:
1519       break;
1520    case nir_intrinsic_load_local_group_size:
1521      for (unsigned i = 0; i < 3; i++)
1522        result[i] = lp_build_broadcast_scalar(&bld_base->uint_bld, LLVMBuildExtractElement(gallivm->builder, bld->system_values.block_size, lp_build_const_int32(gallivm, i), ""));
1523      break;
1524    case nir_intrinsic_load_work_dim:
1525       result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.work_dim);
1526       break;
1527    case nir_intrinsic_load_tess_coord:
1528       for (unsigned i = 0; i < 3; i++) {
1529 	 result[i] = LLVMBuildExtractValue(gallivm->builder, bld->system_values.tess_coord, i, "");
1530       }
1531       break;
1532    case nir_intrinsic_load_tess_level_outer:
1533       for (unsigned i = 0; i < 4; i++)
1534          result[i] = lp_build_broadcast_scalar(&bld_base->base, LLVMBuildExtractValue(gallivm->builder, bld->system_values.tess_outer, i, ""));
1535       break;
1536    case nir_intrinsic_load_tess_level_inner:
1537       for (unsigned i = 0; i < 2; i++)
1538          result[i] = lp_build_broadcast_scalar(&bld_base->base, LLVMBuildExtractValue(gallivm->builder, bld->system_values.tess_inner, i, ""));
1539       break;
1540    case nir_intrinsic_load_patch_vertices_in:
1541       result[0] = bld->system_values.vertices_in;
1542       break;
1543    case nir_intrinsic_load_sample_id:
1544       result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.sample_id);
1545       break;
1546    case nir_intrinsic_load_sample_pos:
1547       for (unsigned i = 0; i < 2; i++) {
1548          LLVMValueRef idx = LLVMBuildMul(gallivm->builder, bld->system_values.sample_id, lp_build_const_int32(gallivm, 2), "");
1549          idx = LLVMBuildAdd(gallivm->builder, idx, lp_build_const_int32(gallivm, i), "");
1550          LLVMValueRef val = lp_build_array_get(gallivm, bld->system_values.sample_pos, idx);
1551          result[i] = lp_build_broadcast_scalar(&bld_base->base, val);
1552       }
1553       break;
1554    case nir_intrinsic_load_sample_mask_in:
1555       result[0] = bld->system_values.sample_mask_in;
1556       break;
1557    }
1558 }
1559 
emit_helper_invocation(struct lp_build_nir_context * bld_base,LLVMValueRef * dst)1560 static void emit_helper_invocation(struct lp_build_nir_context *bld_base,
1561                                    LLVMValueRef *dst)
1562 {
1563    struct gallivm_state *gallivm = bld_base->base.gallivm;
1564    struct lp_build_context *uint_bld = &bld_base->uint_bld;
1565    *dst = lp_build_cmp(uint_bld, PIPE_FUNC_NOTEQUAL, mask_vec(bld_base), lp_build_const_int_vec(gallivm, uint_bld->type, -1));
1566 }
1567 
bgnloop(struct lp_build_nir_context * bld_base)1568 static void bgnloop(struct lp_build_nir_context *bld_base)
1569 {
1570    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1571    lp_exec_bgnloop(&bld->exec_mask, true);
1572 }
1573 
endloop(struct lp_build_nir_context * bld_base)1574 static void endloop(struct lp_build_nir_context *bld_base)
1575 {
1576    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1577    lp_exec_endloop(bld_base->base.gallivm, &bld->exec_mask);
1578 }
1579 
if_cond(struct lp_build_nir_context * bld_base,LLVMValueRef cond)1580 static void if_cond(struct lp_build_nir_context *bld_base, LLVMValueRef cond)
1581 {
1582    LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1583    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1584    lp_exec_mask_cond_push(&bld->exec_mask, LLVMBuildBitCast(builder, cond, bld_base->base.int_vec_type, ""));
1585 }
1586 
else_stmt(struct lp_build_nir_context * bld_base)1587 static void else_stmt(struct lp_build_nir_context *bld_base)
1588 {
1589    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1590    lp_exec_mask_cond_invert(&bld->exec_mask);
1591 }
1592 
endif_stmt(struct lp_build_nir_context * bld_base)1593 static void endif_stmt(struct lp_build_nir_context *bld_base)
1594 {
1595    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1596    lp_exec_mask_cond_pop(&bld->exec_mask);
1597 }
1598 
break_stmt(struct lp_build_nir_context * bld_base)1599 static void break_stmt(struct lp_build_nir_context *bld_base)
1600 {
1601    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1602 
1603    lp_exec_break(&bld->exec_mask, NULL, false);
1604 }
1605 
continue_stmt(struct lp_build_nir_context * bld_base)1606 static void continue_stmt(struct lp_build_nir_context *bld_base)
1607 {
1608    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1609    lp_exec_continue(&bld->exec_mask);
1610 }
1611 
discard(struct lp_build_nir_context * bld_base,LLVMValueRef cond)1612 static void discard(struct lp_build_nir_context *bld_base, LLVMValueRef cond)
1613 {
1614    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1615    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
1616    LLVMValueRef mask;
1617 
1618    if (!cond) {
1619       if (bld->exec_mask.has_mask) {
1620          mask = LLVMBuildNot(builder, bld->exec_mask.exec_mask, "kilp");
1621       } else {
1622          mask = LLVMConstNull(bld->bld_base.base.int_vec_type);
1623       }
1624    } else {
1625       mask = LLVMBuildNot(builder, cond, "");
1626       if (bld->exec_mask.has_mask) {
1627          LLVMValueRef invmask;
1628          invmask = LLVMBuildNot(builder, bld->exec_mask.exec_mask, "kilp");
1629          mask = LLVMBuildOr(builder, mask, invmask, "");
1630       }
1631    }
1632    lp_build_mask_update(bld->mask, mask);
1633 }
1634 
1635 static void
increment_vec_ptr_by_mask(struct lp_build_nir_context * bld_base,LLVMValueRef ptr,LLVMValueRef mask)1636 increment_vec_ptr_by_mask(struct lp_build_nir_context * bld_base,
1637                           LLVMValueRef ptr,
1638                           LLVMValueRef mask)
1639 {
1640    LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1641    LLVMValueRef current_vec = LLVMBuildLoad(builder, ptr, "");
1642 
1643    current_vec = LLVMBuildSub(builder, current_vec, mask, "");
1644 
1645    LLVMBuildStore(builder, current_vec, ptr);
1646 }
1647 
1648 static void
clear_uint_vec_ptr_from_mask(struct lp_build_nir_context * bld_base,LLVMValueRef ptr,LLVMValueRef mask)1649 clear_uint_vec_ptr_from_mask(struct lp_build_nir_context * bld_base,
1650                              LLVMValueRef ptr,
1651                              LLVMValueRef mask)
1652 {
1653    LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1654    LLVMValueRef current_vec = LLVMBuildLoad(builder, ptr, "");
1655 
1656    current_vec = lp_build_select(&bld_base->uint_bld,
1657                                  mask,
1658                                  bld_base->uint_bld.zero,
1659                                  current_vec);
1660 
1661    LLVMBuildStore(builder, current_vec, ptr);
1662 }
1663 
1664 static LLVMValueRef
clamp_mask_to_max_output_vertices(struct lp_build_nir_soa_context * bld,LLVMValueRef current_mask_vec,LLVMValueRef total_emitted_vertices_vec)1665 clamp_mask_to_max_output_vertices(struct lp_build_nir_soa_context * bld,
1666                                   LLVMValueRef current_mask_vec,
1667                                   LLVMValueRef total_emitted_vertices_vec)
1668 {
1669    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
1670    struct lp_build_context *int_bld = &bld->bld_base.int_bld;
1671    LLVMValueRef max_mask = lp_build_cmp(int_bld, PIPE_FUNC_LESS,
1672                                             total_emitted_vertices_vec,
1673                                             bld->max_output_vertices_vec);
1674 
1675    return LLVMBuildAnd(builder, current_mask_vec, max_mask, "");
1676 }
1677 
emit_vertex(struct lp_build_nir_context * bld_base,uint32_t stream_id)1678 static void emit_vertex(struct lp_build_nir_context *bld_base, uint32_t stream_id)
1679 {
1680    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1681    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
1682 
1683    if (stream_id >= bld->gs_vertex_streams)
1684       return;
1685    assert(bld->gs_iface->emit_vertex);
1686    LLVMValueRef total_emitted_vertices_vec =
1687       LLVMBuildLoad(builder, bld->total_emitted_vertices_vec_ptr[stream_id], "");
1688    LLVMValueRef mask = mask_vec(bld_base);
1689    mask = clamp_mask_to_max_output_vertices(bld, mask,
1690                                             total_emitted_vertices_vec);
1691    bld->gs_iface->emit_vertex(bld->gs_iface, &bld->bld_base.base,
1692                               bld->outputs,
1693                               total_emitted_vertices_vec,
1694                               mask,
1695                               lp_build_const_int_vec(bld->bld_base.base.gallivm, bld->bld_base.base.type, stream_id));
1696 
1697    increment_vec_ptr_by_mask(bld_base, bld->emitted_vertices_vec_ptr[stream_id],
1698                              mask);
1699    increment_vec_ptr_by_mask(bld_base, bld->total_emitted_vertices_vec_ptr[stream_id],
1700                              mask);
1701 }
1702 
1703 static void
end_primitive_masked(struct lp_build_nir_context * bld_base,LLVMValueRef mask,uint32_t stream_id)1704 end_primitive_masked(struct lp_build_nir_context * bld_base,
1705                      LLVMValueRef mask, uint32_t stream_id)
1706 {
1707    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1708    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
1709 
1710    if (stream_id >= bld->gs_vertex_streams)
1711       return;
1712    struct lp_build_context *uint_bld = &bld_base->uint_bld;
1713    LLVMValueRef emitted_vertices_vec =
1714       LLVMBuildLoad(builder, bld->emitted_vertices_vec_ptr[stream_id], "");
1715    LLVMValueRef emitted_prims_vec =
1716       LLVMBuildLoad(builder, bld->emitted_prims_vec_ptr[stream_id], "");
1717    LLVMValueRef total_emitted_vertices_vec =
1718       LLVMBuildLoad(builder, bld->total_emitted_vertices_vec_ptr[stream_id], "");
1719 
1720    LLVMValueRef emitted_mask = lp_build_cmp(uint_bld,
1721                                             PIPE_FUNC_NOTEQUAL,
1722                                             emitted_vertices_vec,
1723                                             uint_bld->zero);
1724    mask = LLVMBuildAnd(builder, mask, emitted_mask, "");
1725    bld->gs_iface->end_primitive(bld->gs_iface, &bld->bld_base.base,
1726 				total_emitted_vertices_vec,
1727 				emitted_vertices_vec, emitted_prims_vec, mask, stream_id);
1728    increment_vec_ptr_by_mask(bld_base, bld->emitted_prims_vec_ptr[stream_id],
1729                              mask);
1730    clear_uint_vec_ptr_from_mask(bld_base, bld->emitted_vertices_vec_ptr[stream_id],
1731                                 mask);
1732 }
1733 
end_primitive(struct lp_build_nir_context * bld_base,uint32_t stream_id)1734 static void end_primitive(struct lp_build_nir_context *bld_base, uint32_t stream_id)
1735 {
1736    ASSERTED struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1737 
1738    assert(bld->gs_iface->end_primitive);
1739 
1740    LLVMValueRef mask = mask_vec(bld_base);
1741    end_primitive_masked(bld_base, mask, stream_id);
1742 }
1743 
1744 static void
emit_prologue(struct lp_build_nir_soa_context * bld)1745 emit_prologue(struct lp_build_nir_soa_context *bld)
1746 {
1747    struct gallivm_state * gallivm = bld->bld_base.base.gallivm;
1748    if (bld->indirects & nir_var_shader_in && !bld->gs_iface && !bld->tcs_iface && !bld->tes_iface) {
1749       uint32_t num_inputs = util_bitcount64(bld->bld_base.shader->info.inputs_read);
1750       unsigned index, chan;
1751       LLVMTypeRef vec_type = bld->bld_base.base.vec_type;
1752       LLVMValueRef array_size = lp_build_const_int32(gallivm, num_inputs * 4);
1753       bld->inputs_array = lp_build_array_alloca(gallivm,
1754                                                vec_type, array_size,
1755                                                "input_array");
1756 
1757       for (index = 0; index < num_inputs; ++index) {
1758          for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
1759             LLVMValueRef lindex =
1760                lp_build_const_int32(gallivm, index * 4 + chan);
1761             LLVMValueRef input_ptr =
1762                LLVMBuildGEP(gallivm->builder, bld->inputs_array,
1763                             &lindex, 1, "");
1764             LLVMValueRef value = bld->inputs[index][chan];
1765             if (value)
1766                LLVMBuildStore(gallivm->builder, value, input_ptr);
1767          }
1768       }
1769    }
1770 }
1771 
emit_vote(struct lp_build_nir_context * bld_base,LLVMValueRef src,nir_intrinsic_instr * instr,LLVMValueRef result[4])1772 static void emit_vote(struct lp_build_nir_context *bld_base, LLVMValueRef src, nir_intrinsic_instr *instr, LLVMValueRef result[4])
1773 {
1774    struct gallivm_state * gallivm = bld_base->base.gallivm;
1775    LLVMBuilderRef builder = gallivm->builder;
1776 
1777    LLVMValueRef exec_mask = mask_vec(bld_base);
1778    struct lp_build_loop_state loop_state;
1779 
1780    LLVMValueRef outer_cond = LLVMBuildICmp(builder, LLVMIntNE, exec_mask, bld_base->uint_bld.zero, "");
1781 
1782    LLVMValueRef res_store = lp_build_alloca(gallivm, bld_base->int_bld.elem_type, "");
1783    LLVMValueRef init_val = NULL;
1784    if (instr->intrinsic == nir_intrinsic_vote_ieq) {
1785       /* for equal we unfortunately have to loop and find the first valid one. */
1786       lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
1787       LLVMValueRef if_cond = LLVMBuildExtractElement(gallivm->builder, outer_cond, loop_state.counter, "");
1788 
1789       struct lp_build_if_state ifthen;
1790       lp_build_if(&ifthen, gallivm, if_cond);
1791       LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, src,
1792                                                        loop_state.counter, "");
1793       LLVMBuildStore(builder, value_ptr, res_store);
1794       lp_build_endif(&ifthen);
1795       lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, bld_base->uint_bld.type.length),
1796 			     NULL, LLVMIntUGE);
1797       init_val = LLVMBuildLoad(builder, res_store, "");
1798    } else {
1799       LLVMBuildStore(builder, lp_build_const_int32(gallivm, instr->intrinsic == nir_intrinsic_vote_any ? 0 : -1), res_store);
1800    }
1801 
1802    LLVMValueRef res;
1803    lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
1804    LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, src,
1805                                                        loop_state.counter, "");
1806    struct lp_build_if_state ifthen;
1807    LLVMValueRef if_cond;
1808    if_cond = LLVMBuildExtractElement(gallivm->builder, outer_cond, loop_state.counter, "");
1809 
1810    lp_build_if(&ifthen, gallivm, if_cond);
1811    res = LLVMBuildLoad(builder, res_store, "");
1812 
1813    if (instr->intrinsic == nir_intrinsic_vote_ieq) {
1814       LLVMValueRef tmp = LLVMBuildICmp(builder, LLVMIntEQ, init_val, value_ptr, "");
1815       tmp = LLVMBuildSExt(builder, tmp, bld_base->uint_bld.elem_type, "");
1816       res = LLVMBuildOr(builder, res, tmp, "");
1817    } else if (instr->intrinsic == nir_intrinsic_vote_any)
1818       res = LLVMBuildOr(builder, res, value_ptr, "");
1819    else
1820       res = LLVMBuildAnd(builder, res, value_ptr, "");
1821    LLVMBuildStore(builder, res, res_store);
1822    lp_build_endif(&ifthen);
1823    lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, bld_base->uint_bld.type.length),
1824 			  NULL, LLVMIntUGE);
1825    result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, LLVMBuildLoad(builder, res_store, ""));
1826 }
1827 
1828 static void
emit_interp_at(struct lp_build_nir_context * bld_base,unsigned num_components,nir_variable * var,bool centroid,bool sample,unsigned const_index,LLVMValueRef indir_index,LLVMValueRef offsets[2],LLVMValueRef dst[4])1829 emit_interp_at(struct lp_build_nir_context *bld_base,
1830                unsigned num_components,
1831                nir_variable *var,
1832                bool centroid,
1833                bool sample,
1834                unsigned const_index,
1835                LLVMValueRef indir_index,
1836                LLVMValueRef offsets[2],
1837                LLVMValueRef dst[4])
1838 {
1839    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1840 
1841    for (unsigned i = 0; i < num_components; i++) {
1842       dst[i] = bld->fs_iface->interp_fn(bld->fs_iface, &bld_base->base,
1843                                         const_index + var->data.driver_location, i + var->data.location_frac,
1844                                         centroid, sample, indir_index, offsets);
1845    }
1846 }
1847 
get_scratch_thread_offsets(struct gallivm_state * gallivm,struct lp_type type,unsigned scratch_size)1848 static LLVMValueRef get_scratch_thread_offsets(struct gallivm_state *gallivm,
1849                                                struct lp_type type,
1850                                                unsigned scratch_size)
1851 {
1852    LLVMTypeRef elem_type = lp_build_int_elem_type(gallivm, type);
1853    LLVMValueRef elems[LP_MAX_VECTOR_LENGTH];
1854    unsigned i;
1855 
1856    if (type.length == 1)
1857       return LLVMConstInt(elem_type, 0, 0);
1858 
1859    for (i = 0; i < type.length; ++i)
1860       elems[i] = LLVMConstInt(elem_type, scratch_size * i, 0);
1861 
1862    return LLVMConstVector(elems, type.length);
1863 }
1864 
1865 static void
emit_load_scratch(struct lp_build_nir_context * bld_base,unsigned nc,unsigned bit_size,LLVMValueRef offset,LLVMValueRef outval[NIR_MAX_VEC_COMPONENTS])1866 emit_load_scratch(struct lp_build_nir_context *bld_base,
1867                   unsigned nc, unsigned bit_size,
1868                   LLVMValueRef offset,
1869                   LLVMValueRef outval[NIR_MAX_VEC_COMPONENTS])
1870 {
1871    struct gallivm_state * gallivm = bld_base->base.gallivm;
1872    LLVMBuilderRef builder = gallivm->builder;
1873    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1874    struct lp_build_context *uint_bld = &bld_base->uint_bld;
1875    struct lp_build_context *load_bld;
1876    LLVMValueRef thread_offsets = get_scratch_thread_offsets(gallivm, uint_bld->type, bld->scratch_size);;
1877    uint32_t shift_val = bit_size_to_shift_size(bit_size);
1878 
1879    load_bld = get_int_bld(bld_base, true, bit_size);
1880 
1881    offset = lp_build_add(uint_bld, offset, thread_offsets);
1882    offset = lp_build_shr_imm(uint_bld, offset, shift_val);
1883    for (unsigned c = 0; c < nc; c++) {
1884       LLVMValueRef loop_index = lp_build_add(uint_bld, offset, lp_build_const_int_vec(gallivm, uint_bld->type, c));
1885       LLVMValueRef exec_mask = mask_vec(bld_base);
1886 
1887       LLVMValueRef result = lp_build_alloca(gallivm, load_bld->vec_type, "");
1888       struct lp_build_loop_state loop_state;
1889       lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
1890 
1891       struct lp_build_if_state ifthen;
1892       LLVMValueRef cond, temp_res;
1893 
1894       loop_index = LLVMBuildExtractElement(gallivm->builder, loop_index,
1895                                            loop_state.counter, "");
1896       cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");
1897       cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, "");
1898 
1899       lp_build_if(&ifthen, gallivm, cond);
1900       LLVMValueRef scalar;
1901       LLVMValueRef ptr2 = LLVMBuildBitCast(builder, bld->scratch_ptr, LLVMPointerType(load_bld->elem_type, 0), "");
1902       scalar = lp_build_pointer_get(builder, ptr2, loop_index);
1903 
1904       temp_res = LLVMBuildLoad(builder, result, "");
1905       temp_res = LLVMBuildInsertElement(builder, temp_res, scalar, loop_state.counter, "");
1906       LLVMBuildStore(builder, temp_res, result);
1907       lp_build_else(&ifthen);
1908       temp_res = LLVMBuildLoad(builder, result, "");
1909       LLVMValueRef zero;
1910       if (bit_size == 64)
1911          zero = LLVMConstInt(LLVMInt64TypeInContext(gallivm->context), 0, 0);
1912       else if (bit_size == 16)
1913          zero = LLVMConstInt(LLVMInt16TypeInContext(gallivm->context), 0, 0);
1914       else if (bit_size == 8)
1915          zero = LLVMConstInt(LLVMInt8TypeInContext(gallivm->context), 0, 0);
1916       else
1917          zero = lp_build_const_int32(gallivm, 0);
1918       temp_res = LLVMBuildInsertElement(builder, temp_res, zero, loop_state.counter, "");
1919       LLVMBuildStore(builder, temp_res, result);
1920       lp_build_endif(&ifthen);
1921       lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length),
1922                                 NULL, LLVMIntUGE);
1923       outval[c] = LLVMBuildLoad(gallivm->builder, result, "");
1924    }
1925 }
1926 
1927 static void
emit_store_scratch(struct lp_build_nir_context * bld_base,unsigned writemask,unsigned nc,unsigned bit_size,LLVMValueRef offset,LLVMValueRef dst)1928 emit_store_scratch(struct lp_build_nir_context *bld_base,
1929                    unsigned writemask, unsigned nc,
1930                    unsigned bit_size, LLVMValueRef offset,
1931                    LLVMValueRef dst)
1932 {
1933    struct gallivm_state * gallivm = bld_base->base.gallivm;
1934    LLVMBuilderRef builder = gallivm->builder;
1935    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1936    struct lp_build_context *uint_bld = &bld_base->uint_bld;
1937    struct lp_build_context *store_bld;
1938    LLVMValueRef thread_offsets = get_scratch_thread_offsets(gallivm, uint_bld->type, bld->scratch_size);;
1939    uint32_t shift_val = bit_size_to_shift_size(bit_size);
1940    store_bld = get_int_bld(bld_base, true, bit_size);
1941 
1942    LLVMValueRef exec_mask = mask_vec(bld_base);
1943    offset = lp_build_add(uint_bld, offset, thread_offsets);
1944    offset = lp_build_shr_imm(uint_bld, offset, shift_val);
1945 
1946    for (unsigned c = 0; c < nc; c++) {
1947       if (!(writemask & (1u << c)))
1948          continue;
1949       LLVMValueRef val = (nc == 1) ? dst : LLVMBuildExtractValue(builder, dst, c, "");
1950       LLVMValueRef loop_index = lp_build_add(uint_bld, offset, lp_build_const_int_vec(gallivm, uint_bld->type, c));
1951 
1952       struct lp_build_loop_state loop_state;
1953       lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
1954 
1955       LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, val,
1956                                                        loop_state.counter, "");
1957       value_ptr = LLVMBuildBitCast(gallivm->builder, value_ptr, store_bld->elem_type, "");
1958 
1959       struct lp_build_if_state ifthen;
1960       LLVMValueRef cond;
1961 
1962       loop_index = LLVMBuildExtractElement(gallivm->builder, loop_index,
1963                                                         loop_state.counter, "");
1964 
1965       cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");
1966       cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, "");
1967       lp_build_if(&ifthen, gallivm, cond);
1968 
1969       LLVMValueRef ptr2 = LLVMBuildBitCast(builder, bld->scratch_ptr, LLVMPointerType(store_bld->elem_type, 0), "");
1970       lp_build_pointer_set(builder, ptr2, loop_index, value_ptr);
1971 
1972       lp_build_endif(&ifthen);
1973       lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length),
1974                              NULL, LLVMIntUGE);
1975    }
1976 }
1977 
lp_build_nir_soa(struct gallivm_state * gallivm,struct nir_shader * shader,const struct lp_build_tgsi_params * params,LLVMValueRef (* outputs)[4])1978 void lp_build_nir_soa(struct gallivm_state *gallivm,
1979                       struct nir_shader *shader,
1980                       const struct lp_build_tgsi_params *params,
1981                       LLVMValueRef (*outputs)[4])
1982 {
1983    struct lp_build_nir_soa_context bld;
1984    struct lp_type type = params->type;
1985    struct lp_type res_type;
1986 
1987    assert(type.length <= LP_MAX_VECTOR_LENGTH);
1988    memset(&res_type, 0, sizeof res_type);
1989    res_type.width = type.width;
1990    res_type.length = type.length;
1991    res_type.sign = 1;
1992 
1993    /* Setup build context */
1994    memset(&bld, 0, sizeof bld);
1995    lp_build_context_init(&bld.bld_base.base, gallivm, type);
1996    lp_build_context_init(&bld.bld_base.uint_bld, gallivm, lp_uint_type(type));
1997    lp_build_context_init(&bld.bld_base.int_bld, gallivm, lp_int_type(type));
1998    lp_build_context_init(&bld.elem_bld, gallivm, lp_elem_type(type));
1999    lp_build_context_init(&bld.uint_elem_bld, gallivm, lp_elem_type(lp_uint_type(type)));
2000    {
2001       struct lp_type dbl_type;
2002       dbl_type = type;
2003       dbl_type.width *= 2;
2004       lp_build_context_init(&bld.bld_base.dbl_bld, gallivm, dbl_type);
2005    }
2006    {
2007       struct lp_type uint64_type;
2008       uint64_type = lp_uint_type(type);
2009       uint64_type.width *= 2;
2010       lp_build_context_init(&bld.bld_base.uint64_bld, gallivm, uint64_type);
2011    }
2012    {
2013       struct lp_type int64_type;
2014       int64_type = lp_int_type(type);
2015       int64_type.width *= 2;
2016       lp_build_context_init(&bld.bld_base.int64_bld, gallivm, int64_type);
2017    }
2018    {
2019       struct lp_type uint16_type;
2020       uint16_type = lp_uint_type(type);
2021       uint16_type.width /= 2;
2022       lp_build_context_init(&bld.bld_base.uint16_bld, gallivm, uint16_type);
2023    }
2024    {
2025       struct lp_type int16_type;
2026       int16_type = lp_int_type(type);
2027       int16_type.width /= 2;
2028       lp_build_context_init(&bld.bld_base.int16_bld, gallivm, int16_type);
2029    }
2030    {
2031       struct lp_type uint8_type;
2032       uint8_type = lp_uint_type(type);
2033       uint8_type.width /= 4;
2034       lp_build_context_init(&bld.bld_base.uint8_bld, gallivm, uint8_type);
2035    }
2036    {
2037       struct lp_type int8_type;
2038       int8_type = lp_int_type(type);
2039       int8_type.width /= 4;
2040       lp_build_context_init(&bld.bld_base.int8_bld, gallivm, int8_type);
2041    }
2042    bld.bld_base.load_var = emit_load_var;
2043    bld.bld_base.store_var = emit_store_var;
2044    bld.bld_base.load_reg = emit_load_reg;
2045    bld.bld_base.store_reg = emit_store_reg;
2046    bld.bld_base.emit_var_decl = emit_var_decl;
2047    bld.bld_base.load_ubo = emit_load_ubo;
2048    bld.bld_base.load_kernel_arg = emit_load_kernel_arg;
2049    bld.bld_base.load_global = emit_load_global;
2050    bld.bld_base.store_global = emit_store_global;
2051    bld.bld_base.atomic_global = emit_atomic_global;
2052    bld.bld_base.tex = emit_tex;
2053    bld.bld_base.tex_size = emit_tex_size;
2054    bld.bld_base.bgnloop = bgnloop;
2055    bld.bld_base.endloop = endloop;
2056    bld.bld_base.if_cond = if_cond;
2057    bld.bld_base.else_stmt = else_stmt;
2058    bld.bld_base.endif_stmt = endif_stmt;
2059    bld.bld_base.break_stmt = break_stmt;
2060    bld.bld_base.continue_stmt = continue_stmt;
2061    bld.bld_base.sysval_intrin = emit_sysval_intrin;
2062    bld.bld_base.discard = discard;
2063    bld.bld_base.emit_vertex = emit_vertex;
2064    bld.bld_base.end_primitive = end_primitive;
2065    bld.bld_base.load_mem = emit_load_mem;
2066    bld.bld_base.store_mem = emit_store_mem;
2067    bld.bld_base.get_ssbo_size = emit_get_ssbo_size;
2068    bld.bld_base.atomic_mem = emit_atomic_mem;
2069    bld.bld_base.barrier = emit_barrier;
2070    bld.bld_base.image_op = emit_image_op;
2071    bld.bld_base.image_size = emit_image_size;
2072    bld.bld_base.vote = emit_vote;
2073    bld.bld_base.helper_invocation = emit_helper_invocation;
2074    bld.bld_base.interp_at = emit_interp_at;
2075    bld.bld_base.load_scratch = emit_load_scratch;
2076    bld.bld_base.store_scratch = emit_store_scratch;
2077 
2078    bld.mask = params->mask;
2079    bld.inputs = params->inputs;
2080    bld.outputs = outputs;
2081    bld.consts_ptr = params->consts_ptr;
2082    bld.const_sizes_ptr = params->const_sizes_ptr;
2083    bld.ssbo_ptr = params->ssbo_ptr;
2084    bld.ssbo_sizes_ptr = params->ssbo_sizes_ptr;
2085    bld.sampler = params->sampler;
2086 //   bld.bld_base.info = params->info;
2087 
2088    bld.context_ptr = params->context_ptr;
2089    bld.thread_data_ptr = params->thread_data_ptr;
2090    bld.image = params->image;
2091    bld.shared_ptr = params->shared_ptr;
2092    bld.coro = params->coro;
2093    bld.kernel_args_ptr = params->kernel_args;
2094    bld.indirects = 0;
2095    if (params->info->indirect_files & (1 << TGSI_FILE_INPUT))
2096       bld.indirects |= nir_var_shader_in;
2097 
2098    bld.gs_iface = params->gs_iface;
2099    bld.tcs_iface = params->tcs_iface;
2100    bld.tes_iface = params->tes_iface;
2101    bld.fs_iface = params->fs_iface;
2102    if (bld.gs_iface) {
2103       struct lp_build_context *uint_bld = &bld.bld_base.uint_bld;
2104 
2105       bld.gs_vertex_streams = params->gs_vertex_streams;
2106       bld.max_output_vertices_vec = lp_build_const_int_vec(gallivm, bld.bld_base.int_bld.type,
2107                                                            shader->info.gs.vertices_out);
2108       for (int i = 0; i < params->gs_vertex_streams; i++) {
2109          bld.emitted_prims_vec_ptr[i] =
2110             lp_build_alloca(gallivm, uint_bld->vec_type, "emitted_prims_ptr");
2111          bld.emitted_vertices_vec_ptr[i] =
2112             lp_build_alloca(gallivm, uint_bld->vec_type, "emitted_vertices_ptr");
2113          bld.total_emitted_vertices_vec_ptr[i] =
2114             lp_build_alloca(gallivm, uint_bld->vec_type, "total_emitted_vertices_ptr");
2115       }
2116    }
2117    lp_exec_mask_init(&bld.exec_mask, &bld.bld_base.int_bld);
2118 
2119    bld.system_values = *params->system_values;
2120 
2121    bld.bld_base.shader = shader;
2122 
2123    if (shader->scratch_size) {
2124       bld.scratch_ptr = lp_build_array_alloca(gallivm,
2125                                               LLVMInt8TypeInContext(gallivm->context),
2126                                               lp_build_const_int32(gallivm, shader->scratch_size * type.length),
2127                                               "scratch");
2128    }
2129    bld.scratch_size = shader->scratch_size;
2130    emit_prologue(&bld);
2131    lp_build_nir_llvm(&bld.bld_base, shader);
2132 
2133    if (bld.gs_iface) {
2134       LLVMBuilderRef builder = bld.bld_base.base.gallivm->builder;
2135       LLVMValueRef total_emitted_vertices_vec;
2136       LLVMValueRef emitted_prims_vec;
2137 
2138       for (int i = 0; i < params->gs_vertex_streams; i++) {
2139          end_primitive_masked(&bld.bld_base, lp_build_mask_value(bld.mask), i);
2140 
2141          total_emitted_vertices_vec =
2142             LLVMBuildLoad(builder, bld.total_emitted_vertices_vec_ptr[i], "");
2143 
2144          emitted_prims_vec =
2145             LLVMBuildLoad(builder, bld.emitted_prims_vec_ptr[i], "");
2146          bld.gs_iface->gs_epilogue(bld.gs_iface,
2147                                    total_emitted_vertices_vec,
2148                                    emitted_prims_vec, i);
2149       }
2150    }
2151    lp_exec_mask_fini(&bld.exec_mask);
2152 }
2153