1 /**************************************************************************
2  *
3  * Copyright 2019 Red Hat.
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included
14  * in all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  *
24  **************************************************************************/
25 
26 #include "lp_bld_nir.h"
27 #include "lp_bld_arit.h"
28 #include "lp_bld_bitarit.h"
29 #include "lp_bld_const.h"
30 #include "lp_bld_gather.h"
31 #include "lp_bld_logic.h"
32 #include "lp_bld_quad.h"
33 #include "lp_bld_flow.h"
34 #include "lp_bld_struct.h"
35 #include "lp_bld_debug.h"
36 #include "lp_bld_printf.h"
37 #include "nir_deref.h"
38 
39 static void visit_cf_list(struct lp_build_nir_context *bld_base,
40                           struct exec_list *list);
41 
cast_type(struct lp_build_nir_context * bld_base,LLVMValueRef val,nir_alu_type alu_type,unsigned bit_size)42 static LLVMValueRef cast_type(struct lp_build_nir_context *bld_base, LLVMValueRef val,
43                               nir_alu_type alu_type, unsigned bit_size)
44 {
45    LLVMBuilderRef builder = bld_base->base.gallivm->builder;
46    switch (alu_type) {
47    case nir_type_float:
48       switch (bit_size) {
49       case 16:
50          return LLVMBuildBitCast(builder, val, LLVMVectorType(LLVMHalfTypeInContext(bld_base->base.gallivm->context), bld_base->base.type.length), "");
51       case 32:
52          return LLVMBuildBitCast(builder, val, bld_base->base.vec_type, "");
53       case 64:
54          return LLVMBuildBitCast(builder, val, bld_base->dbl_bld.vec_type, "");
55       default:
56          assert(0);
57          break;
58       }
59       break;
60    case nir_type_int:
61       switch (bit_size) {
62       case 8:
63          return LLVMBuildBitCast(builder, val, bld_base->int8_bld.vec_type, "");
64       case 16:
65          return LLVMBuildBitCast(builder, val, bld_base->int16_bld.vec_type, "");
66       case 32:
67          return LLVMBuildBitCast(builder, val, bld_base->int_bld.vec_type, "");
68       case 64:
69          return LLVMBuildBitCast(builder, val, bld_base->int64_bld.vec_type, "");
70       default:
71          assert(0);
72          break;
73       }
74       break;
75    case nir_type_uint:
76       switch (bit_size) {
77       case 8:
78          return LLVMBuildBitCast(builder, val, bld_base->uint8_bld.vec_type, "");
79       case 16:
80          return LLVMBuildBitCast(builder, val, bld_base->uint16_bld.vec_type, "");
81       case 32:
82          return LLVMBuildBitCast(builder, val, bld_base->uint_bld.vec_type, "");
83       case 64:
84          return LLVMBuildBitCast(builder, val, bld_base->uint64_bld.vec_type, "");
85       default:
86          assert(0);
87          break;
88       }
89       break;
90    case nir_type_uint32:
91       return LLVMBuildBitCast(builder, val, bld_base->uint_bld.vec_type, "");
92    default:
93       return val;
94    }
95    return NULL;
96 }
97 
98 
get_flt_bld(struct lp_build_nir_context * bld_base,unsigned op_bit_size)99 static struct lp_build_context *get_flt_bld(struct lp_build_nir_context *bld_base,
100                                             unsigned op_bit_size)
101 {
102    if (op_bit_size == 64)
103       return &bld_base->dbl_bld;
104    else
105       return &bld_base->base;
106 }
107 
glsl_sampler_to_pipe(int sampler_dim,bool is_array)108 static unsigned glsl_sampler_to_pipe(int sampler_dim, bool is_array)
109 {
110    unsigned pipe_target = PIPE_BUFFER;
111    switch (sampler_dim) {
112    case GLSL_SAMPLER_DIM_1D:
113       pipe_target = is_array ? PIPE_TEXTURE_1D_ARRAY : PIPE_TEXTURE_1D;
114       break;
115    case GLSL_SAMPLER_DIM_2D:
116    case GLSL_SAMPLER_DIM_SUBPASS:
117       pipe_target = is_array ? PIPE_TEXTURE_2D_ARRAY : PIPE_TEXTURE_2D;
118       break;
119    case GLSL_SAMPLER_DIM_3D:
120       pipe_target = PIPE_TEXTURE_3D;
121       break;
122    case GLSL_SAMPLER_DIM_MS:
123    case GLSL_SAMPLER_DIM_SUBPASS_MS:
124       pipe_target = is_array ? PIPE_TEXTURE_2D_ARRAY : PIPE_TEXTURE_2D;
125       break;
126    case GLSL_SAMPLER_DIM_CUBE:
127       pipe_target = is_array ? PIPE_TEXTURE_CUBE_ARRAY : PIPE_TEXTURE_CUBE;
128       break;
129    case GLSL_SAMPLER_DIM_RECT:
130       pipe_target = PIPE_TEXTURE_RECT;
131       break;
132    case GLSL_SAMPLER_DIM_BUF:
133       pipe_target = PIPE_BUFFER;
134       break;
135    default:
136       break;
137    }
138    return pipe_target;
139 }
140 
get_ssa_src(struct lp_build_nir_context * bld_base,nir_ssa_def * ssa)141 static LLVMValueRef get_ssa_src(struct lp_build_nir_context *bld_base, nir_ssa_def *ssa)
142 {
143    return bld_base->ssa_defs[ssa->index];
144 }
145 
146 static LLVMValueRef get_src(struct lp_build_nir_context *bld_base, nir_src src);
147 
get_reg_src(struct lp_build_nir_context * bld_base,nir_reg_src src)148 static LLVMValueRef get_reg_src(struct lp_build_nir_context *bld_base, nir_reg_src src)
149 {
150    struct hash_entry *entry = _mesa_hash_table_search(bld_base->regs, src.reg);
151    LLVMValueRef reg_storage = (LLVMValueRef)entry->data;
152    struct lp_build_context *reg_bld = get_int_bld(bld_base, true, src.reg->bit_size);
153    LLVMValueRef indir_src = NULL;
154    if (src.indirect)
155       indir_src = get_src(bld_base, *src.indirect);
156    return bld_base->load_reg(bld_base, reg_bld, &src, indir_src, reg_storage);
157 }
158 
get_src(struct lp_build_nir_context * bld_base,nir_src src)159 static LLVMValueRef get_src(struct lp_build_nir_context *bld_base, nir_src src)
160 {
161    if (src.is_ssa)
162       return get_ssa_src(bld_base, src.ssa);
163    else
164       return get_reg_src(bld_base, src.reg);
165 }
166 
assign_ssa(struct lp_build_nir_context * bld_base,int idx,LLVMValueRef ptr)167 static void assign_ssa(struct lp_build_nir_context *bld_base, int idx, LLVMValueRef ptr)
168 {
169    bld_base->ssa_defs[idx] = ptr;
170 }
171 
assign_ssa_dest(struct lp_build_nir_context * bld_base,const nir_ssa_def * ssa,LLVMValueRef vals[NIR_MAX_VEC_COMPONENTS])172 static void assign_ssa_dest(struct lp_build_nir_context *bld_base, const nir_ssa_def *ssa,
173                             LLVMValueRef vals[NIR_MAX_VEC_COMPONENTS])
174 {
175    assign_ssa(bld_base, ssa->index, ssa->num_components == 1 ? vals[0] : lp_nir_array_build_gather_values(bld_base->base.gallivm->builder, vals, ssa->num_components));
176 }
177 
assign_reg(struct lp_build_nir_context * bld_base,const nir_reg_dest * reg,unsigned write_mask,LLVMValueRef vals[NIR_MAX_VEC_COMPONENTS])178 static void assign_reg(struct lp_build_nir_context *bld_base, const nir_reg_dest *reg,
179                        unsigned write_mask,
180                        LLVMValueRef vals[NIR_MAX_VEC_COMPONENTS])
181 {
182    struct hash_entry *entry = _mesa_hash_table_search(bld_base->regs, reg->reg);
183    LLVMValueRef reg_storage = (LLVMValueRef)entry->data;
184    struct lp_build_context *reg_bld = get_int_bld(bld_base, true, reg->reg->bit_size);
185    LLVMValueRef indir_src = NULL;
186    if (reg->indirect)
187       indir_src = get_src(bld_base, *reg->indirect);
188    bld_base->store_reg(bld_base, reg_bld, reg, write_mask ? write_mask : 0xf, indir_src, reg_storage, vals);
189 }
190 
assign_dest(struct lp_build_nir_context * bld_base,const nir_dest * dest,LLVMValueRef vals[NIR_MAX_VEC_COMPONENTS])191 static void assign_dest(struct lp_build_nir_context *bld_base, const nir_dest *dest, LLVMValueRef vals[NIR_MAX_VEC_COMPONENTS])
192 {
193    if (dest->is_ssa)
194       assign_ssa_dest(bld_base, &dest->ssa, vals);
195    else
196       assign_reg(bld_base, &dest->reg, 0, vals);
197 }
198 
assign_alu_dest(struct lp_build_nir_context * bld_base,const nir_alu_dest * dest,LLVMValueRef vals[NIR_MAX_VEC_COMPONENTS])199 static void assign_alu_dest(struct lp_build_nir_context *bld_base, const nir_alu_dest *dest, LLVMValueRef vals[NIR_MAX_VEC_COMPONENTS])
200 {
201    if (dest->dest.is_ssa)
202       assign_ssa_dest(bld_base, &dest->dest.ssa, vals);
203    else
204       assign_reg(bld_base, &dest->dest.reg, dest->write_mask, vals);
205 }
206 
int_to_bool32(struct lp_build_nir_context * bld_base,uint32_t src_bit_size,bool is_unsigned,LLVMValueRef val)207 static LLVMValueRef int_to_bool32(struct lp_build_nir_context *bld_base,
208                                 uint32_t src_bit_size,
209                                 bool is_unsigned,
210                                 LLVMValueRef val)
211 {
212    LLVMBuilderRef builder = bld_base->base.gallivm->builder;
213    struct lp_build_context *int_bld = get_int_bld(bld_base, is_unsigned, src_bit_size);
214    LLVMValueRef result = lp_build_compare(bld_base->base.gallivm, int_bld->type, PIPE_FUNC_NOTEQUAL, val, int_bld->zero);
215    if (src_bit_size == 64)
216       result = LLVMBuildTrunc(builder, result, bld_base->int_bld.vec_type, "");
217    return result;
218 }
219 
flt_to_bool32(struct lp_build_nir_context * bld_base,uint32_t src_bit_size,LLVMValueRef val)220 static LLVMValueRef flt_to_bool32(struct lp_build_nir_context *bld_base,
221                                   uint32_t src_bit_size,
222                                   LLVMValueRef val)
223 {
224    LLVMBuilderRef builder = bld_base->base.gallivm->builder;
225    struct lp_build_context *flt_bld = get_flt_bld(bld_base, src_bit_size);
226    LLVMValueRef result = lp_build_cmp(flt_bld, PIPE_FUNC_NOTEQUAL, val, flt_bld->zero);
227    if (src_bit_size == 64)
228       result = LLVMBuildTrunc(builder, result, bld_base->int_bld.vec_type, "");
229    return result;
230 }
231 
fcmp32(struct lp_build_nir_context * bld_base,enum pipe_compare_func compare,uint32_t src_bit_size,LLVMValueRef src[NIR_MAX_VEC_COMPONENTS])232 static LLVMValueRef fcmp32(struct lp_build_nir_context *bld_base,
233                            enum pipe_compare_func compare,
234                            uint32_t src_bit_size,
235                            LLVMValueRef src[NIR_MAX_VEC_COMPONENTS])
236 {
237    LLVMBuilderRef builder = bld_base->base.gallivm->builder;
238    struct lp_build_context *flt_bld = get_flt_bld(bld_base, src_bit_size);
239    LLVMValueRef result;
240 
241    if (compare != PIPE_FUNC_NOTEQUAL)
242       result = lp_build_cmp_ordered(flt_bld, compare, src[0], src[1]);
243    else
244       result = lp_build_cmp(flt_bld, compare, src[0], src[1]);
245    if (src_bit_size == 64)
246       result = LLVMBuildTrunc(builder, result, bld_base->int_bld.vec_type, "");
247    return result;
248 }
249 
icmp32(struct lp_build_nir_context * bld_base,enum pipe_compare_func compare,bool is_unsigned,uint32_t src_bit_size,LLVMValueRef src[NIR_MAX_VEC_COMPONENTS])250 static LLVMValueRef icmp32(struct lp_build_nir_context *bld_base,
251                            enum pipe_compare_func compare,
252                            bool is_unsigned,
253                            uint32_t src_bit_size,
254                            LLVMValueRef src[NIR_MAX_VEC_COMPONENTS])
255 {
256    LLVMBuilderRef builder = bld_base->base.gallivm->builder;
257    struct lp_build_context *i_bld = get_int_bld(bld_base, is_unsigned, src_bit_size);
258    LLVMValueRef result = lp_build_cmp(i_bld, compare, src[0], src[1]);
259    if (src_bit_size < 32)
260       result = LLVMBuildSExt(builder, result, bld_base->int_bld.vec_type, "");
261    else if (src_bit_size == 64)
262       result = LLVMBuildTrunc(builder, result, bld_base->int_bld.vec_type, "");
263    return result;
264 }
265 
get_alu_src(struct lp_build_nir_context * bld_base,nir_alu_src src,unsigned num_components)266 static LLVMValueRef get_alu_src(struct lp_build_nir_context *bld_base,
267                                 nir_alu_src src,
268                                 unsigned num_components)
269 {
270    LLVMBuilderRef builder = bld_base->base.gallivm->builder;
271    struct gallivm_state *gallivm = bld_base->base.gallivm;
272    LLVMValueRef value = get_src(bld_base, src.src);
273    bool need_swizzle = false;
274 
275    assert(value);
276    unsigned src_components = nir_src_num_components(src.src);
277    for (unsigned i = 0; i < num_components; ++i) {
278       assert(src.swizzle[i] < src_components);
279       if (src.swizzle[i] != i)
280          need_swizzle = true;
281    }
282 
283    if (need_swizzle || num_components != src_components) {
284       if (src_components > 1 && num_components == 1) {
285          value = LLVMBuildExtractValue(gallivm->builder, value,
286                                        src.swizzle[0], "");
287       } else if (src_components == 1 && num_components > 1) {
288          LLVMValueRef values[] = {value, value, value, value, value, value, value, value, value, value, value, value, value, value, value, value};
289          value = lp_nir_array_build_gather_values(builder, values, num_components);
290       } else {
291          LLVMValueRef arr = LLVMGetUndef(LLVMArrayType(LLVMTypeOf(LLVMBuildExtractValue(builder, value, 0, "")), num_components));
292          for (unsigned i = 0; i < num_components; i++)
293             arr = LLVMBuildInsertValue(builder, arr, LLVMBuildExtractValue(builder, value, src.swizzle[i], ""), i, "");
294          value = arr;
295       }
296    }
297    assert(!src.negate);
298    assert(!src.abs);
299    return value;
300 }
301 
emit_b2f(struct lp_build_nir_context * bld_base,LLVMValueRef src0,unsigned bitsize)302 static LLVMValueRef emit_b2f(struct lp_build_nir_context *bld_base,
303                              LLVMValueRef src0,
304                              unsigned bitsize)
305 {
306    LLVMBuilderRef builder = bld_base->base.gallivm->builder;
307    LLVMValueRef result = LLVMBuildAnd(builder, cast_type(bld_base, src0, nir_type_int, 32),
308                                       LLVMBuildBitCast(builder, lp_build_const_vec(bld_base->base.gallivm, bld_base->base.type,
309                                                                                    1.0), bld_base->int_bld.vec_type, ""),
310                                       "");
311    result = LLVMBuildBitCast(builder, result, bld_base->base.vec_type, "");
312    switch (bitsize) {
313    case 32:
314       break;
315    case 64:
316       result = LLVMBuildFPExt(builder, result, bld_base->dbl_bld.vec_type, "");
317       break;
318    default:
319       unreachable("unsupported bit size.");
320    }
321    return result;
322 }
323 
emit_b2i(struct lp_build_nir_context * bld_base,LLVMValueRef src0,unsigned bitsize)324 static LLVMValueRef emit_b2i(struct lp_build_nir_context *bld_base,
325                              LLVMValueRef src0,
326                              unsigned bitsize)
327 {
328    LLVMBuilderRef builder = bld_base->base.gallivm->builder;
329    LLVMValueRef result = LLVMBuildAnd(builder, cast_type(bld_base, src0, nir_type_int, 32),
330                                       lp_build_const_int_vec(bld_base->base.gallivm, bld_base->base.type, 1), "");
331    switch (bitsize) {
332    case 8:
333       return LLVMBuildTrunc(builder, result, bld_base->int8_bld.vec_type, "");
334    case 16:
335       return LLVMBuildTrunc(builder, result, bld_base->int16_bld.vec_type, "");
336    case 32:
337       return result;
338    case 64:
339       return LLVMBuildZExt(builder, result, bld_base->int64_bld.vec_type, "");
340    default:
341       unreachable("unsupported bit size.");
342    }
343 }
344 
emit_b32csel(struct lp_build_nir_context * bld_base,unsigned src_bit_size[NIR_MAX_VEC_COMPONENTS],LLVMValueRef src[NIR_MAX_VEC_COMPONENTS])345 static LLVMValueRef emit_b32csel(struct lp_build_nir_context *bld_base,
346                                unsigned src_bit_size[NIR_MAX_VEC_COMPONENTS],
347                                LLVMValueRef src[NIR_MAX_VEC_COMPONENTS])
348 {
349    LLVMValueRef sel = cast_type(bld_base, src[0], nir_type_int, 32);
350    LLVMValueRef v = lp_build_compare(bld_base->base.gallivm, bld_base->int_bld.type, PIPE_FUNC_NOTEQUAL, sel, bld_base->int_bld.zero);
351    struct lp_build_context *bld = get_int_bld(bld_base, false, src_bit_size[1]);
352    return lp_build_select(bld, v, src[1], src[2]);
353 }
354 
split_64bit(struct lp_build_nir_context * bld_base,LLVMValueRef src,bool hi)355 static LLVMValueRef split_64bit(struct lp_build_nir_context *bld_base,
356                                 LLVMValueRef src,
357                                 bool hi)
358 {
359    struct gallivm_state *gallivm = bld_base->base.gallivm;
360    LLVMValueRef shuffles[LP_MAX_VECTOR_WIDTH/32];
361    LLVMValueRef shuffles2[LP_MAX_VECTOR_WIDTH/32];
362    int len = bld_base->base.type.length * 2;
363    for (unsigned i = 0; i < bld_base->base.type.length; i++) {
364 #if UTIL_ARCH_LITTLE_ENDIAN
365       shuffles[i] = lp_build_const_int32(gallivm, i * 2);
366       shuffles2[i] = lp_build_const_int32(gallivm, (i * 2) + 1);
367 #else
368       shuffles[i] = lp_build_const_int32(gallivm, (i * 2) + 1);
369       shuffles2[i] = lp_build_const_int32(gallivm, (i * 2));
370 #endif
371    }
372 
373    src = LLVMBuildBitCast(gallivm->builder, src, LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), len), "");
374    return LLVMBuildShuffleVector(gallivm->builder, src,
375                                  LLVMGetUndef(LLVMTypeOf(src)),
376                                  LLVMConstVector(hi ? shuffles2 : shuffles,
377                                                  bld_base->base.type.length),
378                                  "");
379 }
380 
381 static LLVMValueRef
merge_64bit(struct lp_build_nir_context * bld_base,LLVMValueRef input,LLVMValueRef input2)382 merge_64bit(struct lp_build_nir_context *bld_base,
383             LLVMValueRef input,
384             LLVMValueRef input2)
385 {
386    struct gallivm_state *gallivm = bld_base->base.gallivm;
387    LLVMBuilderRef builder = gallivm->builder;
388    int i;
389    LLVMValueRef shuffles[2 * (LP_MAX_VECTOR_WIDTH/32)];
390    int len = bld_base->base.type.length * 2;
391    assert(len <= (2 * (LP_MAX_VECTOR_WIDTH/32)));
392 
393    for (i = 0; i < bld_base->base.type.length * 2; i+=2) {
394 #if UTIL_ARCH_LITTLE_ENDIAN
395       shuffles[i] = lp_build_const_int32(gallivm, i / 2);
396       shuffles[i + 1] = lp_build_const_int32(gallivm, i / 2 + bld_base->base.type.length);
397 #else
398       shuffles[i] = lp_build_const_int32(gallivm, i / 2 + bld_base->base.type.length);
399       shuffles[i + 1] = lp_build_const_int32(gallivm, i / 2);
400 #endif
401    }
402    return LLVMBuildShuffleVector(builder, input, input2, LLVMConstVector(shuffles, len), "");
403 }
404 
split_16bit(struct lp_build_nir_context * bld_base,LLVMValueRef src,bool hi)405 static LLVMValueRef split_16bit(struct lp_build_nir_context *bld_base,
406                                 LLVMValueRef src,
407                                 bool hi)
408 {
409    struct gallivm_state *gallivm = bld_base->base.gallivm;
410    LLVMValueRef shuffles[LP_MAX_VECTOR_WIDTH/32];
411    LLVMValueRef shuffles2[LP_MAX_VECTOR_WIDTH/32];
412    int len = bld_base->base.type.length * 2;
413    for (unsigned i = 0; i < bld_base->base.type.length; i++) {
414 #if UTIL_ARCH_LITTLE_ENDIAN
415       shuffles[i] = lp_build_const_int32(gallivm, i * 2);
416       shuffles2[i] = lp_build_const_int32(gallivm, (i * 2) + 1);
417 #else
418       shuffles[i] = lp_build_const_int32(gallivm, (i * 2) + 1);
419       shuffles2[i] = lp_build_const_int32(gallivm, (i * 2));
420 #endif
421    }
422 
423    src = LLVMBuildBitCast(gallivm->builder, src, LLVMVectorType(LLVMInt16TypeInContext(gallivm->context), len), "");
424    return LLVMBuildShuffleVector(gallivm->builder, src,
425                                  LLVMGetUndef(LLVMTypeOf(src)),
426                                  LLVMConstVector(hi ? shuffles2 : shuffles,
427                                                  bld_base->base.type.length),
428                                  "");
429 }
430 static LLVMValueRef
merge_16bit(struct lp_build_nir_context * bld_base,LLVMValueRef input,LLVMValueRef input2)431 merge_16bit(struct lp_build_nir_context *bld_base,
432             LLVMValueRef input,
433             LLVMValueRef input2)
434 {
435    struct gallivm_state *gallivm = bld_base->base.gallivm;
436    LLVMBuilderRef builder = gallivm->builder;
437    int i;
438    LLVMValueRef shuffles[2 * (LP_MAX_VECTOR_WIDTH/32)];
439    int len = bld_base->int16_bld.type.length * 2;
440    assert(len <= (2 * (LP_MAX_VECTOR_WIDTH/32)));
441 
442    for (i = 0; i < bld_base->int_bld.type.length * 2; i+=2) {
443 #if UTIL_ARCH_LITTLE_ENDIAN
444       shuffles[i] = lp_build_const_int32(gallivm, i / 2);
445       shuffles[i + 1] = lp_build_const_int32(gallivm, i / 2 + bld_base->base.type.length);
446 #else
447       shuffles[i] = lp_build_const_int32(gallivm, i / 2 + bld_base->base.type.length);
448       shuffles[i + 1] = lp_build_const_int32(gallivm, i / 2);
449 #endif
450    }
451    return LLVMBuildShuffleVector(builder, input, input2, LLVMConstVector(shuffles, len), "");
452 }
453 
454 static LLVMValueRef
do_int_divide(struct lp_build_nir_context * bld_base,bool is_unsigned,unsigned src_bit_size,LLVMValueRef src,LLVMValueRef src2)455 do_int_divide(struct lp_build_nir_context *bld_base,
456               bool is_unsigned, unsigned src_bit_size,
457               LLVMValueRef src, LLVMValueRef src2)
458 {
459    struct gallivm_state *gallivm = bld_base->base.gallivm;
460    LLVMBuilderRef builder = gallivm->builder;
461    struct lp_build_context *int_bld = get_int_bld(bld_base, is_unsigned, src_bit_size);
462    struct lp_build_context *mask_bld = get_int_bld(bld_base, true, src_bit_size);
463    LLVMValueRef div_mask = lp_build_cmp(mask_bld, PIPE_FUNC_EQUAL, src2,
464                                         mask_bld->zero);
465 
466    if (!is_unsigned) {
467       /* INT_MIN (0x80000000) / -1 (0xffffffff) causes sigfpe, seen with blender. */
468       div_mask = LLVMBuildAnd(builder, div_mask, lp_build_const_int_vec(gallivm, int_bld->type, 0x7fffffff), "");
469    }
470    LLVMValueRef divisor = LLVMBuildOr(builder,
471                                       div_mask,
472                                       src2, "");
473    LLVMValueRef result = lp_build_div(int_bld, src, divisor);
474 
475    if (!is_unsigned) {
476       LLVMValueRef not_div_mask = LLVMBuildNot(builder, div_mask, "");
477       return LLVMBuildAnd(builder, not_div_mask, result, "");
478    } else
479       /* udiv by zero is guaranteed to return 0xffffffff at least with d3d10
480        * may as well do same for idiv */
481       return LLVMBuildOr(builder, div_mask, result, "");
482 }
483 
484 static LLVMValueRef
do_int_mod(struct lp_build_nir_context * bld_base,bool is_unsigned,unsigned src_bit_size,LLVMValueRef src,LLVMValueRef src2)485 do_int_mod(struct lp_build_nir_context *bld_base,
486            bool is_unsigned, unsigned src_bit_size,
487            LLVMValueRef src, LLVMValueRef src2)
488 {
489    struct gallivm_state *gallivm = bld_base->base.gallivm;
490    LLVMBuilderRef builder = gallivm->builder;
491    struct lp_build_context *int_bld = get_int_bld(bld_base, is_unsigned, src_bit_size);
492    LLVMValueRef div_mask = lp_build_cmp(int_bld, PIPE_FUNC_EQUAL, src2,
493                                         int_bld->zero);
494    LLVMValueRef divisor = LLVMBuildOr(builder,
495                                       div_mask,
496                                       src2, "");
497    LLVMValueRef result = lp_build_mod(int_bld, src, divisor);
498    return LLVMBuildOr(builder, div_mask, result, "");
499 }
500 
501 static LLVMValueRef
do_quantize_to_f16(struct lp_build_nir_context * bld_base,LLVMValueRef src)502 do_quantize_to_f16(struct lp_build_nir_context *bld_base,
503                    LLVMValueRef src)
504 {
505    struct gallivm_state *gallivm = bld_base->base.gallivm;
506    LLVMBuilderRef builder = gallivm->builder;
507    LLVMValueRef result, cond, cond2, temp;
508 
509    result = LLVMBuildFPTrunc(builder, src, LLVMVectorType(LLVMHalfTypeInContext(gallivm->context), bld_base->base.type.length), "");
510    result = LLVMBuildFPExt(builder, result, bld_base->base.vec_type, "");
511 
512    temp = lp_build_abs(get_flt_bld(bld_base, 32), result);
513    cond = LLVMBuildFCmp(builder, LLVMRealOGT,
514                         LLVMBuildBitCast(builder, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, 0x38800000), bld_base->base.vec_type, ""),
515                         temp, "");
516    cond2 = LLVMBuildFCmp(builder, LLVMRealONE, temp, bld_base->base.zero, "");
517    cond = LLVMBuildAnd(builder, cond, cond2, "");
518    result = LLVMBuildSelect(builder, cond, bld_base->base.zero, result, "");
519    return result;
520 }
521 
do_alu_action(struct lp_build_nir_context * bld_base,nir_op op,unsigned src_bit_size[NIR_MAX_VEC_COMPONENTS],LLVMValueRef src[NIR_MAX_VEC_COMPONENTS])522 static LLVMValueRef do_alu_action(struct lp_build_nir_context *bld_base,
523                                   nir_op op, unsigned src_bit_size[NIR_MAX_VEC_COMPONENTS], LLVMValueRef src[NIR_MAX_VEC_COMPONENTS])
524 {
525    struct gallivm_state *gallivm = bld_base->base.gallivm;
526    LLVMBuilderRef builder = gallivm->builder;
527    LLVMValueRef result;
528    enum gallivm_nan_behavior minmax_nan = bld_base->shader->info.stage == MESA_SHADER_KERNEL ? GALLIVM_NAN_RETURN_OTHER : GALLIVM_NAN_BEHAVIOR_UNDEFINED;
529    switch (op) {
530    case nir_op_b2f32:
531       result = emit_b2f(bld_base, src[0], 32);
532       break;
533    case nir_op_b2f64:
534       result = emit_b2f(bld_base, src[0], 64);
535       break;
536    case nir_op_b2i8:
537       result = emit_b2i(bld_base, src[0], 8);
538       break;
539    case nir_op_b2i16:
540       result = emit_b2i(bld_base, src[0], 16);
541       break;
542    case nir_op_b2i32:
543       result = emit_b2i(bld_base, src[0], 32);
544       break;
545    case nir_op_b2i64:
546       result = emit_b2i(bld_base, src[0], 64);
547       break;
548    case nir_op_b32csel:
549       result = emit_b32csel(bld_base, src_bit_size, src);
550       break;
551    case nir_op_bit_count:
552       result = lp_build_popcount(get_int_bld(bld_base, false, src_bit_size[0]), src[0]);
553       break;
554    case nir_op_bitfield_select:
555       result = lp_build_xor(&bld_base->uint_bld, src[2], lp_build_and(&bld_base->uint_bld, src[0], lp_build_xor(&bld_base->uint_bld, src[1], src[2])));
556       break;
557    case nir_op_bitfield_reverse:
558       result = lp_build_bitfield_reverse(get_int_bld(bld_base, false, src_bit_size[0]), src[0]);
559       break;
560    case nir_op_f2b32:
561       result = flt_to_bool32(bld_base, src_bit_size[0], src[0]);
562       break;
563    case nir_op_f2f16:
564       if (src_bit_size[0] == 64)
565          src[0] = LLVMBuildFPTrunc(builder, src[0],
566                                    bld_base->base.vec_type, "");
567       result = LLVMBuildFPTrunc(builder, src[0],
568                                 LLVMVectorType(LLVMHalfTypeInContext(gallivm->context), bld_base->base.type.length), "");
569       break;
570    case nir_op_f2f32:
571       if (src_bit_size[0] < 32)
572          result = LLVMBuildFPExt(builder, src[0],
573                                  bld_base->base.vec_type, "");
574       else
575          result = LLVMBuildFPTrunc(builder, src[0],
576                                    bld_base->base.vec_type, "");
577       break;
578    case nir_op_f2f64:
579       result = LLVMBuildFPExt(builder, src[0],
580                               bld_base->dbl_bld.vec_type, "");
581       break;
582    case nir_op_f2i32:
583       result = LLVMBuildFPToSI(builder, src[0], bld_base->base.int_vec_type, "");
584       break;
585    case nir_op_f2u32:
586       result = LLVMBuildFPToUI(builder,
587                                src[0],
588                                bld_base->base.int_vec_type, "");
589       break;
590    case nir_op_f2i64:
591       result = LLVMBuildFPToSI(builder,
592                                src[0],
593                                bld_base->int64_bld.vec_type, "");
594       break;
595    case nir_op_f2u64:
596       result = LLVMBuildFPToUI(builder,
597                                src[0],
598                                bld_base->uint64_bld.vec_type, "");
599       break;
600    case nir_op_fabs:
601       result = lp_build_abs(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
602       break;
603    case nir_op_fadd:
604       result = lp_build_add(get_flt_bld(bld_base, src_bit_size[0]),
605                             src[0], src[1]);
606       break;
607    case nir_op_fceil:
608       result = lp_build_ceil(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
609       break;
610    case nir_op_fcos:
611       result = lp_build_cos(&bld_base->base, src[0]);
612       break;
613    case nir_op_fddx:
614    case nir_op_fddx_coarse:
615    case nir_op_fddx_fine:
616       result = lp_build_ddx(&bld_base->base, src[0]);
617       break;
618    case nir_op_fddy:
619    case nir_op_fddy_coarse:
620    case nir_op_fddy_fine:
621       result = lp_build_ddy(&bld_base->base, src[0]);
622       break;
623    case nir_op_fdiv:
624       result = lp_build_div(get_flt_bld(bld_base, src_bit_size[0]),
625                             src[0], src[1]);
626       break;
627    case nir_op_feq32:
628       result = fcmp32(bld_base, PIPE_FUNC_EQUAL, src_bit_size[0], src);
629       break;
630    case nir_op_fexp2:
631       result = lp_build_exp2(&bld_base->base, src[0]);
632       break;
633    case nir_op_ffloor:
634       result = lp_build_floor(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
635       break;
636    case nir_op_ffma:
637       result = lp_build_fmuladd(builder, src[0], src[1], src[2]);
638       break;
639    case nir_op_ffract: {
640       struct lp_build_context *flt_bld = get_flt_bld(bld_base, src_bit_size[0]);
641       LLVMValueRef tmp = lp_build_floor(flt_bld, src[0]);
642       result = lp_build_sub(flt_bld, src[0], tmp);
643       break;
644    }
645    case nir_op_fge32:
646       result = fcmp32(bld_base, PIPE_FUNC_GEQUAL, src_bit_size[0], src);
647       break;
648    case nir_op_find_lsb: {
649       struct lp_build_context *int_bld = get_int_bld(bld_base, false, src_bit_size[0]);
650       result = lp_build_cttz(int_bld, src[0]);
651       if (src_bit_size[0] < 32)
652          result = LLVMBuildZExt(builder, result, bld_base->uint_bld.vec_type, "");
653       else if (src_bit_size[0] > 32)
654          result = LLVMBuildTrunc(builder, result, bld_base->uint_bld.vec_type, "");
655       break;
656    }
657    case nir_op_flog2:
658       result = lp_build_log2_safe(&bld_base->base, src[0]);
659       break;
660    case nir_op_flt:
661    case nir_op_flt32:
662       result = fcmp32(bld_base, PIPE_FUNC_LESS, src_bit_size[0], src);
663       break;
664    case nir_op_fmin:
665       result = lp_build_min_ext(get_flt_bld(bld_base, src_bit_size[0]), src[0], src[1], minmax_nan);
666       break;
667    case nir_op_fmod: {
668       struct lp_build_context *flt_bld = get_flt_bld(bld_base, src_bit_size[0]);
669       result = lp_build_div(flt_bld, src[0], src[1]);
670       result = lp_build_floor(flt_bld, result);
671       result = lp_build_mul(flt_bld, src[1], result);
672       result = lp_build_sub(flt_bld, src[0], result);
673       break;
674    }
675    case nir_op_fmul:
676       result = lp_build_mul(get_flt_bld(bld_base, src_bit_size[0]),
677                             src[0], src[1]);
678       break;
679    case nir_op_fmax:
680       result = lp_build_max_ext(get_flt_bld(bld_base, src_bit_size[0]), src[0], src[1], minmax_nan);
681       break;
682    case nir_op_fneu32:
683       result = fcmp32(bld_base, PIPE_FUNC_NOTEQUAL, src_bit_size[0], src);
684       break;
685    case nir_op_fneg:
686       result = lp_build_negate(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
687       break;
688    case nir_op_fpow:
689       result = lp_build_pow(&bld_base->base, src[0], src[1]);
690       break;
691    case nir_op_fquantize2f16:
692       result = do_quantize_to_f16(bld_base, src[0]);
693       break;
694    case nir_op_frcp:
695       result = lp_build_rcp(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
696       break;
697    case nir_op_fround_even:
698       result = lp_build_round(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
699       break;
700    case nir_op_frsq:
701       result = lp_build_rsqrt(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
702       break;
703    case nir_op_fsat:
704       result = lp_build_clamp_zero_one_nanzero(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
705       break;
706    case nir_op_fsign:
707       result = lp_build_sgn(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
708       break;
709    case nir_op_fsin:
710       result = lp_build_sin(&bld_base->base, src[0]);
711       break;
712    case nir_op_fsqrt:
713       result = lp_build_sqrt(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
714       break;
715    case nir_op_ftrunc:
716       result = lp_build_trunc(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
717       break;
718    case nir_op_i2b32:
719       result = int_to_bool32(bld_base, src_bit_size[0], false, src[0]);
720       break;
721    case nir_op_i2f32:
722       result = lp_build_int_to_float(&bld_base->base, src[0]);
723       break;
724    case nir_op_i2f64:
725       result = lp_build_int_to_float(&bld_base->dbl_bld, src[0]);
726       break;
727    case nir_op_i2i8:
728       result = LLVMBuildTrunc(builder, src[0], bld_base->int8_bld.vec_type, "");
729       break;
730    case nir_op_i2i16:
731       if (src_bit_size[0] < 16)
732          result = LLVMBuildSExt(builder, src[0], bld_base->int16_bld.vec_type, "");
733       else
734          result = LLVMBuildTrunc(builder, src[0], bld_base->int16_bld.vec_type, "");
735       break;
736    case nir_op_i2i32:
737       if (src_bit_size[0] < 32)
738          result = LLVMBuildSExt(builder, src[0], bld_base->int_bld.vec_type, "");
739       else
740          result = LLVMBuildTrunc(builder, src[0], bld_base->int_bld.vec_type, "");
741       break;
742    case nir_op_i2i64:
743       result = LLVMBuildSExt(builder, src[0], bld_base->int64_bld.vec_type, "");
744       break;
745    case nir_op_iabs:
746       result = lp_build_abs(get_int_bld(bld_base, false, src_bit_size[0]), src[0]);
747       break;
748    case nir_op_iadd:
749       result = lp_build_add(get_int_bld(bld_base, false, src_bit_size[0]),
750                             src[0], src[1]);
751       break;
752    case nir_op_iand:
753       result = lp_build_and(get_int_bld(bld_base, false, src_bit_size[0]),
754                             src[0], src[1]);
755       break;
756    case nir_op_idiv:
757       result = do_int_divide(bld_base, false, src_bit_size[0], src[0], src[1]);
758       break;
759    case nir_op_ieq32:
760       result = icmp32(bld_base, PIPE_FUNC_EQUAL, false, src_bit_size[0], src);
761       break;
762    case nir_op_ige32:
763       result = icmp32(bld_base, PIPE_FUNC_GEQUAL, false, src_bit_size[0], src);
764       break;
765    case nir_op_ilt32:
766       result = icmp32(bld_base, PIPE_FUNC_LESS, false, src_bit_size[0], src);
767       break;
768    case nir_op_imax:
769       result = lp_build_max(get_int_bld(bld_base, false, src_bit_size[0]), src[0], src[1]);
770       break;
771    case nir_op_imin:
772       result = lp_build_min(get_int_bld(bld_base, false, src_bit_size[0]), src[0], src[1]);
773       break;
774    case nir_op_imul:
775    case nir_op_imul24:
776       result = lp_build_mul(get_int_bld(bld_base, false, src_bit_size[0]),
777                             src[0], src[1]);
778       break;
779    case nir_op_imul_high: {
780       LLVMValueRef hi_bits;
781       lp_build_mul_32_lohi(get_int_bld(bld_base, false, src_bit_size[0]), src[0], src[1], &hi_bits);
782       result = hi_bits;
783       break;
784    }
785    case nir_op_ine32:
786       result = icmp32(bld_base, PIPE_FUNC_NOTEQUAL, false, src_bit_size[0], src);
787       break;
788    case nir_op_ineg:
789       result = lp_build_negate(get_int_bld(bld_base, false, src_bit_size[0]), src[0]);
790       break;
791    case nir_op_inot:
792       result = lp_build_not(get_int_bld(bld_base, false, src_bit_size[0]), src[0]);
793       break;
794    case nir_op_ior:
795       result = lp_build_or(get_int_bld(bld_base, false, src_bit_size[0]),
796                            src[0], src[1]);
797       break;
798    case nir_op_imod:
799    case nir_op_irem:
800       result = do_int_mod(bld_base, false, src_bit_size[0], src[0], src[1]);
801       break;
802    case nir_op_ishl: {
803       struct lp_build_context *uint_bld = get_int_bld(bld_base, true, src_bit_size[0]);
804       struct lp_build_context *int_bld = get_int_bld(bld_base, false, src_bit_size[0]);
805       if (src_bit_size[0] == 64)
806          src[1] = LLVMBuildZExt(builder, src[1], uint_bld->vec_type, "");
807       if (src_bit_size[0] < 32)
808          src[1] = LLVMBuildTrunc(builder, src[1], uint_bld->vec_type, "");
809       src[1] = lp_build_and(uint_bld, src[1], lp_build_const_int_vec(gallivm, uint_bld->type, (src_bit_size[0] - 1)));
810       result = lp_build_shl(int_bld, src[0], src[1]);
811       break;
812    }
813    case nir_op_ishr: {
814       struct lp_build_context *uint_bld = get_int_bld(bld_base, true, src_bit_size[0]);
815       struct lp_build_context *int_bld = get_int_bld(bld_base, false, src_bit_size[0]);
816       if (src_bit_size[0] == 64)
817          src[1] = LLVMBuildZExt(builder, src[1], uint_bld->vec_type, "");
818       if (src_bit_size[0] < 32)
819          src[1] = LLVMBuildTrunc(builder, src[1], uint_bld->vec_type, "");
820       src[1] = lp_build_and(uint_bld, src[1], lp_build_const_int_vec(gallivm, uint_bld->type, (src_bit_size[0] - 1)));
821       result = lp_build_shr(int_bld, src[0], src[1]);
822       break;
823    }
824    case nir_op_isign:
825       result = lp_build_sgn(get_int_bld(bld_base, false, src_bit_size[0]), src[0]);
826       break;
827    case nir_op_isub:
828       result = lp_build_sub(get_int_bld(bld_base, false, src_bit_size[0]),
829                             src[0], src[1]);
830       break;
831    case nir_op_ixor:
832       result = lp_build_xor(get_int_bld(bld_base, false, src_bit_size[0]),
833                             src[0], src[1]);
834       break;
835    case nir_op_mov:
836       result = src[0];
837       break;
838    case nir_op_unpack_64_2x32_split_x:
839       result = split_64bit(bld_base, src[0], false);
840       break;
841    case nir_op_unpack_64_2x32_split_y:
842       result = split_64bit(bld_base, src[0], true);
843       break;
844 
845    case nir_op_pack_32_2x16_split: {
846       LLVMValueRef tmp = merge_16bit(bld_base, src[0], src[1]);
847       result = LLVMBuildBitCast(builder, tmp, bld_base->base.vec_type, "");
848       break;
849    }
850    case nir_op_unpack_32_2x16_split_x:
851       result = split_16bit(bld_base, src[0], false);
852       break;
853    case nir_op_unpack_32_2x16_split_y:
854       result = split_16bit(bld_base, src[0], true);
855       break;
856    case nir_op_pack_64_2x32_split: {
857       LLVMValueRef tmp = merge_64bit(bld_base, src[0], src[1]);
858       result = LLVMBuildBitCast(builder, tmp, bld_base->dbl_bld.vec_type, "");
859       break;
860    }
861    case nir_op_u2f32:
862       result = LLVMBuildUIToFP(builder, src[0], bld_base->base.vec_type, "");
863       break;
864    case nir_op_u2f64:
865       result = LLVMBuildUIToFP(builder, src[0], bld_base->dbl_bld.vec_type, "");
866       break;
867    case nir_op_u2u8:
868       result = LLVMBuildTrunc(builder, src[0], bld_base->uint8_bld.vec_type, "");
869       break;
870    case nir_op_u2u16:
871       if (src_bit_size[0] < 16)
872          result = LLVMBuildZExt(builder, src[0], bld_base->uint16_bld.vec_type, "");
873       else
874          result = LLVMBuildTrunc(builder, src[0], bld_base->uint16_bld.vec_type, "");
875       break;
876    case nir_op_u2u32:
877       if (src_bit_size[0] < 32)
878          result = LLVMBuildZExt(builder, src[0], bld_base->uint_bld.vec_type, "");
879       else
880          result = LLVMBuildTrunc(builder, src[0], bld_base->uint_bld.vec_type, "");
881       break;
882    case nir_op_u2u64:
883       result = LLVMBuildZExt(builder, src[0], bld_base->uint64_bld.vec_type, "");
884       break;
885    case nir_op_udiv:
886       result = do_int_divide(bld_base, true, src_bit_size[0], src[0], src[1]);
887       break;
888    case nir_op_ufind_msb: {
889       struct lp_build_context *uint_bld = get_int_bld(bld_base, true, src_bit_size[0]);
890       result = lp_build_ctlz(uint_bld, src[0]);
891       result = lp_build_sub(uint_bld, lp_build_const_int_vec(gallivm, uint_bld->type, src_bit_size[0] - 1), result);
892       if (src_bit_size[0] < 32)
893          result = LLVMBuildZExt(builder, result, bld_base->uint_bld.vec_type, "");
894       else
895          result = LLVMBuildTrunc(builder, result, bld_base->uint_bld.vec_type, "");
896       break;
897    }
898    case nir_op_uge32:
899       result = icmp32(bld_base, PIPE_FUNC_GEQUAL, true, src_bit_size[0], src);
900       break;
901    case nir_op_ult32:
902       result = icmp32(bld_base, PIPE_FUNC_LESS, true, src_bit_size[0], src);
903       break;
904    case nir_op_umax:
905       result = lp_build_max(get_int_bld(bld_base, true, src_bit_size[0]), src[0], src[1]);
906       break;
907    case nir_op_umin:
908       result = lp_build_min(get_int_bld(bld_base, true, src_bit_size[0]), src[0], src[1]);
909       break;
910    case nir_op_umod:
911       result = do_int_mod(bld_base, true, src_bit_size[0], src[0], src[1]);
912       break;
913    case nir_op_umul_high: {
914       LLVMValueRef hi_bits;
915       lp_build_mul_32_lohi(get_int_bld(bld_base, true, src_bit_size[0]), src[0], src[1], &hi_bits);
916       result = hi_bits;
917       break;
918    }
919    case nir_op_ushr: {
920       struct lp_build_context *uint_bld = get_int_bld(bld_base, true, src_bit_size[0]);
921       if (src_bit_size[0] == 64)
922          src[1] = LLVMBuildZExt(builder, src[1], uint_bld->vec_type, "");
923       if (src_bit_size[0] < 32)
924          src[1] = LLVMBuildTrunc(builder, src[1], uint_bld->vec_type, "");
925       src[1] = lp_build_and(uint_bld, src[1], lp_build_const_int_vec(gallivm, uint_bld->type, (src_bit_size[0] - 1)));
926       result = lp_build_shr(uint_bld, src[0], src[1]);
927       break;
928    }
929    default:
930       assert(0);
931       break;
932    }
933    return result;
934 }
935 
visit_alu(struct lp_build_nir_context * bld_base,const nir_alu_instr * instr)936 static void visit_alu(struct lp_build_nir_context *bld_base, const nir_alu_instr *instr)
937 {
938    struct gallivm_state *gallivm = bld_base->base.gallivm;
939    LLVMValueRef src[NIR_MAX_VEC_COMPONENTS];
940    unsigned src_bit_size[NIR_MAX_VEC_COMPONENTS];
941    unsigned num_components = nir_dest_num_components(instr->dest.dest);
942    unsigned src_components;
943    switch (instr->op) {
944    case nir_op_vec2:
945    case nir_op_vec3:
946    case nir_op_vec4:
947    case nir_op_vec8:
948    case nir_op_vec16:
949       src_components = 1;
950       break;
951    case nir_op_pack_half_2x16:
952       src_components = 2;
953       break;
954    case nir_op_unpack_half_2x16:
955       src_components = 1;
956       break;
957    case nir_op_cube_face_coord:
958    case nir_op_cube_face_index:
959       src_components = 3;
960       break;
961    default:
962       src_components = num_components;
963       break;
964    }
965    for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) {
966       src[i] = get_alu_src(bld_base, instr->src[i], src_components);
967       src_bit_size[i] = nir_src_bit_size(instr->src[i].src);
968    }
969 
970    LLVMValueRef result[NIR_MAX_VEC_COMPONENTS];
971    if (instr->op == nir_op_vec4 || instr->op == nir_op_vec3 || instr->op == nir_op_vec2 || instr->op == nir_op_vec8 || instr->op == nir_op_vec16) {
972       for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) {
973          result[i] = cast_type(bld_base, src[i], nir_op_infos[instr->op].input_types[i], src_bit_size[i]);
974       }
975    } else {
976       for (unsigned c = 0; c < num_components; c++) {
977          LLVMValueRef src_chan[NIR_MAX_VEC_COMPONENTS];
978 
979          for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) {
980             if (num_components > 1) {
981                src_chan[i] = LLVMBuildExtractValue(gallivm->builder,
982                                                      src[i], c, "");
983             } else
984                src_chan[i] = src[i];
985             src_chan[i] = cast_type(bld_base, src_chan[i], nir_op_infos[instr->op].input_types[i], src_bit_size[i]);
986          }
987          result[c] = do_alu_action(bld_base, instr->op, src_bit_size, src_chan);
988          result[c] = cast_type(bld_base, result[c], nir_op_infos[instr->op].output_type, nir_dest_bit_size(instr->dest.dest));
989       }
990    }
991    assign_alu_dest(bld_base, &instr->dest, result);
992  }
993 
visit_load_const(struct lp_build_nir_context * bld_base,const nir_load_const_instr * instr)994 static void visit_load_const(struct lp_build_nir_context *bld_base,
995                              const nir_load_const_instr *instr)
996 {
997    LLVMValueRef result[NIR_MAX_VEC_COMPONENTS];
998    struct lp_build_context *int_bld = get_int_bld(bld_base, true, instr->def.bit_size);
999    for (unsigned i = 0; i < instr->def.num_components; i++)
1000       result[i] = lp_build_const_int_vec(bld_base->base.gallivm, int_bld->type, instr->def.bit_size == 32 ? instr->value[i].u32 : instr->value[i].u64);
1001    assign_ssa_dest(bld_base, &instr->def, result);
1002 }
1003 
1004 static void
get_deref_offset(struct lp_build_nir_context * bld_base,nir_deref_instr * instr,bool vs_in,unsigned * vertex_index_out,LLVMValueRef * vertex_index_ref,unsigned * const_out,LLVMValueRef * indir_out)1005 get_deref_offset(struct lp_build_nir_context *bld_base, nir_deref_instr *instr,
1006                  bool vs_in, unsigned *vertex_index_out,
1007                  LLVMValueRef *vertex_index_ref,
1008                  unsigned *const_out, LLVMValueRef *indir_out)
1009 {
1010    LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1011    nir_variable *var = nir_deref_instr_get_variable(instr);
1012    nir_deref_path path;
1013    unsigned idx_lvl = 1;
1014 
1015    nir_deref_path_init(&path, instr, NULL);
1016 
1017    if (vertex_index_out != NULL || vertex_index_ref != NULL) {
1018       if (vertex_index_ref) {
1019          *vertex_index_ref = get_src(bld_base, path.path[idx_lvl]->arr.index);
1020          if (vertex_index_out)
1021             *vertex_index_out = 0;
1022       } else {
1023          *vertex_index_out = nir_src_as_uint(path.path[idx_lvl]->arr.index);
1024       }
1025       ++idx_lvl;
1026    }
1027 
1028    uint32_t const_offset = 0;
1029    LLVMValueRef offset = NULL;
1030 
1031    if (var->data.compact && nir_src_is_const(instr->arr.index)) {
1032       assert(instr->deref_type == nir_deref_type_array);
1033       const_offset = nir_src_as_uint(instr->arr.index);
1034       goto out;
1035    }
1036 
1037    for (; path.path[idx_lvl]; ++idx_lvl) {
1038       const struct glsl_type *parent_type = path.path[idx_lvl - 1]->type;
1039       if (path.path[idx_lvl]->deref_type == nir_deref_type_struct) {
1040          unsigned index = path.path[idx_lvl]->strct.index;
1041 
1042          for (unsigned i = 0; i < index; i++) {
1043             const struct glsl_type *ft = glsl_get_struct_field(parent_type, i);
1044             const_offset += glsl_count_attribute_slots(ft, vs_in);
1045          }
1046       } else if(path.path[idx_lvl]->deref_type == nir_deref_type_array) {
1047          unsigned size = glsl_count_attribute_slots(path.path[idx_lvl]->type, vs_in);
1048          if (nir_src_is_const(path.path[idx_lvl]->arr.index)) {
1049            const_offset += nir_src_comp_as_int(path.path[idx_lvl]->arr.index, 0) * size;
1050          } else {
1051            LLVMValueRef idx_src = get_src(bld_base, path.path[idx_lvl]->arr.index);
1052            idx_src = cast_type(bld_base, idx_src, nir_type_uint, 32);
1053            LLVMValueRef array_off = lp_build_mul(&bld_base->uint_bld, lp_build_const_int_vec(bld_base->base.gallivm, bld_base->base.type, size),
1054                                                idx_src);
1055            if (offset)
1056              offset = lp_build_add(&bld_base->uint_bld, offset, array_off);
1057            else
1058              offset = array_off;
1059          }
1060       } else
1061          unreachable("Uhandled deref type in get_deref_instr_offset");
1062    }
1063 
1064 out:
1065    nir_deref_path_finish(&path);
1066 
1067    if (const_offset && offset)
1068       offset = LLVMBuildAdd(builder, offset,
1069                             lp_build_const_int_vec(bld_base->base.gallivm, bld_base->uint_bld.type, const_offset),
1070                             "");
1071    *const_out = const_offset;
1072    *indir_out = offset;
1073 }
1074 
1075 static void
visit_load_input(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])1076 visit_load_input(struct lp_build_nir_context *bld_base,
1077                  nir_intrinsic_instr *instr,
1078                  LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1079 {
1080    nir_variable var = {0};
1081    var.data.location = nir_intrinsic_io_semantics(instr).location;
1082    var.data.driver_location = nir_intrinsic_base(instr);
1083    var.data.location_frac = nir_intrinsic_component(instr);
1084 
1085    unsigned nc = nir_dest_num_components(instr->dest);
1086    unsigned bit_size = nir_dest_bit_size(instr->dest);
1087 
1088    nir_src offset = *nir_get_io_offset_src(instr);
1089    bool indirect = !nir_src_is_const(offset);
1090    if (!indirect)
1091       assert(nir_src_as_uint(offset) == 0);
1092    LLVMValueRef indir_index = indirect ? get_src(bld_base, offset) : NULL;
1093 
1094    bld_base->load_var(bld_base, nir_var_shader_in, nc, bit_size, &var, 0, NULL, 0, indir_index, result);
1095 }
1096 
1097 static void
visit_store_output(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr)1098 visit_store_output(struct lp_build_nir_context *bld_base,
1099                    nir_intrinsic_instr *instr)
1100 {
1101    nir_variable var = {0};
1102    var.data.location = nir_intrinsic_io_semantics(instr).location;
1103    var.data.driver_location = nir_intrinsic_base(instr);
1104    var.data.location_frac = nir_intrinsic_component(instr);
1105 
1106    unsigned mask = nir_intrinsic_write_mask(instr);
1107 
1108    unsigned bit_size = nir_src_bit_size(instr->src[0]);
1109    LLVMValueRef src = get_src(bld_base, instr->src[0]);
1110 
1111    nir_src offset = *nir_get_io_offset_src(instr);
1112    bool indirect = !nir_src_is_const(offset);
1113    if (!indirect)
1114       assert(nir_src_as_uint(offset) == 0);
1115    LLVMValueRef indir_index = indirect ? get_src(bld_base, offset) : NULL;
1116 
1117    if (mask == 0x1 && LLVMGetTypeKind(LLVMTypeOf(src)) == LLVMArrayTypeKind) {
1118       src = LLVMBuildExtractValue(bld_base->base.gallivm->builder,
1119                                   src, 0, "");
1120    }
1121 
1122    bld_base->store_var(bld_base, nir_var_shader_out, util_last_bit(mask),
1123                        bit_size, &var, mask, NULL, 0, indir_index, src);
1124 }
1125 
visit_load_var(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])1126 static void visit_load_var(struct lp_build_nir_context *bld_base,
1127                            nir_intrinsic_instr *instr,
1128                            LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1129 {
1130    nir_deref_instr *deref = nir_instr_as_deref(instr->src[0].ssa->parent_instr);
1131    nir_variable *var = nir_deref_instr_get_variable(deref);
1132    assert(util_bitcount(deref->modes) == 1);
1133    nir_variable_mode mode = deref->modes;
1134    unsigned const_index;
1135    LLVMValueRef indir_index;
1136    LLVMValueRef indir_vertex_index = NULL;
1137    unsigned vertex_index = 0;
1138    unsigned nc = nir_dest_num_components(instr->dest);
1139    unsigned bit_size = nir_dest_bit_size(instr->dest);
1140    if (var) {
1141       bool vs_in = bld_base->shader->info.stage == MESA_SHADER_VERTEX &&
1142          var->data.mode == nir_var_shader_in;
1143       bool gs_in = bld_base->shader->info.stage == MESA_SHADER_GEOMETRY &&
1144          var->data.mode == nir_var_shader_in;
1145       bool tcs_in = bld_base->shader->info.stage == MESA_SHADER_TESS_CTRL &&
1146          var->data.mode == nir_var_shader_in;
1147       bool tcs_out = bld_base->shader->info.stage == MESA_SHADER_TESS_CTRL &&
1148          var->data.mode == nir_var_shader_out && !var->data.patch;
1149       bool tes_in = bld_base->shader->info.stage == MESA_SHADER_TESS_EVAL &&
1150          var->data.mode == nir_var_shader_in && !var->data.patch;
1151 
1152       mode = var->data.mode;
1153 
1154       get_deref_offset(bld_base, deref, vs_in, gs_in ? &vertex_index : NULL, (tcs_in || tcs_out || tes_in) ? &indir_vertex_index : NULL,
1155                        &const_index, &indir_index);
1156    }
1157    bld_base->load_var(bld_base, mode, nc, bit_size, var, vertex_index, indir_vertex_index, const_index, indir_index, result);
1158 }
1159 
1160 static void
visit_store_var(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr)1161 visit_store_var(struct lp_build_nir_context *bld_base,
1162                 nir_intrinsic_instr *instr)
1163 {
1164    nir_deref_instr *deref = nir_instr_as_deref(instr->src[0].ssa->parent_instr);
1165    nir_variable *var = nir_deref_instr_get_variable(deref);
1166    assert(util_bitcount(deref->modes) == 1);
1167    nir_variable_mode mode = deref->modes;
1168    int writemask = instr->const_index[0];
1169    unsigned bit_size = nir_src_bit_size(instr->src[1]);
1170    LLVMValueRef src = get_src(bld_base, instr->src[1]);
1171    unsigned const_index = 0;
1172    LLVMValueRef indir_index, indir_vertex_index = NULL;
1173    if (var) {
1174       bool tcs_out = bld_base->shader->info.stage == MESA_SHADER_TESS_CTRL &&
1175          var->data.mode == nir_var_shader_out && !var->data.patch;
1176       get_deref_offset(bld_base, deref, false, NULL, tcs_out ? &indir_vertex_index : NULL,
1177                        &const_index, &indir_index);
1178    }
1179    bld_base->store_var(bld_base, mode, instr->num_components, bit_size, var, writemask, indir_vertex_index, const_index, indir_index, src);
1180 }
1181 
visit_load_ubo(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])1182 static void visit_load_ubo(struct lp_build_nir_context *bld_base,
1183                            nir_intrinsic_instr *instr,
1184                            LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1185 {
1186    struct gallivm_state *gallivm = bld_base->base.gallivm;
1187    LLVMBuilderRef builder = gallivm->builder;
1188    LLVMValueRef idx = get_src(bld_base, instr->src[0]);
1189    LLVMValueRef offset = get_src(bld_base, instr->src[1]);
1190 
1191    bool offset_is_uniform = nir_src_is_dynamically_uniform(instr->src[1]);
1192    idx = LLVMBuildExtractElement(builder, idx, lp_build_const_int32(gallivm, 0), "");
1193    bld_base->load_ubo(bld_base, nir_dest_num_components(instr->dest), nir_dest_bit_size(instr->dest),
1194                       offset_is_uniform, idx, offset, result);
1195 }
1196 
visit_load_push_constant(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr,LLVMValueRef result[4])1197 static void visit_load_push_constant(struct lp_build_nir_context *bld_base,
1198                                      nir_intrinsic_instr *instr,
1199                                      LLVMValueRef result[4])
1200 {
1201    struct gallivm_state *gallivm = bld_base->base.gallivm;
1202    LLVMValueRef offset = get_src(bld_base, instr->src[0]);
1203    LLVMValueRef idx = lp_build_const_int32(gallivm, 0);
1204    bool offset_is_uniform = nir_src_is_dynamically_uniform(instr->src[0]);
1205 
1206    bld_base->load_ubo(bld_base, nir_dest_num_components(instr->dest), nir_dest_bit_size(instr->dest),
1207                       offset_is_uniform, idx, offset, result);
1208 }
1209 
1210 
visit_load_ssbo(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])1211 static void visit_load_ssbo(struct lp_build_nir_context *bld_base,
1212                            nir_intrinsic_instr *instr,
1213                            LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1214 {
1215    LLVMValueRef idx = get_src(bld_base, instr->src[0]);
1216    LLVMValueRef offset = get_src(bld_base, instr->src[1]);
1217    bld_base->load_mem(bld_base, nir_dest_num_components(instr->dest), nir_dest_bit_size(instr->dest),
1218                        idx, offset, result);
1219 }
1220 
visit_store_ssbo(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr)1221 static void visit_store_ssbo(struct lp_build_nir_context *bld_base,
1222                              nir_intrinsic_instr *instr)
1223 {
1224    LLVMValueRef val = get_src(bld_base, instr->src[0]);
1225    LLVMValueRef idx = get_src(bld_base, instr->src[1]);
1226    LLVMValueRef offset = get_src(bld_base, instr->src[2]);
1227    int writemask = instr->const_index[0];
1228    int nc = nir_src_num_components(instr->src[0]);
1229    int bitsize = nir_src_bit_size(instr->src[0]);
1230    bld_base->store_mem(bld_base, writemask, nc, bitsize, idx, offset, val);
1231 }
1232 
visit_get_ssbo_size(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])1233 static void visit_get_ssbo_size(struct lp_build_nir_context *bld_base,
1234                                 nir_intrinsic_instr *instr,
1235                                 LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1236 {
1237    LLVMValueRef idx = get_src(bld_base, instr->src[0]);
1238    result[0] = bld_base->get_ssbo_size(bld_base, idx);
1239 }
1240 
visit_ssbo_atomic(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])1241 static void visit_ssbo_atomic(struct lp_build_nir_context *bld_base,
1242                               nir_intrinsic_instr *instr,
1243                               LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1244 {
1245    LLVMValueRef idx = get_src(bld_base, instr->src[0]);
1246    LLVMValueRef offset = get_src(bld_base, instr->src[1]);
1247    LLVMValueRef val = get_src(bld_base, instr->src[2]);
1248    LLVMValueRef val2 = NULL;
1249    if (instr->intrinsic == nir_intrinsic_ssbo_atomic_comp_swap)
1250       val2 = get_src(bld_base, instr->src[3]);
1251 
1252    bld_base->atomic_mem(bld_base, instr->intrinsic, idx, offset, val, val2, &result[0]);
1253 
1254 }
1255 
visit_load_image(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])1256 static void visit_load_image(struct lp_build_nir_context *bld_base,
1257                              nir_intrinsic_instr *instr,
1258                              LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1259 {
1260    struct gallivm_state *gallivm = bld_base->base.gallivm;
1261    LLVMBuilderRef builder = gallivm->builder;
1262    nir_deref_instr *deref = nir_instr_as_deref(instr->src[0].ssa->parent_instr);
1263    nir_variable *var = nir_deref_instr_get_variable(deref);
1264    LLVMValueRef coord_val = get_src(bld_base, instr->src[1]);
1265    LLVMValueRef coords[5];
1266    struct lp_img_params params;
1267    const struct glsl_type *type = glsl_without_array(var->type);
1268    unsigned const_index;
1269    LLVMValueRef indir_index;
1270    get_deref_offset(bld_base, deref, false, NULL, NULL,
1271                     &const_index, &indir_index);
1272 
1273    memset(&params, 0, sizeof(params));
1274    params.target = glsl_sampler_to_pipe(glsl_get_sampler_dim(type), glsl_sampler_type_is_array(type));
1275    for (unsigned i = 0; i < 4; i++)
1276       coords[i] = LLVMBuildExtractValue(builder, coord_val, i, "");
1277    if (params.target == PIPE_TEXTURE_1D_ARRAY)
1278       coords[2] = coords[1];
1279 
1280    params.coords = coords;
1281    params.outdata = result;
1282    params.img_op = LP_IMG_LOAD;
1283    if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_MS || glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_SUBPASS_MS)
1284       params.ms_index = cast_type(bld_base, get_src(bld_base, instr->src[2]), nir_type_uint, 32);
1285    params.image_index = var->data.binding + (indir_index ? 0 : const_index);
1286    params.image_index_offset = indir_index;
1287    bld_base->image_op(bld_base, &params);
1288 }
1289 
visit_store_image(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr)1290 static void visit_store_image(struct lp_build_nir_context *bld_base,
1291                               nir_intrinsic_instr *instr)
1292 {
1293    struct gallivm_state *gallivm = bld_base->base.gallivm;
1294    LLVMBuilderRef builder = gallivm->builder;
1295    nir_deref_instr *deref = nir_instr_as_deref(instr->src[0].ssa->parent_instr);
1296    nir_variable *var = nir_deref_instr_get_variable(deref);
1297    LLVMValueRef coord_val = get_src(bld_base, instr->src[1]);
1298    LLVMValueRef in_val = get_src(bld_base, instr->src[3]);
1299    LLVMValueRef coords[5];
1300    struct lp_img_params params;
1301    const struct glsl_type *type = glsl_without_array(var->type);
1302    unsigned const_index;
1303    LLVMValueRef indir_index;
1304    get_deref_offset(bld_base, deref, false, NULL, NULL,
1305                     &const_index, &indir_index);
1306 
1307    memset(&params, 0, sizeof(params));
1308    params.target = glsl_sampler_to_pipe(glsl_get_sampler_dim(type), glsl_sampler_type_is_array(type));
1309    for (unsigned i = 0; i < 4; i++)
1310       coords[i] = LLVMBuildExtractValue(builder, coord_val, i, "");
1311    if (params.target == PIPE_TEXTURE_1D_ARRAY)
1312       coords[2] = coords[1];
1313    params.coords = coords;
1314 
1315    for (unsigned i = 0; i < 4; i++) {
1316       params.indata[i] = LLVMBuildExtractValue(builder, in_val, i, "");
1317       params.indata[i] = LLVMBuildBitCast(builder, params.indata[i], bld_base->base.vec_type, "");
1318    }
1319    if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_MS)
1320       params.ms_index = get_src(bld_base, instr->src[2]);
1321    params.img_op = LP_IMG_STORE;
1322    params.image_index = var->data.binding + (indir_index ? 0 : const_index);
1323    params.image_index_offset = indir_index;
1324 
1325    if (params.target == PIPE_TEXTURE_1D_ARRAY)
1326       coords[2] = coords[1];
1327    bld_base->image_op(bld_base, &params);
1328 }
1329 
visit_atomic_image(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])1330 static void visit_atomic_image(struct lp_build_nir_context *bld_base,
1331                                nir_intrinsic_instr *instr,
1332                                LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1333 {
1334    struct gallivm_state *gallivm = bld_base->base.gallivm;
1335    LLVMBuilderRef builder = gallivm->builder;
1336    nir_deref_instr *deref = nir_instr_as_deref(instr->src[0].ssa->parent_instr);
1337    nir_variable *var = nir_deref_instr_get_variable(deref);
1338    struct lp_img_params params;
1339    LLVMValueRef coord_val = get_src(bld_base, instr->src[1]);
1340    LLVMValueRef in_val = get_src(bld_base, instr->src[3]);
1341    LLVMValueRef coords[5];
1342    const struct glsl_type *type = glsl_without_array(var->type);
1343    unsigned const_index;
1344    LLVMValueRef indir_index;
1345    get_deref_offset(bld_base, deref, false, NULL, NULL,
1346                     &const_index, &indir_index);
1347 
1348    memset(&params, 0, sizeof(params));
1349 
1350    switch (instr->intrinsic) {
1351    case nir_intrinsic_image_deref_atomic_add:
1352       params.op = LLVMAtomicRMWBinOpAdd;
1353       break;
1354    case nir_intrinsic_image_deref_atomic_exchange:
1355       params.op = LLVMAtomicRMWBinOpXchg;
1356       break;
1357    case nir_intrinsic_image_deref_atomic_and:
1358       params.op = LLVMAtomicRMWBinOpAnd;
1359       break;
1360    case nir_intrinsic_image_deref_atomic_or:
1361       params.op = LLVMAtomicRMWBinOpOr;
1362       break;
1363    case nir_intrinsic_image_deref_atomic_xor:
1364       params.op = LLVMAtomicRMWBinOpXor;
1365       break;
1366    case nir_intrinsic_image_deref_atomic_umin:
1367       params.op = LLVMAtomicRMWBinOpUMin;
1368       break;
1369    case nir_intrinsic_image_deref_atomic_umax:
1370       params.op = LLVMAtomicRMWBinOpUMax;
1371       break;
1372    case nir_intrinsic_image_deref_atomic_imin:
1373       params.op = LLVMAtomicRMWBinOpMin;
1374       break;
1375    case nir_intrinsic_image_deref_atomic_imax:
1376       params.op = LLVMAtomicRMWBinOpMax;
1377       break;
1378    default:
1379       break;
1380    }
1381 
1382    params.target = glsl_sampler_to_pipe(glsl_get_sampler_dim(type), glsl_sampler_type_is_array(type));
1383    for (unsigned i = 0; i < 4; i++)
1384       coords[i] = LLVMBuildExtractValue(builder, coord_val, i, "");
1385    if (params.target == PIPE_TEXTURE_1D_ARRAY)
1386       coords[2] = coords[1];
1387    params.coords = coords;
1388    if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_MS)
1389       params.ms_index = get_src(bld_base, instr->src[2]);
1390    if (instr->intrinsic == nir_intrinsic_image_deref_atomic_comp_swap) {
1391       LLVMValueRef cas_val = get_src(bld_base, instr->src[4]);
1392       params.indata[0] = in_val;
1393       params.indata2[0] = cas_val;
1394    } else
1395       params.indata[0] = in_val;
1396 
1397    params.outdata = result;
1398    params.img_op = (instr->intrinsic == nir_intrinsic_image_deref_atomic_comp_swap) ? LP_IMG_ATOMIC_CAS : LP_IMG_ATOMIC;
1399    params.image_index = var->data.binding + (indir_index ? 0 : const_index);
1400    params.image_index_offset = indir_index;
1401 
1402    bld_base->image_op(bld_base, &params);
1403 }
1404 
1405 
visit_image_size(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])1406 static void visit_image_size(struct lp_build_nir_context *bld_base,
1407                              nir_intrinsic_instr *instr,
1408                              LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1409 {
1410    nir_deref_instr *deref = nir_instr_as_deref(instr->src[0].ssa->parent_instr);
1411    nir_variable *var = nir_deref_instr_get_variable(deref);
1412    struct lp_sampler_size_query_params params = { 0 };
1413    unsigned const_index;
1414    LLVMValueRef indir_index;
1415    const struct glsl_type *type = glsl_without_array(var->type);
1416    get_deref_offset(bld_base, deref, false, NULL, NULL,
1417                     &const_index, &indir_index);
1418    params.texture_unit = var->data.binding + (indir_index ? 0 : const_index);
1419    params.texture_unit_offset = indir_index;
1420    params.target = glsl_sampler_to_pipe(glsl_get_sampler_dim(type), glsl_sampler_type_is_array(type));
1421    params.sizes_out = result;
1422 
1423    bld_base->image_size(bld_base, &params);
1424 }
1425 
visit_image_samples(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])1426 static void visit_image_samples(struct lp_build_nir_context *bld_base,
1427                                 nir_intrinsic_instr *instr,
1428                                 LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1429 {
1430    nir_deref_instr *deref = nir_instr_as_deref(instr->src[0].ssa->parent_instr);
1431    nir_variable *var = nir_deref_instr_get_variable(deref);
1432    struct lp_sampler_size_query_params params = { 0 };
1433    unsigned const_index;
1434    LLVMValueRef indir_index;
1435    const struct glsl_type *type = glsl_without_array(var->type);
1436    get_deref_offset(bld_base, deref, false, NULL, NULL,
1437                     &const_index, &indir_index);
1438 
1439    params.texture_unit = var->data.binding + (indir_index ? 0 : const_index);
1440    params.texture_unit_offset = indir_index;
1441    params.target = glsl_sampler_to_pipe(glsl_get_sampler_dim(type), glsl_sampler_type_is_array(type));
1442    params.sizes_out = result;
1443    params.samples_only = true;
1444 
1445    bld_base->image_size(bld_base, &params);
1446 }
1447 
visit_shared_load(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])1448 static void visit_shared_load(struct lp_build_nir_context *bld_base,
1449                                 nir_intrinsic_instr *instr,
1450                                 LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1451 {
1452    LLVMValueRef offset = get_src(bld_base, instr->src[0]);
1453    bld_base->load_mem(bld_base, nir_dest_num_components(instr->dest), nir_dest_bit_size(instr->dest),
1454                       NULL, offset, result);
1455 }
1456 
visit_shared_store(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr)1457 static void visit_shared_store(struct lp_build_nir_context *bld_base,
1458                                nir_intrinsic_instr *instr)
1459 {
1460    LLVMValueRef val = get_src(bld_base, instr->src[0]);
1461    LLVMValueRef offset = get_src(bld_base, instr->src[1]);
1462    int writemask = instr->const_index[1];
1463    int nc = nir_src_num_components(instr->src[0]);
1464    int bitsize = nir_src_bit_size(instr->src[0]);
1465    bld_base->store_mem(bld_base, writemask, nc, bitsize, NULL, offset, val);
1466 }
1467 
visit_shared_atomic(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])1468 static void visit_shared_atomic(struct lp_build_nir_context *bld_base,
1469                                 nir_intrinsic_instr *instr,
1470                                 LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1471 {
1472    LLVMValueRef offset = get_src(bld_base, instr->src[0]);
1473    LLVMValueRef val = get_src(bld_base, instr->src[1]);
1474    LLVMValueRef val2 = NULL;
1475    if (instr->intrinsic == nir_intrinsic_shared_atomic_comp_swap)
1476       val2 = get_src(bld_base, instr->src[2]);
1477 
1478    bld_base->atomic_mem(bld_base, instr->intrinsic, NULL, offset, val, val2, &result[0]);
1479 
1480 }
1481 
visit_barrier(struct lp_build_nir_context * bld_base)1482 static void visit_barrier(struct lp_build_nir_context *bld_base)
1483 {
1484    bld_base->barrier(bld_base);
1485 }
1486 
visit_discard(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr)1487 static void visit_discard(struct lp_build_nir_context *bld_base,
1488                           nir_intrinsic_instr *instr)
1489 {
1490    LLVMValueRef cond = NULL;
1491    if (instr->intrinsic == nir_intrinsic_discard_if) {
1492       cond = get_src(bld_base, instr->src[0]);
1493       cond = cast_type(bld_base, cond, nir_type_int, 32);
1494    }
1495    bld_base->discard(bld_base, cond);
1496 }
1497 
visit_load_kernel_input(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])1498 static void visit_load_kernel_input(struct lp_build_nir_context *bld_base,
1499                                     nir_intrinsic_instr *instr, LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1500 {
1501    LLVMValueRef offset = get_src(bld_base, instr->src[0]);
1502 
1503    bool offset_is_uniform = nir_src_is_dynamically_uniform(instr->src[0]);
1504    bld_base->load_kernel_arg(bld_base, nir_dest_num_components(instr->dest), nir_dest_bit_size(instr->dest),
1505                              nir_src_bit_size(instr->src[0]),
1506                              offset_is_uniform, offset, result);
1507 }
1508 
visit_load_global(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])1509 static void visit_load_global(struct lp_build_nir_context *bld_base,
1510                               nir_intrinsic_instr *instr, LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1511 {
1512    LLVMValueRef addr = get_src(bld_base, instr->src[0]);
1513    bld_base->load_global(bld_base, nir_dest_num_components(instr->dest), nir_dest_bit_size(instr->dest),
1514                          nir_src_bit_size(instr->src[0]),
1515                          addr, result);
1516 }
1517 
visit_store_global(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr)1518 static void visit_store_global(struct lp_build_nir_context *bld_base,
1519                                nir_intrinsic_instr *instr)
1520 {
1521    LLVMValueRef val = get_src(bld_base, instr->src[0]);
1522    int nc = nir_src_num_components(instr->src[0]);
1523    int bitsize = nir_src_bit_size(instr->src[0]);
1524    LLVMValueRef addr = get_src(bld_base, instr->src[1]);
1525    int addr_bitsize = nir_src_bit_size(instr->src[1]);
1526    int writemask = instr->const_index[0];
1527    bld_base->store_global(bld_base, writemask, nc, bitsize, addr_bitsize, addr, val);
1528 }
1529 
visit_global_atomic(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])1530 static void visit_global_atomic(struct lp_build_nir_context *bld_base,
1531                                 nir_intrinsic_instr *instr,
1532                                 LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1533 {
1534    LLVMValueRef addr = get_src(bld_base, instr->src[0]);
1535    LLVMValueRef val = get_src(bld_base, instr->src[1]);
1536    LLVMValueRef val2 = NULL;
1537    int addr_bitsize = nir_src_bit_size(instr->src[0]);
1538    if (instr->intrinsic == nir_intrinsic_global_atomic_comp_swap)
1539       val2 = get_src(bld_base, instr->src[2]);
1540 
1541    bld_base->atomic_global(bld_base, instr->intrinsic, addr_bitsize, addr, val, val2, &result[0]);
1542 }
1543 
visit_interp(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])1544 static void visit_interp(struct lp_build_nir_context *bld_base,
1545                          nir_intrinsic_instr *instr,
1546                          LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1547 {
1548    struct gallivm_state *gallivm = bld_base->base.gallivm;
1549    LLVMBuilderRef builder = gallivm->builder;
1550    nir_deref_instr *deref = nir_instr_as_deref(instr->src[0].ssa->parent_instr);
1551    unsigned num_components = nir_dest_num_components(instr->dest);
1552    nir_variable *var = nir_deref_instr_get_variable(deref);
1553    unsigned const_index;
1554    LLVMValueRef indir_index;
1555    LLVMValueRef offsets[2] = { NULL, NULL };
1556    get_deref_offset(bld_base, deref, false, NULL, NULL,
1557                     &const_index, &indir_index);
1558    bool centroid = instr->intrinsic == nir_intrinsic_interp_deref_at_centroid;
1559    bool sample = false;
1560    if (instr->intrinsic == nir_intrinsic_interp_deref_at_offset) {
1561       for (unsigned i = 0; i < 2; i++) {
1562          offsets[i] = LLVMBuildExtractValue(builder, get_src(bld_base, instr->src[1]), i, "");
1563          offsets[i] = cast_type(bld_base, offsets[i], nir_type_float, 32);
1564       }
1565    } else if (instr->intrinsic == nir_intrinsic_interp_deref_at_sample) {
1566       offsets[0] = get_src(bld_base, instr->src[1]);
1567       offsets[0] = cast_type(bld_base, offsets[0], nir_type_int, 32);
1568       sample = true;
1569    }
1570    bld_base->interp_at(bld_base, num_components, var, centroid, sample, const_index, indir_index, offsets, result);
1571 }
1572 
visit_load_scratch(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])1573 static void visit_load_scratch(struct lp_build_nir_context *bld_base,
1574                                nir_intrinsic_instr *instr,
1575                                LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1576 {
1577    LLVMValueRef offset = get_src(bld_base, instr->src[0]);
1578 
1579    bld_base->load_scratch(bld_base, nir_dest_num_components(instr->dest),
1580                           nir_dest_bit_size(instr->dest), offset, result);
1581 }
1582 
visit_store_scratch(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr)1583 static void visit_store_scratch(struct lp_build_nir_context *bld_base,
1584                                 nir_intrinsic_instr *instr)
1585 {
1586    LLVMValueRef val = get_src(bld_base, instr->src[0]);
1587    LLVMValueRef offset = get_src(bld_base, instr->src[1]);
1588    int writemask = instr->const_index[2];
1589    int nc = nir_src_num_components(instr->src[0]);
1590    int bitsize = nir_src_bit_size(instr->src[0]);
1591    bld_base->store_scratch(bld_base, writemask, nc, bitsize, offset, val);
1592 }
1593 
1594 
visit_intrinsic(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr)1595 static void visit_intrinsic(struct lp_build_nir_context *bld_base,
1596                             nir_intrinsic_instr *instr)
1597 {
1598    LLVMValueRef result[NIR_MAX_VEC_COMPONENTS] = {0};
1599    switch (instr->intrinsic) {
1600    case nir_intrinsic_load_input:
1601       visit_load_input(bld_base, instr, result);
1602       break;
1603    case nir_intrinsic_store_output:
1604       visit_store_output(bld_base, instr);
1605       break;
1606    case nir_intrinsic_load_deref:
1607       visit_load_var(bld_base, instr, result);
1608       break;
1609    case nir_intrinsic_store_deref:
1610       visit_store_var(bld_base, instr);
1611       break;
1612    case nir_intrinsic_load_ubo:
1613       visit_load_ubo(bld_base, instr, result);
1614       break;
1615    case nir_intrinsic_load_push_constant:
1616       visit_load_push_constant(bld_base, instr, result);
1617       break;
1618    case nir_intrinsic_load_ssbo:
1619       visit_load_ssbo(bld_base, instr, result);
1620       break;
1621    case nir_intrinsic_store_ssbo:
1622       visit_store_ssbo(bld_base, instr);
1623       break;
1624    case nir_intrinsic_get_ssbo_size:
1625       visit_get_ssbo_size(bld_base, instr, result);
1626       break;
1627    case nir_intrinsic_load_vertex_id:
1628    case nir_intrinsic_load_primitive_id:
1629    case nir_intrinsic_load_instance_id:
1630    case nir_intrinsic_load_base_instance:
1631    case nir_intrinsic_load_base_vertex:
1632    case nir_intrinsic_load_work_group_id:
1633    case nir_intrinsic_load_local_invocation_id:
1634    case nir_intrinsic_load_num_work_groups:
1635    case nir_intrinsic_load_invocation_id:
1636    case nir_intrinsic_load_front_face:
1637    case nir_intrinsic_load_draw_id:
1638    case nir_intrinsic_load_local_group_size:
1639    case nir_intrinsic_load_work_dim:
1640    case nir_intrinsic_load_tess_coord:
1641    case nir_intrinsic_load_tess_level_outer:
1642    case nir_intrinsic_load_tess_level_inner:
1643    case nir_intrinsic_load_patch_vertices_in:
1644    case nir_intrinsic_load_sample_id:
1645    case nir_intrinsic_load_sample_pos:
1646    case nir_intrinsic_load_sample_mask_in:
1647       bld_base->sysval_intrin(bld_base, instr, result);
1648       break;
1649    case nir_intrinsic_load_helper_invocation:
1650       bld_base->helper_invocation(bld_base, &result[0]);
1651       break;
1652    case nir_intrinsic_discard_if:
1653    case nir_intrinsic_discard:
1654       visit_discard(bld_base, instr);
1655       break;
1656    case nir_intrinsic_emit_vertex:
1657       bld_base->emit_vertex(bld_base, nir_intrinsic_stream_id(instr));
1658       break;
1659    case nir_intrinsic_end_primitive:
1660       bld_base->end_primitive(bld_base, nir_intrinsic_stream_id(instr));
1661       break;
1662    case nir_intrinsic_ssbo_atomic_add:
1663    case nir_intrinsic_ssbo_atomic_imin:
1664    case nir_intrinsic_ssbo_atomic_imax:
1665    case nir_intrinsic_ssbo_atomic_umin:
1666    case nir_intrinsic_ssbo_atomic_umax:
1667    case nir_intrinsic_ssbo_atomic_and:
1668    case nir_intrinsic_ssbo_atomic_or:
1669    case nir_intrinsic_ssbo_atomic_xor:
1670    case nir_intrinsic_ssbo_atomic_exchange:
1671    case nir_intrinsic_ssbo_atomic_comp_swap:
1672       visit_ssbo_atomic(bld_base, instr, result);
1673       break;
1674    case nir_intrinsic_image_deref_load:
1675       visit_load_image(bld_base, instr, result);
1676       break;
1677    case nir_intrinsic_image_deref_store:
1678       visit_store_image(bld_base, instr);
1679       break;
1680    case nir_intrinsic_image_deref_atomic_add:
1681    case nir_intrinsic_image_deref_atomic_imin:
1682    case nir_intrinsic_image_deref_atomic_imax:
1683    case nir_intrinsic_image_deref_atomic_umin:
1684    case nir_intrinsic_image_deref_atomic_umax:
1685    case nir_intrinsic_image_deref_atomic_and:
1686    case nir_intrinsic_image_deref_atomic_or:
1687    case nir_intrinsic_image_deref_atomic_xor:
1688    case nir_intrinsic_image_deref_atomic_exchange:
1689    case nir_intrinsic_image_deref_atomic_comp_swap:
1690       visit_atomic_image(bld_base, instr, result);
1691       break;
1692    case nir_intrinsic_image_deref_size:
1693       visit_image_size(bld_base, instr, result);
1694       break;
1695    case nir_intrinsic_image_deref_samples:
1696       visit_image_samples(bld_base, instr, result);
1697       break;
1698    case nir_intrinsic_load_shared:
1699       visit_shared_load(bld_base, instr, result);
1700       break;
1701    case nir_intrinsic_store_shared:
1702       visit_shared_store(bld_base, instr);
1703       break;
1704    case nir_intrinsic_shared_atomic_add:
1705    case nir_intrinsic_shared_atomic_imin:
1706    case nir_intrinsic_shared_atomic_umin:
1707    case nir_intrinsic_shared_atomic_imax:
1708    case nir_intrinsic_shared_atomic_umax:
1709    case nir_intrinsic_shared_atomic_and:
1710    case nir_intrinsic_shared_atomic_or:
1711    case nir_intrinsic_shared_atomic_xor:
1712    case nir_intrinsic_shared_atomic_exchange:
1713    case nir_intrinsic_shared_atomic_comp_swap:
1714       visit_shared_atomic(bld_base, instr, result);
1715       break;
1716    case nir_intrinsic_control_barrier:
1717       visit_barrier(bld_base);
1718       break;
1719    case nir_intrinsic_group_memory_barrier:
1720    case nir_intrinsic_memory_barrier:
1721    case nir_intrinsic_memory_barrier_shared:
1722    case nir_intrinsic_memory_barrier_buffer:
1723    case nir_intrinsic_memory_barrier_image:
1724    case nir_intrinsic_memory_barrier_tcs_patch:
1725       break;
1726    case nir_intrinsic_load_kernel_input:
1727       visit_load_kernel_input(bld_base, instr, result);
1728      break;
1729    case nir_intrinsic_load_global:
1730    case nir_intrinsic_load_global_constant:
1731       visit_load_global(bld_base, instr, result);
1732       break;
1733    case nir_intrinsic_store_global:
1734       visit_store_global(bld_base, instr);
1735       break;
1736    case nir_intrinsic_global_atomic_add:
1737    case nir_intrinsic_global_atomic_imin:
1738    case nir_intrinsic_global_atomic_umin:
1739    case nir_intrinsic_global_atomic_imax:
1740    case nir_intrinsic_global_atomic_umax:
1741    case nir_intrinsic_global_atomic_and:
1742    case nir_intrinsic_global_atomic_or:
1743    case nir_intrinsic_global_atomic_xor:
1744    case nir_intrinsic_global_atomic_exchange:
1745    case nir_intrinsic_global_atomic_comp_swap:
1746       visit_global_atomic(bld_base, instr, result);
1747       break;
1748    case nir_intrinsic_vote_all:
1749    case nir_intrinsic_vote_any:
1750    case nir_intrinsic_vote_ieq:
1751       bld_base->vote(bld_base, cast_type(bld_base, get_src(bld_base, instr->src[0]), nir_type_int, 32), instr, result);
1752       break;
1753    case nir_intrinsic_interp_deref_at_offset:
1754    case nir_intrinsic_interp_deref_at_centroid:
1755    case nir_intrinsic_interp_deref_at_sample:
1756       visit_interp(bld_base, instr, result);
1757       break;
1758    case nir_intrinsic_load_scratch:
1759       visit_load_scratch(bld_base, instr, result);
1760       break;
1761    case nir_intrinsic_store_scratch:
1762       visit_store_scratch(bld_base, instr);
1763       break;
1764    default:
1765       fprintf(stderr, "Unsupported intrinsic: ");
1766       nir_print_instr(&instr->instr, stderr);
1767       fprintf(stderr, "\n");
1768       assert(0);
1769       break;
1770    }
1771    if (result[0]) {
1772       assign_dest(bld_base, &instr->dest, result);
1773    }
1774 }
1775 
visit_txs(struct lp_build_nir_context * bld_base,nir_tex_instr * instr)1776 static void visit_txs(struct lp_build_nir_context *bld_base, nir_tex_instr *instr)
1777 {
1778    struct lp_sampler_size_query_params params = { 0 };
1779    LLVMValueRef sizes_out[NIR_MAX_VEC_COMPONENTS];
1780    LLVMValueRef explicit_lod = NULL;
1781    LLVMValueRef texture_unit_offset = NULL;
1782    for (unsigned i = 0; i < instr->num_srcs; i++) {
1783       switch (instr->src[i].src_type) {
1784       case nir_tex_src_lod:
1785          explicit_lod = cast_type(bld_base, get_src(bld_base, instr->src[i].src), nir_type_int, 32);
1786          break;
1787       case nir_tex_src_texture_offset:
1788          texture_unit_offset = get_src(bld_base, instr->src[i].src);
1789          break;
1790       default:
1791          break;
1792       }
1793    }
1794 
1795    params.target = glsl_sampler_to_pipe(instr->sampler_dim, instr->is_array);
1796    params.texture_unit = instr->texture_index;
1797    params.explicit_lod = explicit_lod;
1798    params.is_sviewinfo = TRUE;
1799    params.sizes_out = sizes_out;
1800    params.samples_only = (instr->op == nir_texop_texture_samples);
1801    params.texture_unit_offset = texture_unit_offset;
1802 
1803    if (instr->op == nir_texop_query_levels)
1804       params.explicit_lod = bld_base->uint_bld.zero;
1805    bld_base->tex_size(bld_base, &params);
1806    assign_dest(bld_base, &instr->dest, &sizes_out[instr->op == nir_texop_query_levels ? 3 : 0]);
1807 }
1808 
lp_build_nir_lod_property(struct lp_build_nir_context * bld_base,nir_src lod_src)1809 static enum lp_sampler_lod_property lp_build_nir_lod_property(struct lp_build_nir_context *bld_base,
1810                                                               nir_src lod_src)
1811 {
1812    enum lp_sampler_lod_property lod_property;
1813 
1814    if (nir_src_is_dynamically_uniform(lod_src))
1815       lod_property = LP_SAMPLER_LOD_SCALAR;
1816    else if (bld_base->shader->info.stage == MESA_SHADER_FRAGMENT) {
1817       if (gallivm_perf & GALLIVM_PERF_NO_QUAD_LOD)
1818          lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
1819       else
1820          lod_property = LP_SAMPLER_LOD_PER_QUAD;
1821    }
1822    else
1823       lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
1824    return lod_property;
1825 }
1826 
visit_tex(struct lp_build_nir_context * bld_base,nir_tex_instr * instr)1827 static void visit_tex(struct lp_build_nir_context *bld_base, nir_tex_instr *instr)
1828 {
1829    struct gallivm_state *gallivm = bld_base->base.gallivm;
1830    LLVMBuilderRef builder = gallivm->builder;
1831    LLVMValueRef coords[5];
1832    LLVMValueRef offsets[3] = { NULL };
1833    LLVMValueRef explicit_lod = NULL, projector = NULL, ms_index = NULL;
1834    struct lp_sampler_params params;
1835    struct lp_derivatives derivs;
1836    unsigned sample_key = 0;
1837    nir_deref_instr *texture_deref_instr = NULL;
1838    nir_deref_instr *sampler_deref_instr = NULL;
1839    LLVMValueRef texture_unit_offset = NULL;
1840    LLVMValueRef texel[NIR_MAX_VEC_COMPONENTS];
1841    unsigned lod_src = 0;
1842    LLVMValueRef coord_undef = LLVMGetUndef(bld_base->base.int_vec_type);
1843 
1844    memset(&params, 0, sizeof(params));
1845    enum lp_sampler_lod_property lod_property = LP_SAMPLER_LOD_SCALAR;
1846 
1847    if (instr->op == nir_texop_txs || instr->op == nir_texop_query_levels || instr->op == nir_texop_texture_samples) {
1848       visit_txs(bld_base, instr);
1849       return;
1850    }
1851    if (instr->op == nir_texop_txf || instr->op == nir_texop_txf_ms)
1852       sample_key |= LP_SAMPLER_OP_FETCH << LP_SAMPLER_OP_TYPE_SHIFT;
1853    else if (instr->op == nir_texop_tg4) {
1854       sample_key |= LP_SAMPLER_OP_GATHER << LP_SAMPLER_OP_TYPE_SHIFT;
1855       sample_key |= (instr->component << LP_SAMPLER_GATHER_COMP_SHIFT);
1856    } else if (instr->op == nir_texop_lod)
1857       sample_key |= LP_SAMPLER_OP_LODQ << LP_SAMPLER_OP_TYPE_SHIFT;
1858    for (unsigned i = 0; i < instr->num_srcs; i++) {
1859       switch (instr->src[i].src_type) {
1860       case nir_tex_src_coord: {
1861          LLVMValueRef coord = get_src(bld_base, instr->src[i].src);
1862          if (instr->coord_components == 1)
1863             coords[0] = coord;
1864          else {
1865             for (unsigned chan = 0; chan < instr->coord_components; ++chan)
1866                coords[chan] = LLVMBuildExtractValue(builder, coord,
1867                                                     chan, "");
1868          }
1869          for (unsigned chan = instr->coord_components; chan < 5; chan++)
1870             coords[chan] = coord_undef;
1871 
1872          break;
1873       }
1874       case nir_tex_src_texture_deref:
1875          texture_deref_instr = nir_src_as_deref(instr->src[i].src);
1876          break;
1877       case nir_tex_src_sampler_deref:
1878          sampler_deref_instr = nir_src_as_deref(instr->src[i].src);
1879          break;
1880       case nir_tex_src_projector:
1881          projector = lp_build_rcp(&bld_base->base, cast_type(bld_base, get_src(bld_base, instr->src[i].src), nir_type_float, 32));
1882          break;
1883       case nir_tex_src_comparator:
1884          sample_key |= LP_SAMPLER_SHADOW;
1885          coords[4] = get_src(bld_base, instr->src[i].src);
1886          coords[4] = cast_type(bld_base, coords[4], nir_type_float, 32);
1887          break;
1888       case nir_tex_src_bias:
1889          sample_key |= LP_SAMPLER_LOD_BIAS << LP_SAMPLER_LOD_CONTROL_SHIFT;
1890          lod_src = i;
1891          explicit_lod = cast_type(bld_base, get_src(bld_base, instr->src[i].src), nir_type_float, 32);
1892          break;
1893       case nir_tex_src_lod:
1894          sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT;
1895          lod_src = i;
1896          if (instr->op == nir_texop_txf)
1897             explicit_lod = cast_type(bld_base, get_src(bld_base, instr->src[i].src), nir_type_int, 32);
1898          else
1899             explicit_lod = cast_type(bld_base, get_src(bld_base, instr->src[i].src), nir_type_float, 32);
1900          break;
1901       case nir_tex_src_ddx: {
1902          int deriv_cnt = instr->coord_components;
1903          if (instr->is_array)
1904             deriv_cnt--;
1905          LLVMValueRef deriv_val = get_src(bld_base, instr->src[i].src);
1906          if (deriv_cnt == 1)
1907             derivs.ddx[0] = deriv_val;
1908          else
1909             for (unsigned chan = 0; chan < deriv_cnt; ++chan)
1910                derivs.ddx[chan] = LLVMBuildExtractValue(builder, deriv_val,
1911                                                         chan, "");
1912          for (unsigned chan = 0; chan < deriv_cnt; ++chan)
1913             derivs.ddx[chan] = cast_type(bld_base, derivs.ddx[chan], nir_type_float, 32);
1914          break;
1915       }
1916       case nir_tex_src_ddy: {
1917          int deriv_cnt = instr->coord_components;
1918          if (instr->is_array)
1919             deriv_cnt--;
1920          LLVMValueRef deriv_val = get_src(bld_base, instr->src[i].src);
1921          if (deriv_cnt == 1)
1922             derivs.ddy[0] = deriv_val;
1923          else
1924             for (unsigned chan = 0; chan < deriv_cnt; ++chan)
1925                derivs.ddy[chan] = LLVMBuildExtractValue(builder, deriv_val,
1926                                                         chan, "");
1927          for (unsigned chan = 0; chan < deriv_cnt; ++chan)
1928             derivs.ddy[chan] = cast_type(bld_base, derivs.ddy[chan], nir_type_float, 32);
1929          break;
1930       }
1931       case nir_tex_src_offset: {
1932          int offset_cnt = instr->coord_components;
1933          if (instr->is_array)
1934             offset_cnt--;
1935          LLVMValueRef offset_val = get_src(bld_base, instr->src[i].src);
1936          sample_key |= LP_SAMPLER_OFFSETS;
1937          if (offset_cnt == 1)
1938             offsets[0] = cast_type(bld_base, offset_val, nir_type_int, 32);
1939          else {
1940             for (unsigned chan = 0; chan < offset_cnt; ++chan) {
1941                offsets[chan] = LLVMBuildExtractValue(builder, offset_val,
1942                                                      chan, "");
1943                offsets[chan] = cast_type(bld_base, offsets[chan], nir_type_int, 32);
1944             }
1945          }
1946          break;
1947       }
1948       case nir_tex_src_ms_index:
1949          sample_key |= LP_SAMPLER_FETCH_MS;
1950          ms_index = cast_type(bld_base, get_src(bld_base, instr->src[i].src), nir_type_int, 32);
1951          break;
1952 
1953       case nir_tex_src_texture_offset:
1954          texture_unit_offset = get_src(bld_base, instr->src[i].src);
1955          break;
1956       case nir_tex_src_sampler_offset:
1957          break;
1958       default:
1959          assert(0);
1960          break;
1961       }
1962    }
1963    if (!sampler_deref_instr)
1964       sampler_deref_instr = texture_deref_instr;
1965 
1966    if (explicit_lod)
1967       lod_property = lp_build_nir_lod_property(bld_base, instr->src[lod_src].src);
1968 
1969    if (instr->op == nir_texop_tex || instr->op == nir_texop_tg4 || instr->op == nir_texop_txb ||
1970        instr->op == nir_texop_txl || instr->op == nir_texop_txd || instr->op == nir_texop_lod)
1971       for (unsigned chan = 0; chan < instr->coord_components; ++chan)
1972          coords[chan] = cast_type(bld_base, coords[chan], nir_type_float, 32);
1973    else if (instr->op == nir_texop_txf || instr->op == nir_texop_txf_ms)
1974       for (unsigned chan = 0; chan < instr->coord_components; ++chan)
1975          coords[chan] = cast_type(bld_base, coords[chan], nir_type_int, 32);
1976 
1977    if (instr->is_array && instr->sampler_dim == GLSL_SAMPLER_DIM_1D) {
1978       /* move layer coord for 1d arrays. */
1979       coords[2] = coords[1];
1980       coords[1] = coord_undef;
1981    }
1982 
1983    if (projector) {
1984       for (unsigned chan = 0; chan < instr->coord_components; ++chan)
1985          coords[chan] = lp_build_mul(&bld_base->base, coords[chan], projector);
1986       if (sample_key & LP_SAMPLER_SHADOW)
1987          coords[4] = lp_build_mul(&bld_base->base, coords[4], projector);
1988    }
1989 
1990    uint32_t samp_base_index = 0, tex_base_index = 0;
1991    if (!sampler_deref_instr) {
1992       int samp_src_index = nir_tex_instr_src_index(instr, nir_tex_src_sampler_handle);
1993       if (samp_src_index == -1) {
1994          samp_base_index = instr->sampler_index;
1995       }
1996    }
1997    if (!texture_deref_instr) {
1998       int tex_src_index = nir_tex_instr_src_index(instr, nir_tex_src_texture_handle);
1999       if (tex_src_index == -1) {
2000          tex_base_index = instr->texture_index;
2001       }
2002    }
2003 
2004    if (instr->op == nir_texop_txd) {
2005       sample_key |= LP_SAMPLER_LOD_DERIVATIVES << LP_SAMPLER_LOD_CONTROL_SHIFT;
2006       params.derivs = &derivs;
2007       if (bld_base->shader->info.stage == MESA_SHADER_FRAGMENT) {
2008          if (gallivm_perf & GALLIVM_PERF_NO_QUAD_LOD)
2009             lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2010          else
2011             lod_property = LP_SAMPLER_LOD_PER_QUAD;
2012       } else
2013          lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2014    }
2015 
2016    sample_key |= lod_property << LP_SAMPLER_LOD_PROPERTY_SHIFT;
2017    params.sample_key = sample_key;
2018    params.offsets = offsets;
2019    params.texture_index = tex_base_index;
2020    params.texture_index_offset = texture_unit_offset;
2021    params.sampler_index = samp_base_index;
2022    params.coords = coords;
2023    params.texel = texel;
2024    params.lod = explicit_lod;
2025    params.ms_index = ms_index;
2026    bld_base->tex(bld_base, &params);
2027    assign_dest(bld_base, &instr->dest, texel);
2028 }
2029 
visit_ssa_undef(struct lp_build_nir_context * bld_base,const nir_ssa_undef_instr * instr)2030 static void visit_ssa_undef(struct lp_build_nir_context *bld_base,
2031                             const nir_ssa_undef_instr *instr)
2032 {
2033    unsigned num_components = instr->def.num_components;
2034    LLVMValueRef undef[NIR_MAX_VEC_COMPONENTS];
2035    struct lp_build_context *undef_bld = get_int_bld(bld_base, true, instr->def.bit_size);
2036    for (unsigned i = 0; i < num_components; i++)
2037       undef[i] = LLVMGetUndef(undef_bld->vec_type);
2038    assign_ssa_dest(bld_base, &instr->def, undef);
2039 }
2040 
visit_jump(struct lp_build_nir_context * bld_base,const nir_jump_instr * instr)2041 static void visit_jump(struct lp_build_nir_context *bld_base,
2042                        const nir_jump_instr *instr)
2043 {
2044    switch (instr->type) {
2045    case nir_jump_break:
2046       bld_base->break_stmt(bld_base);
2047       break;
2048    case nir_jump_continue:
2049       bld_base->continue_stmt(bld_base);
2050       break;
2051    default:
2052       unreachable("Unknown jump instr\n");
2053    }
2054 }
2055 
visit_deref(struct lp_build_nir_context * bld_base,nir_deref_instr * instr)2056 static void visit_deref(struct lp_build_nir_context *bld_base,
2057                         nir_deref_instr *instr)
2058 {
2059    if (!nir_deref_mode_is_one_of(instr, nir_var_mem_shared |
2060                                         nir_var_mem_global))
2061       return;
2062    LLVMValueRef result = NULL;
2063    switch(instr->deref_type) {
2064    case nir_deref_type_var: {
2065       struct hash_entry *entry = _mesa_hash_table_search(bld_base->vars, instr->var);
2066       result = entry->data;
2067       break;
2068    }
2069    default:
2070       unreachable("Unhandled deref_instr deref type");
2071    }
2072 
2073    assign_ssa(bld_base, instr->dest.ssa.index, result);
2074 }
2075 
visit_block(struct lp_build_nir_context * bld_base,nir_block * block)2076 static void visit_block(struct lp_build_nir_context *bld_base, nir_block *block)
2077 {
2078    nir_foreach_instr(instr, block)
2079    {
2080       switch (instr->type) {
2081       case nir_instr_type_alu:
2082          visit_alu(bld_base, nir_instr_as_alu(instr));
2083          break;
2084       case nir_instr_type_load_const:
2085          visit_load_const(bld_base, nir_instr_as_load_const(instr));
2086          break;
2087       case nir_instr_type_intrinsic:
2088          visit_intrinsic(bld_base, nir_instr_as_intrinsic(instr));
2089          break;
2090       case nir_instr_type_tex:
2091          visit_tex(bld_base, nir_instr_as_tex(instr));
2092          break;
2093       case nir_instr_type_phi:
2094          assert(0);
2095          break;
2096       case nir_instr_type_ssa_undef:
2097          visit_ssa_undef(bld_base, nir_instr_as_ssa_undef(instr));
2098          break;
2099       case nir_instr_type_jump:
2100          visit_jump(bld_base, nir_instr_as_jump(instr));
2101          break;
2102       case nir_instr_type_deref:
2103          visit_deref(bld_base, nir_instr_as_deref(instr));
2104          break;
2105       default:
2106          fprintf(stderr, "Unknown NIR instr type: ");
2107          nir_print_instr(instr, stderr);
2108          fprintf(stderr, "\n");
2109          abort();
2110       }
2111    }
2112 }
2113 
visit_if(struct lp_build_nir_context * bld_base,nir_if * if_stmt)2114 static void visit_if(struct lp_build_nir_context *bld_base, nir_if *if_stmt)
2115 {
2116    LLVMValueRef cond = get_src(bld_base, if_stmt->condition);
2117 
2118    bld_base->if_cond(bld_base, cond);
2119    visit_cf_list(bld_base, &if_stmt->then_list);
2120 
2121    if (!exec_list_is_empty(&if_stmt->else_list)) {
2122       bld_base->else_stmt(bld_base);
2123       visit_cf_list(bld_base, &if_stmt->else_list);
2124    }
2125    bld_base->endif_stmt(bld_base);
2126 }
2127 
visit_loop(struct lp_build_nir_context * bld_base,nir_loop * loop)2128 static void visit_loop(struct lp_build_nir_context *bld_base, nir_loop *loop)
2129 {
2130    bld_base->bgnloop(bld_base);
2131    visit_cf_list(bld_base, &loop->body);
2132    bld_base->endloop(bld_base);
2133 }
2134 
visit_cf_list(struct lp_build_nir_context * bld_base,struct exec_list * list)2135 static void visit_cf_list(struct lp_build_nir_context *bld_base,
2136                           struct exec_list *list)
2137 {
2138    foreach_list_typed(nir_cf_node, node, node, list)
2139    {
2140       switch (node->type) {
2141       case nir_cf_node_block:
2142          visit_block(bld_base, nir_cf_node_as_block(node));
2143          break;
2144 
2145       case nir_cf_node_if:
2146          visit_if(bld_base, nir_cf_node_as_if(node));
2147          break;
2148 
2149       case nir_cf_node_loop:
2150          visit_loop(bld_base, nir_cf_node_as_loop(node));
2151          break;
2152 
2153       default:
2154          assert(0);
2155       }
2156    }
2157 }
2158 
2159 static void
handle_shader_output_decl(struct lp_build_nir_context * bld_base,struct nir_shader * nir,struct nir_variable * variable)2160 handle_shader_output_decl(struct lp_build_nir_context *bld_base,
2161                           struct nir_shader *nir,
2162                           struct nir_variable *variable)
2163 {
2164    bld_base->emit_var_decl(bld_base, variable);
2165 }
2166 
2167 /* vector registers are stored as arrays in LLVM side,
2168    so we can use GEP on them, as to do exec mask stores
2169    we need to operate on a single components.
2170    arrays are:
2171    0.x, 1.x, 2.x, 3.x
2172    0.y, 1.y, 2.y, 3.y
2173    ....
2174 */
get_register_type(struct lp_build_nir_context * bld_base,nir_register * reg)2175 static LLVMTypeRef get_register_type(struct lp_build_nir_context *bld_base,
2176                                      nir_register *reg)
2177 {
2178    struct lp_build_context *int_bld = get_int_bld(bld_base, true, reg->bit_size);
2179 
2180    LLVMTypeRef type = int_bld->vec_type;
2181    if (reg->num_array_elems)
2182       type = LLVMArrayType(type, reg->num_array_elems);
2183    if (reg->num_components > 1)
2184       type = LLVMArrayType(type, reg->num_components);
2185 
2186    return type;
2187 }
2188 
2189 
lp_build_nir_llvm(struct lp_build_nir_context * bld_base,struct nir_shader * nir)2190 bool lp_build_nir_llvm(
2191    struct lp_build_nir_context *bld_base,
2192    struct nir_shader *nir)
2193 {
2194    struct nir_function *func;
2195 
2196    nir_convert_from_ssa(nir, true);
2197    nir_lower_locals_to_regs(nir);
2198    nir_remove_dead_derefs(nir);
2199    nir_remove_dead_variables(nir, nir_var_function_temp, NULL);
2200 
2201    nir_foreach_shader_out_variable(variable, nir)
2202       handle_shader_output_decl(bld_base, nir, variable);
2203 
2204    if (nir->info.io_lowered) {
2205       uint64_t outputs_written = nir->info.outputs_written;
2206 
2207       while (outputs_written) {
2208          unsigned location = u_bit_scan64(&outputs_written);
2209          nir_variable var = {0};
2210 
2211          var.type = glsl_vec4_type();
2212          var.data.mode = nir_var_shader_out;
2213          var.data.location = location;
2214          var.data.driver_location = util_bitcount64(nir->info.outputs_written &
2215                                                     BITFIELD64_MASK(location));
2216          bld_base->emit_var_decl(bld_base, &var);
2217       }
2218    }
2219 
2220    bld_base->regs = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
2221                                             _mesa_key_pointer_equal);
2222    bld_base->vars = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
2223                                             _mesa_key_pointer_equal);
2224 
2225    func = (struct nir_function *)exec_list_get_head(&nir->functions);
2226 
2227    nir_foreach_register(reg, &func->impl->registers) {
2228       LLVMTypeRef type = get_register_type(bld_base, reg);
2229       LLVMValueRef reg_alloc = lp_build_alloca(bld_base->base.gallivm,
2230                                                type, "reg");
2231       _mesa_hash_table_insert(bld_base->regs, reg, reg_alloc);
2232    }
2233    nir_index_ssa_defs(func->impl);
2234    bld_base->ssa_defs = calloc(func->impl->ssa_alloc, sizeof(LLVMValueRef));
2235    visit_cf_list(bld_base, &func->impl->body);
2236 
2237    free(bld_base->ssa_defs);
2238    ralloc_free(bld_base->vars);
2239    ralloc_free(bld_base->regs);
2240    return true;
2241 }
2242 
2243 /* do some basic opts to remove some things we don't want to see. */
lp_build_opt_nir(struct nir_shader * nir)2244 void lp_build_opt_nir(struct nir_shader *nir)
2245 {
2246    bool progress;
2247 
2248    static const struct nir_lower_tex_options lower_tex_options = {
2249       .lower_tg4_offsets = true,
2250    };
2251    NIR_PASS_V(nir, nir_lower_tex, &lower_tex_options);
2252    NIR_PASS_V(nir, nir_lower_frexp);
2253 
2254    NIR_PASS_V(nir, nir_lower_flrp, 16|32|64, true);
2255 
2256    do {
2257       progress = false;
2258       NIR_PASS_V(nir, nir_opt_constant_folding);
2259       NIR_PASS_V(nir, nir_opt_algebraic);
2260       NIR_PASS_V(nir, nir_lower_pack);
2261 
2262       nir_lower_tex_options options = { .lower_tex_without_implicit_lod = true };
2263       NIR_PASS_V(nir, nir_lower_tex, &options);
2264    } while (progress);
2265    nir_lower_bool_to_int32(nir);
2266 }
2267