1 /**************************************************************************
2  *
3  * Copyright 2009 VMware, Inc.
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the
8  * "Software"), to deal in the Software without restriction, including
9  * without limitation the rights to use, copy, modify, merge, publish,
10  * distribute, sub license, and/or sell copies of the Software, and to
11  * permit persons to whom the Software is furnished to do so, subject to
12  * the following conditions:
13  *
14  * The above copyright notice and this permission notice (including the
15  * next paragraph) shall be included in all copies or substantial portions
16  * of the Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25  *
26  **************************************************************************/
27 
28 /**
29  * @file
30  * Helper functions for logical operations.
31  *
32  * @author Jose Fonseca <jfonseca@vmware.com>
33  */
34 
35 
36 #include "util/u_cpu_detect.h"
37 #include "util/u_memory.h"
38 #include "util/u_debug.h"
39 
40 #include "lp_bld_type.h"
41 #include "lp_bld_const.h"
42 #include "lp_bld_swizzle.h"
43 #include "lp_bld_init.h"
44 #include "lp_bld_intr.h"
45 #include "lp_bld_debug.h"
46 #include "lp_bld_logic.h"
47 
48 
49 /*
50  * XXX
51  *
52  * Selection with vector conditional like
53  *
54  *    select <4 x i1> %C, %A, %B
55  *
56  * is valid IR (e.g. llvm/test/Assembler/vector-select.ll), but it is only
57  * supported on some backends (x86) starting with llvm 3.1.
58  *
59  * Expanding the boolean vector to full SIMD register width, as in
60  *
61  *    sext <4 x i1> %C to <4 x i32>
62  *
63  * is valid and supported (e.g., llvm/test/CodeGen/X86/vec_compare.ll), but
64  * it causes assertion failures in LLVM 2.6. It appears to work correctly on
65  * LLVM 2.7.
66  */
67 
68 
69 /**
70  * Build code to compare two values 'a' and 'b' of 'type' using the given func.
71  * \param func  one of PIPE_FUNC_x
72  * If the ordered argument is true the function will use LLVM's ordered
73  * comparisons, otherwise unordered comparisons will be used.
74  * The result values will be 0 for false or ~0 for true.
75  */
76 static LLVMValueRef
lp_build_compare_ext(struct gallivm_state * gallivm,const struct lp_type type,unsigned func,LLVMValueRef a,LLVMValueRef b,boolean ordered)77 lp_build_compare_ext(struct gallivm_state *gallivm,
78                      const struct lp_type type,
79                      unsigned func,
80                      LLVMValueRef a,
81                      LLVMValueRef b,
82                      boolean ordered)
83 {
84    LLVMBuilderRef builder = gallivm->builder;
85    LLVMTypeRef int_vec_type = lp_build_int_vec_type(gallivm, type);
86    LLVMValueRef zeros = LLVMConstNull(int_vec_type);
87    LLVMValueRef ones = LLVMConstAllOnes(int_vec_type);
88    LLVMValueRef cond;
89    LLVMValueRef res;
90 
91    assert(lp_check_value(type, a));
92    assert(lp_check_value(type, b));
93 
94    if(func == PIPE_FUNC_NEVER)
95       return zeros;
96    if(func == PIPE_FUNC_ALWAYS)
97       return ones;
98 
99    assert(func > PIPE_FUNC_NEVER);
100    assert(func < PIPE_FUNC_ALWAYS);
101 
102    if(type.floating) {
103       LLVMRealPredicate op;
104       switch(func) {
105       case PIPE_FUNC_EQUAL:
106          op = ordered ? LLVMRealOEQ : LLVMRealUEQ;
107          break;
108       case PIPE_FUNC_NOTEQUAL:
109          op = ordered ? LLVMRealONE : LLVMRealUNE;
110          break;
111       case PIPE_FUNC_LESS:
112          op = ordered ? LLVMRealOLT : LLVMRealULT;
113          break;
114       case PIPE_FUNC_LEQUAL:
115          op = ordered ? LLVMRealOLE : LLVMRealULE;
116          break;
117       case PIPE_FUNC_GREATER:
118          op = ordered ? LLVMRealOGT : LLVMRealUGT;
119          break;
120       case PIPE_FUNC_GEQUAL:
121          op = ordered ? LLVMRealOGE : LLVMRealUGE;
122          break;
123       default:
124          assert(0);
125          return lp_build_undef(gallivm, type);
126       }
127 
128       cond = LLVMBuildFCmp(builder, op, a, b, "");
129       res = LLVMBuildSExt(builder, cond, int_vec_type, "");
130    }
131    else {
132       LLVMIntPredicate op;
133       switch(func) {
134       case PIPE_FUNC_EQUAL:
135          op = LLVMIntEQ;
136          break;
137       case PIPE_FUNC_NOTEQUAL:
138          op = LLVMIntNE;
139          break;
140       case PIPE_FUNC_LESS:
141          op = type.sign ? LLVMIntSLT : LLVMIntULT;
142          break;
143       case PIPE_FUNC_LEQUAL:
144          op = type.sign ? LLVMIntSLE : LLVMIntULE;
145          break;
146       case PIPE_FUNC_GREATER:
147          op = type.sign ? LLVMIntSGT : LLVMIntUGT;
148          break;
149       case PIPE_FUNC_GEQUAL:
150          op = type.sign ? LLVMIntSGE : LLVMIntUGE;
151          break;
152       default:
153          assert(0);
154          return lp_build_undef(gallivm, type);
155       }
156 
157       cond = LLVMBuildICmp(builder, op, a, b, "");
158       res = LLVMBuildSExt(builder, cond, int_vec_type, "");
159    }
160 
161    return res;
162 }
163 
164 /**
165  * Build code to compare two values 'a' and 'b' of 'type' using the given func.
166  * \param func  one of PIPE_FUNC_x
167  * The result values will be 0 for false or ~0 for true.
168  */
169 LLVMValueRef
lp_build_compare(struct gallivm_state * gallivm,const struct lp_type type,unsigned func,LLVMValueRef a,LLVMValueRef b)170 lp_build_compare(struct gallivm_state *gallivm,
171                  const struct lp_type type,
172                  unsigned func,
173                  LLVMValueRef a,
174                  LLVMValueRef b)
175 {
176    LLVMTypeRef int_vec_type = lp_build_int_vec_type(gallivm, type);
177    LLVMValueRef zeros = LLVMConstNull(int_vec_type);
178    LLVMValueRef ones = LLVMConstAllOnes(int_vec_type);
179 
180    assert(lp_check_value(type, a));
181    assert(lp_check_value(type, b));
182 
183    if(func == PIPE_FUNC_NEVER)
184       return zeros;
185    if(func == PIPE_FUNC_ALWAYS)
186       return ones;
187 
188    assert(func > PIPE_FUNC_NEVER);
189    assert(func < PIPE_FUNC_ALWAYS);
190 
191 #if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
192    /*
193     * There are no unsigned integer comparison instructions in SSE.
194     */
195 
196    if (!type.floating && !type.sign &&
197        type.width * type.length == 128 &&
198        util_cpu_caps.has_sse2 &&
199        (func == PIPE_FUNC_LESS ||
200         func == PIPE_FUNC_LEQUAL ||
201         func == PIPE_FUNC_GREATER ||
202         func == PIPE_FUNC_GEQUAL) &&
203        (gallivm_debug & GALLIVM_DEBUG_PERF)) {
204          debug_printf("%s: inefficient <%u x i%u> unsigned comparison\n",
205                       __FUNCTION__, type.length, type.width);
206    }
207 #endif
208 
209    return lp_build_compare_ext(gallivm, type, func, a, b, FALSE);
210 }
211 
212 /**
213  * Build code to compare two values 'a' and 'b' using the given func.
214  * \param func  one of PIPE_FUNC_x
215  * If the operands are floating point numbers, the function will use
216  * ordered comparison which means that it will return true if both
217  * operands are not a NaN and the specified condition evaluates to true.
218  * The result values will be 0 for false or ~0 for true.
219  */
220 LLVMValueRef
lp_build_cmp_ordered(struct lp_build_context * bld,unsigned func,LLVMValueRef a,LLVMValueRef b)221 lp_build_cmp_ordered(struct lp_build_context *bld,
222                      unsigned func,
223                      LLVMValueRef a,
224                      LLVMValueRef b)
225 {
226    return lp_build_compare_ext(bld->gallivm, bld->type, func, a, b, TRUE);
227 }
228 
229 /**
230  * Build code to compare two values 'a' and 'b' using the given func.
231  * \param func  one of PIPE_FUNC_x
232  * If the operands are floating point numbers, the function will use
233  * unordered comparison which means that it will return true if either
234  * operand is a NaN or the specified condition evaluates to true.
235  * The result values will be 0 for false or ~0 for true.
236  */
237 LLVMValueRef
lp_build_cmp(struct lp_build_context * bld,unsigned func,LLVMValueRef a,LLVMValueRef b)238 lp_build_cmp(struct lp_build_context *bld,
239              unsigned func,
240              LLVMValueRef a,
241              LLVMValueRef b)
242 {
243    return lp_build_compare(bld->gallivm, bld->type, func, a, b);
244 }
245 
246 
247 /**
248  * Return (mask & a) | (~mask & b);
249  */
250 LLVMValueRef
lp_build_select_bitwise(struct lp_build_context * bld,LLVMValueRef mask,LLVMValueRef a,LLVMValueRef b)251 lp_build_select_bitwise(struct lp_build_context *bld,
252                         LLVMValueRef mask,
253                         LLVMValueRef a,
254                         LLVMValueRef b)
255 {
256    LLVMBuilderRef builder = bld->gallivm->builder;
257    struct lp_type type = bld->type;
258    LLVMValueRef res;
259 
260    assert(lp_check_value(type, a));
261    assert(lp_check_value(type, b));
262 
263    if (a == b) {
264       return a;
265    }
266 
267    if(type.floating) {
268       LLVMTypeRef int_vec_type = lp_build_int_vec_type(bld->gallivm, type);
269       a = LLVMBuildBitCast(builder, a, int_vec_type, "");
270       b = LLVMBuildBitCast(builder, b, int_vec_type, "");
271    }
272 
273    a = LLVMBuildAnd(builder, a, mask, "");
274 
275    /* This often gets translated to PANDN, but sometimes the NOT is
276     * pre-computed and stored in another constant. The best strategy depends
277     * on available registers, so it is not a big deal -- hopefully LLVM does
278     * the right decision attending the rest of the program.
279     */
280    b = LLVMBuildAnd(builder, b, LLVMBuildNot(builder, mask, ""), "");
281 
282    res = LLVMBuildOr(builder, a, b, "");
283 
284    if(type.floating) {
285       LLVMTypeRef vec_type = lp_build_vec_type(bld->gallivm, type);
286       res = LLVMBuildBitCast(builder, res, vec_type, "");
287    }
288 
289    return res;
290 }
291 
292 
293 /**
294  * Return mask ? a : b;
295  *
296  * mask is a bitwise mask, composed of 0 or ~0 for each element. Any other value
297  * will yield unpredictable results.
298  */
299 LLVMValueRef
lp_build_select(struct lp_build_context * bld,LLVMValueRef mask,LLVMValueRef a,LLVMValueRef b)300 lp_build_select(struct lp_build_context *bld,
301                 LLVMValueRef mask,
302                 LLVMValueRef a,
303                 LLVMValueRef b)
304 {
305    LLVMBuilderRef builder = bld->gallivm->builder;
306    LLVMContextRef lc = bld->gallivm->context;
307    struct lp_type type = bld->type;
308    LLVMValueRef res;
309 
310    assert(lp_check_value(type, a));
311    assert(lp_check_value(type, b));
312 
313    if(a == b)
314       return a;
315 
316    if (type.length == 1) {
317       mask = LLVMBuildTrunc(builder, mask, LLVMInt1TypeInContext(lc), "");
318       res = LLVMBuildSelect(builder, mask, a, b, "");
319    }
320    else if (!(HAVE_LLVM == 0x0307) &&
321             (LLVMIsConstant(mask) ||
322              LLVMGetInstructionOpcode(mask) == LLVMSExt)) {
323       /* Generate a vector select.
324        *
325        * Using vector selects should avoid emitting intrinsics hence avoid
326        * hindering optimization passes, but vector selects weren't properly
327        * supported yet for a long time, and LLVM will generate poor code when
328        * the mask is not the result of a comparison.
329        * Also, llvm 3.7 may miscompile them (bug 94972).
330        * XXX: Even if the instruction was an SExt, this may still produce
331        * terrible code. Try piglit stencil-twoside.
332        */
333 
334       /* Convert the mask to a vector of booleans.
335        *
336        * XXX: In x86 the mask is controlled by the MSB, so if we shifted the
337        * mask by `type.width - 1`, LLVM should realize the mask is ready.  Alas
338        * what really happens is that LLVM will emit two shifts back to back.
339        */
340       if (0) {
341          LLVMValueRef shift = LLVMConstInt(bld->int_elem_type, bld->type.width - 1, 0);
342          shift = lp_build_broadcast(bld->gallivm, bld->int_vec_type, shift);
343          mask = LLVMBuildLShr(builder, mask, shift, "");
344       }
345       LLVMTypeRef bool_vec_type = LLVMVectorType(LLVMInt1TypeInContext(lc), type.length);
346       mask = LLVMBuildTrunc(builder, mask, bool_vec_type, "");
347 
348       res = LLVMBuildSelect(builder, mask, a, b, "");
349    }
350    else if (((util_cpu_caps.has_sse4_1 &&
351               type.width * type.length == 128) ||
352              (util_cpu_caps.has_avx &&
353               type.width * type.length == 256 && type.width >= 32) ||
354              (util_cpu_caps.has_avx2 &&
355               type.width * type.length == 256)) &&
356             !LLVMIsConstant(a) &&
357             !LLVMIsConstant(b) &&
358             !LLVMIsConstant(mask)) {
359       const char *intrinsic;
360       LLVMTypeRef arg_type;
361       LLVMValueRef args[3];
362 
363       /*
364        *  There's only float blend in AVX but can just cast i32/i64
365        *  to float.
366        */
367       if (type.width * type.length == 256) {
368          if (type.width == 64) {
369            intrinsic = "llvm.x86.avx.blendv.pd.256";
370            arg_type = LLVMVectorType(LLVMDoubleTypeInContext(lc), 4);
371          }
372          else if (type.width == 32) {
373             intrinsic = "llvm.x86.avx.blendv.ps.256";
374             arg_type = LLVMVectorType(LLVMFloatTypeInContext(lc), 8);
375          } else {
376             assert(util_cpu_caps.has_avx2);
377             intrinsic = "llvm.x86.avx2.pblendvb";
378             arg_type = LLVMVectorType(LLVMInt8TypeInContext(lc), 32);
379          }
380       }
381       else if (type.floating &&
382                type.width == 64) {
383          intrinsic = "llvm.x86.sse41.blendvpd";
384          arg_type = LLVMVectorType(LLVMDoubleTypeInContext(lc), 2);
385       } else if (type.floating &&
386                  type.width == 32) {
387          intrinsic = "llvm.x86.sse41.blendvps";
388          arg_type = LLVMVectorType(LLVMFloatTypeInContext(lc), 4);
389       } else {
390          intrinsic = "llvm.x86.sse41.pblendvb";
391          arg_type = LLVMVectorType(LLVMInt8TypeInContext(lc), 16);
392       }
393 
394       if (arg_type != bld->int_vec_type) {
395          mask = LLVMBuildBitCast(builder, mask, arg_type, "");
396       }
397 
398       if (arg_type != bld->vec_type) {
399          a = LLVMBuildBitCast(builder, a, arg_type, "");
400          b = LLVMBuildBitCast(builder, b, arg_type, "");
401       }
402 
403       args[0] = b;
404       args[1] = a;
405       args[2] = mask;
406 
407       res = lp_build_intrinsic(builder, intrinsic,
408                                arg_type, args, ARRAY_SIZE(args), 0);
409 
410       if (arg_type != bld->vec_type) {
411          res = LLVMBuildBitCast(builder, res, bld->vec_type, "");
412       }
413    }
414    else {
415       res = lp_build_select_bitwise(bld, mask, a, b);
416    }
417 
418    return res;
419 }
420 
421 
422 /**
423  * Return mask ? a : b;
424  *
425  * mask is a TGSI_WRITEMASK_xxx.
426  */
427 LLVMValueRef
lp_build_select_aos(struct lp_build_context * bld,unsigned mask,LLVMValueRef a,LLVMValueRef b,unsigned num_channels)428 lp_build_select_aos(struct lp_build_context *bld,
429                     unsigned mask,
430                     LLVMValueRef a,
431                     LLVMValueRef b,
432                     unsigned num_channels)
433 {
434    LLVMBuilderRef builder = bld->gallivm->builder;
435    const struct lp_type type = bld->type;
436    const unsigned n = type.length;
437    unsigned i, j;
438 
439    assert((mask & ~0xf) == 0);
440    assert(lp_check_value(type, a));
441    assert(lp_check_value(type, b));
442 
443    if(a == b)
444       return a;
445    if((mask & 0xf) == 0xf)
446       return a;
447    if((mask & 0xf) == 0x0)
448       return b;
449    if(a == bld->undef || b == bld->undef)
450       return bld->undef;
451 
452    /*
453     * There are two major ways of accomplishing this:
454     * - with a shuffle
455     * - with a select
456     *
457     * The flip between these is empirical and might need to be adjusted.
458     */
459    if (n <= 4) {
460       /*
461        * Shuffle.
462        */
463       LLVMTypeRef elem_type = LLVMInt32TypeInContext(bld->gallivm->context);
464       LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
465 
466       for(j = 0; j < n; j += num_channels)
467          for(i = 0; i < num_channels; ++i)
468             shuffles[j + i] = LLVMConstInt(elem_type,
469                                            (mask & (1 << i) ? 0 : n) + j + i,
470                                            0);
471 
472       return LLVMBuildShuffleVector(builder, a, b, LLVMConstVector(shuffles, n), "");
473    }
474    else {
475       LLVMValueRef mask_vec = lp_build_const_mask_aos(bld->gallivm, type, mask, num_channels);
476       return lp_build_select(bld, mask_vec, a, b);
477    }
478 }
479 
480 
481 /**
482  * Return (scalar-cast)val ? true : false;
483  */
484 LLVMValueRef
lp_build_any_true_range(struct lp_build_context * bld,unsigned real_length,LLVMValueRef val)485 lp_build_any_true_range(struct lp_build_context *bld,
486                         unsigned real_length,
487                         LLVMValueRef val)
488 {
489    LLVMBuilderRef builder = bld->gallivm->builder;
490    LLVMTypeRef scalar_type;
491    LLVMTypeRef true_type;
492 
493    assert(real_length <= bld->type.length);
494 
495    true_type = LLVMIntTypeInContext(bld->gallivm->context,
496                                     bld->type.width * real_length);
497    scalar_type = LLVMIntTypeInContext(bld->gallivm->context,
498                                       bld->type.width * bld->type.length);
499    val = LLVMBuildBitCast(builder, val, scalar_type, "");
500    /*
501     * We're using always native types so we can use intrinsics.
502     * However, if we don't do per-element calculations, we must ensure
503     * the excess elements aren't used since they may contain garbage.
504     */
505    if (real_length < bld->type.length) {
506       val = LLVMBuildTrunc(builder, val, true_type, "");
507    }
508    return LLVMBuildICmp(builder, LLVMIntNE,
509                         val, LLVMConstNull(true_type), "");
510 }
511