1 /**************************************************************************
2  *
3  * Copyright 2009 VMware, Inc.
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the
8  * "Software"), to deal in the Software without restriction, including
9  * without limitation the rights to use, copy, modify, merge, publish,
10  * distribute, sub license, and/or sell copies of the Software, and to
11  * permit persons to whom the Software is furnished to do so, subject to
12  * the following conditions:
13  *
14  * The above copyright notice and this permission notice (including the
15  * next paragraph) shall be included in all copies or substantial portions
16  * of the Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25  *
26  **************************************************************************/
27 
28 /**
29  * @file
30  * Helper arithmetic functions.
31  *
32  * @author Jose Fonseca <jfonseca@vmware.com>
33  */
34 
35 
36 #ifndef LP_BLD_ARIT_H
37 #define LP_BLD_ARIT_H
38 
39 
40 #include "gallivm/lp_bld.h"
41 #include "pipe/p_compiler.h"
42 
43 
44 struct lp_type;
45 struct lp_build_context;
46 struct gallivm_state;
47 
48 
49 /**
50  * Complement, i.e., 1 - a.
51  */
52 LLVMValueRef
53 lp_build_comp(struct lp_build_context *bld,
54               LLVMValueRef a);
55 
56 LLVMValueRef
57 lp_build_add(struct lp_build_context *bld,
58              LLVMValueRef a,
59              LLVMValueRef b);
60 
61 LLVMValueRef
62 lp_build_horizontal_add(struct lp_build_context *bld,
63                         LLVMValueRef a);
64 
65 LLVMValueRef
66 lp_build_hadd_partial4(struct lp_build_context *bld,
67                        LLVMValueRef vectors[],
68                        unsigned num_vecs);
69 
70 LLVMValueRef
71 lp_build_sub(struct lp_build_context *bld,
72              LLVMValueRef a,
73              LLVMValueRef b);
74 
75 
76 LLVMValueRef
77 lp_build_mul_norm(struct gallivm_state *gallivm,
78                   struct lp_type wide_type,
79                   LLVMValueRef a,
80                   LLVMValueRef b);
81 
82 LLVMValueRef
83 lp_build_mul(struct lp_build_context *bld,
84              LLVMValueRef a,
85              LLVMValueRef b);
86 
87 LLVMValueRef
88 lp_build_mul_32_lohi_cpu(struct lp_build_context *bld,
89                          LLVMValueRef a,
90                          LLVMValueRef b,
91                          LLVMValueRef *res_hi);
92 
93 LLVMValueRef
94 lp_build_mul_32_lohi(struct lp_build_context *bld,
95                      LLVMValueRef a,
96                      LLVMValueRef b,
97                      LLVMValueRef *res_hi);
98 
99 LLVMValueRef
100 lp_build_mul_imm(struct lp_build_context *bld,
101                  LLVMValueRef a,
102                  int b);
103 
104 LLVMValueRef
105 lp_build_div(struct lp_build_context *bld,
106              LLVMValueRef a,
107              LLVMValueRef b);
108 
109 
110 /* llvm.fmuladd.* intrinsic */
111 LLVMValueRef
112 lp_build_fmuladd(LLVMBuilderRef builder,
113                  LLVMValueRef a,
114                  LLVMValueRef b,
115                  LLVMValueRef c);
116 
117 /* a * b + c */
118 LLVMValueRef
119 lp_build_mad(struct lp_build_context *bld,
120              LLVMValueRef a,
121              LLVMValueRef b,
122              LLVMValueRef c);
123 
124 
125 /**
126  * Set when the weights for normalized are prescaled, that is, in range
127  * 0..2**n, as opposed to range 0..2**(n-1).
128  */
129 #define LP_BLD_LERP_PRESCALED_WEIGHTS (1 << 0)
130 
131 /**
132  * Used internally when using wide intermediates for normalized lerps.
133  *
134  * Do not use.
135  */
136 #define LP_BLD_LERP_WIDE_NORMALIZED (1 << 1)
137 
138 LLVMValueRef
139 lp_build_lerp(struct lp_build_context *bld,
140               LLVMValueRef x,
141               LLVMValueRef v0,
142               LLVMValueRef v1,
143               unsigned flags);
144 
145 LLVMValueRef
146 lp_build_lerp_2d(struct lp_build_context *bld,
147                  LLVMValueRef x,
148                  LLVMValueRef y,
149                  LLVMValueRef v00,
150                  LLVMValueRef v01,
151                  LLVMValueRef v10,
152                  LLVMValueRef v11,
153                  unsigned flags);
154 
155 LLVMValueRef
156 lp_build_lerp_3d(struct lp_build_context *bld,
157                  LLVMValueRef x,
158                  LLVMValueRef y,
159                  LLVMValueRef z,
160                  LLVMValueRef v000,
161                  LLVMValueRef v001,
162                  LLVMValueRef v010,
163                  LLVMValueRef v011,
164                  LLVMValueRef v100,
165                  LLVMValueRef v101,
166                  LLVMValueRef v110,
167                  LLVMValueRef v111,
168                  unsigned flags);
169 
170 /**
171  * Specifies floating point NaN behavior.
172  */
173 enum gallivm_nan_behavior {
174    /* Results are undefined with NaN. Results in fastest code */
175    GALLIVM_NAN_BEHAVIOR_UNDEFINED,
176    /* If one of the inputs is NaN, NaN is returned */
177    GALLIVM_NAN_RETURN_NAN,
178    /* If one of the inputs is NaN, the other operand is returned */
179    GALLIVM_NAN_RETURN_OTHER,
180    /* If one of the inputs is NaN, the other operand is returned,
181     * but we guarantee the second operand is not a NaN.
182     * In min/max it will be as fast as undefined with sse opcodes,
183     * and archs having native return_other can benefit too. */
184    GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN,
185    /* If one of the inputs is NaN, NaN is returned,
186     * but we guarantee the first operand is not a NaN.
187     * In min/max it will be as fast as undefined with sse opcodes,
188     * and archs having native return_nan can benefit too. */
189    GALLIVM_NAN_RETURN_NAN_FIRST_NONNAN,
190 
191 };
192 
193 LLVMValueRef
194 lp_build_min(struct lp_build_context *bld,
195              LLVMValueRef a,
196              LLVMValueRef b);
197 
198 LLVMValueRef
199 lp_build_min_ext(struct lp_build_context *bld,
200                  LLVMValueRef a,
201                  LLVMValueRef b,
202                  enum gallivm_nan_behavior nan_behavior);
203 
204 LLVMValueRef
205 lp_build_max(struct lp_build_context *bld,
206              LLVMValueRef a,
207              LLVMValueRef b);
208 
209 LLVMValueRef
210 lp_build_max_ext(struct lp_build_context *bld,
211                  LLVMValueRef a,
212                  LLVMValueRef b,
213                  enum gallivm_nan_behavior nan_behavior);
214 
215 LLVMValueRef
216 lp_build_clamp(struct lp_build_context *bld,
217                LLVMValueRef a,
218                LLVMValueRef min,
219                LLVMValueRef max);
220 
221 LLVMValueRef
222 lp_build_clamp_zero_one_nanzero(struct lp_build_context *bld,
223                                 LLVMValueRef a);
224 
225 LLVMValueRef
226 lp_build_abs(struct lp_build_context *bld,
227              LLVMValueRef a);
228 
229 LLVMValueRef
230 lp_build_negate(struct lp_build_context *bld,
231                 LLVMValueRef a);
232 
233 LLVMValueRef
234 lp_build_sgn(struct lp_build_context *bld,
235              LLVMValueRef a);
236 
237 LLVMValueRef
238 lp_build_set_sign(struct lp_build_context *bld,
239                   LLVMValueRef a, LLVMValueRef sign);
240 
241 LLVMValueRef
242 lp_build_int_to_float(struct lp_build_context *bld,
243                       LLVMValueRef a);
244 
245 LLVMValueRef
246 lp_build_round(struct lp_build_context *bld,
247                LLVMValueRef a);
248 
249 LLVMValueRef
250 lp_build_floor(struct lp_build_context *bld,
251                LLVMValueRef a);
252 
253 LLVMValueRef
254 lp_build_ceil(struct lp_build_context *bld,
255               LLVMValueRef a);
256 
257 LLVMValueRef
258 lp_build_trunc(struct lp_build_context *bld,
259                LLVMValueRef a);
260 
261 LLVMValueRef
262 lp_build_fract(struct lp_build_context *bld,
263                LLVMValueRef a);
264 
265 LLVMValueRef
266 lp_build_fract_safe(struct lp_build_context *bld,
267                     LLVMValueRef a);
268 
269 LLVMValueRef
270 lp_build_ifloor(struct lp_build_context *bld,
271                 LLVMValueRef a);
272 LLVMValueRef
273 lp_build_iceil(struct lp_build_context *bld,
274                LLVMValueRef a);
275 
276 LLVMValueRef
277 lp_build_iround(struct lp_build_context *bld,
278                 LLVMValueRef a);
279 
280 LLVMValueRef
281 lp_build_itrunc(struct lp_build_context *bld,
282                 LLVMValueRef a);
283 
284 void
285 lp_build_ifloor_fract(struct lp_build_context *bld,
286                       LLVMValueRef a,
287                       LLVMValueRef *out_ipart,
288                       LLVMValueRef *out_fpart);
289 
290 void
291 lp_build_ifloor_fract_safe(struct lp_build_context *bld,
292                            LLVMValueRef a,
293                            LLVMValueRef *out_ipart,
294                            LLVMValueRef *out_fpart);
295 
296 LLVMValueRef
297 lp_build_sqrt(struct lp_build_context *bld,
298               LLVMValueRef a);
299 
300 LLVMValueRef
301 lp_build_rcp(struct lp_build_context *bld,
302              LLVMValueRef a);
303 
304 LLVMValueRef
305 lp_build_rsqrt(struct lp_build_context *bld,
306                LLVMValueRef a);
307 
308 boolean
309 lp_build_fast_rsqrt_available(struct lp_type type);
310 
311 LLVMValueRef
312 lp_build_fast_rsqrt(struct lp_build_context *bld,
313                     LLVMValueRef a);
314 
315 LLVMValueRef
316 lp_build_polynomial(struct lp_build_context *bld,
317                     LLVMValueRef x,
318                     const double *coeffs,
319                     unsigned num_coeffs);
320 
321 LLVMValueRef
322 lp_build_cos(struct lp_build_context *bld,
323              LLVMValueRef a);
324 
325 LLVMValueRef
326 lp_build_sin(struct lp_build_context *bld,
327              LLVMValueRef a);
328 
329 LLVMValueRef
330 lp_build_pow(struct lp_build_context *bld,
331              LLVMValueRef a,
332              LLVMValueRef b);
333 
334 LLVMValueRef
335 lp_build_exp(struct lp_build_context *bld,
336              LLVMValueRef a);
337 
338 LLVMValueRef
339 lp_build_log(struct lp_build_context *bld,
340              LLVMValueRef a);
341 
342 LLVMValueRef
343 lp_build_log_safe(struct lp_build_context *bld,
344                   LLVMValueRef a);
345 
346 LLVMValueRef
347 lp_build_exp2(struct lp_build_context *bld,
348               LLVMValueRef a);
349 
350 LLVMValueRef
351 lp_build_extract_exponent(struct lp_build_context *bld,
352                           LLVMValueRef x,
353                           int bias);
354 
355 LLVMValueRef
356 lp_build_extract_mantissa(struct lp_build_context *bld,
357                           LLVMValueRef x);
358 
359 LLVMValueRef
360 lp_build_log2(struct lp_build_context *bld,
361               LLVMValueRef a);
362 
363 LLVMValueRef
364 lp_build_log2_safe(struct lp_build_context *bld,
365                    LLVMValueRef a);
366 
367 LLVMValueRef
368 lp_build_fast_log2(struct lp_build_context *bld,
369                    LLVMValueRef a);
370 
371 LLVMValueRef
372 lp_build_ilog2(struct lp_build_context *bld,
373                LLVMValueRef x);
374 
375 void
376 lp_build_log2_approx(struct lp_build_context *bld,
377                      LLVMValueRef x,
378                      LLVMValueRef *p_exp,
379                      LLVMValueRef *p_floor_log2,
380                      LLVMValueRef *p_log2,
381                      boolean handle_nans);
382 
383 LLVMValueRef
384 lp_build_mod(struct lp_build_context *bld,
385              LLVMValueRef x,
386              LLVMValueRef y);
387 
388 LLVMValueRef
389 lp_build_isnan(struct lp_build_context *bld,
390                LLVMValueRef x);
391 
392 LLVMValueRef
393 lp_build_isfinite(struct lp_build_context *bld,
394                   LLVMValueRef x);
395 
396 
397 LLVMValueRef
398 lp_build_is_inf_or_nan(struct gallivm_state *gallivm,
399                        const struct lp_type type,
400                        LLVMValueRef x);
401 
402 
403 LLVMValueRef
404 lp_build_fpstate_get(struct gallivm_state *gallivm);
405 
406 void
407 lp_build_fpstate_set_denorms_zero(struct gallivm_state *gallivm,
408                                   boolean zero);
409 void
410 lp_build_fpstate_set(struct gallivm_state *gallivm,
411                      LLVMValueRef mxcsr);
412 
413 #endif /* !LP_BLD_ARIT_H */
414