1 #include "rs_core.rsh"
2 #include "rs_f16_util.h"
3 
4 extern float2 __attribute__((overloadable)) convert_float2(int2 c);
5 extern float3 __attribute__((overloadable)) convert_float3(int3 c);
6 extern float4 __attribute__((overloadable)) convert_float4(int4 c);
7 
8 extern int2 __attribute__((overloadable)) convert_int2(float2 c);
9 extern int3 __attribute__((overloadable)) convert_int3(float3 c);
10 extern int4 __attribute__((overloadable)) convert_int4(float4 c);
11 
12 
13 extern float __attribute__((overloadable)) fmin(float v, float v2);
14 extern float2 __attribute__((overloadable)) fmin(float2 v, float v2);
15 extern float3 __attribute__((overloadable)) fmin(float3 v, float v2);
16 extern float4 __attribute__((overloadable)) fmin(float4 v, float v2);
17 
18 extern float __attribute__((overloadable)) fmax(float v, float v2);
19 extern float2 __attribute__((overloadable)) fmax(float2 v, float v2);
20 extern float3 __attribute__((overloadable)) fmax(float3 v, float v2);
21 extern float4 __attribute__((overloadable)) fmax(float4 v, float v2);
22 
23 // Float ops, 6.11.2
24 
25 #define FN_FUNC_FN(fnc)                                         \
26 extern float2 __attribute__((overloadable)) fnc(float2 v) { \
27     float2 r;                                                   \
28     r.x = fnc(v.x);                                             \
29     r.y = fnc(v.y);                                             \
30     return r;                                                   \
31 }                                                               \
32 extern float3 __attribute__((overloadable)) fnc(float3 v) { \
33     float3 r;                                                   \
34     r.x = fnc(v.x);                                             \
35     r.y = fnc(v.y);                                             \
36     r.z = fnc(v.z);                                             \
37     return r;                                                   \
38 }                                                               \
39 extern float4 __attribute__((overloadable)) fnc(float4 v) { \
40     float4 r;                                                   \
41     r.x = fnc(v.x);                                             \
42     r.y = fnc(v.y);                                             \
43     r.z = fnc(v.z);                                             \
44     r.w = fnc(v.w);                                             \
45     return r;                                                   \
46 }
47 
48 #define IN_FUNC_FN(fnc)                                         \
49 extern int2 __attribute__((overloadable)) fnc(float2 v) {   \
50     int2 r;                                                     \
51     r.x = fnc(v.x);                                             \
52     r.y = fnc(v.y);                                             \
53     return r;                                                   \
54 }                                                               \
55 extern int3 __attribute__((overloadable)) fnc(float3 v) {   \
56     int3 r;                                                     \
57     r.x = fnc(v.x);                                             \
58     r.y = fnc(v.y);                                             \
59     r.z = fnc(v.z);                                             \
60     return r;                                                   \
61 }                                                               \
62 extern int4 __attribute__((overloadable)) fnc(float4 v) {   \
63     int4 r;                                                     \
64     r.x = fnc(v.x);                                             \
65     r.y = fnc(v.y);                                             \
66     r.z = fnc(v.z);                                             \
67     r.w = fnc(v.w);                                             \
68     return r;                                                   \
69 }
70 
71 #define FN_FUNC_FN_FN(fnc)                                                  \
72 extern float2 __attribute__((overloadable)) fnc(float2 v1, float2 v2) { \
73     float2 r;                                                               \
74     r.x = fnc(v1.x, v2.x);                                                  \
75     r.y = fnc(v1.y, v2.y);                                                  \
76     return r;                                                               \
77 }                                                                           \
78 extern float3 __attribute__((overloadable)) fnc(float3 v1, float3 v2) { \
79     float3 r;                                                               \
80     r.x = fnc(v1.x, v2.x);                                                  \
81     r.y = fnc(v1.y, v2.y);                                                  \
82     r.z = fnc(v1.z, v2.z);                                                  \
83     return r;                                                               \
84 }                                                                           \
85 extern float4 __attribute__((overloadable)) fnc(float4 v1, float4 v2) { \
86     float4 r;                                                               \
87     r.x = fnc(v1.x, v2.x);                                                  \
88     r.y = fnc(v1.y, v2.y);                                                  \
89     r.z = fnc(v1.z, v2.z);                                                  \
90     r.w = fnc(v1.w, v2.w);                                                  \
91     return r;                                                               \
92 }
93 
94 #define FN_FUNC_FN_F(fnc)                                                   \
95 extern float2 __attribute__((overloadable)) fnc(float2 v1, float v2) {  \
96     float2 r;                                                               \
97     r.x = fnc(v1.x, v2);                                                    \
98     r.y = fnc(v1.y, v2);                                                    \
99     return r;                                                               \
100 }                                                                           \
101 extern float3 __attribute__((overloadable)) fnc(float3 v1, float v2) {  \
102     float3 r;                                                               \
103     r.x = fnc(v1.x, v2);                                                    \
104     r.y = fnc(v1.y, v2);                                                    \
105     r.z = fnc(v1.z, v2);                                                    \
106     return r;                                                               \
107 }                                                                           \
108 extern float4 __attribute__((overloadable)) fnc(float4 v1, float v2) {  \
109     float4 r;                                                               \
110     r.x = fnc(v1.x, v2);                                                    \
111     r.y = fnc(v1.y, v2);                                                    \
112     r.z = fnc(v1.z, v2);                                                    \
113     r.w = fnc(v1.w, v2);                                                    \
114     return r;                                                               \
115 }
116 
117 #define FN_FUNC_FN_IN(fnc)                                                  \
118 extern float2 __attribute__((overloadable)) fnc(float2 v1, int2 v2) {   \
119     float2 r;                                                               \
120     r.x = fnc(v1.x, v2.x);                                                  \
121     r.y = fnc(v1.y, v2.y);                                                  \
122     return r;                                                               \
123 }                                                                           \
124 extern float3 __attribute__((overloadable)) fnc(float3 v1, int3 v2) {   \
125     float3 r;                                                               \
126     r.x = fnc(v1.x, v2.x);                                                  \
127     r.y = fnc(v1.y, v2.y);                                                  \
128     r.z = fnc(v1.z, v2.z);                                                  \
129     return r;                                                               \
130 }                                                                           \
131 extern float4 __attribute__((overloadable)) fnc(float4 v1, int4 v2) {   \
132     float4 r;                                                               \
133     r.x = fnc(v1.x, v2.x);                                                  \
134     r.y = fnc(v1.y, v2.y);                                                  \
135     r.z = fnc(v1.z, v2.z);                                                  \
136     r.w = fnc(v1.w, v2.w);                                                  \
137     return r;                                                               \
138 }
139 
140 #define FN_FUNC_FN_I(fnc)                                                   \
141 extern float2 __attribute__((overloadable)) fnc(float2 v1, int v2) {    \
142     float2 r;                                                               \
143     r.x = fnc(v1.x, v2);                                                    \
144     r.y = fnc(v1.y, v2);                                                    \
145     return r;                                                               \
146 }                                                                           \
147 extern float3 __attribute__((overloadable)) fnc(float3 v1, int v2) {    \
148     float3 r;                                                               \
149     r.x = fnc(v1.x, v2);                                                    \
150     r.y = fnc(v1.y, v2);                                                    \
151     r.z = fnc(v1.z, v2);                                                    \
152     return r;                                                               \
153 }                                                                           \
154 extern float4 __attribute__((overloadable)) fnc(float4 v1, int v2) {    \
155     float4 r;                                                               \
156     r.x = fnc(v1.x, v2);                                                    \
157     r.y = fnc(v1.y, v2);                                                    \
158     r.z = fnc(v1.z, v2);                                                    \
159     r.w = fnc(v1.w, v2);                                                    \
160     return r;                                                               \
161 }
162 
163 #define FN_FUNC_FN_PFN(fnc)                     \
164 extern float2 __attribute__((overloadable)) \
165         fnc(float2 v1, float2 *v2) {            \
166     float2 r;                                   \
167     float t[2];                                 \
168     r.x = fnc(v1.x, &t[0]);                     \
169     r.y = fnc(v1.y, &t[1]);                     \
170     v2->x = t[0];                               \
171     v2->y = t[1];                               \
172     return r;                                   \
173 }                                               \
174 extern float3 __attribute__((overloadable)) \
175         fnc(float3 v1, float3 *v2) {            \
176     float3 r;                                   \
177     float t[3];                                 \
178     r.x = fnc(v1.x, &t[0]);                     \
179     r.y = fnc(v1.y, &t[1]);                     \
180     r.z = fnc(v1.z, &t[2]);                     \
181     v2->x = t[0];                               \
182     v2->y = t[1];                               \
183     v2->z = t[2];                               \
184     return r;                                   \
185 }                                               \
186 extern float4 __attribute__((overloadable)) \
187         fnc(float4 v1, float4 *v2) {            \
188     float4 r;                                   \
189     float t[4];                                 \
190     r.x = fnc(v1.x, &t[0]);                     \
191     r.y = fnc(v1.y, &t[1]);                     \
192     r.z = fnc(v1.z, &t[2]);                     \
193     r.w = fnc(v1.w, &t[3]);                     \
194     v2->x = t[0];                               \
195     v2->y = t[1];                               \
196     v2->z = t[2];                               \
197     v2->w = t[3];                               \
198     return r;                                   \
199 }
200 
201 #define FN_FUNC_FN_PIN(fnc)                                                 \
202 extern float2 __attribute__((overloadable)) fnc(float2 v1, int2 *v2) {  \
203     float2 r;                                                               \
204     int t[2];                                                               \
205     r.x = fnc(v1.x, &t[0]);                                                 \
206     r.y = fnc(v1.y, &t[1]);                                                 \
207     v2->x = t[0];                                                           \
208     v2->y = t[1];                                                           \
209     return r;                                                               \
210 }                                                                           \
211 extern float3 __attribute__((overloadable)) fnc(float3 v1, int3 *v2) {  \
212     float3 r;                                                               \
213     int t[3];                                                               \
214     r.x = fnc(v1.x, &t[0]);                                                 \
215     r.y = fnc(v1.y, &t[1]);                                                 \
216     r.z = fnc(v1.z, &t[2]);                                                 \
217     v2->x = t[0];                                                           \
218     v2->y = t[1];                                                           \
219     v2->z = t[2];                                                           \
220     return r;                                                               \
221 }                                                                           \
222 extern float4 __attribute__((overloadable)) fnc(float4 v1, int4 *v2) {  \
223     float4 r;                                                               \
224     int t[4];                                                               \
225     r.x = fnc(v1.x, &t[0]);                                                 \
226     r.y = fnc(v1.y, &t[1]);                                                 \
227     r.z = fnc(v1.z, &t[2]);                                                 \
228     r.w = fnc(v1.w, &t[3]);                                                 \
229     v2->x = t[0];                                                           \
230     v2->y = t[1];                                                           \
231     v2->z = t[2];                                                           \
232     v2->w = t[3];                                                           \
233     return r;                                                               \
234 }
235 
236 #define FN_FUNC_FN_FN_FN(fnc)                   \
237 extern float2 __attribute__((overloadable)) \
238         fnc(float2 v1, float2 v2, float2 v3) {  \
239     float2 r;                                   \
240     r.x = fnc(v1.x, v2.x, v3.x);                \
241     r.y = fnc(v1.y, v2.y, v3.y);                \
242     return r;                                   \
243 }                                               \
244 extern float3 __attribute__((overloadable)) \
245         fnc(float3 v1, float3 v2, float3 v3) {  \
246     float3 r;                                   \
247     r.x = fnc(v1.x, v2.x, v3.x);                \
248     r.y = fnc(v1.y, v2.y, v3.y);                \
249     r.z = fnc(v1.z, v2.z, v3.z);                \
250     return r;                                   \
251 }                                               \
252 extern float4 __attribute__((overloadable)) \
253         fnc(float4 v1, float4 v2, float4 v3) {  \
254     float4 r;                                   \
255     r.x = fnc(v1.x, v2.x, v3.x);                \
256     r.y = fnc(v1.y, v2.y, v3.y);                \
257     r.z = fnc(v1.z, v2.z, v3.z);                \
258     r.w = fnc(v1.w, v2.w, v3.w);                \
259     return r;                                   \
260 }
261 
262 #define FN_FUNC_FN_FN_PIN(fnc)                  \
263 extern float2 __attribute__((overloadable)) \
264         fnc(float2 v1, float2 v2, int2 *v3) {   \
265     float2 r;                                   \
266     int t[2];                                   \
267     r.x = fnc(v1.x, v2.x, &t[0]);               \
268     r.y = fnc(v1.y, v2.y, &t[1]);               \
269     v3->x = t[0];                               \
270     v3->y = t[1];                               \
271     return r;                                   \
272 }                                               \
273 extern float3 __attribute__((overloadable)) \
274         fnc(float3 v1, float3 v2, int3 *v3) {   \
275     float3 r;                                   \
276     int t[3];                                   \
277     r.x = fnc(v1.x, v2.x, &t[0]);               \
278     r.y = fnc(v1.y, v2.y, &t[1]);               \
279     r.z = fnc(v1.z, v2.z, &t[2]);               \
280     v3->x = t[0];                               \
281     v3->y = t[1];                               \
282     v3->z = t[2];                               \
283     return r;                                   \
284 }                                               \
285 extern float4 __attribute__((overloadable)) \
286         fnc(float4 v1, float4 v2, int4 *v3) {   \
287     float4 r;                                   \
288     int t[4];                                   \
289     r.x = fnc(v1.x, v2.x, &t[0]);               \
290     r.y = fnc(v1.y, v2.y, &t[1]);               \
291     r.z = fnc(v1.z, v2.z, &t[2]);               \
292     r.w = fnc(v1.w, v2.w, &t[3]);               \
293     v3->x = t[0];                               \
294     v3->y = t[1];                               \
295     v3->z = t[2];                               \
296     v3->w = t[3];                               \
297     return r;                                   \
298 }
299 
300 static const int iposinf = 0x7f800000;
301 static const int ineginf = 0xff800000;
302 
posinf()303 static const float posinf() {
304     float f = *((float*)&iposinf);
305     return f;
306 }
307 
neginf()308 static const float neginf() {
309     float f = *((float*)&ineginf);
310     return f;
311 }
312 
isinf(float f)313 static bool isinf(float f) {
314     int i = *((int*)(void*)&f);
315     return (i == iposinf) || (i == ineginf);
316 }
317 
isnan(float f)318 static bool isnan(float f) {
319     int i = *((int*)(void*)&f);
320     return (((i & 0x7f800000) == 0x7f800000) && (i & 0x007fffff));
321 }
322 
isposzero(float f)323 static bool isposzero(float f) {
324     int i = *((int*)(void*)&f);
325     return (i == 0x00000000);
326 }
327 
isnegzero(float f)328 static bool isnegzero(float f) {
329     int i = *((int*)(void*)&f);
330     return (i == 0x80000000);
331 }
332 
iszero(float f)333 static bool iszero(float f) {
334     return isposzero(f) || isnegzero(f);
335 }
336 
337 
338 extern float __attribute__((overloadable)) SC_acosf(float);
acos(float v)339 float __attribute__((overloadable)) acos(float v) {
340     return SC_acosf(v);
341 }
342 FN_FUNC_FN(acos)
343 
344 extern float __attribute__((overloadable)) SC_acoshf(float);
acosh(float v)345 float __attribute__((overloadable)) acosh(float v) {
346     return SC_acoshf(v);
347 }
FN_FUNC_FN(acosh)348 FN_FUNC_FN(acosh)
349 
350 
351 extern float __attribute__((overloadable)) acospi(float v) {
352     return acos(v) / M_PI;
353 }
354 FN_FUNC_FN(acospi)
355 
356 extern float __attribute__((overloadable)) SC_asinf(float);
asin(float v)357 float __attribute__((overloadable)) asin(float v) {
358     return SC_asinf(v);
359 }
360 FN_FUNC_FN(asin)
361 
362 extern float __attribute__((overloadable)) SC_asinhf(float);
asinh(float v)363 float __attribute__((overloadable)) asinh(float v) {
364     return SC_asinhf(v);
365 }
FN_FUNC_FN(asinh)366 FN_FUNC_FN(asinh)
367 
368 extern float __attribute__((overloadable)) asinpi(float v) {
369     return asin(v) / M_PI;
370 }
371 FN_FUNC_FN(asinpi)
372 
373 extern float __attribute__((overloadable)) SC_atanf(float);
atan(float v)374 float __attribute__((overloadable)) atan(float v) {
375     return SC_atanf(v);
376 }
377 FN_FUNC_FN(atan)
378 
379 extern float __attribute__((overloadable)) SC_atan2f(float, float);
atan2(float v1,float v2)380 float __attribute__((overloadable)) atan2(float v1, float v2) {
381     return SC_atan2f(v1, v2);
382 }
383 FN_FUNC_FN_FN(atan2)
384 
385 extern float __attribute__((overloadable)) SC_atanhf(float);
atanh(float v)386 float __attribute__((overloadable)) atanh(float v) {
387     return SC_atanhf(v);
388 }
FN_FUNC_FN(atanh)389 FN_FUNC_FN(atanh)
390 
391 extern float __attribute__((overloadable)) atanpi(float v) {
392     return atan(v) / M_PI;
393 }
FN_FUNC_FN(atanpi)394 FN_FUNC_FN(atanpi)
395 
396 
397 extern float __attribute__((overloadable)) atan2pi(float y, float x) {
398     return atan2(y, x) / M_PI;
399 }
400 FN_FUNC_FN_FN(atan2pi)
401 
402 extern float __attribute__((overloadable)) SC_cbrtf(float);
cbrt(float v)403 float __attribute__((overloadable)) cbrt(float v) {
404     return SC_cbrtf(v);
405 }
406 FN_FUNC_FN(cbrt)
407 
408 extern float __attribute__((overloadable)) SC_ceilf(float);
ceil(float v)409 float __attribute__((overloadable)) ceil(float v) {
410     return SC_ceilf(v);
411 }
412 FN_FUNC_FN(ceil)
413 
414 extern float __attribute__((overloadable)) SC_copysignf(float, float);
copysign(float v1,float v2)415 float __attribute__((overloadable)) copysign(float v1, float v2) {
416     return SC_copysignf(v1, v2);
417 }
418 FN_FUNC_FN_FN(copysign)
419 
420 extern float __attribute__((overloadable)) SC_cosf(float);
cos(float v)421 float __attribute__((overloadable)) cos(float v) {
422     return SC_cosf(v);
423 }
424 FN_FUNC_FN(cos)
425 
426 extern float __attribute__((overloadable)) SC_coshf(float);
cosh(float v)427 float __attribute__((overloadable)) cosh(float v) {
428     return SC_coshf(v);
429 }
FN_FUNC_FN(cosh)430 FN_FUNC_FN(cosh)
431 
432 extern float __attribute__((overloadable)) cospi(float v) {
433     return cos(v * M_PI);
434 }
435 FN_FUNC_FN(cospi)
436 
437 extern float __attribute__((overloadable)) SC_erfcf(float);
erfc(float v)438 float __attribute__((overloadable)) erfc(float v) {
439     return SC_erfcf(v);
440 }
441 FN_FUNC_FN(erfc)
442 
443 extern float __attribute__((overloadable)) SC_erff(float);
erf(float v)444 float __attribute__((overloadable)) erf(float v) {
445     return SC_erff(v);
446 }
447 FN_FUNC_FN(erf)
448 
449 extern float __attribute__((overloadable)) SC_expf(float);
exp(float v)450 float __attribute__((overloadable)) exp(float v) {
451     return SC_expf(v);
452 }
453 FN_FUNC_FN(exp)
454 
455 extern float __attribute__((overloadable)) SC_exp2f(float);
exp2(float v)456 float __attribute__((overloadable)) exp2(float v) {
457     return SC_exp2f(v);
458 }
459 FN_FUNC_FN(exp2)
460 
461 extern float __attribute__((overloadable)) pow(float, float);
462 
exp10(float v)463 extern float __attribute__((overloadable)) exp10(float v) {
464     return exp2(v * 3.321928095f);
465 }
466 FN_FUNC_FN(exp10)
467 
468 extern float __attribute__((overloadable)) SC_expm1f(float);
expm1(float v)469 float __attribute__((overloadable)) expm1(float v) {
470     return SC_expm1f(v);
471 }
FN_FUNC_FN(expm1)472 FN_FUNC_FN(expm1)
473 
474 extern float __attribute__((overloadable)) fabs(float v) {
475     int i = *((int*)(void*)&v) & 0x7fffffff;
476     return  *((float*)(void*)&i);
477 }
478 FN_FUNC_FN(fabs)
479 
480 extern float __attribute__((overloadable)) SC_fdimf(float, float);
fdim(float v1,float v2)481 float __attribute__((overloadable)) fdim(float v1, float v2) {
482     return SC_fdimf(v1, v2);
483 }
484 FN_FUNC_FN_FN(fdim)
485 
486 extern float __attribute__((overloadable)) SC_floorf(float);
floor(float v)487 float __attribute__((overloadable)) floor(float v) {
488     return SC_floorf(v);
489 }
490 FN_FUNC_FN(floor)
491 
492 extern float __attribute__((overloadable)) SC_fmaf(float, float, float);
fma(float v1,float v2,float v3)493 float __attribute__((overloadable)) fma(float v1, float v2, float v3) {
494     return SC_fmaf(v1, v2, v3);
495 }
496 FN_FUNC_FN_FN_FN(fma)
497 
498 extern float __attribute__((overloadable)) SC_fminf(float, float);
499 
500 extern float __attribute__((overloadable)) SC_fmodf(float, float);
fmod(float v1,float v2)501 float __attribute__((overloadable)) fmod(float v1, float v2) {
502     return SC_fmodf(v1, v2);
503 }
FN_FUNC_FN_FN(fmod)504 FN_FUNC_FN_FN(fmod)
505 
506 extern float __attribute__((overloadable)) fract(float v, float *iptr) {
507     int i = (int)floor(v);
508     if (iptr) {
509         iptr[0] = i;
510     }
511     return fmin(v - i, 0x1.fffffep-1f);
512 }
FN_FUNC_FN_PFN(fract)513 FN_FUNC_FN_PFN(fract)
514 
515 extern float __attribute__((const, overloadable)) fract(float v) {
516     float unused;
517     return fract(v, &unused);
518 }
519 FN_FUNC_FN(fract)
520 
521 extern float __attribute__((overloadable)) SC_frexpf(float, int *);
frexp(float v1,int * v2)522 float __attribute__((overloadable)) frexp(float v1, int* v2) {
523     return SC_frexpf(v1, v2);
524 }
525 FN_FUNC_FN_PIN(frexp)
526 
527 extern float __attribute__((overloadable)) SC_hypotf(float, float);
hypot(float v1,float v2)528 float __attribute__((overloadable)) hypot(float v1, float v2) {
529     return SC_hypotf(v1, v2);
530 }
531 FN_FUNC_FN_FN(hypot)
532 
533 extern int __attribute__((overloadable)) SC_ilogbf(float);
ilogb(float v)534 int __attribute__((overloadable)) ilogb(float v) {
535     return SC_ilogbf(v);
536 }
537 IN_FUNC_FN(ilogb)
538 
539 extern float __attribute__((overloadable)) SC_ldexpf(float, int);
ldexp(float v1,int v2)540 float __attribute__((overloadable)) ldexp(float v1, int v2) {
541     return SC_ldexpf(v1, v2);
542 }
543 FN_FUNC_FN_IN(ldexp)
544 FN_FUNC_FN_I(ldexp)
545 
546 extern float __attribute__((overloadable)) SC_lgammaf(float);
lgamma(float v)547 float __attribute__((overloadable)) lgamma(float v) {
548     return SC_lgammaf(v);
549 }
550 FN_FUNC_FN(lgamma)
551 extern float __attribute__((overloadable)) SC_lgammaf_r(float, int*);
lgamma(float v,int * ptr)552 float __attribute__((overloadable)) lgamma(float v, int* ptr) {
553     return SC_lgammaf_r(v, ptr);
554 }
555 FN_FUNC_FN_PIN(lgamma)
556 
557 extern float __attribute__((overloadable)) SC_logf(float);
log(float v)558 float __attribute__((overloadable)) log(float v) {
559     return SC_logf(v);
560 }
561 FN_FUNC_FN(log)
562 
563 extern float __attribute__((overloadable)) SC_log10f(float);
log10(float v)564 float __attribute__((overloadable)) log10(float v) {
565     return SC_log10f(v);
566 }
FN_FUNC_FN(log10)567 FN_FUNC_FN(log10)
568 
569 
570 extern float __attribute__((overloadable)) log2(float v) {
571     return log10(v) * 3.321928095f;
572 }
573 FN_FUNC_FN(log2)
574 
575 extern float __attribute__((overloadable)) SC_log1pf(float);
log1p(float v)576 float __attribute__((overloadable)) log1p(float v) {
577     return SC_log1pf(v);
578 }
579 FN_FUNC_FN(log1p)
580 
581 extern float __attribute__((overloadable)) SC_logbf(float);
logb(float v)582 float __attribute__((overloadable)) logb(float v) {
583     return SC_logbf(v);
584 }
FN_FUNC_FN(logb)585 FN_FUNC_FN(logb)
586 
587 extern float __attribute__((overloadable)) mad(float a, float b, float c) {
588     return a * b + c;
589 }
mad(float2 a,float2 b,float2 c)590 extern float2 __attribute__((overloadable)) mad(float2 a, float2 b, float2 c) {
591     return a * b + c;
592 }
mad(float3 a,float3 b,float3 c)593 extern float3 __attribute__((overloadable)) mad(float3 a, float3 b, float3 c) {
594     return a * b + c;
595 }
mad(float4 a,float4 b,float4 c)596 extern float4 __attribute__((overloadable)) mad(float4 a, float4 b, float4 c) {
597     return a * b + c;
598 }
599 
600 extern float __attribute__((overloadable)) SC_modff(float, float *);
modf(float v1,float * v2)601 float __attribute__((overloadable)) modf(float v1, float *v2) {
602     return SC_modff(v1, v2);
603 }
604 FN_FUNC_FN_PFN(modf);
605 
nan(uint v)606 extern float __attribute__((overloadable)) nan(uint v) {
607     float f[1];
608     uint32_t *ip = (uint32_t *)f;
609     *ip = v | 0x7fc00000;
610     return f[0];
611 }
612 
613 extern float __attribute__((overloadable)) SC_nextafterf(float, float);
nextafter(float v1,float v2)614 float __attribute__((overloadable)) nextafter(float v1, float v2) {
615     return SC_nextafterf(v1, v2);
616 }
617 FN_FUNC_FN_FN(nextafter)
618 
619 // This function must be defined here if we're compiling with debug info
620 // (libclcore_g.bc), because we need a C source to get debug information.
621 // Otherwise the implementation can be found in IR.
622 #if defined(RS_G_RUNTIME)
623 extern float __attribute__((overloadable)) SC_powf(float, float);
pow(float v1,float v2)624 float __attribute__((overloadable)) pow(float v1, float v2) {
625     return SC_powf(v1, v2);
626 }
627 #endif // defined(RS_G_RUNTIME)
FN_FUNC_FN_FN(pow)628 FN_FUNC_FN_FN(pow)
629 
630 extern float __attribute__((overloadable)) pown(float v, int p) {
631     /* The mantissa of a float has fewer bits than an int (24 effective vs. 31).
632      * For very large ints, we'll lose whether the exponent is even or odd, making
633      * the selection of a correct sign incorrect.  We correct this.  Use copysign
634      * to handle the negative zero case.
635      */
636     float sign = (p & 0x1) ? copysign(1.f, v) : 1.f;
637     float f = pow(v, (float)p);
638     return copysign(f, sign);
639 }
FN_FUNC_FN_IN(pown)640 FN_FUNC_FN_IN(pown)
641 
642 extern float __attribute__((overloadable)) powr(float v, float p) {
643     return pow(v, p);
644 }
powr(float2 v,float2 p)645 extern float2 __attribute__((overloadable)) powr(float2 v, float2 p) {
646     return pow(v, p);
647 }
powr(float3 v,float3 p)648 extern float3 __attribute__((overloadable)) powr(float3 v, float3 p) {
649     return pow(v, p);
650 }
powr(float4 v,float4 p)651 extern float4 __attribute__((overloadable)) powr(float4 v, float4 p) {
652     return pow(v, p);
653 }
654 
655 extern float __attribute__((overloadable)) SC_remainderf(float, float);
remainder(float v1,float v2)656 float __attribute__((overloadable)) remainder(float v1, float v2) {
657     return SC_remainderf(v1, v2);
658 }
659 FN_FUNC_FN_FN(remainder)
660 
661 extern float __attribute__((overloadable)) SC_remquof(float, float, int *);
remquo(float v1,float v2,int * v3)662 float __attribute__((overloadable)) remquo(float v1, float v2, int *v3) {
663     return SC_remquof(v1, v2, v3);
664 }
665 FN_FUNC_FN_FN_PIN(remquo)
666 
667 extern float __attribute__((overloadable)) SC_rintf(float);
rint(float v)668 float __attribute__((overloadable)) rint(float v) {
669     return SC_rintf(v);
670 }
FN_FUNC_FN(rint)671 FN_FUNC_FN(rint)
672 
673 extern float __attribute__((overloadable)) rootn(float v, int r) {
674     if (r == 0) {
675         return posinf();
676     }
677 
678     if (iszero(v)) {
679         if (r < 0) {
680             if (r & 1) {
681                 return copysign(posinf(), v);
682             } else {
683                 return posinf();
684             }
685         } else {
686             if (r & 1) {
687                 return copysign(0.f, v);
688             } else {
689                 return 0.f;
690             }
691         }
692     }
693 
694     if (!isinf(v) && !isnan(v) && (v < 0.f)) {
695         if (r & 1) {
696             return (-1.f * pow(-1.f * v, 1.f / r));
697         } else {
698             return nan(0);
699         }
700     }
701 
702     return pow(v, 1.f / r);
703 }
704 FN_FUNC_FN_IN(rootn);
705 
706 extern float __attribute__((overloadable)) SC_roundf(float);
round(float v)707 float __attribute__((overloadable)) round(float v) {
708     return SC_roundf(v);
709 }
710 FN_FUNC_FN(round)
711 
712 extern float __attribute__((overloadable)) SC_randf2(float, float);
rsRand(float min,float max)713 float __attribute__((overloadable)) rsRand(float min, float max) {
714   return SC_randf2(min, max);
715 }
716 
717 
rsqrt(float v)718 extern float __attribute__((overloadable)) rsqrt(float v) {
719     return 1.f / sqrt(v);
720 }
721 
722 #if !defined(ARCH_X86_HAVE_SSSE3) || defined(RS_DEBUG_RUNTIME) || defined(RS_G_RUNTIME)
723 // These functions must be defined here if we are not using the SSE
724 // implementation, which includes when we are built as part of the
725 // debug runtime (libclcore_debug.bc) or compiling with debug info.
726 #if defined(RS_G_RUNTIME)
727 extern float __attribute__((overloadable)) SC_sqrtf(float);
sqrt(float v)728 float __attribute__((overloadable)) sqrt(float v) {
729     return SC_sqrtf(v);
730 }
731 #endif // defined(RS_G_RUNTIME)
732 
733 FN_FUNC_FN(sqrt)
734 #else
735 extern float2 __attribute__((overloadable)) sqrt(float2);
736 extern float3 __attribute__((overloadable)) sqrt(float3);
737 extern float4 __attribute__((overloadable)) sqrt(float4);
738 #endif // !defined(ARCH_X86_HAVE_SSSE3) || defined(RS_DEBUG_RUNTIME) || defined(RS_G_RUNTIME)
739 
740 FN_FUNC_FN(rsqrt)
741 
742 extern float __attribute__((overloadable)) SC_sinf(float);
sin(float v)743 float __attribute__((overloadable)) sin(float v) {
744     return SC_sinf(v);
745 }
FN_FUNC_FN(sin)746 FN_FUNC_FN(sin)
747 
748 extern float __attribute__((overloadable)) sincos(float v, float *cosptr) {
749     *cosptr = cos(v);
750     return sin(v);
751 }
sincos(float2 v,float2 * cosptr)752 extern float2 __attribute__((overloadable)) sincos(float2 v, float2 *cosptr) {
753     *cosptr = cos(v);
754     return sin(v);
755 }
sincos(float3 v,float3 * cosptr)756 extern float3 __attribute__((overloadable)) sincos(float3 v, float3 *cosptr) {
757     *cosptr = cos(v);
758     return sin(v);
759 }
sincos(float4 v,float4 * cosptr)760 extern float4 __attribute__((overloadable)) sincos(float4 v, float4 *cosptr) {
761     *cosptr = cos(v);
762     return sin(v);
763 }
764 
765 extern float __attribute__((overloadable)) SC_sinhf(float);
sinh(float v)766 float __attribute__((overloadable)) sinh(float v) {
767     return SC_sinhf(v);
768 }
FN_FUNC_FN(sinh)769 FN_FUNC_FN(sinh)
770 
771 extern float __attribute__((overloadable)) sinpi(float v) {
772     return sin(v * M_PI);
773 }
774 FN_FUNC_FN(sinpi)
775 
776 extern float __attribute__((overloadable)) SC_tanf(float);
tan(float v)777 float __attribute__((overloadable)) tan(float v) {
778     return SC_tanf(v);
779 }
780 FN_FUNC_FN(tan)
781 
782 extern float __attribute__((overloadable)) SC_tanhf(float);
tanh(float v)783 float __attribute__((overloadable)) tanh(float v) {
784     return SC_tanhf(v);
785 }
FN_FUNC_FN(tanh)786 FN_FUNC_FN(tanh)
787 
788 extern float __attribute__((overloadable)) tanpi(float v) {
789     return tan(v * M_PI);
790 }
791 FN_FUNC_FN(tanpi)
792 
793 
794 extern float __attribute__((overloadable)) SC_tgammaf(float);
tgamma(float v)795 float __attribute__((overloadable)) tgamma(float v) {
796     return SC_tgammaf(v);
797 }
798 FN_FUNC_FN(tgamma)
799 
800 extern float __attribute__((overloadable)) SC_truncf(float);
trunc(float v)801 float __attribute__((overloadable)) trunc(float v) {
802     return SC_truncf(v);
803 }
FN_FUNC_FN(trunc)804 FN_FUNC_FN(trunc)
805 
806 // Int ops (partial), 6.11.3
807 
808 #define XN_FUNC_YN(typeout, fnc, typein)                                \
809 extern typeout __attribute__((overloadable)) fnc(typein);               \
810 extern typeout##2 __attribute__((overloadable)) fnc(typein##2 v) {  \
811     typeout##2 r;                                                       \
812     r.x = fnc(v.x);                                                     \
813     r.y = fnc(v.y);                                                     \
814     return r;                                                           \
815 }                                                                       \
816 extern typeout##3 __attribute__((overloadable)) fnc(typein##3 v) {  \
817     typeout##3 r;                                                       \
818     r.x = fnc(v.x);                                                     \
819     r.y = fnc(v.y);                                                     \
820     r.z = fnc(v.z);                                                     \
821     return r;                                                           \
822 }                                                                       \
823 extern typeout##4 __attribute__((overloadable)) fnc(typein##4 v) {  \
824     typeout##4 r;                                                       \
825     r.x = fnc(v.x);                                                     \
826     r.y = fnc(v.y);                                                     \
827     r.z = fnc(v.z);                                                     \
828     r.w = fnc(v.w);                                                     \
829     return r;                                                           \
830 }
831 
832 
833 #define UIN_FUNC_IN(fnc)          \
834 XN_FUNC_YN(uchar, fnc, char)      \
835 XN_FUNC_YN(ushort, fnc, short)    \
836 XN_FUNC_YN(uint, fnc, int)
837 
838 #define IN_FUNC_IN(fnc)           \
839 XN_FUNC_YN(uchar, fnc, uchar)     \
840 XN_FUNC_YN(char, fnc, char)       \
841 XN_FUNC_YN(ushort, fnc, ushort)   \
842 XN_FUNC_YN(short, fnc, short)     \
843 XN_FUNC_YN(uint, fnc, uint)       \
844 XN_FUNC_YN(int, fnc, int)
845 
846 
847 #define XN_FUNC_XN_XN_BODY(type, fnc, body)         \
848 extern type __attribute__((overloadable))       \
849         fnc(type v1, type v2) {                     \
850     return body;                                    \
851 }                                                   \
852 extern type##2 __attribute__((overloadable))    \
853         fnc(type##2 v1, type##2 v2) {               \
854     type##2 r;                                      \
855     r.x = fnc(v1.x, v2.x);                          \
856     r.y = fnc(v1.y, v2.y);                          \
857     return r;                                       \
858 }                                                   \
859 extern type##3 __attribute__((overloadable))    \
860         fnc(type##3 v1, type##3 v2) {               \
861     type##3 r;                                      \
862     r.x = fnc(v1.x, v2.x);                          \
863     r.y = fnc(v1.y, v2.y);                          \
864     r.z = fnc(v1.z, v2.z);                          \
865     return r;                                       \
866 }                                                   \
867 extern type##4 __attribute__((overloadable))    \
868         fnc(type##4 v1, type##4 v2) {               \
869     type##4 r;                                      \
870     r.x = fnc(v1.x, v2.x);                          \
871     r.y = fnc(v1.y, v2.y);                          \
872     r.z = fnc(v1.z, v2.z);                          \
873     r.w = fnc(v1.w, v2.w);                          \
874     return r;                                       \
875 }
876 
877 #define IN_FUNC_IN_IN_BODY(fnc, body) \
878 XN_FUNC_XN_XN_BODY(uchar, fnc, body)  \
879 XN_FUNC_XN_XN_BODY(char, fnc, body)   \
880 XN_FUNC_XN_XN_BODY(ushort, fnc, body) \
881 XN_FUNC_XN_XN_BODY(short, fnc, body)  \
882 XN_FUNC_XN_XN_BODY(uint, fnc, body)   \
883 XN_FUNC_XN_XN_BODY(int, fnc, body)    \
884 XN_FUNC_XN_XN_BODY(float, fnc, body)
885 
886 
887 /**
888  * abs
889  */
890 extern uint32_t __attribute__((overloadable)) abs(int32_t v) {
891     if (v < 0)
892         return -v;
893     return v;
894 }
abs(int16_t v)895 extern uint16_t __attribute__((overloadable)) abs(int16_t v) {
896     if (v < 0)
897         return -v;
898     return v;
899 }
abs(int8_t v)900 extern uint8_t __attribute__((overloadable)) abs(int8_t v) {
901     if (v < 0)
902         return -v;
903     return v;
904 }
905 
906 /**
907  * clz
908  * __builtin_clz only accepts a 32-bit unsigned int, so every input will be
909  * expanded to 32 bits. For our smaller data types, we need to subtract off
910  * these unused top bits (that will be always be composed of zeros).
911  */
clz(uint32_t v)912 extern uint32_t __attribute__((overloadable)) clz(uint32_t v) {
913     return __builtin_clz(v);
914 }
clz(uint16_t v)915 extern uint16_t __attribute__((overloadable)) clz(uint16_t v) {
916     return __builtin_clz(v) - 16;
917 }
clz(uint8_t v)918 extern uint8_t __attribute__((overloadable)) clz(uint8_t v) {
919     return __builtin_clz(v) - 24;
920 }
clz(int32_t v)921 extern int32_t __attribute__((overloadable)) clz(int32_t v) {
922     return __builtin_clz(v);
923 }
clz(int16_t v)924 extern int16_t __attribute__((overloadable)) clz(int16_t v) {
925     return __builtin_clz(((uint32_t)v) & 0x0000ffff) - 16;
926 }
clz(int8_t v)927 extern int8_t __attribute__((overloadable)) clz(int8_t v) {
928     return __builtin_clz(((uint32_t)v) & 0x000000ff) - 24;
929 }
930 
931 
932 UIN_FUNC_IN(abs)
IN_FUNC_IN(clz)933 IN_FUNC_IN(clz)
934 
935 
936 // 6.11.4
937 
938 
939 extern float __attribute__((overloadable)) degrees(float radians) {
940     return radians * (180.f / M_PI);
941 }
degrees(float2 radians)942 extern float2 __attribute__((overloadable)) degrees(float2 radians) {
943     return radians * (180.f / M_PI);
944 }
degrees(float3 radians)945 extern float3 __attribute__((overloadable)) degrees(float3 radians) {
946     return radians * (180.f / M_PI);
947 }
degrees(float4 radians)948 extern float4 __attribute__((overloadable)) degrees(float4 radians) {
949     return radians * (180.f / M_PI);
950 }
951 
mix(float start,float stop,float amount)952 extern float __attribute__((overloadable)) mix(float start, float stop, float amount) {
953     return start + (stop - start) * amount;
954 }
mix(float2 start,float2 stop,float2 amount)955 extern float2 __attribute__((overloadable)) mix(float2 start, float2 stop, float2 amount) {
956     return start + (stop - start) * amount;
957 }
mix(float3 start,float3 stop,float3 amount)958 extern float3 __attribute__((overloadable)) mix(float3 start, float3 stop, float3 amount) {
959     return start + (stop - start) * amount;
960 }
mix(float4 start,float4 stop,float4 amount)961 extern float4 __attribute__((overloadable)) mix(float4 start, float4 stop, float4 amount) {
962     return start + (stop - start) * amount;
963 }
mix(float2 start,float2 stop,float amount)964 extern float2 __attribute__((overloadable)) mix(float2 start, float2 stop, float amount) {
965     return start + (stop - start) * amount;
966 }
mix(float3 start,float3 stop,float amount)967 extern float3 __attribute__((overloadable)) mix(float3 start, float3 stop, float amount) {
968     return start + (stop - start) * amount;
969 }
mix(float4 start,float4 stop,float amount)970 extern float4 __attribute__((overloadable)) mix(float4 start, float4 stop, float amount) {
971     return start + (stop - start) * amount;
972 }
973 
radians(float degrees)974 extern float __attribute__((overloadable)) radians(float degrees) {
975     return degrees * (M_PI / 180.f);
976 }
radians(float2 degrees)977 extern float2 __attribute__((overloadable)) radians(float2 degrees) {
978     return degrees * (M_PI / 180.f);
979 }
radians(float3 degrees)980 extern float3 __attribute__((overloadable)) radians(float3 degrees) {
981     return degrees * (M_PI / 180.f);
982 }
radians(float4 degrees)983 extern float4 __attribute__((overloadable)) radians(float4 degrees) {
984     return degrees * (M_PI / 180.f);
985 }
986 
step(float edge,float v)987 extern float __attribute__((overloadable)) step(float edge, float v) {
988     return (v < edge) ? 0.f : 1.f;
989 }
step(float2 edge,float2 v)990 extern float2 __attribute__((overloadable)) step(float2 edge, float2 v) {
991     float2 r;
992     r.x = (v.x < edge.x) ? 0.f : 1.f;
993     r.y = (v.y < edge.y) ? 0.f : 1.f;
994     return r;
995 }
step(float3 edge,float3 v)996 extern float3 __attribute__((overloadable)) step(float3 edge, float3 v) {
997     float3 r;
998     r.x = (v.x < edge.x) ? 0.f : 1.f;
999     r.y = (v.y < edge.y) ? 0.f : 1.f;
1000     r.z = (v.z < edge.z) ? 0.f : 1.f;
1001     return r;
1002 }
step(float4 edge,float4 v)1003 extern float4 __attribute__((overloadable)) step(float4 edge, float4 v) {
1004     float4 r;
1005     r.x = (v.x < edge.x) ? 0.f : 1.f;
1006     r.y = (v.y < edge.y) ? 0.f : 1.f;
1007     r.z = (v.z < edge.z) ? 0.f : 1.f;
1008     r.w = (v.w < edge.w) ? 0.f : 1.f;
1009     return r;
1010 }
step(float2 edge,float v)1011 extern float2 __attribute__((overloadable)) step(float2 edge, float v) {
1012     float2 r;
1013     r.x = (v < edge.x) ? 0.f : 1.f;
1014     r.y = (v < edge.y) ? 0.f : 1.f;
1015     return r;
1016 }
step(float3 edge,float v)1017 extern float3 __attribute__((overloadable)) step(float3 edge, float v) {
1018     float3 r;
1019     r.x = (v < edge.x) ? 0.f : 1.f;
1020     r.y = (v < edge.y) ? 0.f : 1.f;
1021     r.z = (v < edge.z) ? 0.f : 1.f;
1022     return r;
1023 }
step(float4 edge,float v)1024 extern float4 __attribute__((overloadable)) step(float4 edge, float v) {
1025     float4 r;
1026     r.x = (v < edge.x) ? 0.f : 1.f;
1027     r.y = (v < edge.y) ? 0.f : 1.f;
1028     r.z = (v < edge.z) ? 0.f : 1.f;
1029     r.w = (v < edge.w) ? 0.f : 1.f;
1030     return r;
1031 }
step(float edge,float2 v)1032 extern float2 __attribute__((overloadable)) step(float edge, float2 v) {
1033     float2 r;
1034     r.x = (v.x < edge) ? 0.f : 1.f;
1035     r.y = (v.y < edge) ? 0.f : 1.f;
1036     return r;
1037 }
step(float edge,float3 v)1038 extern float3 __attribute__((overloadable)) step(float edge, float3 v) {
1039     float3 r;
1040     r.x = (v.x < edge) ? 0.f : 1.f;
1041     r.y = (v.y < edge) ? 0.f : 1.f;
1042     r.z = (v.z < edge) ? 0.f : 1.f;
1043     return r;
1044 }
step(float edge,float4 v)1045 extern float4 __attribute__((overloadable)) step(float edge, float4 v) {
1046     float4 r;
1047     r.x = (v.x < edge) ? 0.f : 1.f;
1048     r.y = (v.y < edge) ? 0.f : 1.f;
1049     r.z = (v.z < edge) ? 0.f : 1.f;
1050     r.w = (v.w < edge) ? 0.f : 1.f;
1051     return r;
1052 }
1053 
sign(float v)1054 extern float __attribute__((overloadable)) sign(float v) {
1055     if (v > 0) return 1.f;
1056     if (v < 0) return -1.f;
1057     return v;
1058 }
FN_FUNC_FN(sign)1059 FN_FUNC_FN(sign)
1060 
1061 
1062 // 6.11.5
1063 extern float3 __attribute__((overloadable)) cross(float3 lhs, float3 rhs) {
1064     float3 r;
1065     r.x = lhs.y * rhs.z  - lhs.z * rhs.y;
1066     r.y = lhs.z * rhs.x  - lhs.x * rhs.z;
1067     r.z = lhs.x * rhs.y  - lhs.y * rhs.x;
1068     return r;
1069 }
1070 
cross(float4 lhs,float4 rhs)1071 extern float4 __attribute__((overloadable)) cross(float4 lhs, float4 rhs) {
1072     float4 r;
1073     r.x = lhs.y * rhs.z  - lhs.z * rhs.y;
1074     r.y = lhs.z * rhs.x  - lhs.x * rhs.z;
1075     r.z = lhs.x * rhs.y  - lhs.y * rhs.x;
1076     r.w = 0.f;
1077     return r;
1078 }
1079 
1080 #if !defined(ARCH_X86_HAVE_SSSE3) || defined(RS_DEBUG_RUNTIME) || defined(RS_G_RUNTIME)
1081 // These functions must be defined here if we are not using the SSE
1082 // implementation, which includes when we are built as part of the
1083 // debug runtime (libclcore_debug.bc) or compiling with debug info.
1084 
dot(float lhs,float rhs)1085 extern float __attribute__((overloadable)) dot(float lhs, float rhs) {
1086     return lhs * rhs;
1087 }
dot(float2 lhs,float2 rhs)1088 extern float __attribute__((overloadable)) dot(float2 lhs, float2 rhs) {
1089     return lhs.x*rhs.x + lhs.y*rhs.y;
1090 }
dot(float3 lhs,float3 rhs)1091 extern float __attribute__((overloadable)) dot(float3 lhs, float3 rhs) {
1092     return lhs.x*rhs.x + lhs.y*rhs.y + lhs.z*rhs.z;
1093 }
dot(float4 lhs,float4 rhs)1094 extern float __attribute__((overloadable)) dot(float4 lhs, float4 rhs) {
1095     return lhs.x*rhs.x + lhs.y*rhs.y + lhs.z*rhs.z + lhs.w*rhs.w;
1096 }
1097 
length(float v)1098 extern float __attribute__((overloadable)) length(float v) {
1099     return fabs(v);
1100 }
length(float2 v)1101 extern float __attribute__((overloadable)) length(float2 v) {
1102     return sqrt(v.x*v.x + v.y*v.y);
1103 }
length(float3 v)1104 extern float __attribute__((overloadable)) length(float3 v) {
1105     return sqrt(v.x*v.x + v.y*v.y + v.z*v.z);
1106 }
length(float4 v)1107 extern float __attribute__((overloadable)) length(float4 v) {
1108     return sqrt(v.x*v.x + v.y*v.y + v.z*v.z + v.w*v.w);
1109 }
1110 
1111 #else
1112 
1113 extern float __attribute__((overloadable)) length(float v);
1114 extern float __attribute__((overloadable)) length(float2 v);
1115 extern float __attribute__((overloadable)) length(float3 v);
1116 extern float __attribute__((overloadable)) length(float4 v);
1117 
1118 #endif // !defined(ARCH_X86_HAVE_SSSE3) || defined(RS_DEBUG_RUNTIME) || defined(RS_G_RUNTIME)
1119 
distance(float lhs,float rhs)1120 extern float __attribute__((overloadable)) distance(float lhs, float rhs) {
1121     return length(lhs - rhs);
1122 }
distance(float2 lhs,float2 rhs)1123 extern float __attribute__((overloadable)) distance(float2 lhs, float2 rhs) {
1124     return length(lhs - rhs);
1125 }
distance(float3 lhs,float3 rhs)1126 extern float __attribute__((overloadable)) distance(float3 lhs, float3 rhs) {
1127     return length(lhs - rhs);
1128 }
distance(float4 lhs,float4 rhs)1129 extern float __attribute__((overloadable)) distance(float4 lhs, float4 rhs) {
1130     return length(lhs - rhs);
1131 }
1132 
1133 /* For the normalization functions, vectors of length 0 should simply be
1134  * returned (i.e. all the components of that vector are 0).
1135  */
normalize(float v)1136 extern float __attribute__((overloadable)) normalize(float v) {
1137     if (v == 0.0f) {
1138         return 0.0f;
1139     } else if (v < 0.0f) {
1140         return -1.0f;
1141     } else {
1142         return 1.0f;
1143     }
1144 }
normalize(float2 v)1145 extern float2 __attribute__((overloadable)) normalize(float2 v) {
1146     float l = length(v);
1147     return l == 0.0f ? v : v / l;
1148 }
normalize(float3 v)1149 extern float3 __attribute__((overloadable)) normalize(float3 v) {
1150     float l = length(v);
1151     return l == 0.0f ? v : v / l;
1152 }
normalize(float4 v)1153 extern float4 __attribute__((overloadable)) normalize(float4 v) {
1154     float l = length(v);
1155     return l == 0.0f ? v : v / l;
1156 }
1157 
half_sqrt(float v)1158 extern float __attribute__((overloadable)) half_sqrt(float v) {
1159     return sqrt(v);
1160 }
FN_FUNC_FN(half_sqrt)1161 FN_FUNC_FN(half_sqrt)
1162 
1163 extern float __attribute__((overloadable)) fast_length(float v) {
1164     return fabs(v);
1165 }
fast_length(float2 v)1166 extern float __attribute__((overloadable)) fast_length(float2 v) {
1167     return half_sqrt(v.x*v.x + v.y*v.y);
1168 }
fast_length(float3 v)1169 extern float __attribute__((overloadable)) fast_length(float3 v) {
1170     return half_sqrt(v.x*v.x + v.y*v.y + v.z*v.z);
1171 }
fast_length(float4 v)1172 extern float __attribute__((overloadable)) fast_length(float4 v) {
1173     return half_sqrt(v.x*v.x + v.y*v.y + v.z*v.z + v.w*v.w);
1174 }
1175 
fast_distance(float lhs,float rhs)1176 extern float __attribute__((overloadable)) fast_distance(float lhs, float rhs) {
1177     return fast_length(lhs - rhs);
1178 }
fast_distance(float2 lhs,float2 rhs)1179 extern float __attribute__((overloadable)) fast_distance(float2 lhs, float2 rhs) {
1180     return fast_length(lhs - rhs);
1181 }
fast_distance(float3 lhs,float3 rhs)1182 extern float __attribute__((overloadable)) fast_distance(float3 lhs, float3 rhs) {
1183     return fast_length(lhs - rhs);
1184 }
fast_distance(float4 lhs,float4 rhs)1185 extern float __attribute__((overloadable)) fast_distance(float4 lhs, float4 rhs) {
1186     return fast_length(lhs - rhs);
1187 }
1188 
1189 extern float __attribute__((overloadable)) half_rsqrt(float);
1190 
1191 /* For the normalization functions, vectors of length 0 should simply be
1192  * returned (i.e. all the components of that vector are 0).
1193  */
fast_normalize(float v)1194 extern float __attribute__((overloadable)) fast_normalize(float v) {
1195     if (v == 0.0f) {
1196         return 0.0f;
1197     } else if (v < 0.0f) {
1198         return -1.0f;
1199     } else {
1200         return 1.0f;
1201     }
1202 }
1203 // If the length is 0, then rlength should be NaN.
fast_normalize(float2 v)1204 extern float2 __attribute__((overloadable)) fast_normalize(float2 v) {
1205     float rlength = half_rsqrt(v.x*v.x + v.y*v.y);
1206     return (rlength == rlength) ? v * rlength : v;
1207 }
fast_normalize(float3 v)1208 extern float3 __attribute__((overloadable)) fast_normalize(float3 v) {
1209     float rlength = half_rsqrt(v.x*v.x + v.y*v.y + v.z*v.z);
1210     return (rlength == rlength) ? v * rlength : v;
1211 }
fast_normalize(float4 v)1212 extern float4 __attribute__((overloadable)) fast_normalize(float4 v) {
1213     float rlength = half_rsqrt(v.x*v.x + v.y*v.y + v.z*v.z + v.w*v.w);
1214     return (rlength == rlength) ? v * rlength : v;
1215 }
1216 
half_recip(float v)1217 extern float __attribute__((overloadable)) half_recip(float v) {
1218     return 1.f / v;
1219 }
1220 
1221 /*
1222 extern float __attribute__((overloadable)) approx_atan(float x) {
1223     if (x == 0.f)
1224         return 0.f;
1225     if (x < 0.f)
1226         return -1.f * approx_atan(-1.f * x);
1227     if (x > 1.f)
1228         return M_PI_2 - approx_atan(approx_recip(x));
1229     return x * approx_recip(1.f + 0.28f * x*x);
1230 }
1231 FN_FUNC_FN(approx_atan)
1232 */
1233 
1234 typedef union
1235 {
1236   float fv;
1237   int32_t iv;
1238 } ieee_float_shape_type;
1239 
1240 /* Get a 32 bit int from a float.  */
1241 
1242 #define GET_FLOAT_WORD(i,d)                 \
1243 do {                                \
1244   ieee_float_shape_type gf_u;                   \
1245   gf_u.fv = (d);                     \
1246   (i) = gf_u.iv;                      \
1247 } while (0)
1248 
1249 /* Set a float from a 32 bit int.  */
1250 
1251 #define SET_FLOAT_WORD(d,i)                 \
1252 do {                                \
1253   ieee_float_shape_type sf_u;                   \
1254   sf_u.iv = (i);                      \
1255   (d) = sf_u.fv;                     \
1256 } while (0)
1257 
1258 
1259 
1260 // Valid -125 to 125
native_exp2(float v)1261 extern float __attribute__((overloadable)) native_exp2(float v) {
1262     int32_t iv = (int)v;
1263     int32_t x = iv + (iv >> 31); // ~floor(v)
1264     float r = (v - x);
1265 
1266     float fo;
1267     SET_FLOAT_WORD(fo, (x + 127) << 23);
1268 
1269     r *= 0.694f; // ~ log(e) / log(2)
1270     float r2 = r*r;
1271     float adj = 1.f + r + (r2 * 0.5f) + (r2*r * 0.166666f) + (r2*r2 * 0.0416666f);
1272     return fo * adj;
1273 }
1274 
native_exp2(float2 v)1275 extern float2 __attribute__((overloadable)) native_exp2(float2 v) {
1276     int2 iv = convert_int2(v);
1277     int2 x = iv + (iv >> (int2)31);//floor(v);
1278     float2 r = (v - convert_float2(x));
1279 
1280     x += 127;
1281 
1282     float2 fo = (float2)(x << (int2)23);
1283 
1284     r *= 0.694f; // ~ log(e) / log(2)
1285     float2 r2 = r*r;
1286     float2 adj = 1.f + r + (r2 * 0.5f) + (r2*r * 0.166666f) + (r2*r2 * 0.0416666f);
1287     return fo * adj;
1288 }
1289 
native_exp2(float4 v)1290 extern float4 __attribute__((overloadable)) native_exp2(float4 v) {
1291     int4 iv = convert_int4(v);
1292     int4 x = iv + (iv >> (int4)31);//floor(v);
1293     float4 r = (v - convert_float4(x));
1294 
1295     x += 127;
1296 
1297     float4 fo = (float4)(x << (int4)23);
1298 
1299     r *= 0.694f; // ~ log(e) / log(2)
1300     float4 r2 = r*r;
1301     float4 adj = 1.f + r + (r2 * 0.5f) + (r2*r * 0.166666f) + (r2*r2 * 0.0416666f);
1302     return fo * adj;
1303 }
1304 
native_exp2(float3 v)1305 extern float3 __attribute__((overloadable)) native_exp2(float3 v) {
1306     float4 t = 1.f;
1307     t.xyz = v;
1308     return native_exp2(t).xyz;
1309 }
1310 
1311 
native_exp(float v)1312 extern float __attribute__((overloadable)) native_exp(float v) {
1313     return native_exp2(v * 1.442695041f);
1314 }
native_exp(float2 v)1315 extern float2 __attribute__((overloadable)) native_exp(float2 v) {
1316     return native_exp2(v * 1.442695041f);
1317 }
native_exp(float3 v)1318 extern float3 __attribute__((overloadable)) native_exp(float3 v) {
1319     return native_exp2(v * 1.442695041f);
1320 }
native_exp(float4 v)1321 extern float4 __attribute__((overloadable)) native_exp(float4 v) {
1322     return native_exp2(v * 1.442695041f);
1323 }
1324 
native_exp10(float v)1325 extern float __attribute__((overloadable)) native_exp10(float v) {
1326     return native_exp2(v * 3.321928095f);
1327 }
native_exp10(float2 v)1328 extern float2 __attribute__((overloadable)) native_exp10(float2 v) {
1329     return native_exp2(v * 3.321928095f);
1330 }
native_exp10(float3 v)1331 extern float3 __attribute__((overloadable)) native_exp10(float3 v) {
1332     return native_exp2(v * 3.321928095f);
1333 }
native_exp10(float4 v)1334 extern float4 __attribute__((overloadable)) native_exp10(float4 v) {
1335     return native_exp2(v * 3.321928095f);
1336 }
1337 
native_log2(float v)1338 extern float __attribute__((overloadable)) native_log2(float v) {
1339     int32_t ibits;
1340     GET_FLOAT_WORD(ibits, v);
1341 
1342     int32_t e = (ibits >> 23) & 0xff;
1343 
1344     ibits &= 0x7fffff;
1345     ibits |= 127 << 23;
1346 
1347     float ir;
1348     SET_FLOAT_WORD(ir, ibits);
1349     ir -= 1.5f;
1350     float ir2 = ir*ir;
1351     float adj2 = (0.405465108f / 0.693147181f) +
1352                  ((0.666666667f / 0.693147181f) * ir) -
1353                  ((0.222222222f / 0.693147181f) * ir2) +
1354                  ((0.098765432f / 0.693147181f) * ir*ir2) -
1355                  ((0.049382716f / 0.693147181f) * ir2*ir2) +
1356                  ((0.026337449f / 0.693147181f) * ir*ir2*ir2) -
1357                  ((0.014631916f / 0.693147181f) * ir2*ir2*ir2);
1358     return (float)(e - 127) + adj2;
1359 }
native_log2(float2 v)1360 extern float2 __attribute__((overloadable)) native_log2(float2 v) {
1361     float2 v2 = {native_log2(v.x), native_log2(v.y)};
1362     return v2;
1363 }
native_log2(float3 v)1364 extern float3 __attribute__((overloadable)) native_log2(float3 v) {
1365     float3 v2 = {native_log2(v.x), native_log2(v.y), native_log2(v.z)};
1366     return v2;
1367 }
native_log2(float4 v)1368 extern float4 __attribute__((overloadable)) native_log2(float4 v) {
1369     float4 v2 = {native_log2(v.x), native_log2(v.y), native_log2(v.z), native_log2(v.w)};
1370     return v2;
1371 }
1372 
native_log(float v)1373 extern float __attribute__((overloadable)) native_log(float v) {
1374     return native_log2(v) * (1.f / 1.442695041f);
1375 }
native_log(float2 v)1376 extern float2 __attribute__((overloadable)) native_log(float2 v) {
1377     return native_log2(v) * (1.f / 1.442695041f);
1378 }
native_log(float3 v)1379 extern float3 __attribute__((overloadable)) native_log(float3 v) {
1380     return native_log2(v) * (1.f / 1.442695041f);
1381 }
native_log(float4 v)1382 extern float4 __attribute__((overloadable)) native_log(float4 v) {
1383     return native_log2(v) * (1.f / 1.442695041f);
1384 }
1385 
native_log10(float v)1386 extern float __attribute__((overloadable)) native_log10(float v) {
1387     return native_log2(v) * (1.f / 3.321928095f);
1388 }
native_log10(float2 v)1389 extern float2 __attribute__((overloadable)) native_log10(float2 v) {
1390     return native_log2(v) * (1.f / 3.321928095f);
1391 }
native_log10(float3 v)1392 extern float3 __attribute__((overloadable)) native_log10(float3 v) {
1393     return native_log2(v) * (1.f / 3.321928095f);
1394 }
native_log10(float4 v)1395 extern float4 __attribute__((overloadable)) native_log10(float4 v) {
1396     return native_log2(v) * (1.f / 3.321928095f);
1397 }
1398 
1399 
native_powr(float v,float y)1400 extern float __attribute__((overloadable)) native_powr(float v, float y) {
1401     float v2 = native_log2(v);
1402     v2 = fmax(v2 * y, -125.f);
1403     return native_exp2(v2);
1404 }
native_powr(float2 v,float2 y)1405 extern float2 __attribute__((overloadable)) native_powr(float2 v, float2 y) {
1406     float2 v2 = native_log2(v);
1407     v2 = fmax(v2 * y, -125.f);
1408     return native_exp2(v2);
1409 }
native_powr(float3 v,float3 y)1410 extern float3 __attribute__((overloadable)) native_powr(float3 v, float3 y) {
1411     float3 v2 = native_log2(v);
1412     v2 = fmax(v2 * y, -125.f);
1413     return native_exp2(v2);
1414 }
native_powr(float4 v,float4 y)1415 extern float4 __attribute__((overloadable)) native_powr(float4 v, float4 y) {
1416     float4 v2 = native_log2(v);
1417     v2 = fmax(v2 * y, -125.f);
1418     return native_exp2(v2);
1419 }
1420 
min(double v1,double v2)1421 extern double __attribute__((overloadable)) min(double v1, double v2) {
1422     return v1 < v2 ? v1 : v2;
1423 }
1424 
min(double2 v1,double2 v2)1425 extern double2 __attribute__((overloadable)) min(double2 v1, double2 v2) {
1426     double2 r;
1427     r.x = v1.x < v2.x ? v1.x : v2.x;
1428     r.y = v1.y < v2.y ? v1.y : v2.y;
1429     return r;
1430 }
1431 
min(double3 v1,double3 v2)1432 extern double3 __attribute__((overloadable)) min(double3 v1, double3 v2) {
1433     double3 r;
1434     r.x = v1.x < v2.x ? v1.x : v2.x;
1435     r.y = v1.y < v2.y ? v1.y : v2.y;
1436     r.z = v1.z < v2.z ? v1.z : v2.z;
1437     return r;
1438 }
1439 
min(double4 v1,double4 v2)1440 extern double4 __attribute__((overloadable)) min(double4 v1, double4 v2) {
1441     double4 r;
1442     r.x = v1.x < v2.x ? v1.x : v2.x;
1443     r.y = v1.y < v2.y ? v1.y : v2.y;
1444     r.z = v1.z < v2.z ? v1.z : v2.z;
1445     r.w = v1.w < v2.w ? v1.w : v2.w;
1446     return r;
1447 }
1448 
min(long v1,long v2)1449 extern long __attribute__((overloadable)) min(long v1, long v2) {
1450     return v1 < v2 ? v1 : v2;
1451 }
min(long2 v1,long2 v2)1452 extern long2 __attribute__((overloadable)) min(long2 v1, long2 v2) {
1453     long2 r;
1454     r.x = v1.x < v2.x ? v1.x : v2.x;
1455     r.y = v1.y < v2.y ? v1.y : v2.y;
1456     return r;
1457 }
min(long3 v1,long3 v2)1458 extern long3 __attribute__((overloadable)) min(long3 v1, long3 v2) {
1459     long3 r;
1460     r.x = v1.x < v2.x ? v1.x : v2.x;
1461     r.y = v1.y < v2.y ? v1.y : v2.y;
1462     r.z = v1.z < v2.z ? v1.z : v2.z;
1463     return r;
1464 }
min(long4 v1,long4 v2)1465 extern long4 __attribute__((overloadable)) min(long4 v1, long4 v2) {
1466     long4 r;
1467     r.x = v1.x < v2.x ? v1.x : v2.x;
1468     r.y = v1.y < v2.y ? v1.y : v2.y;
1469     r.z = v1.z < v2.z ? v1.z : v2.z;
1470     r.w = v1.w < v2.w ? v1.w : v2.w;
1471     return r;
1472 }
1473 
min(ulong v1,ulong v2)1474 extern ulong __attribute__((overloadable)) min(ulong v1, ulong v2) {
1475     return v1 < v2 ? v1 : v2;
1476 }
min(ulong2 v1,ulong2 v2)1477 extern ulong2 __attribute__((overloadable)) min(ulong2 v1, ulong2 v2) {
1478     ulong2 r;
1479     r.x = v1.x < v2.x ? v1.x : v2.x;
1480     r.y = v1.y < v2.y ? v1.y : v2.y;
1481     return r;
1482 }
min(ulong3 v1,ulong3 v2)1483 extern ulong3 __attribute__((overloadable)) min(ulong3 v1, ulong3 v2) {
1484     ulong3 r;
1485     r.x = v1.x < v2.x ? v1.x : v2.x;
1486     r.y = v1.y < v2.y ? v1.y : v2.y;
1487     r.z = v1.z < v2.z ? v1.z : v2.z;
1488     return r;
1489 }
min(ulong4 v1,ulong4 v2)1490 extern ulong4 __attribute__((overloadable)) min(ulong4 v1, ulong4 v2) {
1491     ulong4 r;
1492     r.x = v1.x < v2.x ? v1.x : v2.x;
1493     r.y = v1.y < v2.y ? v1.y : v2.y;
1494     r.z = v1.z < v2.z ? v1.z : v2.z;
1495     r.w = v1.w < v2.w ? v1.w : v2.w;
1496     return r;
1497 }
1498 
max(double v1,double v2)1499 extern double __attribute__((overloadable)) max(double v1, double v2) {
1500     return v1 > v2 ? v1 : v2;
1501 }
1502 
max(double2 v1,double2 v2)1503 extern double2 __attribute__((overloadable)) max(double2 v1, double2 v2) {
1504     double2 r;
1505     r.x = v1.x > v2.x ? v1.x : v2.x;
1506     r.y = v1.y > v2.y ? v1.y : v2.y;
1507     return r;
1508 }
1509 
max(double3 v1,double3 v2)1510 extern double3 __attribute__((overloadable)) max(double3 v1, double3 v2) {
1511     double3 r;
1512     r.x = v1.x > v2.x ? v1.x : v2.x;
1513     r.y = v1.y > v2.y ? v1.y : v2.y;
1514     r.z = v1.z > v2.z ? v1.z : v2.z;
1515     return r;
1516 }
1517 
max(double4 v1,double4 v2)1518 extern double4 __attribute__((overloadable)) max(double4 v1, double4 v2) {
1519     double4 r;
1520     r.x = v1.x > v2.x ? v1.x : v2.x;
1521     r.y = v1.y > v2.y ? v1.y : v2.y;
1522     r.z = v1.z > v2.z ? v1.z : v2.z;
1523     r.w = v1.w > v2.w ? v1.w : v2.w;
1524     return r;
1525 }
1526 
max(long v1,long v2)1527 extern long __attribute__((overloadable)) max(long v1, long v2) {
1528     return v1 > v2 ? v1 : v2;
1529 }
max(long2 v1,long2 v2)1530 extern long2 __attribute__((overloadable)) max(long2 v1, long2 v2) {
1531     long2 r;
1532     r.x = v1.x > v2.x ? v1.x : v2.x;
1533     r.y = v1.y > v2.y ? v1.y : v2.y;
1534     return r;
1535 }
max(long3 v1,long3 v2)1536 extern long3 __attribute__((overloadable)) max(long3 v1, long3 v2) {
1537     long3 r;
1538     r.x = v1.x > v2.x ? v1.x : v2.x;
1539     r.y = v1.y > v2.y ? v1.y : v2.y;
1540     r.z = v1.z > v2.z ? v1.z : v2.z;
1541     return r;
1542 }
max(long4 v1,long4 v2)1543 extern long4 __attribute__((overloadable)) max(long4 v1, long4 v2) {
1544     long4 r;
1545     r.x = v1.x > v2.x ? v1.x : v2.x;
1546     r.y = v1.y > v2.y ? v1.y : v2.y;
1547     r.z = v1.z > v2.z ? v1.z : v2.z;
1548     r.w = v1.w > v2.w ? v1.w : v2.w;
1549     return r;
1550 }
1551 
max(ulong v1,ulong v2)1552 extern ulong __attribute__((overloadable)) max(ulong v1, ulong v2) {
1553     return v1 > v2 ? v1 : v2;
1554 }
max(ulong2 v1,ulong2 v2)1555 extern ulong2 __attribute__((overloadable)) max(ulong2 v1, ulong2 v2) {
1556     ulong2 r;
1557     r.x = v1.x > v2.x ? v1.x : v2.x;
1558     r.y = v1.y > v2.y ? v1.y : v2.y;
1559     return r;
1560 }
max(ulong3 v1,ulong3 v2)1561 extern ulong3 __attribute__((overloadable)) max(ulong3 v1, ulong3 v2) {
1562     ulong3 r;
1563     r.x = v1.x > v2.x ? v1.x : v2.x;
1564     r.y = v1.y > v2.y ? v1.y : v2.y;
1565     r.z = v1.z > v2.z ? v1.z : v2.z;
1566     return r;
1567 }
max(ulong4 v1,ulong4 v2)1568 extern ulong4 __attribute__((overloadable)) max(ulong4 v1, ulong4 v2) {
1569     ulong4 r;
1570     r.x = v1.x > v2.x ? v1.x : v2.x;
1571     r.y = v1.y > v2.y ? v1.y : v2.y;
1572     r.z = v1.z > v2.z ? v1.z : v2.z;
1573     r.w = v1.w > v2.w ? v1.w : v2.w;
1574     return r;
1575 }
1576 
1577 #define THUNK_NATIVE_F(fn) \
1578     float __attribute__((overloadable)) native_##fn(float v) { return fn(v);} \
1579     float2 __attribute__((overloadable)) native_##fn(float2 v) { return fn(v);} \
1580     float3 __attribute__((overloadable)) native_##fn(float3 v) { return fn(v);} \
1581     float4 __attribute__((overloadable)) native_##fn(float4 v) { return fn(v);}
1582 
1583 #define THUNK_NATIVE_F_F(fn) \
1584     float __attribute__((overloadable)) native_##fn(float v1, float v2) { return fn(v1, v2);} \
1585     float2 __attribute__((overloadable)) native_##fn(float2 v1, float2 v2) { return fn(v1, v2);} \
1586     float3 __attribute__((overloadable)) native_##fn(float3 v1, float3 v2) { return fn(v1, v2);} \
1587     float4 __attribute__((overloadable)) native_##fn(float4 v1, float4 v2) { return fn(v1, v2);}
1588 
1589 #define THUNK_NATIVE_F_FP(fn) \
1590     float __attribute__((overloadable)) native_##fn(float v1, float *v2) { return fn(v1, v2);} \
1591     float2 __attribute__((overloadable)) native_##fn(float2 v1, float2 *v2) { return fn(v1, v2);} \
1592     float3 __attribute__((overloadable)) native_##fn(float3 v1, float3 *v2) { return fn(v1, v2);} \
1593     float4 __attribute__((overloadable)) native_##fn(float4 v1, float4 *v2) { return fn(v1, v2);}
1594 
1595 #define THUNK_NATIVE_F_I(fn) \
1596     float __attribute__((overloadable)) native_##fn(float v1, int v2) { return fn(v1, v2);} \
1597     float2 __attribute__((overloadable)) native_##fn(float2 v1, int2 v2) { return fn(v1, v2);} \
1598     float3 __attribute__((overloadable)) native_##fn(float3 v1, int3 v2) { return fn(v1, v2);} \
1599     float4 __attribute__((overloadable)) native_##fn(float4 v1, int4 v2) { return fn(v1, v2);}
1600 
1601 THUNK_NATIVE_F(acos)
THUNK_NATIVE_F(acosh)1602 THUNK_NATIVE_F(acosh)
1603 THUNK_NATIVE_F(acospi)
1604 THUNK_NATIVE_F(asin)
1605 THUNK_NATIVE_F(asinh)
1606 THUNK_NATIVE_F(asinpi)
1607 THUNK_NATIVE_F(atan)
1608 THUNK_NATIVE_F_F(atan2)
1609 THUNK_NATIVE_F(atanh)
1610 THUNK_NATIVE_F(atanpi)
1611 THUNK_NATIVE_F_F(atan2pi)
1612 THUNK_NATIVE_F(cbrt)
1613 THUNK_NATIVE_F(cos)
1614 THUNK_NATIVE_F(cosh)
1615 THUNK_NATIVE_F(cospi)
1616 THUNK_NATIVE_F(expm1)
1617 THUNK_NATIVE_F_F(hypot)
1618 THUNK_NATIVE_F(log1p)
1619 THUNK_NATIVE_F_I(rootn)
1620 THUNK_NATIVE_F(rsqrt)
1621 THUNK_NATIVE_F(sqrt)
1622 THUNK_NATIVE_F(sin)
1623 THUNK_NATIVE_F_FP(sincos)
1624 THUNK_NATIVE_F(sinh)
1625 THUNK_NATIVE_F(sinpi)
1626 THUNK_NATIVE_F(tan)
1627 THUNK_NATIVE_F(tanh)
1628 THUNK_NATIVE_F(tanpi)
1629 
1630 #undef THUNK_NATIVE_F
1631 #undef THUNK_NATIVE_F_F
1632 #undef THUNK_NATIVE_F_I
1633 #undef THUNK_NATIVE_F_FP
1634 
1635 float __attribute__((overloadable)) native_normalize(float v) { return fast_normalize(v);}
native_normalize(float2 v)1636 float2 __attribute__((overloadable)) native_normalize(float2 v) { return fast_normalize(v);}
native_normalize(float3 v)1637 float3 __attribute__((overloadable)) native_normalize(float3 v) { return fast_normalize(v);}
native_normalize(float4 v)1638 float4 __attribute__((overloadable)) native_normalize(float4 v) { return fast_normalize(v);}
1639 
native_distance(float v1,float v2)1640 float __attribute__((overloadable)) native_distance(float v1, float v2) { return fast_distance(v1, v2);}
native_distance(float2 v1,float2 v2)1641 float __attribute__((overloadable)) native_distance(float2 v1, float2 v2) { return fast_distance(v1, v2);}
native_distance(float3 v1,float3 v2)1642 float __attribute__((overloadable)) native_distance(float3 v1, float3 v2) { return fast_distance(v1, v2);}
native_distance(float4 v1,float4 v2)1643 float __attribute__((overloadable)) native_distance(float4 v1, float4 v2) { return fast_distance(v1, v2);}
1644 
native_length(float v)1645 float __attribute__((overloadable)) native_length(float v) { return fast_length(v);}
native_length(float2 v)1646 float __attribute__((overloadable)) native_length(float2 v) { return fast_length(v);}
native_length(float3 v)1647 float __attribute__((overloadable)) native_length(float3 v) { return fast_length(v);}
native_length(float4 v)1648 float __attribute__((overloadable)) native_length(float4 v) { return fast_length(v);}
1649 
native_divide(float v1,float v2)1650 float __attribute__((overloadable)) native_divide(float v1, float v2) { return v1 / v2;}
native_divide(float2 v1,float2 v2)1651 float2 __attribute__((overloadable)) native_divide(float2 v1, float2 v2) { return v1 / v2;}
native_divide(float3 v1,float3 v2)1652 float3 __attribute__((overloadable)) native_divide(float3 v1, float3 v2) { return v1 / v2;}
native_divide(float4 v1,float4 v2)1653 float4 __attribute__((overloadable)) native_divide(float4 v1, float4 v2) { return v1 / v2;}
1654 
native_recip(float v)1655 float __attribute__((overloadable)) native_recip(float v) { return 1.f / v;}
native_recip(float2 v)1656 float2 __attribute__((overloadable)) native_recip(float2 v) { return ((float2)1.f) / v;}
native_recip(float3 v)1657 float3 __attribute__((overloadable)) native_recip(float3 v) { return ((float3)1.f) / v;}
native_recip(float4 v)1658 float4 __attribute__((overloadable)) native_recip(float4 v) { return ((float4)1.f) / v;}
1659 
1660 
1661 
1662 
1663 
1664 #undef FN_FUNC_FN
1665 #undef IN_FUNC_FN
1666 #undef FN_FUNC_FN_FN
1667 #undef FN_FUNC_FN_F
1668 #undef FN_FUNC_FN_IN
1669 #undef FN_FUNC_FN_I
1670 #undef FN_FUNC_FN_PFN
1671 #undef FN_FUNC_FN_PIN
1672 #undef FN_FUNC_FN_FN_FN
1673 #undef FN_FUNC_FN_FN_PIN
1674 #undef XN_FUNC_YN
1675 #undef UIN_FUNC_IN
1676 #undef IN_FUNC_IN
1677 #undef XN_FUNC_XN_XN_BODY
1678 #undef IN_FUNC_IN_IN_BODY
1679 
1680 static const unsigned short kHalfPositiveInfinity = 0x7c00;
1681 
1682 /* Define f16 functions of the form
1683  *     HN output = fn(HN input)
1684  * where HN is scalar or vector half type
1685  */
1686 #define HN_FUNC_HN(fn)                                                    \
1687 extern half __attribute__((overloadable)) fn(half h) {                    \
1688     return (half) fn((float) h);                                          \
1689 }                                                                         \
1690 extern half2 __attribute__((overloadable)) fn(half2 v) {                  \
1691   return convert_half2(fn(convert_float2(v)));                            \
1692 }                                                                         \
1693 extern half3 __attribute__((overloadable)) fn(half3 v) {                  \
1694   return convert_half3(fn(convert_float3(v)));                            \
1695 }                                                                         \
1696 extern half4 __attribute__((overloadable)) fn(half4 v) {                  \
1697   return convert_half4(fn(convert_float4(v)));                            \
1698 }
1699 
1700 /* Define f16 functions of the form
1701  *     HN output = fn(HN input1, HN input2)
1702  * where HN is scalar or vector half type
1703  */
1704 #define HN_FUNC_HN_HN(fn)                                                 \
1705 extern half __attribute__((overloadable)) fn(half h1, half h2) {          \
1706     return (half) fn((float) h1, (float) h2);                             \
1707 }                                                                         \
1708 extern half2 __attribute__((overloadable)) fn(half2 v1, half2 v2) {       \
1709   return convert_half2(fn(convert_float2(v1),                             \
1710                           convert_float2(v2)));                           \
1711 }                                                                         \
1712 extern half3 __attribute__((overloadable)) fn(half3 v1, half3 v2) {       \
1713   return convert_half3(fn(convert_float3(v1),                             \
1714                           convert_float3(v2)));                           \
1715 }                                                                         \
1716 extern half4 __attribute__((overloadable)) fn(half4 v1, half4 v2) {       \
1717   return convert_half4(fn(convert_float4(v1),                             \
1718                           convert_float4(v2)));                           \
1719 }
1720 
1721 /* Define f16 functions of the form
1722  *     HN output = fn(HN input1, half input2)
1723  * where HN is scalar or vector half type
1724  */
1725 #define HN_FUNC_HN_H(fn)                                                  \
1726 extern half2 __attribute__((overloadable)) fn(half2 v1, half v2) {        \
1727   return convert_half2(fn(convert_float2(v1), (float) v2));               \
1728 }                                                                         \
1729 extern half3 __attribute__((overloadable)) fn(half3 v1, half v2) {        \
1730   return convert_half3(fn(convert_float3(v1), (float) v2));               \
1731 }                                                                         \
1732 extern half4 __attribute__((overloadable)) fn(half4 v1, half v2) {        \
1733   return convert_half4(fn(convert_float4(v1), (float) v2));               \
1734 }
1735 
1736 /* Define f16 functions of the form
1737  *     HN output = fn(HN input1, HN input2, HN input3)
1738  * where HN is scalar or vector half type
1739  */
1740 #define HN_FUNC_HN_HN_HN(fn)                                                   \
1741 extern half __attribute__((overloadable)) fn(half h1, half h2, half h3) {      \
1742     return (half) fn((float) h1, (float) h2, (float) h3);                      \
1743 }                                                                              \
1744 extern half2 __attribute__((overloadable)) fn(half2 v1, half2 v2, half2 v3) {  \
1745   return convert_half2(fn(convert_float2(v1),                                  \
1746                           convert_float2(v2),                                  \
1747                           convert_float2(v3)));                                \
1748 }                                                                              \
1749 extern half3 __attribute__((overloadable)) fn(half3 v1, half3 v2, half3 v3) {  \
1750   return convert_half3(fn(convert_float3(v1),                                  \
1751                           convert_float3(v2),                                  \
1752                           convert_float3(v3)));                                \
1753 }                                                                              \
1754 extern half4 __attribute__((overloadable)) fn(half4 v1, half4 v2, half4 v3) {  \
1755   return convert_half4(fn(convert_float4(v1),                                  \
1756                           convert_float4(v2),                                  \
1757                           convert_float4(v3)));                                \
1758 }
1759 
1760 /* Define f16 functions of the form
1761  *     HN output = fn(HN input1, IN input2)
1762  * where HN is scalar or vector half type and IN the equivalent integer type
1763  * of same vector length.
1764  */
1765 #define HN_FUNC_HN_IN(fn)                                                 \
1766 extern half __attribute__((overloadable)) fn(half h1, int v) {            \
1767     return (half) fn((float) h1, v);                                      \
1768 }                                                                         \
1769 extern half2 __attribute__((overloadable)) fn(half2 v1, int2 v2) {        \
1770   return convert_half2(fn(convert_float2(v1), v2));                       \
1771 }                                                                         \
1772 extern half3 __attribute__((overloadable)) fn(half3 v1, int3 v2) {        \
1773   return convert_half3(fn(convert_float3(v1), v2));                       \
1774 }                                                                         \
1775 extern half4 __attribute__((overloadable)) fn(half4 v1, int4 v2) {        \
1776   return convert_half4(fn(convert_float4(v1), v2));                       \
1777 }
1778 
1779 /* Define f16 functions of the form
1780  *     half output = fn(HN input1)
1781  * where HN is a scalar or vector half type.
1782  */
1783 #define H_FUNC_HN(fn)                                                     \
1784 extern half __attribute__((overloadable)) fn(half h) {                    \
1785     return (half) fn((float) h);                                          \
1786 }                                                                         \
1787 extern half __attribute__((overloadable)) fn(half2 v) {                   \
1788   return fn(convert_float2(v));                                           \
1789 }                                                                         \
1790 extern half __attribute__((overloadable)) fn(half3 v) {                   \
1791   return fn(convert_float3(v));                                           \
1792 }                                                                         \
1793 extern half __attribute__((overloadable)) fn(half4 v) {                   \
1794   return fn(convert_float4(v));                                           \
1795 }
1796 
1797 /* Define f16 functions of the form
1798  *     half output = fn(HN input1, HN input2)
1799  * where HN is a scalar or vector half type.
1800  */
1801 #define H_FUNC_HN_HN(fn)                                                  \
1802 extern half __attribute__((overloadable)) fn(half h1, half h2) {          \
1803     return (half) fn((float) h1, (float) h2);                             \
1804 }                                                                         \
1805 extern half __attribute__((overloadable)) fn(half2 v1, half2 v2) {        \
1806   return fn(convert_float2(v1), convert_float2(v2));                      \
1807 }                                                                         \
1808 extern half __attribute__((overloadable)) fn(half3 v1, half3 v2) {        \
1809   return fn(convert_float3(v1), convert_float3(v2));                      \
1810 }                                                                         \
1811 extern half __attribute__((overloadable)) fn(half4 v1, half4 v2) {        \
1812   return fn(convert_float4(v1), convert_float4(v2));                      \
1813 }
1814 
1815 #define SCALARIZE_HN_FUNC_HN_PHN(fnc)                                 \
1816 extern half2 __attribute__((overloadable)) fnc(half2 v1, half2 *v2) { \
1817     half2 ret;                                                        \
1818     half t[2];                                                        \
1819     ret.x = fnc(v1.x, &t[0]);                                         \
1820     ret.y = fnc(v1.y, &t[1]);                                         \
1821     v2->x = t[0];                                                     \
1822     v2->y = t[1];                                                     \
1823     return ret;                                                       \
1824 }                                                                     \
1825 extern half3 __attribute__((overloadable)) fnc(half3 v1, half3 *v2) { \
1826     half3 ret;                                                        \
1827     half t[3];                                                        \
1828     ret.x = fnc(v1.x, &t[0]);                                         \
1829     ret.y = fnc(v1.y, &t[1]);                                         \
1830     ret.z = fnc(v1.z, &t[2]);                                         \
1831     v2->x = t[0];                                                     \
1832     v2->y = t[1];                                                     \
1833     v2->z = t[2];                                                     \
1834     return ret;                                                       \
1835 }                                                                     \
1836 extern half4 __attribute__((overloadable)) fnc(half4 v1, half4 *v2) { \
1837     half4 ret;                                                        \
1838     half t[4];                                                        \
1839     ret.x = fnc(v1.x, &t[0]);                                         \
1840     ret.y = fnc(v1.y, &t[1]);                                         \
1841     ret.z = fnc(v1.z, &t[2]);                                         \
1842     ret.w = fnc(v1.w, &t[3]);                                         \
1843     v2->x = t[0];                                                     \
1844     v2->y = t[1];                                                     \
1845     v2->z = t[2];                                                     \
1846     v2->w = t[3];                                                     \
1847     return ret;                                                       \
1848 }
1849 
1850 /* Define f16 functions of the form
1851  *     HN output = fn(HN input1, HN input2)
1852  * where HN is a vector half type.  The functions are defined to call the
1853  * scalar function of the same name.
1854  */
1855 #define SCALARIZE_HN_FUNC_HN_HN(fn)                                       \
1856 extern half2 __attribute__((overloadable)) fn(half2 v1, half2 v2) {       \
1857   half2 ret;                                                              \
1858   ret.x = fn(v1.x, v2.x);                                                 \
1859   ret.y = fn(v1.y, v2.y);                                                 \
1860   return ret;                                                             \
1861 }                                                                         \
1862 extern half3 __attribute__((overloadable)) fn(half3 v1, half3 v2) {       \
1863   half3 ret;                                                              \
1864   ret.x = fn(v1.x, v2.x);                                                 \
1865   ret.y = fn(v1.y, v2.y);                                                 \
1866   ret.z = fn(v1.z, v2.z);                                                 \
1867   return ret;                                                             \
1868 }                                                                         \
1869 extern half4 __attribute__((overloadable)) fn(half4 v1, half4 v2) {       \
1870   half4 ret;                                                              \
1871   ret.x = fn(v1.x, v2.x);                                                 \
1872   ret.y = fn(v1.y, v2.y);                                                 \
1873   ret.z = fn(v1.z, v2.z);                                                 \
1874   ret.w = fn(v1.w, v2.w);                                                 \
1875   return ret;                                                             \
1876 }                                                                         \
1877 
1878 HN_FUNC_HN(acos);
1879 HN_FUNC_HN(acosh);
1880 HN_FUNC_HN(acospi);
1881 HN_FUNC_HN(asin);
1882 HN_FUNC_HN(asinh);
1883 HN_FUNC_HN(asinpi);
1884 HN_FUNC_HN(atan);
1885 HN_FUNC_HN(atanh);
1886 HN_FUNC_HN(atanpi);
1887 HN_FUNC_HN_HN(atan2);
1888 HN_FUNC_HN_HN(atan2pi);
1889 
1890 HN_FUNC_HN(cbrt);
1891 HN_FUNC_HN(ceil);
1892 
1893 extern half __attribute__((overloadable)) copysign(half x, half y);
1894 SCALARIZE_HN_FUNC_HN_HN(copysign);
1895 
1896 HN_FUNC_HN(cos);
1897 HN_FUNC_HN(cosh);
1898 HN_FUNC_HN(cospi);
1899 
cross(half3 lhs,half3 rhs)1900 extern half3 __attribute__((overloadable)) cross(half3 lhs, half3 rhs) {
1901     half3 r;
1902     r.x = lhs.y * rhs.z  - lhs.z * rhs.y;
1903     r.y = lhs.z * rhs.x  - lhs.x * rhs.z;
1904     r.z = lhs.x * rhs.y  - lhs.y * rhs.x;
1905     return r;
1906 }
1907 
cross(half4 lhs,half4 rhs)1908 extern half4 __attribute__((overloadable)) cross(half4 lhs, half4 rhs) {
1909     half4 r;
1910     r.x = lhs.y * rhs.z  - lhs.z * rhs.y;
1911     r.y = lhs.z * rhs.x  - lhs.x * rhs.z;
1912     r.z = lhs.x * rhs.y  - lhs.y * rhs.x;
1913     r.w = 0.f;
1914     return r;
1915 }
1916 
1917 HN_FUNC_HN(degrees);
1918 H_FUNC_HN_HN(distance);
1919 H_FUNC_HN_HN(dot);
1920 
1921 HN_FUNC_HN(erf);
1922 HN_FUNC_HN(erfc);
1923 HN_FUNC_HN(exp);
1924 HN_FUNC_HN(exp10);
1925 HN_FUNC_HN(exp2);
1926 HN_FUNC_HN(expm1);
1927 
1928 HN_FUNC_HN(fabs);
1929 HN_FUNC_HN_HN(fdim);
1930 HN_FUNC_HN(floor);
1931 HN_FUNC_HN_HN_HN(fma);
1932 HN_FUNC_HN_HN(fmax);
1933 HN_FUNC_HN_H(fmax);
1934 HN_FUNC_HN_HN(fmin);
1935 HN_FUNC_HN_H(fmin);
1936 HN_FUNC_HN_HN(fmod);
1937 
fract(half v,half * iptr)1938 extern half __attribute__((overloadable)) fract(half v, half *iptr) {
1939     // maxLessThanOne = 0.99951171875, the largest value < 1.0
1940     half maxLessThanOne;
1941     SET_HALF_WORD(maxLessThanOne, 0x3bff);
1942 
1943     int i = (int) floor(v);
1944     if (iptr) {
1945         *iptr = i;
1946     }
1947     // return v - floor(v), if strictly less than one
1948     return fmin(v - i, maxLessThanOne);
1949 }
1950 
1951 SCALARIZE_HN_FUNC_HN_PHN(fract);
1952 
fract(half v)1953 extern half __attribute__((const, overloadable)) fract(half v) {
1954     half unused;
1955     return fract(v, &unused);
1956 }
1957 
fract(half2 v)1958 extern half2 __attribute__((const, overloadable)) fract(half2 v) {
1959     half2 unused;
1960     return fract(v, &unused);
1961 }
1962 
fract(half3 v)1963 extern half3 __attribute__((const, overloadable)) fract(half3 v) {
1964     half3 unused;
1965     return fract(v, &unused);
1966 }
1967 
fract(half4 v)1968 extern half4 __attribute__((const, overloadable)) fract(half4 v) {
1969     half4 unused;
1970     return fract(v, &unused);
1971 }
1972 
1973 extern half __attribute__((overloadable)) frexp(half x, int *eptr);
1974 
frexp(half2 v1,int2 * eptr)1975 extern half2 __attribute__((overloadable)) frexp(half2 v1, int2 *eptr) {
1976     half2 ret;
1977     int e[2];
1978     ret.x = frexp(v1.x, &e[0]);
1979     ret.y = frexp(v1.y, &e[1]);
1980     eptr->x = e[0];
1981     eptr->y = e[1];
1982     return ret;
1983 }
1984 
frexp(half3 v1,int3 * eptr)1985 extern half3 __attribute__((overloadable)) frexp(half3 v1, int3 *eptr) {
1986     half3 ret;
1987     int e[3];
1988     ret.x = frexp(v1.x, &e[0]);
1989     ret.y = frexp(v1.y, &e[1]);
1990     ret.z = frexp(v1.z, &e[2]);
1991     eptr->x = e[0];
1992     eptr->y = e[1];
1993     eptr->z = e[2];
1994     return ret;
1995 }
1996 
frexp(half4 v1,int4 * eptr)1997 extern half4 __attribute__((overloadable)) frexp(half4 v1, int4 *eptr) {
1998     half4 ret;
1999     int e[4];
2000     ret.x = frexp(v1.x, &e[0]);
2001     ret.y = frexp(v1.y, &e[1]);
2002     ret.z = frexp(v1.z, &e[2]);
2003     ret.w = frexp(v1.w, &e[3]);
2004     eptr->x = e[0];
2005     eptr->y = e[1];
2006     eptr->z = e[2];
2007     eptr->w = e[3];
2008     return ret;
2009 }
2010 
2011 HN_FUNC_HN_HN(hypot);
2012 
2013 extern int __attribute__((overloadable)) ilogb(half x);
2014 
ilogb(half2 v)2015 extern int2 __attribute__((overloadable)) ilogb(half2 v) {
2016     int2 ret;
2017     ret.x = ilogb(v.x);
2018     ret.y = ilogb(v.y);
2019     return ret;
2020 }
ilogb(half3 v)2021 extern int3 __attribute__((overloadable)) ilogb(half3 v) {
2022     int3 ret;
2023     ret.x = ilogb(v.x);
2024     ret.y = ilogb(v.y);
2025     ret.z = ilogb(v.z);
2026     return ret;
2027 }
ilogb(half4 v)2028 extern int4 __attribute__((overloadable)) ilogb(half4 v) {
2029     int4 ret;
2030     ret.x = ilogb(v.x);
2031     ret.y = ilogb(v.y);
2032     ret.z = ilogb(v.z);
2033     ret.w = ilogb(v.w);
2034     return ret;
2035 }
2036 
2037 HN_FUNC_HN_IN(ldexp);
ldexp(half2 v,int exponent)2038 extern half2 __attribute__((overloadable)) ldexp(half2 v, int exponent) {
2039     return convert_half2(ldexp(convert_float2(v), exponent));
2040 }
ldexp(half3 v,int exponent)2041 extern half3 __attribute__((overloadable)) ldexp(half3 v, int exponent) {
2042     return convert_half3(ldexp(convert_float3(v), exponent));
2043 }
ldexp(half4 v,int exponent)2044 extern half4 __attribute__((overloadable)) ldexp(half4 v, int exponent) {
2045     return convert_half4(ldexp(convert_float4(v), exponent));
2046 }
2047 
2048 H_FUNC_HN(length);
2049 HN_FUNC_HN(lgamma);
2050 
lgamma(half h,int * signp)2051 extern half __attribute__((overloadable)) lgamma(half h, int *signp) {
2052     return (half) lgamma((float) h, signp);
2053 }
lgamma(half2 v,int2 * signp)2054 extern half2 __attribute__((overloadable)) lgamma(half2 v, int2 *signp) {
2055     return convert_half2(lgamma(convert_float2(v), signp));
2056 }
lgamma(half3 v,int3 * signp)2057 extern half3 __attribute__((overloadable)) lgamma(half3 v, int3 *signp) {
2058     return convert_half3(lgamma(convert_float3(v), signp));
2059 }
lgamma(half4 v,int4 * signp)2060 extern half4 __attribute__((overloadable)) lgamma(half4 v, int4 *signp) {
2061     return convert_half4(lgamma(convert_float4(v), signp));
2062 }
2063 
2064 HN_FUNC_HN(log);
2065 HN_FUNC_HN(log10);
2066 HN_FUNC_HN(log1p);
2067 HN_FUNC_HN(log2);
2068 HN_FUNC_HN(logb);
2069 
2070 HN_FUNC_HN_HN_HN(mad);
2071 HN_FUNC_HN_HN(max);
2072 HN_FUNC_HN_H(max); // TODO can this be arch-specific similar to _Z3maxDv2_ff?
2073 HN_FUNC_HN_HN(min);
2074 HN_FUNC_HN_H(min); // TODO can this be arch-specific similar to _Z3minDv2_ff?
2075 
mix(half start,half stop,half amount)2076 extern half __attribute__((overloadable)) mix(half start, half stop, half amount) {
2077     return start + (stop - start) * amount;
2078 }
mix(half2 start,half2 stop,half2 amount)2079 extern half2 __attribute__((overloadable)) mix(half2 start, half2 stop, half2 amount) {
2080     return start + (stop - start) * amount;
2081 }
mix(half3 start,half3 stop,half3 amount)2082 extern half3 __attribute__((overloadable)) mix(half3 start, half3 stop, half3 amount) {
2083     return start + (stop - start) * amount;
2084 }
mix(half4 start,half4 stop,half4 amount)2085 extern half4 __attribute__((overloadable)) mix(half4 start, half4 stop, half4 amount) {
2086     return start + (stop - start) * amount;
2087 }
mix(half2 start,half2 stop,half amount)2088 extern half2 __attribute__((overloadable)) mix(half2 start, half2 stop, half amount) {
2089     return start + (stop - start) * amount;
2090 }
mix(half3 start,half3 stop,half amount)2091 extern half3 __attribute__((overloadable)) mix(half3 start, half3 stop, half amount) {
2092     return start + (stop - start) * amount;
2093 }
mix(half4 start,half4 stop,half amount)2094 extern half4 __attribute__((overloadable)) mix(half4 start, half4 stop, half amount) {
2095     return start + (stop - start) * amount;
2096 }
2097 
2098 extern half __attribute__((overloadable)) modf(half x, half *iptr);
2099 SCALARIZE_HN_FUNC_HN_PHN(modf);
2100 
nan_half()2101 half __attribute__((overloadable)) nan_half() {
2102   unsigned short nan_short = kHalfPositiveInfinity | 0x0200;
2103   half nan;
2104   SET_HALF_WORD(nan, nan_short);
2105   return nan;
2106 }
2107 
2108 HN_FUNC_HN(normalize);
2109 
2110 extern half __attribute__((overloadable)) nextafter(half x, half y);
2111 SCALARIZE_HN_FUNC_HN_HN(nextafter);
2112 
2113 HN_FUNC_HN_HN(pow);
2114 HN_FUNC_HN_IN(pown);
2115 HN_FUNC_HN_HN(powr);
2116 HN_FUNC_HN(radians);
2117 HN_FUNC_HN_HN(remainder);
2118 
remquo(half n,half d,int * quo)2119 extern half __attribute__((overloadable)) remquo(half n, half d, int *quo) {
2120     return (float) remquo((float) n, (float) d, quo);
2121 }
remquo(half2 n,half2 d,int2 * quo)2122 extern half2 __attribute__((overloadable)) remquo(half2 n, half2 d, int2 *quo) {
2123     return convert_half2(remquo(convert_float2(d), convert_float2(n), quo));
2124 }
remquo(half3 n,half3 d,int3 * quo)2125 extern half3 __attribute__((overloadable)) remquo(half3 n, half3 d, int3 *quo) {
2126     return convert_half3(remquo(convert_float3(d), convert_float3(n), quo));
2127 }
remquo(half4 n,half4 d,int4 * quo)2128 extern half4 __attribute__((overloadable)) remquo(half4 n, half4 d, int4 *quo) {
2129     return convert_half4(remquo(convert_float4(d), convert_float4(n), quo));
2130 }
2131 
2132 HN_FUNC_HN(rint);
2133 HN_FUNC_HN_IN(rootn);
2134 HN_FUNC_HN(round);
2135 HN_FUNC_HN(rsqrt);
2136 
sign(half h)2137 extern half __attribute__((overloadable)) sign(half h) {
2138     if (h > 0) return (half) 1.f;
2139     if (h < 0) return (half) -1.f;
2140     return h;
2141 }
sign(half2 v)2142 extern half2 __attribute__((overloadable)) sign(half2 v) {
2143     half2 ret;
2144     ret.x = sign(v.x);
2145     ret.y = sign(v.y);
2146     return ret;
2147 }
sign(half3 v)2148 extern half3 __attribute__((overloadable)) sign(half3 v) {
2149     half3 ret;
2150     ret.x = sign(v.x);
2151     ret.y = sign(v.y);
2152     ret.z = sign(v.z);
2153     return ret;
2154 }
sign(half4 v)2155 extern half4 __attribute__((overloadable)) sign(half4 v) {
2156     half4 ret;
2157     ret.x = sign(v.x);
2158     ret.y = sign(v.y);
2159     ret.z = sign(v.z);
2160     ret.w = sign(v.w);
2161     return ret;
2162 }
2163 
2164 HN_FUNC_HN(sin);
2165 
sincos(half v,half * cosptr)2166 extern half __attribute__((overloadable)) sincos(half v, half *cosptr) {
2167     *cosptr = cos(v);
2168     return sin(v);
2169 }
2170 // TODO verify if LLVM eliminates the duplicate convert_float2
sincos(half2 v,half2 * cosptr)2171 extern half2 __attribute__((overloadable)) sincos(half2 v, half2 *cosptr) {
2172     *cosptr = cos(v);
2173     return sin(v);
2174 }
sincos(half3 v,half3 * cosptr)2175 extern half3 __attribute__((overloadable)) sincos(half3 v, half3 *cosptr) {
2176     *cosptr = cos(v);
2177     return sin(v);
2178 }
sincos(half4 v,half4 * cosptr)2179 extern half4 __attribute__((overloadable)) sincos(half4 v, half4 *cosptr) {
2180     *cosptr = cos(v);
2181     return sin(v);
2182 }
2183 
2184 HN_FUNC_HN(sinh);
2185 HN_FUNC_HN(sinpi);
2186 HN_FUNC_HN(sqrt);
2187 
step(half edge,half v)2188 extern half __attribute__((overloadable)) step(half edge, half v) {
2189     return (v < edge) ? 0.f : 1.f;
2190 }
step(half2 edge,half2 v)2191 extern half2 __attribute__((overloadable)) step(half2 edge, half2 v) {
2192     half2 r;
2193     r.x = (v.x < edge.x) ? 0.f : 1.f;
2194     r.y = (v.y < edge.y) ? 0.f : 1.f;
2195     return r;
2196 }
step(half3 edge,half3 v)2197 extern half3 __attribute__((overloadable)) step(half3 edge, half3 v) {
2198     half3 r;
2199     r.x = (v.x < edge.x) ? 0.f : 1.f;
2200     r.y = (v.y < edge.y) ? 0.f : 1.f;
2201     r.z = (v.z < edge.z) ? 0.f : 1.f;
2202     return r;
2203 }
step(half4 edge,half4 v)2204 extern half4 __attribute__((overloadable)) step(half4 edge, half4 v) {
2205     half4 r;
2206     r.x = (v.x < edge.x) ? 0.f : 1.f;
2207     r.y = (v.y < edge.y) ? 0.f : 1.f;
2208     r.z = (v.z < edge.z) ? 0.f : 1.f;
2209     r.w = (v.w < edge.w) ? 0.f : 1.f;
2210     return r;
2211 }
step(half2 edge,half v)2212 extern half2 __attribute__((overloadable)) step(half2 edge, half v) {
2213     half2 r;
2214     r.x = (v < edge.x) ? 0.f : 1.f;
2215     r.y = (v < edge.y) ? 0.f : 1.f;
2216     return r;
2217 }
step(half3 edge,half v)2218 extern half3 __attribute__((overloadable)) step(half3 edge, half v) {
2219     half3 r;
2220     r.x = (v < edge.x) ? 0.f : 1.f;
2221     r.y = (v < edge.y) ? 0.f : 1.f;
2222     r.z = (v < edge.z) ? 0.f : 1.f;
2223     return r;
2224 }
step(half4 edge,half v)2225 extern half4 __attribute__((overloadable)) step(half4 edge, half v) {
2226     half4 r;
2227     r.x = (v < edge.x) ? 0.f : 1.f;
2228     r.y = (v < edge.y) ? 0.f : 1.f;
2229     r.z = (v < edge.z) ? 0.f : 1.f;
2230     r.w = (v < edge.w) ? 0.f : 1.f;
2231     return r;
2232 }
step(half edge,half2 v)2233 extern half2 __attribute__((overloadable)) step(half edge, half2 v) {
2234     half2 r;
2235     r.x = (v.x < edge) ? 0.f : 1.f;
2236     r.y = (v.y < edge) ? 0.f : 1.f;
2237     return r;
2238 }
step(half edge,half3 v)2239 extern half3 __attribute__((overloadable)) step(half edge, half3 v) {
2240     half3 r;
2241     r.x = (v.x < edge) ? 0.f : 1.f;
2242     r.y = (v.y < edge) ? 0.f : 1.f;
2243     r.z = (v.z < edge) ? 0.f : 1.f;
2244     return r;
2245 }
step(half edge,half4 v)2246 extern half4 __attribute__((overloadable)) step(half edge, half4 v) {
2247     half4 r;
2248     r.x = (v.x < edge) ? 0.f : 1.f;
2249     r.y = (v.y < edge) ? 0.f : 1.f;
2250     r.z = (v.z < edge) ? 0.f : 1.f;
2251     r.w = (v.w < edge) ? 0.f : 1.f;
2252     return r;
2253 }
2254 
2255 HN_FUNC_HN(tan);
2256 HN_FUNC_HN(tanh);
2257 HN_FUNC_HN(tanpi);
2258 HN_FUNC_HN(tgamma);
2259 HN_FUNC_HN(trunc); // TODO: rethink: needs half-specific implementation?
2260 
2261 HN_FUNC_HN(native_acos);
2262 HN_FUNC_HN(native_acosh);
2263 HN_FUNC_HN(native_acospi);
2264 HN_FUNC_HN(native_asin);
2265 HN_FUNC_HN(native_asinh);
2266 HN_FUNC_HN(native_asinpi);
2267 HN_FUNC_HN(native_atan);
2268 HN_FUNC_HN(native_atanh);
2269 HN_FUNC_HN(native_atanpi);
2270 HN_FUNC_HN_HN(native_atan2);
2271 HN_FUNC_HN_HN(native_atan2pi);
2272 
2273 HN_FUNC_HN(native_cbrt);
2274 HN_FUNC_HN(native_cos);
2275 HN_FUNC_HN(native_cosh);
2276 HN_FUNC_HN(native_cospi);
2277 
2278 H_FUNC_HN_HN(native_distance);
2279 HN_FUNC_HN_HN(native_divide);
2280 
2281 HN_FUNC_HN(native_exp);
2282 HN_FUNC_HN(native_exp10);
2283 HN_FUNC_HN(native_exp2);
2284 HN_FUNC_HN(native_expm1);
2285 
2286 HN_FUNC_HN_HN(native_hypot);
2287 H_FUNC_HN(native_length);
2288 
2289 HN_FUNC_HN(native_log);
2290 HN_FUNC_HN(native_log10);
2291 HN_FUNC_HN(native_log1p);
2292 HN_FUNC_HN(native_log2);
2293 
2294 HN_FUNC_HN(native_normalize);
2295 
2296 HN_FUNC_HN_HN(native_powr); // TODO are parameter limits different for half?
2297 
2298 HN_FUNC_HN(native_recip);
2299 HN_FUNC_HN_IN(native_rootn);
2300 HN_FUNC_HN(native_rsqrt);
2301 
2302 HN_FUNC_HN(native_sin);
2303 
native_sincos(half v,half * cosptr)2304 extern half __attribute__((overloadable)) native_sincos(half v, half *cosptr) {
2305     return sincos(v, cosptr);
2306 }
native_sincos(half2 v,half2 * cosptr)2307 extern half2 __attribute__((overloadable)) native_sincos(half2 v, half2 *cosptr) {
2308     return sincos(v, cosptr);
2309 }
native_sincos(half3 v,half3 * cosptr)2310 extern half3 __attribute__((overloadable)) native_sincos(half3 v, half3 *cosptr) {
2311     return sincos(v, cosptr);
2312 }
native_sincos(half4 v,half4 * cosptr)2313 extern half4 __attribute__((overloadable)) native_sincos(half4 v, half4 *cosptr) {
2314     return sincos(v, cosptr);
2315 }
2316 
2317 HN_FUNC_HN(native_sinh);
2318 HN_FUNC_HN(native_sinpi);
2319 HN_FUNC_HN(native_sqrt);
2320 
2321 HN_FUNC_HN(native_tan);
2322 HN_FUNC_HN(native_tanh);
2323 HN_FUNC_HN(native_tanpi);
2324 
2325 #undef HN_FUNC_HN
2326 #undef HN_FUNC_HN_HN
2327 #undef HN_FUNC_HN_H
2328 #undef HN_FUNC_HN_HN_HN
2329 #undef HN_FUNC_HN_IN
2330 #undef H_FUNC_HN
2331 #undef H_FUNC_HN_HN
2332 #undef SCALARIZE_HN_FUNC_HN_HN
2333 
2334 // exports unavailable mathlib functions to compat lib
2335 
2336 #ifdef RS_COMPATIBILITY_LIB
2337 
2338 // !!! DANGER !!!
2339 // These functions are potentially missing on older Android versions.
2340 // Work around the issue by supplying our own variants.
2341 // !!! DANGER !!!
2342 
2343 // The logbl() implementation is taken from the latest bionic/, since
2344 // double == long double on Android.
logbl(long double x)2345 extern "C" long double logbl(long double x) { return logb(x); }
2346 
2347 // __aeabi_idiv0 is a missing function in libcompiler_rt.so, so we just
2348 // pick the simplest implementation based on the ARM EABI doc.
__aeabi_idiv0(int v)2349 extern "C" int __aeabi_idiv0(int v) { return v; }
2350 
2351 #endif // compatibility lib
2352