1 /*
2  * Copyright (C) 2017 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "shared.rsh"
18 
19 static volatile half h1;
20 static volatile half2 h2;
21 static volatile half3 h3;
22 static volatile half4 h4;
23 
24 static volatile int i1;
25 static volatile int2 i2;
26 static volatile int3 i3;
27 static volatile int4 i4;
28 
29 #define TEST_HN_FUNC_HN(fn) \
30     h1 = fn(h1);            \
31     h2 = fn(h2);            \
32     h3 = fn(h3);            \
33     h4 = fn(h4);
34 
35 #define TEST_IN_FUNC_HN(fn) \
36     i1 = fn(h1);            \
37     i2 = fn(h2);            \
38     i3 = fn(h3);            \
39     i4 = fn(h4);
40 
41 #define TEST_HN_FUNC_HN_HN(fn)  \
42     h1 = fn(h1, h1);            \
43     h2 = fn(h2, h2);            \
44     h3 = fn(h3, h3);            \
45     h4 = fn(h4, h4);
46 
47 #define TEST_HN_FUNC_HN_IN(fn)  \
48     h1 = fn(h1, i1);            \
49     h2 = fn(h2, i2);            \
50     h3 = fn(h3, i3);            \
51     h4 = fn(h4, i4);
52 
53 #define TEST_HN_FUNC_HN_PIN(fn) \
54     h1 = fn(h1, (int *) &i1);   \
55     h2 = fn(h2, (int2 *) &i2);  \
56     h3 = fn(h3, (int3 *) &i3);  \
57     h4 = fn(h4, (int4 *) &i4);
58 
59 #define TEST_HN_FUNC_HN_I(fn)  \
60     h1 = fn(h1, i1);           \
61     h2 = fn(h2, i1);           \
62     h3 = fn(h3, i1);           \
63     h4 = fn(h4, i1);
64 
65 #define TEST_HN_FUNC_HN_H(fn)  \
66     h1 = fn(h1, h1);           \
67     h2 = fn(h2, h1);           \
68     h3 = fn(h3, h1);           \
69     h4 = fn(h4, h1);
70 
71 #define TEST_HN_FUNC_H_HN(fn)  \
72     h1 = fn(h1, h1);           \
73     h2 = fn(h1, h2);           \
74     h3 = fn(h1, h3);           \
75     h4 = fn(h1, h4);           \
76 
77 #define TEST_HN_FUNC_HN_PHN(fn) \
78     h1 = fn(h1, (half *) &h1);  \
79     h2 = fn(h2, (half2 *) &h2); \
80     h3 = fn(h3, (half3 *) &h3); \
81     h4 = fn(h4, (half4 *) &h4); \
82 
83 #define TEST_HN_FUNC_HN_HN_HN(fn)   \
84     h1 = fn(h1, h1, h1);            \
85     h2 = fn(h2, h2, h2);            \
86     h3 = fn(h3, h3, h3);            \
87     h4 = fn(h4, h4, h4);
88 
89 #define TEST_HN_FUNC_HN_HN_H(fn)   \
90     h1 = fn(h1, h1, h1);           \
91     h2 = fn(h2, h2, h1);           \
92     h3 = fn(h3, h3, h1);           \
93     h4 = fn(h4, h4, h1);
94 
95 #define TEST_HN_FUNC_HN_HN_PIN(fn) \
96     h1 = fn(h1, h1, (int *) &i1);  \
97     h2 = fn(h2, h2, (int2 *) &i2); \
98     h3 = fn(h3, h3, (int3 *) &i3); \
99     h4 = fn(h4, h4, (int4 *) &i4);
100 
101 #define TEST_H_FUNC_HN(fn)  \
102     h1 = fn(h1);            \
103     h1 = fn(h2);            \
104     h1 = fn(h3);            \
105     h1 = fn(h4);
106 
107 #define TEST_H_FUNC_HN_HN(fn) \
108     h1 = fn(h1, h1);          \
109     h1 = fn(h2, h2);          \
110     h1 = fn(h3, h3);          \
111     h1 = fn(h4, h4);
112 
113 static bool testAPI() {
114     TEST_HN_FUNC_HN(acos);
115     TEST_HN_FUNC_HN(acosh);
116     TEST_HN_FUNC_HN(acospi);
117 
118     TEST_HN_FUNC_HN(asin);
119     TEST_HN_FUNC_HN(asinh);
120     TEST_HN_FUNC_HN(asinpi);
121 
122     TEST_HN_FUNC_HN(atan);
123     TEST_HN_FUNC_HN_HN(atan2);
124     TEST_HN_FUNC_HN_HN(atan2pi);
125     TEST_HN_FUNC_HN(atanh);
126     TEST_HN_FUNC_HN(atanpi);
127 
128     TEST_HN_FUNC_HN(cbrt);
129     TEST_HN_FUNC_HN(ceil);
130     TEST_HN_FUNC_HN_HN(copysign);
131 
132     TEST_HN_FUNC_HN(cos);
133     TEST_HN_FUNC_HN(cosh);
134     TEST_HN_FUNC_HN(cospi);
135 
136     TEST_HN_FUNC_HN(degrees);
137     TEST_HN_FUNC_HN(erf);
138     TEST_HN_FUNC_HN(erfc);
139     TEST_HN_FUNC_HN(exp);
140     TEST_HN_FUNC_HN(exp10);
141     TEST_HN_FUNC_HN(exp2);
142     TEST_HN_FUNC_HN(expm1);
143 
144     TEST_HN_FUNC_HN(fabs);
145     TEST_HN_FUNC_HN_HN(fdim);
146     TEST_HN_FUNC_HN(floor);
147     TEST_HN_FUNC_HN_HN_HN(fma);
148 
149     TEST_HN_FUNC_HN_HN(fmax);
150     TEST_HN_FUNC_HN_H(fmax);
151     TEST_HN_FUNC_HN_HN(fmin);
152     TEST_HN_FUNC_HN_H(fmin);
153     TEST_HN_FUNC_HN_HN(fmod);
154 
155     TEST_HN_FUNC_HN(fract);
156     TEST_HN_FUNC_HN_PHN(fract);
157     TEST_HN_FUNC_HN_PIN(frexp);
158 
159     TEST_HN_FUNC_HN_HN(hypot);
160     TEST_IN_FUNC_HN(ilogb);
161     TEST_HN_FUNC_HN_IN(ldexp);
162     TEST_HN_FUNC_HN_I(ldexp);
163     TEST_HN_FUNC_HN(lgamma);
164     TEST_HN_FUNC_HN_PIN(lgamma);
165 
166     TEST_HN_FUNC_HN(log);
167     TEST_HN_FUNC_HN(log10);
168     TEST_HN_FUNC_HN(log1p);
169     TEST_HN_FUNC_HN(log2);
170     TEST_HN_FUNC_HN(logb);
171 
172     TEST_HN_FUNC_HN_HN_HN(mad);
173     TEST_HN_FUNC_HN_HN(max);
174     TEST_HN_FUNC_HN_H(max);
175     TEST_HN_FUNC_HN_HN(min);
176     TEST_HN_FUNC_HN_H(min);
177     TEST_HN_FUNC_HN_HN_HN(mix);
178     TEST_HN_FUNC_HN_HN_H(mix);
179     TEST_HN_FUNC_HN_PHN(modf);
180 
181     h1 = nan_half();
182 
183     TEST_HN_FUNC_HN(native_acos);
184     TEST_HN_FUNC_HN(native_acosh);
185     TEST_HN_FUNC_HN(native_acospi);
186 
187     TEST_HN_FUNC_HN(native_asin);
188     TEST_HN_FUNC_HN(native_asinh);
189     TEST_HN_FUNC_HN(native_asinpi);
190 
191     TEST_HN_FUNC_HN(native_atan);
192     TEST_HN_FUNC_HN_HN(native_atan2);
193     TEST_HN_FUNC_HN_HN(native_atan2pi);
194     TEST_HN_FUNC_HN(native_atanh);
195     TEST_HN_FUNC_HN(native_atanpi);
196 
197     TEST_HN_FUNC_HN(native_cbrt);
198     TEST_HN_FUNC_HN(native_cos);
199     TEST_HN_FUNC_HN(native_cosh);
200     TEST_HN_FUNC_HN(native_cospi);
201 
202     TEST_HN_FUNC_HN_HN(native_divide);
203     TEST_HN_FUNC_HN(native_exp);
204     TEST_HN_FUNC_HN(native_exp10);
205     TEST_HN_FUNC_HN(native_exp2);
206     TEST_HN_FUNC_HN(native_expm1);
207 
208     TEST_HN_FUNC_HN_HN(native_hypot);
209     TEST_H_FUNC_HN(native_length);
210     TEST_HN_FUNC_HN(native_log);
211     TEST_HN_FUNC_HN(native_log10);
212     TEST_HN_FUNC_HN(native_log1p);
213     TEST_HN_FUNC_HN(native_log2);
214 
215     TEST_HN_FUNC_HN_HN(native_powr);
216     TEST_HN_FUNC_HN(native_recip);
217     TEST_HN_FUNC_HN_IN(native_rootn);
218     TEST_HN_FUNC_HN(native_rsqrt);
219 
220     TEST_HN_FUNC_HN(native_sin);
221     TEST_HN_FUNC_HN_PHN(native_sincos);
222     TEST_HN_FUNC_HN(native_sinh);
223     TEST_HN_FUNC_HN(native_sinpi);
224 
225     TEST_HN_FUNC_HN(native_tan);
226     TEST_HN_FUNC_HN(native_tanh);
227     TEST_HN_FUNC_HN(native_tanpi);
228 
229     TEST_HN_FUNC_HN_HN(nextafter);
230     TEST_HN_FUNC_HN_HN(pow);
231     TEST_HN_FUNC_HN_IN(pown);
232     TEST_HN_FUNC_HN_HN(powr);
233 
234     TEST_HN_FUNC_HN(radians);
235     TEST_HN_FUNC_HN_HN(remainder);
236     TEST_HN_FUNC_HN_HN_PIN(remquo);
237     TEST_HN_FUNC_HN(rint);
238     TEST_HN_FUNC_HN_IN(rootn);
239     TEST_HN_FUNC_HN(round);
240     TEST_HN_FUNC_HN(rsqrt);
241 
242     TEST_HN_FUNC_HN(sign);
243     TEST_HN_FUNC_HN(sin);
244     TEST_HN_FUNC_HN_PHN(sincos);
245     TEST_HN_FUNC_HN(sinh);
246     TEST_HN_FUNC_HN(sinpi);
247     TEST_HN_FUNC_HN(sqrt);
248 
249     TEST_HN_FUNC_HN_HN(step);
250     TEST_HN_FUNC_HN_H(step);
251     TEST_HN_FUNC_H_HN(step);
252 
253     TEST_HN_FUNC_HN(tan);
254     TEST_HN_FUNC_HN(tanh);
255     TEST_HN_FUNC_HN(tanpi);
256 
257     TEST_HN_FUNC_HN(tgamma);
258     TEST_HN_FUNC_HN(trunc);
259 
260     // Vector math functions
261     h3 = cross(h3, h3);
262     h4 = cross(h4, h4);
263 
264     TEST_H_FUNC_HN_HN(distance);
265     TEST_H_FUNC_HN_HN(dot);
266     TEST_H_FUNC_HN(length);
267     TEST_H_FUNC_HN_HN(native_distance);
268     TEST_H_FUNC_HN(native_length);
269     TEST_HN_FUNC_HN(native_normalize);
270     TEST_HN_FUNC_HN(normalize);
271     return true;
272 }
273 
274 typedef union {
275   half hval;
276   short sval;
277 } fp16_shape_type;
278 
279 /* half h = unsigned short s; */
280 #define SET_HALF_WORD(h, s) \
281 do {                        \
282   fp16_shape_type fp16_u;   \
283   fp16_u.sval = (s);        \
284   (h) = fp16_u.hval;        \
285 } while (0)
286 
287 #define VALIDATE_FREXP_HALF(inp, ref, refExp)  \
288 do {                                           \
289     int exp;                                   \
290     half out = frexp(((half) inp), &exp);      \
291     _RS_ASSERT_EQU(out, ((half) ref));         \
292     _RS_ASSERT_EQU(exp, (refExp));             \
293 } while (0);
294 
295 static bool testFrexp() {
296     bool failed= false;
297 
298     VALIDATE_FREXP_HALF(0, 0, 0);
299     VALIDATE_FREXP_HALF(-0, -0, 0);
300     VALIDATE_FREXP_HALF(1, 0.5, 1);
301     VALIDATE_FREXP_HALF(0.25, 0.5, -1);
302     VALIDATE_FREXP_HALF(1.5, 0.75, 1);
303     VALIDATE_FREXP_HALF(1.99, 0.995, 1);
304 
305     return !failed;
306 }
307 
308 // Place sentinel values around the *intPart paramter to modf to ensure that
309 // the call writes to just the 2 bytes pointed-to by the paramter.
310 #define VALIDATE_MODF_HALF(inp, ref, refIntPart)     \
311 do {                                                 \
312     half intPart[3];                                 \
313     intPart[0] = (half) 42.0f;                       \
314     intPart[2] = (half) 3.14f;                       \
315     half out = modf(((half) inp), &intPart[1]);      \
316     _RS_ASSERT_EQU(out, ((half) ref));               \
317     _RS_ASSERT_EQU(intPart[1], ((half) refIntPart)); \
318     _RS_ASSERT_EQU(intPart[0], (half) 42.0f);        \
319     _RS_ASSERT_EQU(intPart[2], (half) 3.14f);        \
320 } while (0);
321 
322 static bool testModf() {
323     bool failed = false;
324 
325     VALIDATE_MODF_HALF(0.5, 0.5, 0.0);
326     VALIDATE_MODF_HALF(1.5, 0.5, 1.0);
327     VALIDATE_MODF_HALF(100.5625, 0.5625, 100.0);
328 
329     VALIDATE_MODF_HALF(-0.5, -0.5, -0.0);
330     VALIDATE_MODF_HALF(-1.5, -0.5, -1.0);
331     VALIDATE_MODF_HALF(-100.5625, -0.5625, -100.0);
332 
333     return !failed;
334 }
335 
336 static bool testNextAfter() {
337     half zero, minSubNormal, maxSubNormal, minNormal, infinity;
338     half negativeZero, negativeInfinity;
339     half negativeMinSubNormal, negativeMaxSubNormal, negativeMinNormal;
340 
341     // TODO Define these constants so the SET_HALF_WORD macro is unnecessary.
342     SET_HALF_WORD(zero, 0x0000);
343     SET_HALF_WORD(minSubNormal, 0x0001);
344     SET_HALF_WORD(maxSubNormal, 0x03ff);
345     SET_HALF_WORD(minNormal, 0x0400);
346     SET_HALF_WORD(infinity, 0x7c00);
347 
348     SET_HALF_WORD(negativeZero, 0x7000);
349     SET_HALF_WORD(negativeMinSubNormal, 0x8001);
350     SET_HALF_WORD(negativeMaxSubNormal, 0x83ff);
351     SET_HALF_WORD(negativeMinNormal, 0x8400);
352     SET_HALF_WORD(negativeInfinity, 0xfc00);
353 
354     // Number of normal fp16 values:
355     //   All-zero exponent is for zero and subnormals.  All-one exponent is for
356     //   Infinity and NaN.  Hence number of possible values for exponent = 30
357     //
358     //   No. of possible values for mantissa = 2 ^ 10 = 1024
359     //
360     //   Number of positive, non-zero and normal fp16 values = 30 * 1024 = 30720
361     //   Number of negative, non-zero and normal fp16 values = 30 * 1024 = 30720
362     //
363     //   The following tests call nextafter in a loop starting at infinity
364     //   towards the smallest normal and vice versa (for +ve and -ve) and verify
365     //   that the number of loop iterations is 30720.
366 
367     const unsigned int numDistinctExpected = 30720;
368     const unsigned int maxSteps = 31000;
369 
370     unsigned int numDistinct;
371     half h, toward;
372 
373     for (h = minNormal, toward = infinity, numDistinct = 0;
374             numDistinct < maxSteps && h != toward; numDistinct ++) {
375         h = nextafter(h, toward);
376     }
377     if (numDistinct != numDistinctExpected)
378         return false;
379 
380     for (h = infinity, toward = minNormal, numDistinct = 0;
381             numDistinct < maxSteps && h != toward; numDistinct ++) {
382         h = nextafter(h, toward);
383     }
384     if (numDistinct != numDistinctExpected)
385         return false;
386 
387     for (h = negativeMinNormal, toward = negativeInfinity, numDistinct = 0;
388             numDistinct < maxSteps && h != toward; numDistinct ++) {
389         h = nextafter(h, toward);
390     }
391     if (numDistinct != numDistinctExpected)
392         return false;
393 
394     for (h = negativeInfinity, toward = negativeMinNormal, numDistinct = 0;
395             numDistinct < maxSteps && h != toward; numDistinct ++) {
396         h = nextafter(h, toward);
397     }
398     if (numDistinct != numDistinctExpected)
399         return false;
400 
401     // Test nextafter at the boundary of subnormal numbers.  Since RenderScript
402     // doesn't require implementations to handle FP16 subnormals correctly,
403     // allow nextafter to return a valid normal number that satisfies the
404     // constraints of nextafter.
405 
406     // nextafter(0, infinity) = minnormal or minsubnormal
407     h = nextafter(zero, infinity);
408     if (h != minSubNormal && h != minNormal)
409         return false;
410     h = nextafter(zero, negativeInfinity);
411     if (h != negativeMinSubNormal && h != negativeMinNormal)
412         return false;
413 
414     // nextafter(minNormal, negativeInfinity) = maxSubNormal or zero
415     h = nextafter(minNormal, negativeInfinity);
416     if (h != maxSubNormal && h != zero)
417         return false;
418     h = nextafter(negativeMinNormal, infinity);
419     if (h != negativeMaxSubNormal && h != negativeZero)
420         return false;
421 
422     return true;
423 }
424 
425 static bool testIlogb() {
426     bool failed = false;
427 
428     // Test ilogb for 0, +/- infininty and NaN
429     half infinity, negativeInfinity;
430     SET_HALF_WORD(infinity, 0x7c00);
431     SET_HALF_WORD(negativeInfinity, 0xfc00);
432 
433     _RS_ASSERT_EQU(ilogb((half) 0), 0x80000000);
434     _RS_ASSERT_EQU(ilogb((half) -0), 0x80000000);
435     _RS_ASSERT_EQU(ilogb(infinity), 0x7fffffff);
436     _RS_ASSERT_EQU(ilogb(negativeInfinity), 0x7fffffff);
437     _RS_ASSERT_EQU(ilogb(nan_half()), 0x7fffffff);
438 
439     // ilogb(2^n) = n.  Test at the boundary on either side of 2^n.
440     // Don't test subnormal numbers as implementations are not expected to
441     // handle them.
442     _RS_ASSERT_EQU(ilogb((half) 0.24), -3);
443     _RS_ASSERT_EQU(ilogb((half) 0.26), -2);
444     _RS_ASSERT_EQU(ilogb((half) 0.49), -2);
445     _RS_ASSERT_EQU(ilogb((half) 0.51), -1);
446     _RS_ASSERT_EQU(ilogb((half) 0.99), -1);
447     _RS_ASSERT_EQU(ilogb((half) 1.01), 0);
448     _RS_ASSERT_EQU(ilogb((half) 1.99), 0);
449     _RS_ASSERT_EQU(ilogb((half) 2.01), 1);
450     _RS_ASSERT_EQU(ilogb((half) 1023), 9);
451     _RS_ASSERT_EQU(ilogb((half) 1025), 10);
452 
453     // Result is same irrespective of sign.
454     _RS_ASSERT_EQU(ilogb((half) -0.24), -3);
455     _RS_ASSERT_EQU(ilogb((half) -0.26), -2);
456     _RS_ASSERT_EQU(ilogb((half) -0.49), -2);
457     _RS_ASSERT_EQU(ilogb((half) -0.51), -1);
458     _RS_ASSERT_EQU(ilogb((half) -0.99), -1);
459     _RS_ASSERT_EQU(ilogb((half) -1.01), 0);
460     _RS_ASSERT_EQU(ilogb((half) -1.99), 0);
461     _RS_ASSERT_EQU(ilogb((half) -2.01), 1);
462     _RS_ASSERT_EQU(ilogb((half) -1023), 9);
463     _RS_ASSERT_EQU(ilogb((half) -1025), 10);
464 
465     return !failed;
466 }
467 
468 void testFp16Math() {
469     bool success = true;
470 
471     success &= testAPI();
472     success &= testFrexp();
473     success &= testModf();
474     success &= testNextAfter();
475     success &= testIlogb();
476 
477     if (success) {
478         rsDebug("PASSED", 0);
479     } else {
480         rsDebug("FAILED", 0);
481     }
482 
483     if (success) {
484         rsSendToClientBlocking(RS_MSG_TEST_PASSED);
485     } else {
486         rsSendToClientBlocking(RS_MSG_TEST_FAILED);
487     }
488 }
489