1 /*
2  * Copyright (C) 2019 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package libcore.util;
18 
19 import static android.annotation.SystemApi.Client.MODULE_LIBRARIES;
20 
21 import android.annotation.SystemApi;
22 
23 /**
24  * <p>The {@code FP16} class is a wrapper and a utility class to manipulate half-precision 16-bit
25  * <a href="https://en.wikipedia.org/wiki/Half-precision_floating-point_format">IEEE 754</a>
26  * floating point data types (also called fp16 or binary16). A half-precision float can be
27  * created from or converted to single-precision floats, and is stored in a short data type.
28  *
29  * <p>The IEEE 754 standard specifies an fp16 as having the following format:</p>
30  * <ul>
31  * <li>Sign bit: 1 bit</li>
32  * <li>Exponent width: 5 bits</li>
33  * <li>Significand: 10 bits</li>
34  * </ul>
35  *
36  * <p>The format is laid out as follows:</p>
37  * <pre>
38  * 1   11111   1111111111
39  * ^   --^--   -----^----
40  * sign  |          |_______ significand
41  *       |
42  *       -- exponent
43  * </pre>
44  *
45  * <p>Half-precision floating points can be useful to save memory and/or
46  * bandwidth at the expense of range and precision when compared to single-precision
47  * floating points (fp32).</p>
48  * <p>To help you decide whether fp16 is the right storage type for you need, please
49  * refer to the table below that shows the available precision throughout the range of
50  * possible values. The <em>precision</em> column indicates the step size between two
51  * consecutive numbers in a specific part of the range.</p>
52  *
53  * <table summary="Precision of fp16 across the range">
54  *     <tr><th>Range start</th><th>Precision</th></tr>
55  *     <tr><td>0</td><td>1 &frasl; 16,777,216</td></tr>
56  *     <tr><td>1 &frasl; 16,384</td><td>1 &frasl; 16,777,216</td></tr>
57  *     <tr><td>1 &frasl; 8,192</td><td>1 &frasl; 8,388,608</td></tr>
58  *     <tr><td>1 &frasl; 4,096</td><td>1 &frasl; 4,194,304</td></tr>
59  *     <tr><td>1 &frasl; 2,048</td><td>1 &frasl; 2,097,152</td></tr>
60  *     <tr><td>1 &frasl; 1,024</td><td>1 &frasl; 1,048,576</td></tr>
61  *     <tr><td>1 &frasl; 512</td><td>1 &frasl; 524,288</td></tr>
62  *     <tr><td>1 &frasl; 256</td><td>1 &frasl; 262,144</td></tr>
63  *     <tr><td>1 &frasl; 128</td><td>1 &frasl; 131,072</td></tr>
64  *     <tr><td>1 &frasl; 64</td><td>1 &frasl; 65,536</td></tr>
65  *     <tr><td>1 &frasl; 32</td><td>1 &frasl; 32,768</td></tr>
66  *     <tr><td>1 &frasl; 16</td><td>1 &frasl; 16,384</td></tr>
67  *     <tr><td>1 &frasl; 8</td><td>1 &frasl; 8,192</td></tr>
68  *     <tr><td>1 &frasl; 4</td><td>1 &frasl; 4,096</td></tr>
69  *     <tr><td>1 &frasl; 2</td><td>1 &frasl; 2,048</td></tr>
70  *     <tr><td>1</td><td>1 &frasl; 1,024</td></tr>
71  *     <tr><td>2</td><td>1 &frasl; 512</td></tr>
72  *     <tr><td>4</td><td>1 &frasl; 256</td></tr>
73  *     <tr><td>8</td><td>1 &frasl; 128</td></tr>
74  *     <tr><td>16</td><td>1 &frasl; 64</td></tr>
75  *     <tr><td>32</td><td>1 &frasl; 32</td></tr>
76  *     <tr><td>64</td><td>1 &frasl; 16</td></tr>
77  *     <tr><td>128</td><td>1 &frasl; 8</td></tr>
78  *     <tr><td>256</td><td>1 &frasl; 4</td></tr>
79  *     <tr><td>512</td><td>1 &frasl; 2</td></tr>
80  *     <tr><td>1,024</td><td>1</td></tr>
81  *     <tr><td>2,048</td><td>2</td></tr>
82  *     <tr><td>4,096</td><td>4</td></tr>
83  *     <tr><td>8,192</td><td>8</td></tr>
84  *     <tr><td>16,384</td><td>16</td></tr>
85  *     <tr><td>32,768</td><td>32</td></tr>
86  * </table>
87  *
88  * <p>This table shows that numbers higher than 1024 lose all fractional precision.</p>
89  *
90  * @hide
91  */
92 
93 @SystemApi(client = MODULE_LIBRARIES)
94 public final class FP16 {
95     /**
96      * The number of bits used to represent a half-precision float value.
97      *
98      * @hide
99      */
100     @SystemApi(client = MODULE_LIBRARIES)
101     public static final int SIZE = 16;
102 
103     /**
104      * Epsilon is the difference between 1.0 and the next value representable
105      * by a half-precision floating-point.
106      *
107      * @hide
108      */
109     @SystemApi(client = MODULE_LIBRARIES)
110     public static final short EPSILON = (short) 0x1400;
111 
112     /**
113      * Maximum exponent a finite half-precision float may have.
114      *
115      * @hide
116      */
117     @SystemApi(client = MODULE_LIBRARIES)
118     public static final int MAX_EXPONENT = 15;
119     /**
120      * Minimum exponent a normalized half-precision float may have.
121      *
122      * @hide
123      */
124     @SystemApi(client = MODULE_LIBRARIES)
125     public static final int MIN_EXPONENT = -14;
126 
127     /**
128      * Smallest negative value a half-precision float may have.
129      *
130      * @hide
131      */
132     @SystemApi(client = MODULE_LIBRARIES)
133     public static final short LOWEST_VALUE = (short) 0xfbff;
134     /**
135      * Maximum positive finite value a half-precision float may have.
136      *
137      * @hide
138      */
139     @SystemApi(client = MODULE_LIBRARIES)
140     public static final short MAX_VALUE = (short) 0x7bff;
141     /**
142      * Smallest positive normal value a half-precision float may have.
143      *
144      * @hide
145      */
146     @SystemApi(client = MODULE_LIBRARIES)
147     public static final short MIN_NORMAL = (short) 0x0400;
148     /**
149      * Smallest positive non-zero value a half-precision float may have.
150      *
151      * @hide
152      */
153     @SystemApi(client = MODULE_LIBRARIES)
154     public static final short MIN_VALUE = (short) 0x0001;
155     /**
156      * A Not-a-Number representation of a half-precision float.
157      *
158      * @hide
159      */
160     @SystemApi(client = MODULE_LIBRARIES)
161     public static final short NaN = (short) 0x7e00;
162     /**
163      * Negative infinity of type half-precision float.
164      *
165      * @hide
166      */
167     @SystemApi(client = MODULE_LIBRARIES)
168     public static final short NEGATIVE_INFINITY = (short) 0xfc00;
169     /**
170      * Negative 0 of type half-precision float.
171      *
172      * @hide
173      */
174     @SystemApi(client = MODULE_LIBRARIES)
175     public static final short NEGATIVE_ZERO = (short) 0x8000;
176     /**
177      * Positive infinity of type half-precision float.
178      *
179      * @hide
180      */
181     @SystemApi(client = MODULE_LIBRARIES)
182     public static final short POSITIVE_INFINITY = (short) 0x7c00;
183     /**
184      * Positive 0 of type half-precision float.
185      *
186      * @hide
187      */
188     @SystemApi(client = MODULE_LIBRARIES)
189     public static final short POSITIVE_ZERO = (short) 0x0000;
190 
191     /**
192      * The offset to shift by to obtain the sign bit.
193      *
194      * @hide
195      */
196     @SystemApi(client = MODULE_LIBRARIES)
197     public static final int SIGN_SHIFT                = 15;
198 
199     /**
200      * The offset to shift by to obtain the exponent bits.
201      *
202      * @hide
203      */
204     @SystemApi(client = MODULE_LIBRARIES)
205     public static final int EXPONENT_SHIFT            = 10;
206 
207     /**
208      * The bitmask to AND a number with to obtain the sign bit.
209      *
210      * @hide
211      */
212     @SystemApi(client = MODULE_LIBRARIES)
213     public static final int SIGN_MASK                 = 0x8000;
214 
215     /**
216      * The bitmask to AND a number shifted by {@link #EXPONENT_SHIFT} right, to obtain exponent bits.
217      *
218      * @hide
219      */
220     @SystemApi(client = MODULE_LIBRARIES)
221     public static final int SHIFTED_EXPONENT_MASK     = 0x1f;
222 
223     /**
224      * The bitmask to AND a number with to obtain significand bits.
225      *
226      * @hide
227      */
228     @SystemApi(client = MODULE_LIBRARIES)
229     public static final int SIGNIFICAND_MASK          = 0x3ff;
230 
231     /**
232      * The bitmask to AND with to obtain exponent and significand bits.
233      *
234      * @hide
235      */
236     @SystemApi(client = MODULE_LIBRARIES)
237     public static final int EXPONENT_SIGNIFICAND_MASK = 0x7fff;
238 
239     /**
240      * The offset of the exponent from the actual value.
241      *
242      * @hide
243      */
244     @SystemApi(client = MODULE_LIBRARIES)
245     public static final int EXPONENT_BIAS             = 15;
246 
247     private static final int FP32_SIGN_SHIFT            = 31;
248     private static final int FP32_EXPONENT_SHIFT        = 23;
249     private static final int FP32_SHIFTED_EXPONENT_MASK = 0xff;
250     private static final int FP32_SIGNIFICAND_MASK      = 0x7fffff;
251     private static final int FP32_EXPONENT_BIAS         = 127;
252     private static final int FP32_QNAN_MASK             = 0x400000;
253     private static final int FP32_DENORMAL_MAGIC = 126 << 23;
254     private static final float FP32_DENORMAL_FLOAT = Float.intBitsToFloat(FP32_DENORMAL_MAGIC);
255 
256     /** Hidden constructor to prevent instantiation. */
FP16()257     private FP16() {}
258 
259     /**
260      * <p>Compares the two specified half-precision float values. The following
261      * conditions apply during the comparison:</p>
262      *
263      * <ul>
264      * <li>{@link #NaN} is considered by this method to be equal to itself and greater
265      * than all other half-precision float values (including {@code #POSITIVE_INFINITY})</li>
266      * <li>{@link #POSITIVE_ZERO} is considered by this method to be greater than
267      * {@link #NEGATIVE_ZERO}.</li>
268      * </ul>
269      *
270      * @param x The first half-precision float value to compare.
271      * @param y The second half-precision float value to compare
272      *
273      * @return  The value {@code 0} if {@code x} is numerically equal to {@code y}, a
274      *          value less than {@code 0} if {@code x} is numerically less than {@code y},
275      *          and a value greater than {@code 0} if {@code x} is numerically greater
276      *          than {@code y}
277      *
278      * @hide
279      */
280     @SystemApi(client = MODULE_LIBRARIES)
compare(short x, short y)281     public static int compare(short x, short y) {
282         if (less(x, y)) return -1;
283         if (greater(x, y)) return 1;
284 
285         // Collapse NaNs, akin to halfToIntBits(), but we want to keep
286         // (signed) short value types to preserve the ordering of -0.0
287         // and +0.0
288         short xBits = isNaN(x) ? NaN : x;
289         short yBits = isNaN(y) ? NaN : y;
290 
291         return (xBits == yBits ? 0 : (xBits < yBits ? -1 : 1));
292     }
293 
294     /**
295      * Returns the closest integral half-precision float value to the specified
296      * half-precision float value. Special values are handled in the
297      * following ways:
298      * <ul>
299      * <li>If the specified half-precision float is NaN, the result is NaN</li>
300      * <li>If the specified half-precision float is infinity (negative or positive),
301      * the result is infinity (with the same sign)</li>
302      * <li>If the specified half-precision float is zero (negative or positive),
303      * the result is zero (with the same sign)</li>
304      * </ul>
305      *
306      * @param h A half-precision float value
307      * @return The value of the specified half-precision float rounded to the nearest
308      *         half-precision float value
309      *
310      * @hide
311      */
312     @SystemApi(client = MODULE_LIBRARIES)
rint(short h)313     public static short rint(short h) {
314         int bits = h & 0xffff;
315         int abs = bits & EXPONENT_SIGNIFICAND_MASK;
316         int result = bits;
317 
318         if (abs < 0x3c00) {
319             result &= SIGN_MASK;
320             if (abs > 0x3800){
321                 result |= 0x3c00;
322             }
323         } else if (abs < 0x6400) {
324             int exp = 25 - (abs >> 10);
325             int mask = (1 << exp) - 1;
326             result += ((1 << (exp - 1)) - (~(abs >> exp) & 1));
327             result &= ~mask;
328         }
329         if (isNaN((short) result)) {
330             // if result is NaN mask with qNaN
331             // (i.e. mask the most significant mantissa bit with 1)
332             // to comply with hardware implementations (ARM64, Intel, etc).
333             result |= NaN;
334         }
335 
336         return (short) result;
337     }
338 
339     /**
340      * Returns the smallest half-precision float value toward negative infinity
341      * greater than or equal to the specified half-precision float value.
342      * Special values are handled in the following ways:
343      * <ul>
344      * <li>If the specified half-precision float is NaN, the result is NaN</li>
345      * <li>If the specified half-precision float is infinity (negative or positive),
346      * the result is infinity (with the same sign)</li>
347      * <li>If the specified half-precision float is zero (negative or positive),
348      * the result is zero (with the same sign)</li>
349      * </ul>
350      *
351      * @param h A half-precision float value
352      * @return The smallest half-precision float value toward negative infinity
353      *         greater than or equal to the specified half-precision float value
354      *
355      * @hide
356      */
357     @SystemApi(client = MODULE_LIBRARIES)
ceil(short h)358     public static short ceil(short h) {
359         int bits = h & 0xffff;
360         int abs = bits & EXPONENT_SIGNIFICAND_MASK;
361         int result = bits;
362 
363         if (abs < 0x3c00) {
364             result &= SIGN_MASK;
365             result |= 0x3c00 & -(~(bits >> 15) & (abs != 0 ? 1 : 0));
366         } else if (abs < 0x6400) {
367             abs = 25 - (abs >> 10);
368             int mask = (1 << abs) - 1;
369             result += mask & ((bits >> 15) - 1);
370             result &= ~mask;
371         }
372         if (isNaN((short) result)) {
373             // if result is NaN mask with qNaN
374             // (i.e. mask the most significant mantissa bit with 1)
375             // to comply with hardware implementations (ARM64, Intel, etc).
376             result |= NaN;
377         }
378 
379         return (short) result;
380     }
381 
382     /**
383      * Returns the largest half-precision float value toward positive infinity
384      * less than or equal to the specified half-precision float value.
385      * Special values are handled in the following ways:
386      * <ul>
387      * <li>If the specified half-precision float is NaN, the result is NaN</li>
388      * <li>If the specified half-precision float is infinity (negative or positive),
389      * the result is infinity (with the same sign)</li>
390      * <li>If the specified half-precision float is zero (negative or positive),
391      * the result is zero (with the same sign)</li>
392      * </ul>
393      *
394      * @param h A half-precision float value
395      * @return The largest half-precision float value toward positive infinity
396      *         less than or equal to the specified half-precision float value
397      *
398      * @hide
399      */
400     @SystemApi(client = MODULE_LIBRARIES)
floor(short h)401     public static short floor(short h) {
402         int bits = h & 0xffff;
403         int abs = bits & EXPONENT_SIGNIFICAND_MASK;
404         int result = bits;
405 
406         if (abs < 0x3c00) {
407             result &= SIGN_MASK;
408             result |= 0x3c00 & (bits > 0x8000 ? 0xffff : 0x0);
409         } else if (abs < 0x6400) {
410             abs = 25 - (abs >> 10);
411             int mask = (1 << abs) - 1;
412             result += mask & -(bits >> 15);
413             result &= ~mask;
414         }
415         if (isNaN((short) result)) {
416             // if result is NaN mask with qNaN
417             // i.e. (Mask the most significant mantissa bit with 1)
418             result |= NaN;
419         }
420 
421         return (short) result;
422     }
423 
424     /**
425      * Returns the truncated half-precision float value of the specified
426      * half-precision float value. Special values are handled in the following ways:
427      * <ul>
428      * <li>If the specified half-precision float is NaN, the result is NaN</li>
429      * <li>If the specified half-precision float is infinity (negative or positive),
430      * the result is infinity (with the same sign)</li>
431      * <li>If the specified half-precision float is zero (negative or positive),
432      * the result is zero (with the same sign)</li>
433      * </ul>
434      *
435      * @param h A half-precision float value
436      * @return The truncated half-precision float value of the specified
437      *         half-precision float value
438      *
439      * @hide
440      */
441     @SystemApi(client = MODULE_LIBRARIES)
trunc(short h)442     public static short trunc(short h) {
443         int bits = h & 0xffff;
444         int abs = bits & EXPONENT_SIGNIFICAND_MASK;
445         int result = bits;
446 
447         if (abs < 0x3c00) {
448             result &= SIGN_MASK;
449         } else if (abs < 0x6400) {
450             abs = 25 - (abs >> 10);
451             int mask = (1 << abs) - 1;
452             result &= ~mask;
453         }
454 
455         return (short) result;
456     }
457 
458     /**
459      * Returns the smaller of two half-precision float values (the value closest
460      * to negative infinity). Special values are handled in the following ways:
461      * <ul>
462      * <li>If either value is NaN, the result is NaN</li>
463      * <li>{@link #NEGATIVE_ZERO} is smaller than {@link #POSITIVE_ZERO}</li>
464      * </ul>
465      *
466      * @param x The first half-precision value
467      * @param y The second half-precision value
468      * @return The smaller of the two specified half-precision values
469      *
470      * @hide
471      */
472     @SystemApi(client = MODULE_LIBRARIES)
min(short x, short y)473     public static short min(short x, short y) {
474         if (isNaN(x)) return NaN;
475         if (isNaN(y)) return NaN;
476 
477         if ((x & EXPONENT_SIGNIFICAND_MASK) == 0 && (y & EXPONENT_SIGNIFICAND_MASK) == 0) {
478             return (x & SIGN_MASK) != 0 ? x : y;
479         }
480 
481         return ((x & SIGN_MASK) != 0 ? 0x8000 - (x & 0xffff) : x & 0xffff) <
482                ((y & SIGN_MASK) != 0 ? 0x8000 - (y & 0xffff) : y & 0xffff) ? x : y;
483     }
484 
485     /**
486      * Returns the larger of two half-precision float values (the value closest
487      * to positive infinity). Special values are handled in the following ways:
488      * <ul>
489      * <li>If either value is NaN, the result is NaN</li>
490      * <li>{@link #POSITIVE_ZERO} is greater than {@link #NEGATIVE_ZERO}</li>
491      * </ul>
492      *
493      * @param x The first half-precision value
494      * @param y The second half-precision value
495      *
496      * @return The larger of the two specified half-precision values
497      *
498      * @hide
499      */
500     @SystemApi(client = MODULE_LIBRARIES)
max(short x, short y)501     public static short max(short x, short y) {
502         if (isNaN(x)) return NaN;
503         if (isNaN(y)) return NaN;
504 
505         if ((x & EXPONENT_SIGNIFICAND_MASK) == 0 && (y & EXPONENT_SIGNIFICAND_MASK) == 0) {
506             return (x & SIGN_MASK) != 0 ? y : x;
507         }
508 
509         return ((x & SIGN_MASK) != 0 ? 0x8000 - (x & 0xffff) : x & 0xffff) >
510                ((y & SIGN_MASK) != 0 ? 0x8000 - (y & 0xffff) : y & 0xffff) ? x : y;
511     }
512 
513     /**
514      * Returns true if the first half-precision float value is less (smaller
515      * toward negative infinity) than the second half-precision float value.
516      * If either of the values is NaN, the result is false.
517      *
518      * @param x The first half-precision value
519      * @param y The second half-precision value
520      *
521      * @return True if x is less than y, false otherwise
522      *
523      * @hide
524      */
525     @SystemApi(client = MODULE_LIBRARIES)
less(short x, short y)526     public static boolean less(short x, short y) {
527         if (isNaN(x)) return false;
528         if (isNaN(y)) return false;
529 
530         return ((x & SIGN_MASK) != 0 ? 0x8000 - (x & 0xffff) : x & 0xffff) <
531                ((y & SIGN_MASK) != 0 ? 0x8000 - (y & 0xffff) : y & 0xffff);
532     }
533 
534     /**
535      * Returns true if the first half-precision float value is less (smaller
536      * toward negative infinity) than or equal to the second half-precision
537      * float value. If either of the values is NaN, the result is false.
538      *
539      * @param x The first half-precision value
540      * @param y The second half-precision value
541      *
542      * @return True if x is less than or equal to y, false otherwise
543      *
544      * @hide
545      */
546     @SystemApi(client = MODULE_LIBRARIES)
lessEquals(short x, short y)547     public static boolean lessEquals(short x, short y) {
548         if (isNaN(x)) return false;
549         if (isNaN(y)) return false;
550 
551         return ((x & SIGN_MASK) != 0 ? 0x8000 - (x & 0xffff) : x & 0xffff) <=
552                ((y & SIGN_MASK) != 0 ? 0x8000 - (y & 0xffff) : y & 0xffff);
553     }
554 
555     /**
556      * Returns true if the first half-precision float value is greater (larger
557      * toward positive infinity) than the second half-precision float value.
558      * If either of the values is NaN, the result is false.
559      *
560      * @param x The first half-precision value
561      * @param y The second half-precision value
562      *
563      * @return True if x is greater than y, false otherwise
564      *
565      * @hide
566      */
567     @SystemApi(client = MODULE_LIBRARIES)
greater(short x, short y)568     public static boolean greater(short x, short y) {
569         if (isNaN(x)) return false;
570         if (isNaN(y)) return false;
571 
572         return ((x & SIGN_MASK) != 0 ? 0x8000 - (x & 0xffff) : x & 0xffff) >
573                ((y & SIGN_MASK) != 0 ? 0x8000 - (y & 0xffff) : y & 0xffff);
574     }
575 
576     /**
577      * Returns true if the first half-precision float value is greater (larger
578      * toward positive infinity) than or equal to the second half-precision float
579      * value. If either of the values is NaN, the result is false.
580      *
581      * @param x The first half-precision value
582      * @param y The second half-precision value
583      *
584      * @return True if x is greater than y, false otherwise
585      *
586      * @hide
587      */
588     @SystemApi(client = MODULE_LIBRARIES)
greaterEquals(short x, short y)589     public static boolean greaterEquals(short x, short y) {
590         if (isNaN(x)) return false;
591         if (isNaN(y)) return false;
592 
593         return ((x & SIGN_MASK) != 0 ? 0x8000 - (x & 0xffff) : x & 0xffff) >=
594                ((y & SIGN_MASK) != 0 ? 0x8000 - (y & 0xffff) : y & 0xffff);
595     }
596 
597     /**
598      * Returns true if the two half-precision float values are equal.
599      * If either of the values is NaN, the result is false. {@link #POSITIVE_ZERO}
600      * and {@link #NEGATIVE_ZERO} are considered equal.
601      *
602      * @param x The first half-precision value
603      * @param y The second half-precision value
604      *
605      * @return True if x is equal to y, false otherwise
606      *
607      * @hide
608      */
609     @SystemApi(client = MODULE_LIBRARIES)
equals(short x, short y)610     public static boolean equals(short x, short y) {
611         if (isNaN(x)) return false;
612         if (isNaN(y)) return false;
613 
614         return x == y || ((x | y) & EXPONENT_SIGNIFICAND_MASK) == 0;
615     }
616 
617     /**
618      * Returns true if the specified half-precision float value represents
619      * infinity, false otherwise.
620      *
621      * @param h A half-precision float value
622      * @return True if the value is positive infinity or negative infinity,
623      *         false otherwise
624      *
625      * @hide
626      */
627     @SystemApi(client = MODULE_LIBRARIES)
isInfinite(short h)628     public static boolean isInfinite(short h) {
629         return (h & EXPONENT_SIGNIFICAND_MASK) == POSITIVE_INFINITY;
630     }
631 
632     /**
633      * Returns true if the specified half-precision float value represents
634      * a Not-a-Number, false otherwise.
635      *
636      * @param h A half-precision float value
637      * @return True if the value is a NaN, false otherwise
638      *
639      * @hide
640      */
641     @SystemApi(client = MODULE_LIBRARIES)
isNaN(short h)642     public static boolean isNaN(short h) {
643         return (h & EXPONENT_SIGNIFICAND_MASK) > POSITIVE_INFINITY;
644     }
645 
646     /**
647      * Returns true if the specified half-precision float value is normalized
648      * (does not have a subnormal representation). If the specified value is
649      * {@link #POSITIVE_INFINITY}, {@link #NEGATIVE_INFINITY},
650      * {@link #POSITIVE_ZERO}, {@link #NEGATIVE_ZERO}, NaN or any subnormal
651      * number, this method returns false.
652      *
653      * @param h A half-precision float value
654      * @return True if the value is normalized, false otherwise
655      *
656      * @hide
657      */
658     @SystemApi(client = MODULE_LIBRARIES)
isNormalized(short h)659     public static boolean isNormalized(short h) {
660         return (h & POSITIVE_INFINITY) != 0 && (h & POSITIVE_INFINITY) != POSITIVE_INFINITY;
661     }
662 
663     /**
664      * <p>Converts the specified half-precision float value into a
665      * single-precision float value. The following special cases are handled:</p>
666      * <ul>
667      * <li>If the input is {@link #NaN}, the returned value is {@link Float#NaN}</li>
668      * <li>If the input is {@link #POSITIVE_INFINITY} or
669      * {@link #NEGATIVE_INFINITY}, the returned value is respectively
670      * {@link Float#POSITIVE_INFINITY} or {@link Float#NEGATIVE_INFINITY}</li>
671      * <li>If the input is 0 (positive or negative), the returned value is +/-0.0f</li>
672      * <li>Otherwise, the returned value is a normalized single-precision float value</li>
673      * </ul>
674      *
675      * @param h The half-precision float value to convert to single-precision
676      * @return A normalized single-precision float value
677      *
678      * @hide
679      */
680     @SystemApi(client = MODULE_LIBRARIES)
toFloat(short h)681     public static float toFloat(short h) {
682         int bits = h & 0xffff;
683         int s = bits & SIGN_MASK;
684         int e = (bits >>> EXPONENT_SHIFT) & SHIFTED_EXPONENT_MASK;
685         int m = (bits                        ) & SIGNIFICAND_MASK;
686 
687         int outE = 0;
688         int outM = 0;
689 
690         if (e == 0) { // Denormal or 0
691             if (m != 0) {
692                 // Convert denorm fp16 into normalized fp32
693                 float o = Float.intBitsToFloat(FP32_DENORMAL_MAGIC + m);
694                 o -= FP32_DENORMAL_FLOAT;
695                 return s == 0 ? o : -o;
696             }
697         } else {
698             outM = m << 13;
699             if (e == 0x1f) { // Infinite or NaN
700                 outE = 0xff;
701                 if (outM != 0) { // SNaNs are quieted
702                     outM |= FP32_QNAN_MASK;
703                 }
704             } else {
705                 outE = e - EXPONENT_BIAS + FP32_EXPONENT_BIAS;
706             }
707         }
708 
709         int out = (s << 16) | (outE << FP32_EXPONENT_SHIFT) | outM;
710         return Float.intBitsToFloat(out);
711     }
712 
713     /**
714      * <p>Converts the specified single-precision float value into a
715      * half-precision float value. The following special cases are handled:</p>
716      * <ul>
717      * <li>If the input is NaN (see {@link Float#isNaN(float)}), the returned
718      * value is {@link #NaN}</li>
719      * <li>If the input is {@link Float#POSITIVE_INFINITY} or
720      * {@link Float#NEGATIVE_INFINITY}, the returned value is respectively
721      * {@link #POSITIVE_INFINITY} or {@link #NEGATIVE_INFINITY}</li>
722      * <li>If the input is 0 (positive or negative), the returned value is
723      * {@link #POSITIVE_ZERO} or {@link #NEGATIVE_ZERO}</li>
724      * <li>If the input is a less than {@link #MIN_VALUE}, the returned value
725      * is flushed to {@link #POSITIVE_ZERO} or {@link #NEGATIVE_ZERO}</li>
726      * <li>If the input is a less than {@link #MIN_NORMAL}, the returned value
727      * is a denorm half-precision float</li>
728      * <li>Otherwise, the returned value is rounded to the nearest
729      * representable half-precision float value</li>
730      * </ul>
731      *
732      * @param f The single-precision float value to convert to half-precision
733      * @return A half-precision float value
734      *
735      * @hide
736      */
737     @SystemApi(client = MODULE_LIBRARIES)
toHalf(float f)738     public static short toHalf(float f) {
739         int bits = Float.floatToRawIntBits(f);
740         int s = (bits >>> FP32_SIGN_SHIFT    );
741         int e = (bits >>> FP32_EXPONENT_SHIFT) & FP32_SHIFTED_EXPONENT_MASK;
742         int m = (bits                        ) & FP32_SIGNIFICAND_MASK;
743 
744         int outE = 0;
745         int outM = 0;
746 
747         if (e == 0xff) { // Infinite or NaN
748             outE = 0x1f;
749             outM = m != 0 ? 0x200 : 0;
750         } else {
751             e = e - FP32_EXPONENT_BIAS + EXPONENT_BIAS;
752             if (e >= 0x1f) { // Overflow
753                 outE = 0x1f;
754             } else if (e <= 0) { // Underflow
755                 if (e < -10) {
756                     // The absolute fp32 value is less than MIN_VALUE, flush to +/-0
757                 } else {
758                     // The fp32 value is a normalized float less than MIN_NORMAL,
759                     // we convert to a denorm fp16
760                     m = m | 0x800000;
761                     int shift = 14 - e;
762                     outM = m >> shift;
763 
764                     int lowm = m & ((1 << shift) - 1);
765                     int hway = 1 << (shift - 1);
766                     // if above halfway or exactly halfway and outM is odd
767                     if (lowm + (outM & 1) > hway){
768                         // Round to nearest even
769                         // Can overflow into exponent bit, which surprisingly is OK.
770                         // This increment relies on the +outM in the return statement below
771                         outM++;
772                     }
773                 }
774             } else {
775                 outE = e;
776                 outM = m >> 13;
777                 // if above halfway or exactly halfway and outM is odd
778                 if ((m & 0x1fff) + (outM & 0x1) > 0x1000) {
779                     // Round to nearest even
780                     // Can overflow into exponent bit, which surprisingly is OK.
781                     // This increment relies on the +outM in the return statement below
782                     outM++;
783                 }
784             }
785         }
786         // The outM is added here as the +1 increments for outM above can
787         // cause an overflow in the exponent bit which is OK.
788         return (short) ((s << SIGN_SHIFT) | (outE << EXPONENT_SHIFT) + outM);
789     }
790 
791     /**
792      * <p>Returns a hexadecimal string representation of the specified half-precision
793      * float value. If the value is a NaN, the result is <code>"NaN"</code>,
794      * otherwise the result follows this format:</p>
795      * <ul>
796      * <li>If the sign is positive, no sign character appears in the result</li>
797      * <li>If the sign is negative, the first character is <code>'-'</code></li>
798      * <li>If the value is inifinity, the string is <code>"Infinity"</code></li>
799      * <li>If the value is 0, the string is <code>"0x0.0p0"</code></li>
800      * <li>If the value has a normalized representation, the exponent and
801      * significand are represented in the string in two fields. The significand
802      * starts with <code>"0x1."</code> followed by its lowercase hexadecimal
803      * representation. Trailing zeroes are removed unless all digits are 0, then
804      * a single zero is used. The significand representation is followed by the
805      * exponent, represented by <code>"p"</code>, itself followed by a decimal
806      * string of the unbiased exponent</li>
807      * <li>If the value has a subnormal representation, the significand starts
808      * with <code>"0x0."</code> followed by its lowercase hexadecimal
809      * representation. Trailing zeroes are removed unless all digits are 0, then
810      * a single zero is used. The significand representation is followed by the
811      * exponent, represented by <code>"p-14"</code></li>
812      * </ul>
813      *
814      * @param h A half-precision float value
815      * @return A hexadecimal string representation of the specified value
816      *
817      * @hide
818      */
819     @SystemApi(client = MODULE_LIBRARIES)
toHexString(short h)820     public static String toHexString(short h) {
821         StringBuilder o = new StringBuilder();
822 
823         int bits = h & 0xffff;
824         int s = (bits >>> SIGN_SHIFT    );
825         int e = (bits >>> EXPONENT_SHIFT) & SHIFTED_EXPONENT_MASK;
826         int m = (bits                   ) & SIGNIFICAND_MASK;
827 
828         if (e == 0x1f) { // Infinite or NaN
829             if (m == 0) {
830                 if (s != 0) o.append('-');
831                 o.append("Infinity");
832             } else {
833                 o.append("NaN");
834             }
835         } else {
836             if (s == 1) o.append('-');
837             if (e == 0) {
838                 if (m == 0) {
839                     o.append("0x0.0p0");
840                 } else {
841                     o.append("0x0.");
842                     String significand = Integer.toHexString(m);
843                     o.append(significand.replaceFirst("0{2,}$", ""));
844                     o.append("p-14");
845                 }
846             } else {
847                 o.append("0x1.");
848                 String significand = Integer.toHexString(m);
849                 o.append(significand.replaceFirst("0{2,}$", ""));
850                 o.append('p');
851                 o.append(Integer.toString(e - EXPONENT_BIAS));
852             }
853         }
854 
855         return o.toString();
856     }
857 }
858