1 /* 2 * Copyright (C) 2019 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package libcore.util; 18 19 import static android.annotation.SystemApi.Client.MODULE_LIBRARIES; 20 21 import android.annotation.SystemApi; 22 23 /** 24 * <p>The {@code FP16} class is a wrapper and a utility class to manipulate half-precision 16-bit 25 * <a href="https://en.wikipedia.org/wiki/Half-precision_floating-point_format">IEEE 754</a> 26 * floating point data types (also called fp16 or binary16). A half-precision float can be 27 * created from or converted to single-precision floats, and is stored in a short data type. 28 * 29 * <p>The IEEE 754 standard specifies an fp16 as having the following format:</p> 30 * <ul> 31 * <li>Sign bit: 1 bit</li> 32 * <li>Exponent width: 5 bits</li> 33 * <li>Significand: 10 bits</li> 34 * </ul> 35 * 36 * <p>The format is laid out as follows:</p> 37 * <pre> 38 * 1 11111 1111111111 39 * ^ --^-- -----^---- 40 * sign | |_______ significand 41 * | 42 * -- exponent 43 * </pre> 44 * 45 * <p>Half-precision floating points can be useful to save memory and/or 46 * bandwidth at the expense of range and precision when compared to single-precision 47 * floating points (fp32).</p> 48 * <p>To help you decide whether fp16 is the right storage type for you need, please 49 * refer to the table below that shows the available precision throughout the range of 50 * possible values. The <em>precision</em> column indicates the step size between two 51 * consecutive numbers in a specific part of the range.</p> 52 * 53 * <table summary="Precision of fp16 across the range"> 54 * <tr><th>Range start</th><th>Precision</th></tr> 55 * <tr><td>0</td><td>1 ⁄ 16,777,216</td></tr> 56 * <tr><td>1 ⁄ 16,384</td><td>1 ⁄ 16,777,216</td></tr> 57 * <tr><td>1 ⁄ 8,192</td><td>1 ⁄ 8,388,608</td></tr> 58 * <tr><td>1 ⁄ 4,096</td><td>1 ⁄ 4,194,304</td></tr> 59 * <tr><td>1 ⁄ 2,048</td><td>1 ⁄ 2,097,152</td></tr> 60 * <tr><td>1 ⁄ 1,024</td><td>1 ⁄ 1,048,576</td></tr> 61 * <tr><td>1 ⁄ 512</td><td>1 ⁄ 524,288</td></tr> 62 * <tr><td>1 ⁄ 256</td><td>1 ⁄ 262,144</td></tr> 63 * <tr><td>1 ⁄ 128</td><td>1 ⁄ 131,072</td></tr> 64 * <tr><td>1 ⁄ 64</td><td>1 ⁄ 65,536</td></tr> 65 * <tr><td>1 ⁄ 32</td><td>1 ⁄ 32,768</td></tr> 66 * <tr><td>1 ⁄ 16</td><td>1 ⁄ 16,384</td></tr> 67 * <tr><td>1 ⁄ 8</td><td>1 ⁄ 8,192</td></tr> 68 * <tr><td>1 ⁄ 4</td><td>1 ⁄ 4,096</td></tr> 69 * <tr><td>1 ⁄ 2</td><td>1 ⁄ 2,048</td></tr> 70 * <tr><td>1</td><td>1 ⁄ 1,024</td></tr> 71 * <tr><td>2</td><td>1 ⁄ 512</td></tr> 72 * <tr><td>4</td><td>1 ⁄ 256</td></tr> 73 * <tr><td>8</td><td>1 ⁄ 128</td></tr> 74 * <tr><td>16</td><td>1 ⁄ 64</td></tr> 75 * <tr><td>32</td><td>1 ⁄ 32</td></tr> 76 * <tr><td>64</td><td>1 ⁄ 16</td></tr> 77 * <tr><td>128</td><td>1 ⁄ 8</td></tr> 78 * <tr><td>256</td><td>1 ⁄ 4</td></tr> 79 * <tr><td>512</td><td>1 ⁄ 2</td></tr> 80 * <tr><td>1,024</td><td>1</td></tr> 81 * <tr><td>2,048</td><td>2</td></tr> 82 * <tr><td>4,096</td><td>4</td></tr> 83 * <tr><td>8,192</td><td>8</td></tr> 84 * <tr><td>16,384</td><td>16</td></tr> 85 * <tr><td>32,768</td><td>32</td></tr> 86 * </table> 87 * 88 * <p>This table shows that numbers higher than 1024 lose all fractional precision.</p> 89 * 90 * @hide 91 */ 92 93 @SystemApi(client = MODULE_LIBRARIES) 94 public final class FP16 { 95 /** 96 * The number of bits used to represent a half-precision float value. 97 * 98 * @hide 99 */ 100 @SystemApi(client = MODULE_LIBRARIES) 101 public static final int SIZE = 16; 102 103 /** 104 * Epsilon is the difference between 1.0 and the next value representable 105 * by a half-precision floating-point. 106 * 107 * @hide 108 */ 109 @SystemApi(client = MODULE_LIBRARIES) 110 public static final short EPSILON = (short) 0x1400; 111 112 /** 113 * Maximum exponent a finite half-precision float may have. 114 * 115 * @hide 116 */ 117 @SystemApi(client = MODULE_LIBRARIES) 118 public static final int MAX_EXPONENT = 15; 119 /** 120 * Minimum exponent a normalized half-precision float may have. 121 * 122 * @hide 123 */ 124 @SystemApi(client = MODULE_LIBRARIES) 125 public static final int MIN_EXPONENT = -14; 126 127 /** 128 * Smallest negative value a half-precision float may have. 129 * 130 * @hide 131 */ 132 @SystemApi(client = MODULE_LIBRARIES) 133 public static final short LOWEST_VALUE = (short) 0xfbff; 134 /** 135 * Maximum positive finite value a half-precision float may have. 136 * 137 * @hide 138 */ 139 @SystemApi(client = MODULE_LIBRARIES) 140 public static final short MAX_VALUE = (short) 0x7bff; 141 /** 142 * Smallest positive normal value a half-precision float may have. 143 * 144 * @hide 145 */ 146 @SystemApi(client = MODULE_LIBRARIES) 147 public static final short MIN_NORMAL = (short) 0x0400; 148 /** 149 * Smallest positive non-zero value a half-precision float may have. 150 * 151 * @hide 152 */ 153 @SystemApi(client = MODULE_LIBRARIES) 154 public static final short MIN_VALUE = (short) 0x0001; 155 /** 156 * A Not-a-Number representation of a half-precision float. 157 * 158 * @hide 159 */ 160 @SystemApi(client = MODULE_LIBRARIES) 161 public static final short NaN = (short) 0x7e00; 162 /** 163 * Negative infinity of type half-precision float. 164 * 165 * @hide 166 */ 167 @SystemApi(client = MODULE_LIBRARIES) 168 public static final short NEGATIVE_INFINITY = (short) 0xfc00; 169 /** 170 * Negative 0 of type half-precision float. 171 * 172 * @hide 173 */ 174 @SystemApi(client = MODULE_LIBRARIES) 175 public static final short NEGATIVE_ZERO = (short) 0x8000; 176 /** 177 * Positive infinity of type half-precision float. 178 * 179 * @hide 180 */ 181 @SystemApi(client = MODULE_LIBRARIES) 182 public static final short POSITIVE_INFINITY = (short) 0x7c00; 183 /** 184 * Positive 0 of type half-precision float. 185 * 186 * @hide 187 */ 188 @SystemApi(client = MODULE_LIBRARIES) 189 public static final short POSITIVE_ZERO = (short) 0x0000; 190 191 /** 192 * The offset to shift by to obtain the sign bit. 193 * 194 * @hide 195 */ 196 @SystemApi(client = MODULE_LIBRARIES) 197 public static final int SIGN_SHIFT = 15; 198 199 /** 200 * The offset to shift by to obtain the exponent bits. 201 * 202 * @hide 203 */ 204 @SystemApi(client = MODULE_LIBRARIES) 205 public static final int EXPONENT_SHIFT = 10; 206 207 /** 208 * The bitmask to AND a number with to obtain the sign bit. 209 * 210 * @hide 211 */ 212 @SystemApi(client = MODULE_LIBRARIES) 213 public static final int SIGN_MASK = 0x8000; 214 215 /** 216 * The bitmask to AND a number shifted by {@link #EXPONENT_SHIFT} right, to obtain exponent bits. 217 * 218 * @hide 219 */ 220 @SystemApi(client = MODULE_LIBRARIES) 221 public static final int SHIFTED_EXPONENT_MASK = 0x1f; 222 223 /** 224 * The bitmask to AND a number with to obtain significand bits. 225 * 226 * @hide 227 */ 228 @SystemApi(client = MODULE_LIBRARIES) 229 public static final int SIGNIFICAND_MASK = 0x3ff; 230 231 /** 232 * The bitmask to AND with to obtain exponent and significand bits. 233 * 234 * @hide 235 */ 236 @SystemApi(client = MODULE_LIBRARIES) 237 public static final int EXPONENT_SIGNIFICAND_MASK = 0x7fff; 238 239 /** 240 * The offset of the exponent from the actual value. 241 * 242 * @hide 243 */ 244 @SystemApi(client = MODULE_LIBRARIES) 245 public static final int EXPONENT_BIAS = 15; 246 247 private static final int FP32_SIGN_SHIFT = 31; 248 private static final int FP32_EXPONENT_SHIFT = 23; 249 private static final int FP32_SHIFTED_EXPONENT_MASK = 0xff; 250 private static final int FP32_SIGNIFICAND_MASK = 0x7fffff; 251 private static final int FP32_EXPONENT_BIAS = 127; 252 private static final int FP32_QNAN_MASK = 0x400000; 253 private static final int FP32_DENORMAL_MAGIC = 126 << 23; 254 private static final float FP32_DENORMAL_FLOAT = Float.intBitsToFloat(FP32_DENORMAL_MAGIC); 255 256 /** Hidden constructor to prevent instantiation. */ FP16()257 private FP16() {} 258 259 /** 260 * <p>Compares the two specified half-precision float values. The following 261 * conditions apply during the comparison:</p> 262 * 263 * <ul> 264 * <li>{@link #NaN} is considered by this method to be equal to itself and greater 265 * than all other half-precision float values (including {@code #POSITIVE_INFINITY})</li> 266 * <li>{@link #POSITIVE_ZERO} is considered by this method to be greater than 267 * {@link #NEGATIVE_ZERO}.</li> 268 * </ul> 269 * 270 * @param x The first half-precision float value to compare. 271 * @param y The second half-precision float value to compare 272 * 273 * @return The value {@code 0} if {@code x} is numerically equal to {@code y}, a 274 * value less than {@code 0} if {@code x} is numerically less than {@code y}, 275 * and a value greater than {@code 0} if {@code x} is numerically greater 276 * than {@code y} 277 * 278 * @hide 279 */ 280 @SystemApi(client = MODULE_LIBRARIES) compare(short x, short y)281 public static int compare(short x, short y) { 282 if (less(x, y)) return -1; 283 if (greater(x, y)) return 1; 284 285 // Collapse NaNs, akin to halfToIntBits(), but we want to keep 286 // (signed) short value types to preserve the ordering of -0.0 287 // and +0.0 288 short xBits = isNaN(x) ? NaN : x; 289 short yBits = isNaN(y) ? NaN : y; 290 291 return (xBits == yBits ? 0 : (xBits < yBits ? -1 : 1)); 292 } 293 294 /** 295 * Returns the closest integral half-precision float value to the specified 296 * half-precision float value. Special values are handled in the 297 * following ways: 298 * <ul> 299 * <li>If the specified half-precision float is NaN, the result is NaN</li> 300 * <li>If the specified half-precision float is infinity (negative or positive), 301 * the result is infinity (with the same sign)</li> 302 * <li>If the specified half-precision float is zero (negative or positive), 303 * the result is zero (with the same sign)</li> 304 * </ul> 305 * 306 * @param h A half-precision float value 307 * @return The value of the specified half-precision float rounded to the nearest 308 * half-precision float value 309 * 310 * @hide 311 */ 312 @SystemApi(client = MODULE_LIBRARIES) rint(short h)313 public static short rint(short h) { 314 int bits = h & 0xffff; 315 int abs = bits & EXPONENT_SIGNIFICAND_MASK; 316 int result = bits; 317 318 if (abs < 0x3c00) { 319 result &= SIGN_MASK; 320 if (abs > 0x3800){ 321 result |= 0x3c00; 322 } 323 } else if (abs < 0x6400) { 324 int exp = 25 - (abs >> 10); 325 int mask = (1 << exp) - 1; 326 result += ((1 << (exp - 1)) - (~(abs >> exp) & 1)); 327 result &= ~mask; 328 } 329 if (isNaN((short) result)) { 330 // if result is NaN mask with qNaN 331 // (i.e. mask the most significant mantissa bit with 1) 332 // to comply with hardware implementations (ARM64, Intel, etc). 333 result |= NaN; 334 } 335 336 return (short) result; 337 } 338 339 /** 340 * Returns the smallest half-precision float value toward negative infinity 341 * greater than or equal to the specified half-precision float value. 342 * Special values are handled in the following ways: 343 * <ul> 344 * <li>If the specified half-precision float is NaN, the result is NaN</li> 345 * <li>If the specified half-precision float is infinity (negative or positive), 346 * the result is infinity (with the same sign)</li> 347 * <li>If the specified half-precision float is zero (negative or positive), 348 * the result is zero (with the same sign)</li> 349 * </ul> 350 * 351 * @param h A half-precision float value 352 * @return The smallest half-precision float value toward negative infinity 353 * greater than or equal to the specified half-precision float value 354 * 355 * @hide 356 */ 357 @SystemApi(client = MODULE_LIBRARIES) ceil(short h)358 public static short ceil(short h) { 359 int bits = h & 0xffff; 360 int abs = bits & EXPONENT_SIGNIFICAND_MASK; 361 int result = bits; 362 363 if (abs < 0x3c00) { 364 result &= SIGN_MASK; 365 result |= 0x3c00 & -(~(bits >> 15) & (abs != 0 ? 1 : 0)); 366 } else if (abs < 0x6400) { 367 abs = 25 - (abs >> 10); 368 int mask = (1 << abs) - 1; 369 result += mask & ((bits >> 15) - 1); 370 result &= ~mask; 371 } 372 if (isNaN((short) result)) { 373 // if result is NaN mask with qNaN 374 // (i.e. mask the most significant mantissa bit with 1) 375 // to comply with hardware implementations (ARM64, Intel, etc). 376 result |= NaN; 377 } 378 379 return (short) result; 380 } 381 382 /** 383 * Returns the largest half-precision float value toward positive infinity 384 * less than or equal to the specified half-precision float value. 385 * Special values are handled in the following ways: 386 * <ul> 387 * <li>If the specified half-precision float is NaN, the result is NaN</li> 388 * <li>If the specified half-precision float is infinity (negative or positive), 389 * the result is infinity (with the same sign)</li> 390 * <li>If the specified half-precision float is zero (negative or positive), 391 * the result is zero (with the same sign)</li> 392 * </ul> 393 * 394 * @param h A half-precision float value 395 * @return The largest half-precision float value toward positive infinity 396 * less than or equal to the specified half-precision float value 397 * 398 * @hide 399 */ 400 @SystemApi(client = MODULE_LIBRARIES) floor(short h)401 public static short floor(short h) { 402 int bits = h & 0xffff; 403 int abs = bits & EXPONENT_SIGNIFICAND_MASK; 404 int result = bits; 405 406 if (abs < 0x3c00) { 407 result &= SIGN_MASK; 408 result |= 0x3c00 & (bits > 0x8000 ? 0xffff : 0x0); 409 } else if (abs < 0x6400) { 410 abs = 25 - (abs >> 10); 411 int mask = (1 << abs) - 1; 412 result += mask & -(bits >> 15); 413 result &= ~mask; 414 } 415 if (isNaN((short) result)) { 416 // if result is NaN mask with qNaN 417 // i.e. (Mask the most significant mantissa bit with 1) 418 result |= NaN; 419 } 420 421 return (short) result; 422 } 423 424 /** 425 * Returns the truncated half-precision float value of the specified 426 * half-precision float value. Special values are handled in the following ways: 427 * <ul> 428 * <li>If the specified half-precision float is NaN, the result is NaN</li> 429 * <li>If the specified half-precision float is infinity (negative or positive), 430 * the result is infinity (with the same sign)</li> 431 * <li>If the specified half-precision float is zero (negative or positive), 432 * the result is zero (with the same sign)</li> 433 * </ul> 434 * 435 * @param h A half-precision float value 436 * @return The truncated half-precision float value of the specified 437 * half-precision float value 438 * 439 * @hide 440 */ 441 @SystemApi(client = MODULE_LIBRARIES) trunc(short h)442 public static short trunc(short h) { 443 int bits = h & 0xffff; 444 int abs = bits & EXPONENT_SIGNIFICAND_MASK; 445 int result = bits; 446 447 if (abs < 0x3c00) { 448 result &= SIGN_MASK; 449 } else if (abs < 0x6400) { 450 abs = 25 - (abs >> 10); 451 int mask = (1 << abs) - 1; 452 result &= ~mask; 453 } 454 455 return (short) result; 456 } 457 458 /** 459 * Returns the smaller of two half-precision float values (the value closest 460 * to negative infinity). Special values are handled in the following ways: 461 * <ul> 462 * <li>If either value is NaN, the result is NaN</li> 463 * <li>{@link #NEGATIVE_ZERO} is smaller than {@link #POSITIVE_ZERO}</li> 464 * </ul> 465 * 466 * @param x The first half-precision value 467 * @param y The second half-precision value 468 * @return The smaller of the two specified half-precision values 469 * 470 * @hide 471 */ 472 @SystemApi(client = MODULE_LIBRARIES) min(short x, short y)473 public static short min(short x, short y) { 474 if (isNaN(x)) return NaN; 475 if (isNaN(y)) return NaN; 476 477 if ((x & EXPONENT_SIGNIFICAND_MASK) == 0 && (y & EXPONENT_SIGNIFICAND_MASK) == 0) { 478 return (x & SIGN_MASK) != 0 ? x : y; 479 } 480 481 return ((x & SIGN_MASK) != 0 ? 0x8000 - (x & 0xffff) : x & 0xffff) < 482 ((y & SIGN_MASK) != 0 ? 0x8000 - (y & 0xffff) : y & 0xffff) ? x : y; 483 } 484 485 /** 486 * Returns the larger of two half-precision float values (the value closest 487 * to positive infinity). Special values are handled in the following ways: 488 * <ul> 489 * <li>If either value is NaN, the result is NaN</li> 490 * <li>{@link #POSITIVE_ZERO} is greater than {@link #NEGATIVE_ZERO}</li> 491 * </ul> 492 * 493 * @param x The first half-precision value 494 * @param y The second half-precision value 495 * 496 * @return The larger of the two specified half-precision values 497 * 498 * @hide 499 */ 500 @SystemApi(client = MODULE_LIBRARIES) max(short x, short y)501 public static short max(short x, short y) { 502 if (isNaN(x)) return NaN; 503 if (isNaN(y)) return NaN; 504 505 if ((x & EXPONENT_SIGNIFICAND_MASK) == 0 && (y & EXPONENT_SIGNIFICAND_MASK) == 0) { 506 return (x & SIGN_MASK) != 0 ? y : x; 507 } 508 509 return ((x & SIGN_MASK) != 0 ? 0x8000 - (x & 0xffff) : x & 0xffff) > 510 ((y & SIGN_MASK) != 0 ? 0x8000 - (y & 0xffff) : y & 0xffff) ? x : y; 511 } 512 513 /** 514 * Returns true if the first half-precision float value is less (smaller 515 * toward negative infinity) than the second half-precision float value. 516 * If either of the values is NaN, the result is false. 517 * 518 * @param x The first half-precision value 519 * @param y The second half-precision value 520 * 521 * @return True if x is less than y, false otherwise 522 * 523 * @hide 524 */ 525 @SystemApi(client = MODULE_LIBRARIES) less(short x, short y)526 public static boolean less(short x, short y) { 527 if (isNaN(x)) return false; 528 if (isNaN(y)) return false; 529 530 return ((x & SIGN_MASK) != 0 ? 0x8000 - (x & 0xffff) : x & 0xffff) < 531 ((y & SIGN_MASK) != 0 ? 0x8000 - (y & 0xffff) : y & 0xffff); 532 } 533 534 /** 535 * Returns true if the first half-precision float value is less (smaller 536 * toward negative infinity) than or equal to the second half-precision 537 * float value. If either of the values is NaN, the result is false. 538 * 539 * @param x The first half-precision value 540 * @param y The second half-precision value 541 * 542 * @return True if x is less than or equal to y, false otherwise 543 * 544 * @hide 545 */ 546 @SystemApi(client = MODULE_LIBRARIES) lessEquals(short x, short y)547 public static boolean lessEquals(short x, short y) { 548 if (isNaN(x)) return false; 549 if (isNaN(y)) return false; 550 551 return ((x & SIGN_MASK) != 0 ? 0x8000 - (x & 0xffff) : x & 0xffff) <= 552 ((y & SIGN_MASK) != 0 ? 0x8000 - (y & 0xffff) : y & 0xffff); 553 } 554 555 /** 556 * Returns true if the first half-precision float value is greater (larger 557 * toward positive infinity) than the second half-precision float value. 558 * If either of the values is NaN, the result is false. 559 * 560 * @param x The first half-precision value 561 * @param y The second half-precision value 562 * 563 * @return True if x is greater than y, false otherwise 564 * 565 * @hide 566 */ 567 @SystemApi(client = MODULE_LIBRARIES) greater(short x, short y)568 public static boolean greater(short x, short y) { 569 if (isNaN(x)) return false; 570 if (isNaN(y)) return false; 571 572 return ((x & SIGN_MASK) != 0 ? 0x8000 - (x & 0xffff) : x & 0xffff) > 573 ((y & SIGN_MASK) != 0 ? 0x8000 - (y & 0xffff) : y & 0xffff); 574 } 575 576 /** 577 * Returns true if the first half-precision float value is greater (larger 578 * toward positive infinity) than or equal to the second half-precision float 579 * value. If either of the values is NaN, the result is false. 580 * 581 * @param x The first half-precision value 582 * @param y The second half-precision value 583 * 584 * @return True if x is greater than y, false otherwise 585 * 586 * @hide 587 */ 588 @SystemApi(client = MODULE_LIBRARIES) greaterEquals(short x, short y)589 public static boolean greaterEquals(short x, short y) { 590 if (isNaN(x)) return false; 591 if (isNaN(y)) return false; 592 593 return ((x & SIGN_MASK) != 0 ? 0x8000 - (x & 0xffff) : x & 0xffff) >= 594 ((y & SIGN_MASK) != 0 ? 0x8000 - (y & 0xffff) : y & 0xffff); 595 } 596 597 /** 598 * Returns true if the two half-precision float values are equal. 599 * If either of the values is NaN, the result is false. {@link #POSITIVE_ZERO} 600 * and {@link #NEGATIVE_ZERO} are considered equal. 601 * 602 * @param x The first half-precision value 603 * @param y The second half-precision value 604 * 605 * @return True if x is equal to y, false otherwise 606 * 607 * @hide 608 */ 609 @SystemApi(client = MODULE_LIBRARIES) equals(short x, short y)610 public static boolean equals(short x, short y) { 611 if (isNaN(x)) return false; 612 if (isNaN(y)) return false; 613 614 return x == y || ((x | y) & EXPONENT_SIGNIFICAND_MASK) == 0; 615 } 616 617 /** 618 * Returns true if the specified half-precision float value represents 619 * infinity, false otherwise. 620 * 621 * @param h A half-precision float value 622 * @return True if the value is positive infinity or negative infinity, 623 * false otherwise 624 * 625 * @hide 626 */ 627 @SystemApi(client = MODULE_LIBRARIES) isInfinite(short h)628 public static boolean isInfinite(short h) { 629 return (h & EXPONENT_SIGNIFICAND_MASK) == POSITIVE_INFINITY; 630 } 631 632 /** 633 * Returns true if the specified half-precision float value represents 634 * a Not-a-Number, false otherwise. 635 * 636 * @param h A half-precision float value 637 * @return True if the value is a NaN, false otherwise 638 * 639 * @hide 640 */ 641 @SystemApi(client = MODULE_LIBRARIES) isNaN(short h)642 public static boolean isNaN(short h) { 643 return (h & EXPONENT_SIGNIFICAND_MASK) > POSITIVE_INFINITY; 644 } 645 646 /** 647 * Returns true if the specified half-precision float value is normalized 648 * (does not have a subnormal representation). If the specified value is 649 * {@link #POSITIVE_INFINITY}, {@link #NEGATIVE_INFINITY}, 650 * {@link #POSITIVE_ZERO}, {@link #NEGATIVE_ZERO}, NaN or any subnormal 651 * number, this method returns false. 652 * 653 * @param h A half-precision float value 654 * @return True if the value is normalized, false otherwise 655 * 656 * @hide 657 */ 658 @SystemApi(client = MODULE_LIBRARIES) isNormalized(short h)659 public static boolean isNormalized(short h) { 660 return (h & POSITIVE_INFINITY) != 0 && (h & POSITIVE_INFINITY) != POSITIVE_INFINITY; 661 } 662 663 /** 664 * <p>Converts the specified half-precision float value into a 665 * single-precision float value. The following special cases are handled:</p> 666 * <ul> 667 * <li>If the input is {@link #NaN}, the returned value is {@link Float#NaN}</li> 668 * <li>If the input is {@link #POSITIVE_INFINITY} or 669 * {@link #NEGATIVE_INFINITY}, the returned value is respectively 670 * {@link Float#POSITIVE_INFINITY} or {@link Float#NEGATIVE_INFINITY}</li> 671 * <li>If the input is 0 (positive or negative), the returned value is +/-0.0f</li> 672 * <li>Otherwise, the returned value is a normalized single-precision float value</li> 673 * </ul> 674 * 675 * @param h The half-precision float value to convert to single-precision 676 * @return A normalized single-precision float value 677 * 678 * @hide 679 */ 680 @SystemApi(client = MODULE_LIBRARIES) toFloat(short h)681 public static float toFloat(short h) { 682 int bits = h & 0xffff; 683 int s = bits & SIGN_MASK; 684 int e = (bits >>> EXPONENT_SHIFT) & SHIFTED_EXPONENT_MASK; 685 int m = (bits ) & SIGNIFICAND_MASK; 686 687 int outE = 0; 688 int outM = 0; 689 690 if (e == 0) { // Denormal or 0 691 if (m != 0) { 692 // Convert denorm fp16 into normalized fp32 693 float o = Float.intBitsToFloat(FP32_DENORMAL_MAGIC + m); 694 o -= FP32_DENORMAL_FLOAT; 695 return s == 0 ? o : -o; 696 } 697 } else { 698 outM = m << 13; 699 if (e == 0x1f) { // Infinite or NaN 700 outE = 0xff; 701 if (outM != 0) { // SNaNs are quieted 702 outM |= FP32_QNAN_MASK; 703 } 704 } else { 705 outE = e - EXPONENT_BIAS + FP32_EXPONENT_BIAS; 706 } 707 } 708 709 int out = (s << 16) | (outE << FP32_EXPONENT_SHIFT) | outM; 710 return Float.intBitsToFloat(out); 711 } 712 713 /** 714 * <p>Converts the specified single-precision float value into a 715 * half-precision float value. The following special cases are handled:</p> 716 * <ul> 717 * <li>If the input is NaN (see {@link Float#isNaN(float)}), the returned 718 * value is {@link #NaN}</li> 719 * <li>If the input is {@link Float#POSITIVE_INFINITY} or 720 * {@link Float#NEGATIVE_INFINITY}, the returned value is respectively 721 * {@link #POSITIVE_INFINITY} or {@link #NEGATIVE_INFINITY}</li> 722 * <li>If the input is 0 (positive or negative), the returned value is 723 * {@link #POSITIVE_ZERO} or {@link #NEGATIVE_ZERO}</li> 724 * <li>If the input is a less than {@link #MIN_VALUE}, the returned value 725 * is flushed to {@link #POSITIVE_ZERO} or {@link #NEGATIVE_ZERO}</li> 726 * <li>If the input is a less than {@link #MIN_NORMAL}, the returned value 727 * is a denorm half-precision float</li> 728 * <li>Otherwise, the returned value is rounded to the nearest 729 * representable half-precision float value</li> 730 * </ul> 731 * 732 * @param f The single-precision float value to convert to half-precision 733 * @return A half-precision float value 734 * 735 * @hide 736 */ 737 @SystemApi(client = MODULE_LIBRARIES) toHalf(float f)738 public static short toHalf(float f) { 739 int bits = Float.floatToRawIntBits(f); 740 int s = (bits >>> FP32_SIGN_SHIFT ); 741 int e = (bits >>> FP32_EXPONENT_SHIFT) & FP32_SHIFTED_EXPONENT_MASK; 742 int m = (bits ) & FP32_SIGNIFICAND_MASK; 743 744 int outE = 0; 745 int outM = 0; 746 747 if (e == 0xff) { // Infinite or NaN 748 outE = 0x1f; 749 outM = m != 0 ? 0x200 : 0; 750 } else { 751 e = e - FP32_EXPONENT_BIAS + EXPONENT_BIAS; 752 if (e >= 0x1f) { // Overflow 753 outE = 0x1f; 754 } else if (e <= 0) { // Underflow 755 if (e < -10) { 756 // The absolute fp32 value is less than MIN_VALUE, flush to +/-0 757 } else { 758 // The fp32 value is a normalized float less than MIN_NORMAL, 759 // we convert to a denorm fp16 760 m = m | 0x800000; 761 int shift = 14 - e; 762 outM = m >> shift; 763 764 int lowm = m & ((1 << shift) - 1); 765 int hway = 1 << (shift - 1); 766 // if above halfway or exactly halfway and outM is odd 767 if (lowm + (outM & 1) > hway){ 768 // Round to nearest even 769 // Can overflow into exponent bit, which surprisingly is OK. 770 // This increment relies on the +outM in the return statement below 771 outM++; 772 } 773 } 774 } else { 775 outE = e; 776 outM = m >> 13; 777 // if above halfway or exactly halfway and outM is odd 778 if ((m & 0x1fff) + (outM & 0x1) > 0x1000) { 779 // Round to nearest even 780 // Can overflow into exponent bit, which surprisingly is OK. 781 // This increment relies on the +outM in the return statement below 782 outM++; 783 } 784 } 785 } 786 // The outM is added here as the +1 increments for outM above can 787 // cause an overflow in the exponent bit which is OK. 788 return (short) ((s << SIGN_SHIFT) | (outE << EXPONENT_SHIFT) + outM); 789 } 790 791 /** 792 * <p>Returns a hexadecimal string representation of the specified half-precision 793 * float value. If the value is a NaN, the result is <code>"NaN"</code>, 794 * otherwise the result follows this format:</p> 795 * <ul> 796 * <li>If the sign is positive, no sign character appears in the result</li> 797 * <li>If the sign is negative, the first character is <code>'-'</code></li> 798 * <li>If the value is inifinity, the string is <code>"Infinity"</code></li> 799 * <li>If the value is 0, the string is <code>"0x0.0p0"</code></li> 800 * <li>If the value has a normalized representation, the exponent and 801 * significand are represented in the string in two fields. The significand 802 * starts with <code>"0x1."</code> followed by its lowercase hexadecimal 803 * representation. Trailing zeroes are removed unless all digits are 0, then 804 * a single zero is used. The significand representation is followed by the 805 * exponent, represented by <code>"p"</code>, itself followed by a decimal 806 * string of the unbiased exponent</li> 807 * <li>If the value has a subnormal representation, the significand starts 808 * with <code>"0x0."</code> followed by its lowercase hexadecimal 809 * representation. Trailing zeroes are removed unless all digits are 0, then 810 * a single zero is used. The significand representation is followed by the 811 * exponent, represented by <code>"p-14"</code></li> 812 * </ul> 813 * 814 * @param h A half-precision float value 815 * @return A hexadecimal string representation of the specified value 816 * 817 * @hide 818 */ 819 @SystemApi(client = MODULE_LIBRARIES) toHexString(short h)820 public static String toHexString(short h) { 821 StringBuilder o = new StringBuilder(); 822 823 int bits = h & 0xffff; 824 int s = (bits >>> SIGN_SHIFT ); 825 int e = (bits >>> EXPONENT_SHIFT) & SHIFTED_EXPONENT_MASK; 826 int m = (bits ) & SIGNIFICAND_MASK; 827 828 if (e == 0x1f) { // Infinite or NaN 829 if (m == 0) { 830 if (s != 0) o.append('-'); 831 o.append("Infinity"); 832 } else { 833 o.append("NaN"); 834 } 835 } else { 836 if (s == 1) o.append('-'); 837 if (e == 0) { 838 if (m == 0) { 839 o.append("0x0.0p0"); 840 } else { 841 o.append("0x0."); 842 String significand = Integer.toHexString(m); 843 o.append(significand.replaceFirst("0{2,}$", "")); 844 o.append("p-14"); 845 } 846 } else { 847 o.append("0x1."); 848 String significand = Integer.toHexString(m); 849 o.append(significand.replaceFirst("0{2,}$", "")); 850 o.append('p'); 851 o.append(Integer.toString(e - EXPONENT_BIAS)); 852 } 853 } 854 855 return o.toString(); 856 } 857 } 858