1 /* 2 * Copyright (C) 2016 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package android.renderscript.cts; 18 19 import android.renderscript.RSRuntimeException; 20 import android.util.Log; 21 22 import junit.framework.Assert; 23 24 /** This class contains utility functions needed by RenderScript CTS tests to handle Float16 25 * operations. 26 */ 27 class Float16Utils { 28 // 16-bit masks for extracting sign, exponent and mantissa bits 29 private static short SIGN_MASK = (short) 0x8000; 30 private static short EXPONENT_MASK = (short) 0x7C00; 31 private static short MANTISSA_MASK = (short) 0x03FF; 32 33 private static long DOUBLE_SIGN_MASK = 0x8000000000000000L; 34 private static long DOUBLE_EXPONENT_MASK = 0x7ff0000000000000L; 35 private static long DOUBLE_MANTISSA_MASK = 0x000fffffffffffffL; 36 37 static double MIN_NORMAL = Math.scalb(1.0, -14); // smallest Float16 normal is 2 ^ -14 38 static double MIN_VALUE = Math.scalb(1.0, -24); // smallest Float16 value is 2 ^ -24 39 static double MAX_VALUE = 65504; // largest Float16 value is 2^16 - 32 40 41 // NaN has all exponent bits set to 1 and a non-zero mantissa isFloat16NaN(short val)42 static boolean isFloat16NaN(short val) { 43 return (val & EXPONENT_MASK) == EXPONENT_MASK && 44 (val & MANTISSA_MASK) != 0; 45 } 46 47 // Infinity has all exponent bits set to 1 and zeroes in mantissa isFloat16Infinite(short val)48 static boolean isFloat16Infinite(short val) { 49 return (val & EXPONENT_MASK) == EXPONENT_MASK && 50 (val & MANTISSA_MASK) == 0; 51 } 52 53 // Subnormal numbers have exponent bits set to 0 and a non-zero mantissa isFloat16SubNormal(short val)54 static boolean isFloat16SubNormal(short val) { 55 return (val & EXPONENT_MASK) == 0 && (val & MANTISSA_MASK) != 0; 56 } 57 58 // Zero has all but the sign bit set to zero isFloat16Zero(short val)59 static boolean isFloat16Zero(short val) { 60 return (val & ~SIGN_MASK) == 0; 61 } 62 63 // Negativity test checks the sign bit isFloat16Negative(short val)64 static boolean isFloat16Negative(short val) { 65 return (val & SIGN_MASK) != 0; 66 } 67 68 // Check if this is a finite, non-zero FP16 value isFloat16FiniteNonZero(short val)69 static boolean isFloat16FiniteNonZero(short val) { 70 return !isFloat16NaN(val) && !isFloat16Infinite(val) && !isFloat16Zero(val); 71 } 72 convertFloat16ToFloat(short val)73 static float convertFloat16ToFloat(short val) { 74 // Extract sign, exponent and mantissa 75 int sign = val & SIGN_MASK; 76 int exponent = (val & EXPONENT_MASK) >> 10; 77 int mantissa = val & MANTISSA_MASK; 78 79 // 0.<mantissa> = <mantissa> * 2^-10 80 float mantissaAsFloat = Math.scalb(mantissa, -10); 81 82 float result; 83 if (isFloat16Zero(val)) 84 result = 0.0f; 85 else if (isFloat16Infinite(val)) 86 result = java.lang.Float.POSITIVE_INFINITY; 87 else if (isFloat16NaN(val)) 88 result = java.lang.Float.NaN; 89 else if (isFloat16SubNormal(val)) { 90 // value is 2^-14 * mantissaAsFloat 91 result = Math.scalb(1, -14) * mantissaAsFloat; 92 } 93 else { 94 // value is 2^(exponent - 15) * 1.<mantissa> 95 result = Math.scalb(1, exponent - 15) * (1 + mantissaAsFloat); 96 } 97 98 if (sign != 0) 99 result = -result; 100 return result; 101 } 102 convertFloat16ToDouble(short val)103 static double convertFloat16ToDouble(short val) { 104 return (double) convertFloat16ToFloat(val); 105 } 106 107 /* This utility function accepts the mantissa, exponent and an isNegative flag and constructs a 108 * double value. The exponent should be biased, but not shifted left by 52-bits. 109 */ constructDouble(long mantissa, long exponent, boolean isNegative)110 private static double constructDouble(long mantissa, long exponent, boolean isNegative) { 111 exponent = exponent << 52; 112 long bits = (exponent & DOUBLE_EXPONENT_MASK) | (mantissa & DOUBLE_MANTISSA_MASK); 113 if (isNegative) bits |= DOUBLE_SIGN_MASK; 114 return Double.longBitsToDouble(bits); 115 } 116 117 /* This function takes a double value and returns an array with the double representations of 118 * the Float16 values immediately smaller and larger than the input. If the input value is 119 * precisely representable in Float16, it is copied into both the entries of the array. 120 * 121 * The returned values can be subnormal Float16 numbers. Handling subnormals is delegated to 122 * the caller. 123 * 124 * TODO Extend this function to handle rounding for both float16 and float32. 125 */ roundToFloat16(double value)126 static double[] roundToFloat16(double value) { 127 long valueBits = Double.doubleToLongBits(value); 128 long mantissa = valueBits & DOUBLE_MANTISSA_MASK; // 52-bit mantissa 129 long exponent = valueBits & DOUBLE_EXPONENT_MASK; // 11-bit exponent 130 long unbiasedExponent = (exponent >> 52) - 1023; 131 boolean isNegative = (valueBits & DOUBLE_SIGN_MASK) != 0; 132 133 double[] result = new double[2]; 134 if (Double.isNaN(value) || Double.isInfinite(value)) { 135 // Input is NaN or Infinity. Return unchanged. 136 result[0] = value; 137 result[1] = value; 138 return result; // Note that we skip the negation at the end of this function 139 } 140 141 if (unbiasedExponent == -1023 && mantissa == 0) { 142 // Zero. Assign 0 and adjust sign at the end of this function 143 result[0] = 0.; 144 result[1] = 0.; 145 } 146 else if (unbiasedExponent < -24) { 147 // Absolute value is between 0 and MIN_VALUE. Return 0 and MIN_VALUE 148 result[0] = 0.; 149 result[1] = MIN_VALUE; 150 } 151 else if (unbiasedExponent <= 15) { 152 /* 153 * Either subnormal or normal. We compute a mask for the excess precision bits in the 154 * mantissa. 155 * 156 * (a) If none of these bits are set, the current value's mantissa and exponent are used 157 * for both the low and high values. 158 * (b) If some of these bits are set, we zero-out the extra bits to get the mantissa and 159 * exponent of the lower value. For the higher value, we increment the masked mantissa 160 * at the least-significant bit within the range of this Float16 value. To handle 161 * overflows during the the increment, we need to increment the exponent and round up to 162 * infinity if needed. 163 */ 164 165 // 'mask' is used to detect and zero-out excess bits set. 'mask + 1' is the value 166 // added to zero-ed out mantissa to get the next higher Float16 value. 167 long mask; 168 long maxSigMantissaBits; 169 170 if (unbiasedExponent < -14) { 171 // Subnormal Float16. For Float16's MIN_VALUE, mantissa can have no bits set (after 172 // adjusting for the implied one bit. For each higher exponent, an extra bit of 173 // precision is allowed in the mantissa. This computes to "24 + unbiasedExponent". 174 maxSigMantissaBits = 24 + unbiasedExponent; 175 } else { 176 // For normal Float16 values have 10 bits of precision in the mantissa. 177 maxSigMantissaBits = 10; 178 } 179 mask = DOUBLE_MANTISSA_MASK >> maxSigMantissaBits; 180 181 // zero-out the excess precision bits for the mantissa for both low and high values. 182 long lowFloat16Mantissa = mantissa & ~mask; 183 long highFloat16Mantissa = mantissa & ~mask; 184 185 long lowFloat16Exponent = unbiasedExponent; 186 long highFloat16Exponent = unbiasedExponent; 187 188 if ((mantissa & mask) != 0) { 189 // If mantissa has extra bits set, increment the mantissa at the LSB (for this 190 // Float16 value) 191 highFloat16Mantissa += mask + 1; 192 193 // If this overflows the mantissa into the exponent, set mantissa to zero and 194 // increment the exponent. 195 if ((highFloat16Mantissa & DOUBLE_EXPONENT_MASK) != 0) { 196 highFloat16Mantissa = 0; 197 highFloat16Exponent += 1; 198 } 199 200 // If the exponent exceeds the range of Float16 exponents, set it to 1024, so the 201 // value gets rounded up to Double.POSITIVE_INFINITY. 202 if (highFloat16Exponent == 16) { 203 highFloat16Exponent = 1024; 204 } 205 } 206 207 result[0] = constructDouble(lowFloat16Mantissa, lowFloat16Exponent + 1023, false); 208 result[1] = constructDouble(highFloat16Mantissa, highFloat16Exponent + 1023, false); 209 } else { 210 // Exponent is outside Float16's range. Use POSITIVE_INFINITY for both bounds. 211 result[0] = Double.POSITIVE_INFINITY; 212 result[1] = Double.POSITIVE_INFINITY; 213 } 214 215 // Swap values in result and negate them if the input value is negative. 216 if (isNegative) { 217 double tmp = result[0]; 218 result[0] = -result[1]; 219 result[1] = -tmp; 220 } 221 222 return result; 223 } 224 225 // This function takes a double value and returns 1 ulp, in Float16 precision, of that value. 226 // Both the parameter and return value have 'double' type but they should be exactly 227 // representable in Float16. If the parameter exceeds the precision of Float16, an exception is 228 // thrown. float16Ulp(double value)229 static double float16Ulp(double value) { 230 long valueBits = Double.doubleToLongBits(value); 231 long mantissa = valueBits & DOUBLE_MANTISSA_MASK; // 52-bit mantissa 232 long exponent = valueBits & DOUBLE_EXPONENT_MASK; // 11-bit exponent 233 long unbiasedExponent = (exponent >> 52) - 1023; 234 235 if (unbiasedExponent == 1024) { // i.e. NaN or infinity 236 if (mantissa == 0) { 237 return Double.POSITIVE_INFINITY; // ulp of +/- infinity is +infinity 238 } else { 239 return Double.NaN; // ulp for NaN is NaN 240 } 241 } 242 243 if (unbiasedExponent == -1023) { 244 // assert that mantissa is zero, i.e. value is zero and not a subnormal value. 245 if (mantissa != 0) { 246 throw new RSRuntimeException("float16ulp: Double parameter is subnormal"); 247 } 248 return MIN_VALUE; 249 } 250 251 if (unbiasedExponent < -24 || unbiasedExponent > 15) { 252 throw new RSRuntimeException("float16Ulp: Double parameter's exponent out of range"); 253 } 254 255 if (unbiasedExponent >= -24 && unbiasedExponent < -14) { 256 // Exponent within the range of Float16 subnormals. 257 258 // Ensure that mantissa doesn't have too much precision. For example, the smallest 259 // normal number has an unbiased exponent of -24 and has one bit in mantissa. Each 260 // higher exponent allows one extra bit of precision in the mantissa. Combined with the 261 // implied one bit, the mantissa can have "24 + unbiasedExponent" significant bits. The 262 // rest of the 52 bits in mantissa must be zero. 263 264 long maxSigMantissaBits = 24 + unbiasedExponent; 265 long mask = DOUBLE_MANTISSA_MASK >> maxSigMantissaBits; 266 267 if((mask & mantissa) != 0) { 268 throw new RSRuntimeException("float16ulp: Double parameter is too precise for subnormal Float16 values."); 269 } 270 return MIN_VALUE; 271 } 272 if (unbiasedExponent >= -14) { 273 // Exponent within the range of Float16 normals. Ensure that the mantissa has at most 274 // 10 significant bits. 275 long mask = DOUBLE_MANTISSA_MASK >> 10; 276 if ((mantissa & mask) != 0) { 277 throw new RSRuntimeException("float16ulp: Double parameter is too precise for normal Float16 values."); 278 } 279 return Math.scalb(1.0, (int) (unbiasedExponent - 10)); 280 } 281 throw new RSRuntimeException("float16Ulp: unreachable line executed"); 282 } 283 284 // This function converts its double input value to its Float16 representation (represented as a 285 // short). It assumes, but does not check, that the input is precisely representable in Float16 286 // precision. No rounding is performed either. convertDoubleToFloat16(double value)287 static short convertDoubleToFloat16(double value) { 288 if (value == 0.) { 289 if (Double.doubleToLongBits(value) == 0) 290 return (short) 0x0; 291 else 292 return (short) 0x8000; 293 } else if (Double.isNaN(value)) { 294 // return Quiet NaN irrespective of what kind of NaN 'value' is. 295 return (short) 0x7e00; 296 } else if (value == Double.POSITIVE_INFINITY) { 297 return (short) 0x7c00; 298 } else if (value == Double.NEGATIVE_INFINITY) { 299 return (short) 0xfc00; 300 } 301 302 double positiveValue = Math.abs(value); 303 boolean isNegative = (value < 0.); 304 if (positiveValue < MIN_NORMAL) { 305 short quotient = (short) (positiveValue / MIN_VALUE); 306 return (isNegative) ? (short) (0x8000 | quotient) : quotient; 307 } else { 308 long valueBits = Double.doubleToLongBits(value); 309 long mantissa = valueBits & DOUBLE_MANTISSA_MASK; // 52-bit mantissa 310 long exponent = valueBits & DOUBLE_EXPONENT_MASK; // 11-bit exponent 311 long unbiasedExponent = (exponent >> 52) - 1023; 312 313 short halfExponent = (short) ((unbiasedExponent + 15) << 10); 314 short halfMantissa = (short) (mantissa >> 42); 315 short halfValue = (short) (halfExponent | halfMantissa); 316 return (isNegative) ? (short) (0x8000 | halfValue) : halfValue; 317 } 318 } 319 320 } 321