1 /*
2  * Copyright (C) 2016 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "floatRt.h"
18 #include <stdbool.h>
19 
20 
21 #include <stdio.h>
22 
23 /*
24  * FLOAT:
25  *  seeeeeee emmmmmmm mmmmmmmm mmmmmmmm
26  *
27  *  s = negative
28  *  e = exponent
29  *  m = mantissa (with one bit removed)
30  *
31  *   if (e == 0xFF)
32  *      if (f)  val = inf
33  *      else    val = nan
34  *      goto valDone
35  *   else if (e == 0x00)
36  *      useLeadingOne = 0
37  *      e = -126
38  *   else
39  *      e = e - 127
40  *      useLeadingOne = 1
41  *
42  *   val = ((useLeadingOne << 24) + m) / (2 ^ 23)
43  *   val *= 2 ^ e
44  *
45  * valDone:
46  *
47  *   if (s)
48  *      val = -val;
49  */
50 
51 #define BIT_SIGN        0x80000000UL
52 #define MANTISSA_BITS   23
53 #define EXP_SHIFT       MANTISSA_BITS
54 #define EXP_ADJUST      127
55 
56 
57 #ifdef USE_NANOHUB_FLOAT_RUNTIME
58 
floatToUint64(float f)59 uint64_t floatToUint64(float f)
60 {
61     uint32_t e, word = *(const uint32_t*)&f;
62     uint64_t ret;
63 
64 
65     //all negatives become zero
66     if (word & BIT_SIGN)
67         return 0;
68 
69     //all values with exponent < 0 are less than one and thus become zero
70     if (word < (EXP_ADJUST << EXP_SHIFT))
71         return 0;
72 
73     //standard does not say what happens to NaNs, infs & other too-large values, we return a large value as an approximation (though a zero would be equally valid)
74     if (word >= (EXP_ADJUST + 64) << EXP_SHIFT)
75         return 0xFFFFFFFFFFFFFFFFULL;
76 
77     //get mantissa and the implied leading one
78     ret = (word & ((1 << MANTISSA_BITS) - 1)) | (1 << MANTISSA_BITS);
79     e = ((word >> EXP_SHIFT) - EXP_ADJUST);
80 
81     //shift it by the exp
82     if (e < MANTISSA_BITS)
83         ret >>= MANTISSA_BITS - e;
84     else
85         ret <<= e - MANTISSA_BITS;
86 
87     return ret;
88 }
89 
floatToInt64(float f)90 int64_t floatToInt64(float f)
91 {
92     uint32_t e, word = *(const uint32_t*)&f;
93     bool neg = (word & BIT_SIGN);
94     uint64_t ret;
95 
96 
97     //all negatives become positive for now
98     word &=~ BIT_SIGN;
99 
100     //all values with exponent < 0 are less than one and thus become zero
101     if (word < (EXP_ADJUST << EXP_SHIFT))
102         return 0;
103 
104     //standard does not say what happens to NaNs, infs & other too-large values, we return a large value as an approximation (though a zero would be equally valid)
105     if (word >= (EXP_ADJUST + 63) << EXP_SHIFT)
106         ret = 0x7FFFFFFFFFFFFFFFULL;
107 
108     else {
109         //get mantissa and the implied leading one
110         ret = (word & ((1 << MANTISSA_BITS) - 1)) | (1 << MANTISSA_BITS);
111         e = ((word >> EXP_SHIFT) - EXP_ADJUST);
112 
113         //shift it by the exp
114         if (e < MANTISSA_BITS)
115             ret >>= MANTISSA_BITS - e;
116         else
117             ret <<= e - MANTISSA_BITS;
118     }
119 
120     if (neg)
121         ret = -ret;
122 
123     return ret;
124 }
125 
floatFromUint64(uint64_t v)126 float floatFromUint64(uint64_t v)
127 {
128     uint32_t hi = v >> 32, lo = v;
129 
130     if (!hi) //this is very fast for cases where we fit into a uint32_t
131         return(float)lo;
132     else {
133         return ((float)hi) * 4294967296.0f + (float)lo;
134     }
135 }
136 
floatFromInt64(int64_t v)137 float floatFromInt64(int64_t v)
138 {
139     uint32_t hi = ((uint64_t)v) >> 32, lo = v;
140 
141     if ((hi == 0x00000000 && !(lo >> 31)) || (hi == 0xffffffff && (lo >> 31))) //this complex test is a lot faster then the simpler ((v >> 33) == -1 || (v >> 33) == 0)
142         return (float)(int32_t)lo;
143     else if (hi >> 31)  //the case of 0x8000000000000000 is handled here, as negated it remains the same
144         return -floatFromUint64(-v);
145     else
146         return floatFromUint64(v);
147 }
148 
149 
150 
151 
152 
153 #endif // USE_NANOHUB_FLOAT_RUNTIME
154