1 // Copyright 2016 The SwiftShader Authors. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //    http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #ifndef sw_Half_hpp
16 #define sw_Half_hpp
17 
18 #include <algorithm>
19 #include <cmath>
20 
21 namespace sw
22 {
23 	class half
24 	{
25 	public:
26 		half() = default;
27 		explicit half(float f);
28 
29 		operator float() const;
30 
31 		half &operator=(half h);
32 		half &operator=(float f);
33 
34 	private:
35 		unsigned short fp16i;
36 	};
37 
shortAsHalf(short s)38 	inline half shortAsHalf(short s)
39 	{
40 		union
41 		{
42 			half h;
43 			short s;
44 		} hs;
45 
46 		hs.s = s;
47 
48 		return hs.h;
49 	}
50 
51 	class RGB9E5
52 	{
53 		unsigned int R : 9;
54 		unsigned int G : 9;
55 		unsigned int B : 9;
56 		unsigned int E : 5;
57 
58 	public:
RGB9E5(float rgb[3])59 		RGB9E5(float rgb[3])
60 		{
61 			// B is the exponent bias (15)
62 			constexpr int g_sharedexp_bias = 15;
63 
64 			// N is the number of mantissa bits per component (9)
65 			constexpr int g_sharedexp_mantissabits = 9;
66 
67 			// Emax is the maximum allowed biased exponent value (31)
68 			constexpr int g_sharedexp_maxexponent = 31;
69 
70 			constexpr float g_sharedexp_max =
71 				((static_cast<float>(1 << g_sharedexp_mantissabits) - 1) /
72 					static_cast<float>(1 << g_sharedexp_mantissabits)) *
73 				static_cast<float>(1 << (g_sharedexp_maxexponent - g_sharedexp_bias));
74 
75 			const float red_c = std::max<float>(0, std::min(g_sharedexp_max, rgb[0]));
76 			const float green_c = std::max<float>(0, std::min(g_sharedexp_max, rgb[1]));
77 			const float blue_c = std::max<float>(0, std::min(g_sharedexp_max, rgb[2]));
78 
79 			const float max_c = std::max<float>(std::max<float>(red_c, green_c), blue_c);
80 			const float exp_p =
81 				std::max<float>(-g_sharedexp_bias - 1, floor(log(max_c))) + 1 + g_sharedexp_bias;
82 			const int max_s = static_cast<int>(
83 				floor((max_c / (pow(2.0f, exp_p - g_sharedexp_bias - g_sharedexp_mantissabits))) + 0.5f));
84 			const int exp_s =
85 				static_cast<int>((max_s < pow(2.0f, g_sharedexp_mantissabits)) ? exp_p : exp_p + 1);
86 
87 			R = static_cast<unsigned int>(
88 				floor((red_c / (pow(2.0f, exp_s - g_sharedexp_bias - g_sharedexp_mantissabits))) + 0.5f));
89 			G = static_cast<unsigned int>(
90 				floor((green_c / (pow(2.0f, exp_s - g_sharedexp_bias - g_sharedexp_mantissabits))) + 0.5f));
91 			B = static_cast<unsigned int>(
92 				floor((blue_c / (pow(2.0f, exp_s - g_sharedexp_bias - g_sharedexp_mantissabits))) + 0.5f));
93 			E = exp_s;
94 		}
95 
operator unsigned int() const96 		operator unsigned int() const
97 		{
98 			return *reinterpret_cast<const unsigned int*>(this);
99 		}
100 
toRGB16F(half rgb[3]) const101 		void toRGB16F(half rgb[3]) const
102 		{
103 			constexpr int offset = 24;   // Exponent bias (15) + number of mantissa bits per component (9) = 24
104 
105 			const float factor = (1u << E) * (1.0f / (1 << offset));
106 			rgb[0] = half(R * factor);
107 			rgb[1] = half(G * factor);
108 			rgb[2] = half(B * factor);
109 		}
110 	};
111 
112 	class R11G11B10F
113 	{
114 		unsigned int R : 11;
115 		unsigned int G : 11;
116 		unsigned int B : 10;
117 
float11ToFloat16(unsigned short fp11)118 		static inline half float11ToFloat16(unsigned short fp11)
119 		{
120 			return shortAsHalf(fp11 << 4);   // Sign bit 0
121 		}
122 
float10ToFloat16(unsigned short fp10)123 		static inline half float10ToFloat16(unsigned short fp10)
124 		{
125 			return shortAsHalf(fp10 << 5);   // Sign bit 0
126 		}
127 
float32ToFloat11(float fp32)128 		inline unsigned short float32ToFloat11(float fp32)
129 		{
130 			const unsigned int float32MantissaMask = 0x7FFFFF;
131 			const unsigned int float32ExponentMask = 0x7F800000;
132 			const unsigned int float32SignMask = 0x80000000;
133 			const unsigned int float32ValueMask = ~float32SignMask;
134 			const unsigned int float32ExponentFirstBit = 23;
135 			const unsigned int float32ExponentBias = 127;
136 
137 			const unsigned short float11Max = 0x7BF;
138 			const unsigned short float11MantissaMask = 0x3F;
139 			const unsigned short float11ExponentMask = 0x7C0;
140 			const unsigned short float11BitMask = 0x7FF;
141 			const unsigned int float11ExponentBias = 14;
142 
143 			const unsigned int float32Maxfloat11 = 0x477E0000;
144 			const unsigned int float32Minfloat11 = 0x38800000;
145 
146 			const unsigned int float32Bits = *reinterpret_cast<unsigned int*>(&fp32);
147 			const bool float32Sign = (float32Bits & float32SignMask) == float32SignMask;
148 
149 			unsigned int float32Val = float32Bits & float32ValueMask;
150 
151 			if((float32Val & float32ExponentMask) == float32ExponentMask)
152 			{
153 				// INF or NAN
154 				if((float32Val & float32MantissaMask) != 0)
155 				{
156 					return float11ExponentMask |
157 						(((float32Val >> 17) | (float32Val >> 11) | (float32Val >> 6) | (float32Val)) &
158 							float11MantissaMask);
159 				}
160 				else if(float32Sign)
161 				{
162 					// -INF is clamped to 0 since float11 is positive only
163 					return 0;
164 				}
165 				else
166 				{
167 					return float11ExponentMask;
168 				}
169 			}
170 			else if(float32Sign)
171 			{
172 				// float11 is positive only, so clamp to zero
173 				return 0;
174 			}
175 			else if(float32Val > float32Maxfloat11)
176 			{
177 				// The number is too large to be represented as a float11, set to max
178 				return float11Max;
179 			}
180 			else
181 			{
182 				if(float32Val < float32Minfloat11)
183 				{
184 					// The number is too small to be represented as a normalized float11
185 					// Convert it to a denormalized value.
186 					const unsigned int shift = (float32ExponentBias - float11ExponentBias) -
187 						(float32Val >> float32ExponentFirstBit);
188 					float32Val =
189 						((1 << float32ExponentFirstBit) | (float32Val & float32MantissaMask)) >> shift;
190 				}
191 				else
192 				{
193 					// Rebias the exponent to represent the value as a normalized float11
194 					float32Val += 0xC8000000;
195 				}
196 
197 				return ((float32Val + 0xFFFF + ((float32Val >> 17) & 1)) >> 17) & float11BitMask;
198 			}
199 		}
200 
float32ToFloat10(float fp32)201 		inline unsigned short float32ToFloat10(float fp32)
202 		{
203 			const unsigned int float32MantissaMask = 0x7FFFFF;
204 			const unsigned int float32ExponentMask = 0x7F800000;
205 			const unsigned int float32SignMask = 0x80000000;
206 			const unsigned int float32ValueMask = ~float32SignMask;
207 			const unsigned int float32ExponentFirstBit = 23;
208 			const unsigned int float32ExponentBias = 127;
209 
210 			const unsigned short float10Max = 0x3DF;
211 			const unsigned short float10MantissaMask = 0x1F;
212 			const unsigned short float10ExponentMask = 0x3E0;
213 			const unsigned short float10BitMask = 0x3FF;
214 			const unsigned int float10ExponentBias = 14;
215 
216 			const unsigned int float32Maxfloat10 = 0x477C0000;
217 			const unsigned int float32Minfloat10 = 0x38800000;
218 
219 			const unsigned int float32Bits = *reinterpret_cast<unsigned int*>(&fp32);
220 			const bool float32Sign = (float32Bits & float32SignMask) == float32SignMask;
221 
222 			unsigned int float32Val = float32Bits & float32ValueMask;
223 
224 			if((float32Val & float32ExponentMask) == float32ExponentMask)
225 			{
226 				// INF or NAN
227 				if((float32Val & float32MantissaMask) != 0)
228 				{
229 					return float10ExponentMask |
230 						(((float32Val >> 18) | (float32Val >> 13) | (float32Val >> 3) | (float32Val)) &
231 							float10MantissaMask);
232 				}
233 				else if(float32Sign)
234 				{
235 					// -INF is clamped to 0 since float11 is positive only
236 					return 0;
237 				}
238 				else
239 				{
240 					return float10ExponentMask;
241 				}
242 			}
243 			else if(float32Sign)
244 			{
245 				// float10 is positive only, so clamp to zero
246 				return 0;
247 			}
248 			else if(float32Val > float32Maxfloat10)
249 			{
250 				// The number is too large to be represented as a float11, set to max
251 				return float10Max;
252 			}
253 			else
254 			{
255 				if(float32Val < float32Minfloat10)
256 				{
257 					// The number is too small to be represented as a normalized float11
258 					// Convert it to a denormalized value.
259 					const unsigned int shift = (float32ExponentBias - float10ExponentBias) -
260 						(float32Val >> float32ExponentFirstBit);
261 					float32Val =
262 						((1 << float32ExponentFirstBit) | (float32Val & float32MantissaMask)) >> shift;
263 				}
264 				else
265 				{
266 					// Rebias the exponent to represent the value as a normalized float11
267 					float32Val += 0xC8000000;
268 				}
269 
270 				return ((float32Val + 0x1FFFF + ((float32Val >> 18) & 1)) >> 18) & float10BitMask;
271 			}
272 		}
273 
274 	public:
R11G11B10F(float rgb[3])275 		R11G11B10F(float rgb[3])
276 		{
277 			R = float32ToFloat11(rgb[0]);
278 			G = float32ToFloat11(rgb[1]);
279 			B = float32ToFloat10(rgb[2]);
280 		}
281 
operator unsigned int() const282 		operator unsigned int() const
283 		{
284 			return *reinterpret_cast<const unsigned int*>(this);
285 		}
286 
toRGB16F(half rgb[3]) const287 		void toRGB16F(half rgb[3]) const
288 		{
289 			rgb[0] = float11ToFloat16(R);
290 			rgb[1] = float11ToFloat16(G);
291 			rgb[2] = float10ToFloat16(B);
292 		}
293 	};
294 }
295 
296 #endif   // sw_Half_hpp
297