1 // Copyright 2016 The SwiftShader Authors. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //    http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "Constants.hpp"
16 
17 #include "System/Math.hpp"
18 #include "System/Half.hpp"
19 
20 #include <cstring>
21 
22 namespace sw
23 {
24 	Constants constants;
25 
Constants()26 	Constants::Constants()
27 	{
28 		static const unsigned int transposeBit0[16] =
29 		{
30 			0x00000000,
31 			0x00000001,
32 			0x00000010,
33 			0x00000011,
34 			0x00000100,
35 			0x00000101,
36 			0x00000110,
37 			0x00000111,
38 			0x00001000,
39 			0x00001001,
40 			0x00001010,
41 			0x00001011,
42 			0x00001100,
43 			0x00001101,
44 			0x00001110,
45 			0x00001111
46 		};
47 
48 		static const unsigned int transposeBit1[16] =
49 		{
50 			0x00000000,
51 			0x00000002,
52 			0x00000020,
53 			0x00000022,
54 			0x00000200,
55 			0x00000202,
56 			0x00000220,
57 			0x00000222,
58 			0x00002000,
59 			0x00002002,
60 			0x00002020,
61 			0x00002022,
62 			0x00002200,
63 			0x00002202,
64 			0x00002220,
65 			0x00002222
66 		};
67 
68 		static const unsigned int transposeBit2[16] =
69 		{
70 			0x00000000,
71 			0x00000004,
72 			0x00000040,
73 			0x00000044,
74 			0x00000400,
75 			0x00000404,
76 			0x00000440,
77 			0x00000444,
78 			0x00004000,
79 			0x00004004,
80 			0x00004040,
81 			0x00004044,
82 			0x00004400,
83 			0x00004404,
84 			0x00004440,
85 			0x00004444
86 		};
87 
88 		memcpy(&this->transposeBit0, transposeBit0, sizeof(transposeBit0));
89 		memcpy(&this->transposeBit1, transposeBit1, sizeof(transposeBit1));
90 		memcpy(&this->transposeBit2, transposeBit2, sizeof(transposeBit2));
91 
92 		static const ushort4 cWeight[17] =
93 		{
94 			{0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF},   // 0xFFFF / 1  = 0xFFFF
95 			{0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF},   // 0xFFFF / 1  = 0xFFFF
96 			{0x8000, 0x8000, 0x8000, 0x8000},   // 0xFFFF / 2  = 0x8000
97 			{0x5555, 0x5555, 0x5555, 0x5555},   // 0xFFFF / 3  = 0x5555
98 			{0x4000, 0x4000, 0x4000, 0x4000},   // 0xFFFF / 4  = 0x4000
99 			{0x3333, 0x3333, 0x3333, 0x3333},   // 0xFFFF / 5  = 0x3333
100 			{0x2AAA, 0x2AAA, 0x2AAA, 0x2AAA},   // 0xFFFF / 6  = 0x2AAA
101 			{0x2492, 0x2492, 0x2492, 0x2492},   // 0xFFFF / 7  = 0x2492
102 			{0x2000, 0x2000, 0x2000, 0x2000},   // 0xFFFF / 8  = 0x2000
103 			{0x1C71, 0x1C71, 0x1C71, 0x1C71},   // 0xFFFF / 9  = 0x1C71
104 			{0x1999, 0x1999, 0x1999, 0x1999},   // 0xFFFF / 10 = 0x1999
105 			{0x1745, 0x1745, 0x1745, 0x1745},   // 0xFFFF / 11 = 0x1745
106 			{0x1555, 0x1555, 0x1555, 0x1555},   // 0xFFFF / 12 = 0x1555
107 			{0x13B1, 0x13B1, 0x13B1, 0x13B1},   // 0xFFFF / 13 = 0x13B1
108 			{0x1249, 0x1249, 0x1249, 0x1249},   // 0xFFFF / 14 = 0x1249
109 			{0x1111, 0x1111, 0x1111, 0x1111},   // 0xFFFF / 15 = 0x1111
110 			{0x1000, 0x1000, 0x1000, 0x1000},   // 0xFFFF / 16 = 0x1000
111 		};
112 
113 		static const float4 uvWeight[17] =
114 		{
115 			{1.0f / 1.0f,  1.0f / 1.0f,  1.0f / 1.0f,  1.0f / 1.0f},
116 			{1.0f / 1.0f,  1.0f / 1.0f,  1.0f / 1.0f,  1.0f / 1.0f},
117 			{1.0f / 2.0f,  1.0f / 2.0f,  1.0f / 2.0f,  1.0f / 2.0f},
118 			{1.0f / 3.0f,  1.0f / 3.0f,  1.0f / 3.0f,  1.0f / 3.0f},
119 			{1.0f / 4.0f,  1.0f / 4.0f,  1.0f / 4.0f,  1.0f / 4.0f},
120 			{1.0f / 5.0f,  1.0f / 5.0f,  1.0f / 5.0f,  1.0f / 5.0f},
121 			{1.0f / 6.0f,  1.0f / 6.0f,  1.0f / 6.0f,  1.0f / 6.0f},
122 			{1.0f / 7.0f,  1.0f / 7.0f,  1.0f / 7.0f,  1.0f / 7.0f},
123 			{1.0f / 8.0f,  1.0f / 8.0f,  1.0f / 8.0f,  1.0f / 8.0f},
124 			{1.0f / 9.0f,  1.0f / 9.0f,  1.0f / 9.0f,  1.0f / 9.0f},
125 			{1.0f / 10.0f, 1.0f / 10.0f, 1.0f / 10.0f, 1.0f / 10.0f},
126 			{1.0f / 11.0f, 1.0f / 11.0f, 1.0f / 11.0f, 1.0f / 11.0f},
127 			{1.0f / 12.0f, 1.0f / 12.0f, 1.0f / 12.0f, 1.0f / 12.0f},
128 			{1.0f / 13.0f, 1.0f / 13.0f, 1.0f / 13.0f, 1.0f / 13.0f},
129 			{1.0f / 14.0f, 1.0f / 14.0f, 1.0f / 14.0f, 1.0f / 14.0f},
130 			{1.0f / 15.0f, 1.0f / 15.0f, 1.0f / 15.0f, 1.0f / 15.0f},
131 			{1.0f / 16.0f, 1.0f / 16.0f, 1.0f / 16.0f, 1.0f / 16.0f},
132 		};
133 
134 		static const float4 uvStart[17] =
135 		{
136 			{-0.0f / 2.0f,   -0.0f / 2.0f,   -0.0f / 2.0f,   -0.0f / 2.0f},
137 			{-0.0f / 2.0f,   -0.0f / 2.0f,   -0.0f / 2.0f,   -0.0f / 2.0f},
138 			{-1.0f / 4.0f,   -1.0f / 4.0f,   -1.0f / 4.0f,   -1.0f / 4.0f},
139 			{-2.0f / 6.0f,   -2.0f / 6.0f,   -2.0f / 6.0f,   -2.0f / 6.0f},
140 			{-3.0f / 8.0f,   -3.0f / 8.0f,   -3.0f / 8.0f,   -3.0f / 8.0f},
141 			{-4.0f / 10.0f,  -4.0f / 10.0f,  -4.0f / 10.0f,  -4.0f / 10.0f},
142 			{-5.0f / 12.0f,  -5.0f / 12.0f,  -5.0f / 12.0f,  -5.0f / 12.0f},
143 			{-6.0f / 14.0f,  -6.0f / 14.0f,  -6.0f / 14.0f,  -6.0f / 14.0f},
144 			{-7.0f / 16.0f,  -7.0f / 16.0f,  -7.0f / 16.0f,  -7.0f / 16.0f},
145 			{-8.0f / 18.0f,  -8.0f / 18.0f,  -8.0f / 18.0f,  -8.0f / 18.0f},
146 			{-9.0f / 20.0f,  -9.0f / 20.0f,  -9.0f / 20.0f,  -9.0f / 20.0f},
147 			{-10.0f / 22.0f, -10.0f / 22.0f, -10.0f / 22.0f, -10.0f / 22.0f},
148 			{-11.0f / 24.0f, -11.0f / 24.0f, -11.0f / 24.0f, -11.0f / 24.0f},
149 			{-12.0f / 26.0f, -12.0f / 26.0f, -12.0f / 26.0f, -12.0f / 26.0f},
150 			{-13.0f / 28.0f, -13.0f / 28.0f, -13.0f / 28.0f, -13.0f / 28.0f},
151 			{-14.0f / 30.0f, -14.0f / 30.0f, -14.0f / 30.0f, -14.0f / 30.0f},
152 			{-15.0f / 32.0f, -15.0f / 32.0f, -15.0f / 32.0f, -15.0f / 32.0f},
153 		};
154 
155 		memcpy(&this->cWeight, cWeight, sizeof(cWeight));
156 		memcpy(&this->uvWeight, uvWeight, sizeof(uvWeight));
157 		memcpy(&this->uvStart, uvStart, sizeof(uvStart));
158 
159 		static const unsigned int occlusionCount[16] = {0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4};
160 
161 		memcpy(&this->occlusionCount, &occlusionCount, sizeof(occlusionCount));
162 
163 		for(int i = 0; i < 16; i++)
164 		{
165 			maskB4Q[i][0] = -(i >> 0 & 1);
166 			maskB4Q[i][1] = -(i >> 1 & 1);
167 			maskB4Q[i][2] = -(i >> 2 & 1);
168 			maskB4Q[i][3] = -(i >> 3 & 1);
169 			maskB4Q[i][4] = -(i >> 0 & 1);
170 			maskB4Q[i][5] = -(i >> 1 & 1);
171 			maskB4Q[i][6] = -(i >> 2 & 1);
172 			maskB4Q[i][7] = -(i >> 3 & 1);
173 
174 			invMaskB4Q[i][0] = ~maskB4Q[i][0];
175 			invMaskB4Q[i][1] = ~maskB4Q[i][1];
176 			invMaskB4Q[i][2] = ~maskB4Q[i][2];
177 			invMaskB4Q[i][3] = ~maskB4Q[i][3];
178 			invMaskB4Q[i][4] = ~maskB4Q[i][4];
179 			invMaskB4Q[i][5] = ~maskB4Q[i][5];
180 			invMaskB4Q[i][6] = ~maskB4Q[i][6];
181 			invMaskB4Q[i][7] = ~maskB4Q[i][7];
182 
183 			maskW4Q[i][0] = -(i >> 0 & 1);
184 			maskW4Q[i][1] = -(i >> 1 & 1);
185 			maskW4Q[i][2] = -(i >> 2 & 1);
186 			maskW4Q[i][3] = -(i >> 3 & 1);
187 
188 			invMaskW4Q[i][0] = ~maskW4Q[i][0];
189 			invMaskW4Q[i][1] = ~maskW4Q[i][1];
190 			invMaskW4Q[i][2] = ~maskW4Q[i][2];
191 			invMaskW4Q[i][3] = ~maskW4Q[i][3];
192 
193 			maskD4X[i][0] = -(i >> 0 & 1);
194 			maskD4X[i][1] = -(i >> 1 & 1);
195 			maskD4X[i][2] = -(i >> 2 & 1);
196 			maskD4X[i][3] = -(i >> 3 & 1);
197 
198 			invMaskD4X[i][0] = ~maskD4X[i][0];
199 			invMaskD4X[i][1] = ~maskD4X[i][1];
200 			invMaskD4X[i][2] = ~maskD4X[i][2];
201 			invMaskD4X[i][3] = ~maskD4X[i][3];
202 
203 			maskQ0Q[i] = -(i >> 0 & 1);
204 			maskQ1Q[i] = -(i >> 1 & 1);
205 			maskQ2Q[i] = -(i >> 2 & 1);
206 			maskQ3Q[i] = -(i >> 3 & 1);
207 
208 			invMaskQ0Q[i] = ~maskQ0Q[i];
209 			invMaskQ1Q[i] = ~maskQ1Q[i];
210 			invMaskQ2Q[i] = ~maskQ2Q[i];
211 			invMaskQ3Q[i] = ~maskQ3Q[i];
212 
213 			maskX0X[i][0] = maskX0X[i][1] = maskX0X[i][2] = maskX0X[i][3] = -(i >> 0 & 1);
214 			maskX1X[i][0] = maskX1X[i][1] = maskX1X[i][2] = maskX1X[i][3] = -(i >> 1 & 1);
215 			maskX2X[i][0] = maskX2X[i][1] = maskX2X[i][2] = maskX2X[i][3] = -(i >> 2 & 1);
216 			maskX3X[i][0] = maskX3X[i][1] = maskX3X[i][2] = maskX3X[i][3] = -(i >> 3 & 1);
217 
218 			invMaskX0X[i][0] = invMaskX0X[i][1] = invMaskX0X[i][2] = invMaskX0X[i][3] = ~maskX0X[i][0];
219 			invMaskX1X[i][0] = invMaskX1X[i][1] = invMaskX1X[i][2] = invMaskX1X[i][3] = ~maskX1X[i][0];
220 			invMaskX2X[i][0] = invMaskX2X[i][1] = invMaskX2X[i][2] = invMaskX2X[i][3] = ~maskX2X[i][0];
221 			invMaskX3X[i][0] = invMaskX3X[i][1] = invMaskX3X[i][2] = invMaskX3X[i][3] = ~maskX3X[i][0];
222 
223 			maskD01Q[i][0] = -(i >> 0 & 1);
224 			maskD01Q[i][1] = -(i >> 1 & 1);
225 			maskD23Q[i][0] = -(i >> 2 & 1);
226 			maskD23Q[i][1] = -(i >> 3 & 1);
227 
228 			invMaskD01Q[i][0] = ~maskD01Q[i][0];
229 			invMaskD01Q[i][1] = ~maskD01Q[i][1];
230 			invMaskD23Q[i][0] = ~maskD23Q[i][0];
231 			invMaskD23Q[i][1] = ~maskD23Q[i][1];
232 
233 			maskQ01X[i][0] = -(i >> 0 & 1);
234 			maskQ01X[i][1] = -(i >> 1 & 1);
235 			maskQ23X[i][0] = -(i >> 2 & 1);
236 			maskQ23X[i][1] = -(i >> 3 & 1);
237 
238 			invMaskQ01X[i][0] = ~maskQ01X[i][0];
239 			invMaskQ01X[i][1] = ~maskQ01X[i][1];
240 			invMaskQ23X[i][0] = ~maskQ23X[i][0];
241 			invMaskQ23X[i][1] = ~maskQ23X[i][1];
242 		}
243 
244 		for(int i = 0; i < 8; i++)
245 		{
246 			mask565Q[i][0] =
247 			mask565Q[i][1] =
248 			mask565Q[i][2] =
249 			mask565Q[i][3] = (i & 0x1 ? 0x001F : 0) | (i & 0x2 ? 0x07E0 : 0) | (i & 0x4 ? 0xF800 : 0);
250 		}
251 
252 		for(int i = 0; i < 4; i++)
253 		{
254 			maskW01Q[i][0] =  -(i >> 0 & 1);
255 			maskW01Q[i][1] =  -(i >> 1 & 1);
256 			maskW01Q[i][2] =  -(i >> 0 & 1);
257 			maskW01Q[i][3] =  -(i >> 1 & 1);
258 
259 			maskD01X[i][0] =  -(i >> 0 & 1);
260 			maskD01X[i][1] =  -(i >> 1 & 1);
261 			maskD01X[i][2] =  -(i >> 0 & 1);
262 			maskD01X[i][3] =  -(i >> 1 & 1);
263 		}
264 
265 		for(int i = 0; i < 256; i++)
266 		{
267 			sRGBtoLinear8_16[i] = (unsigned short)(sw::sRGBtoLinear((float)i / 0xFF) * 0xFFFF + 0.5f);
268 		}
269 
270 		for(int i = 0; i < 64; i++)
271 		{
272 			sRGBtoLinear6_16[i] = (unsigned short)(sw::sRGBtoLinear((float)i / 0x3F) * 0xFFFF + 0.5f);
273 		}
274 
275 		for(int i = 0; i < 32; i++)
276 		{
277 			sRGBtoLinear5_16[i] = (unsigned short)(sw::sRGBtoLinear((float)i / 0x1F) * 0xFFFF + 0.5f);
278 		}
279 
280 		for(int i = 0; i < 0x1000; i++)
281 		{
282 			linearToSRGB12_16[i] = (unsigned short)(clamp(sw::linearToSRGB((float)i / 0x0FFF) * 0xFFFF + 0.5f, 0.0f, (float)0xFFFF));
283 			sRGBtoLinear12_16[i] = (unsigned short)(clamp(sw::sRGBtoLinear((float)i / 0x0FFF) * 0xFFFF + 0.5f, 0.0f, (float)0xFFFF));
284 		}
285 
286 		for(int q = 0; q < 4; q++)
287 		{
288 			for(int c = 0; c < 16; c++)
289 			{
290 				for(int i = 0; i < 4; i++)
291 				{
292 					const float X[4] = {+0.3125f, -0.3125f, -0.1250f, +0.1250f};
293 					const float Y[4] = {+0.1250f, -0.1250f, +0.3125f, -0.3125f};
294 
295 					sampleX[q][c][i] = c & (1 << i) ? X[q] : 0.0f;
296 					sampleY[q][c][i] = c & (1 << i) ? Y[q] : 0.0f;
297 					weight[c][i] = c & (1 << i) ? 1.0f : 0.0f;
298 				}
299 			}
300 		}
301 
302 		const int Xf[4] = {-5, +5, +2, -2};   // Fragment offsets
303 		const int Yf[4] = {-2, +2, -5, +5};   // Fragment offsets
304 
305 		memcpy(&this->Xf, &Xf, sizeof(Xf));
306 		memcpy(&this->Yf, &Yf, sizeof(Yf));
307 
308 		static const float4 X[4] = {{-0.3125f, -0.3125f, -0.3125f, -0.3125f},
309 					                {+0.3125f, +0.3125f, +0.3125f, +0.3125f},
310 					                {+0.1250f, +0.1250f, +0.1250f, +0.1250f},
311 					                {-0.1250f, -0.1250f, -0.1250f, -0.1250f}};
312 
313 		static const float4 Y[4] = {{-0.1250f, -0.1250f, -0.1250f, -0.1250f},
314 		                            {+0.1250f, +0.1250f, +0.1250f, +0.1250f},
315 		                            {-0.3125f, -0.3125f, -0.3125f, -0.3125f},
316 		                            {+0.3125f, +0.3125f, +0.3125f, +0.3125f}};
317 
318 		memcpy(&this->X, &X, sizeof(X));
319 		memcpy(&this->Y, &Y, sizeof(Y));
320 
321 		const dword maxX[16] = {0x00000000, 0x00000001, 0x00000100, 0x00000101, 0x00010000, 0x00010001, 0x00010100, 0x00010101, 0x01000000, 0x01000001, 0x01000100, 0x01000101, 0x01010000, 0x01010001, 0x01010100, 0x01010101};
322 		const dword maxY[16] = {0x00000000, 0x00000002, 0x00000200, 0x00000202, 0x00020000, 0x00020002, 0x00020200, 0x00020202, 0x02000000, 0x02000002, 0x02000200, 0x02000202, 0x02020000, 0x02020002, 0x02020200, 0x02020202};
323 		const dword maxZ[16] = {0x00000000, 0x00000004, 0x00000400, 0x00000404, 0x00040000, 0x00040004, 0x00040400, 0x00040404, 0x04000000, 0x04000004, 0x04000400, 0x04000404, 0x04040000, 0x04040004, 0x04040400, 0x04040404};
324 		const dword minX[16] = {0x00000000, 0x00000008, 0x00000800, 0x00000808, 0x00080000, 0x00080008, 0x00080800, 0x00080808, 0x08000000, 0x08000008, 0x08000800, 0x08000808, 0x08080000, 0x08080008, 0x08080800, 0x08080808};
325 		const dword minY[16] = {0x00000000, 0x00000010, 0x00001000, 0x00001010, 0x00100000, 0x00100010, 0x00101000, 0x00101010, 0x10000000, 0x10000010, 0x10001000, 0x10001010, 0x10100000, 0x10100010, 0x10101000, 0x10101010};
326 		const dword minZ[16] = {0x00000000, 0x00000020, 0x00002000, 0x00002020, 0x00200000, 0x00200020, 0x00202000, 0x00202020, 0x20000000, 0x20000020, 0x20002000, 0x20002020, 0x20200000, 0x20200020, 0x20202000, 0x20202020};
327 		const dword fini[16] = {0x00000000, 0x00000080, 0x00008000, 0x00008080, 0x00800000, 0x00800080, 0x00808000, 0x00808080, 0x80000000, 0x80000080, 0x80008000, 0x80008080, 0x80800000, 0x80800080, 0x80808000, 0x80808080};
328 
329 		memcpy(&this->maxX, &maxX, sizeof(maxX));
330 		memcpy(&this->maxY, &maxY, sizeof(maxY));
331 		memcpy(&this->maxZ, &maxZ, sizeof(maxZ));
332 		memcpy(&this->minX, &minX, sizeof(minX));
333 		memcpy(&this->minY, &minY, sizeof(minY));
334 		memcpy(&this->minZ, &minZ, sizeof(minZ));
335 		memcpy(&this->fini, &fini, sizeof(fini));
336 
337 		static const dword4 maxPos = {0x7F7FFFFF, 0x7F7FFFFF, 0x7F7FFFFF, 0x7F7FFFFE};
338 
339 		memcpy(&this->maxPos, &maxPos, sizeof(maxPos));
340 
341 		static const float4 unscaleByte = {1.0f / 0xFF, 1.0f / 0xFF, 1.0f / 0xFF, 1.0f / 0xFF};
342 		static const float4 unscaleSByte = {1.0f / 0x7F, 1.0f / 0x7F, 1.0f / 0x7F, 1.0f / 0x7F};
343 		static const float4 unscaleShort = {1.0f / 0x7FFF, 1.0f / 0x7FFF, 1.0f / 0x7FFF, 1.0f / 0x7FFF};
344 		static const float4 unscaleUShort = {1.0f / 0xFFFF, 1.0f / 0xFFFF, 1.0f / 0xFFFF, 1.0f / 0xFFFF};
345 		static const float4 unscaleInt = {1.0f / 0x7FFFFFFF, 1.0f / 0x7FFFFFFF, 1.0f / 0x7FFFFFFF, 1.0f / 0x7FFFFFFF};
346 		static const float4 unscaleUInt = {1.0f / 0xFFFFFFFF, 1.0f / 0xFFFFFFFF, 1.0f / 0xFFFFFFFF, 1.0f / 0xFFFFFFFF};
347 		static const float4 unscaleFixed = {1.0f / 0x00010000, 1.0f / 0x00010000, 1.0f / 0x00010000, 1.0f / 0x00010000};
348 
349 		memcpy(&this->unscaleByte, &unscaleByte, sizeof(unscaleByte));
350 		memcpy(&this->unscaleSByte, &unscaleSByte, sizeof(unscaleSByte));
351 		memcpy(&this->unscaleShort, &unscaleShort, sizeof(unscaleShort));
352 		memcpy(&this->unscaleUShort, &unscaleUShort, sizeof(unscaleUShort));
353 		memcpy(&this->unscaleInt, &unscaleInt, sizeof(unscaleInt));
354 		memcpy(&this->unscaleUInt, &unscaleUInt, sizeof(unscaleUInt));
355 		memcpy(&this->unscaleFixed, &unscaleFixed, sizeof(unscaleFixed));
356 
357 		for(int i = 0; i <= 0xFFFF; i++)
358 		{
359 			half2float[i] = (float)reinterpret_cast<half&>(i);
360 		}
361 	}
362 }