1 // Copyright 2016 The SwiftShader Authors. All Rights Reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #include "Constants.hpp" 16 17 #include "System/Math.hpp" 18 #include "System/Half.hpp" 19 20 #include <cstring> 21 22 namespace sw 23 { 24 Constants constants; 25 Constants()26 Constants::Constants() 27 { 28 static const unsigned int transposeBit0[16] = 29 { 30 0x00000000, 31 0x00000001, 32 0x00000010, 33 0x00000011, 34 0x00000100, 35 0x00000101, 36 0x00000110, 37 0x00000111, 38 0x00001000, 39 0x00001001, 40 0x00001010, 41 0x00001011, 42 0x00001100, 43 0x00001101, 44 0x00001110, 45 0x00001111 46 }; 47 48 static const unsigned int transposeBit1[16] = 49 { 50 0x00000000, 51 0x00000002, 52 0x00000020, 53 0x00000022, 54 0x00000200, 55 0x00000202, 56 0x00000220, 57 0x00000222, 58 0x00002000, 59 0x00002002, 60 0x00002020, 61 0x00002022, 62 0x00002200, 63 0x00002202, 64 0x00002220, 65 0x00002222 66 }; 67 68 static const unsigned int transposeBit2[16] = 69 { 70 0x00000000, 71 0x00000004, 72 0x00000040, 73 0x00000044, 74 0x00000400, 75 0x00000404, 76 0x00000440, 77 0x00000444, 78 0x00004000, 79 0x00004004, 80 0x00004040, 81 0x00004044, 82 0x00004400, 83 0x00004404, 84 0x00004440, 85 0x00004444 86 }; 87 88 memcpy(&this->transposeBit0, transposeBit0, sizeof(transposeBit0)); 89 memcpy(&this->transposeBit1, transposeBit1, sizeof(transposeBit1)); 90 memcpy(&this->transposeBit2, transposeBit2, sizeof(transposeBit2)); 91 92 static const ushort4 cWeight[17] = 93 { 94 {0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF}, // 0xFFFF / 1 = 0xFFFF 95 {0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF}, // 0xFFFF / 1 = 0xFFFF 96 {0x8000, 0x8000, 0x8000, 0x8000}, // 0xFFFF / 2 = 0x8000 97 {0x5555, 0x5555, 0x5555, 0x5555}, // 0xFFFF / 3 = 0x5555 98 {0x4000, 0x4000, 0x4000, 0x4000}, // 0xFFFF / 4 = 0x4000 99 {0x3333, 0x3333, 0x3333, 0x3333}, // 0xFFFF / 5 = 0x3333 100 {0x2AAA, 0x2AAA, 0x2AAA, 0x2AAA}, // 0xFFFF / 6 = 0x2AAA 101 {0x2492, 0x2492, 0x2492, 0x2492}, // 0xFFFF / 7 = 0x2492 102 {0x2000, 0x2000, 0x2000, 0x2000}, // 0xFFFF / 8 = 0x2000 103 {0x1C71, 0x1C71, 0x1C71, 0x1C71}, // 0xFFFF / 9 = 0x1C71 104 {0x1999, 0x1999, 0x1999, 0x1999}, // 0xFFFF / 10 = 0x1999 105 {0x1745, 0x1745, 0x1745, 0x1745}, // 0xFFFF / 11 = 0x1745 106 {0x1555, 0x1555, 0x1555, 0x1555}, // 0xFFFF / 12 = 0x1555 107 {0x13B1, 0x13B1, 0x13B1, 0x13B1}, // 0xFFFF / 13 = 0x13B1 108 {0x1249, 0x1249, 0x1249, 0x1249}, // 0xFFFF / 14 = 0x1249 109 {0x1111, 0x1111, 0x1111, 0x1111}, // 0xFFFF / 15 = 0x1111 110 {0x1000, 0x1000, 0x1000, 0x1000}, // 0xFFFF / 16 = 0x1000 111 }; 112 113 static const float4 uvWeight[17] = 114 { 115 {1.0f / 1.0f, 1.0f / 1.0f, 1.0f / 1.0f, 1.0f / 1.0f}, 116 {1.0f / 1.0f, 1.0f / 1.0f, 1.0f / 1.0f, 1.0f / 1.0f}, 117 {1.0f / 2.0f, 1.0f / 2.0f, 1.0f / 2.0f, 1.0f / 2.0f}, 118 {1.0f / 3.0f, 1.0f / 3.0f, 1.0f / 3.0f, 1.0f / 3.0f}, 119 {1.0f / 4.0f, 1.0f / 4.0f, 1.0f / 4.0f, 1.0f / 4.0f}, 120 {1.0f / 5.0f, 1.0f / 5.0f, 1.0f / 5.0f, 1.0f / 5.0f}, 121 {1.0f / 6.0f, 1.0f / 6.0f, 1.0f / 6.0f, 1.0f / 6.0f}, 122 {1.0f / 7.0f, 1.0f / 7.0f, 1.0f / 7.0f, 1.0f / 7.0f}, 123 {1.0f / 8.0f, 1.0f / 8.0f, 1.0f / 8.0f, 1.0f / 8.0f}, 124 {1.0f / 9.0f, 1.0f / 9.0f, 1.0f / 9.0f, 1.0f / 9.0f}, 125 {1.0f / 10.0f, 1.0f / 10.0f, 1.0f / 10.0f, 1.0f / 10.0f}, 126 {1.0f / 11.0f, 1.0f / 11.0f, 1.0f / 11.0f, 1.0f / 11.0f}, 127 {1.0f / 12.0f, 1.0f / 12.0f, 1.0f / 12.0f, 1.0f / 12.0f}, 128 {1.0f / 13.0f, 1.0f / 13.0f, 1.0f / 13.0f, 1.0f / 13.0f}, 129 {1.0f / 14.0f, 1.0f / 14.0f, 1.0f / 14.0f, 1.0f / 14.0f}, 130 {1.0f / 15.0f, 1.0f / 15.0f, 1.0f / 15.0f, 1.0f / 15.0f}, 131 {1.0f / 16.0f, 1.0f / 16.0f, 1.0f / 16.0f, 1.0f / 16.0f}, 132 }; 133 134 static const float4 uvStart[17] = 135 { 136 {-0.0f / 2.0f, -0.0f / 2.0f, -0.0f / 2.0f, -0.0f / 2.0f}, 137 {-0.0f / 2.0f, -0.0f / 2.0f, -0.0f / 2.0f, -0.0f / 2.0f}, 138 {-1.0f / 4.0f, -1.0f / 4.0f, -1.0f / 4.0f, -1.0f / 4.0f}, 139 {-2.0f / 6.0f, -2.0f / 6.0f, -2.0f / 6.0f, -2.0f / 6.0f}, 140 {-3.0f / 8.0f, -3.0f / 8.0f, -3.0f / 8.0f, -3.0f / 8.0f}, 141 {-4.0f / 10.0f, -4.0f / 10.0f, -4.0f / 10.0f, -4.0f / 10.0f}, 142 {-5.0f / 12.0f, -5.0f / 12.0f, -5.0f / 12.0f, -5.0f / 12.0f}, 143 {-6.0f / 14.0f, -6.0f / 14.0f, -6.0f / 14.0f, -6.0f / 14.0f}, 144 {-7.0f / 16.0f, -7.0f / 16.0f, -7.0f / 16.0f, -7.0f / 16.0f}, 145 {-8.0f / 18.0f, -8.0f / 18.0f, -8.0f / 18.0f, -8.0f / 18.0f}, 146 {-9.0f / 20.0f, -9.0f / 20.0f, -9.0f / 20.0f, -9.0f / 20.0f}, 147 {-10.0f / 22.0f, -10.0f / 22.0f, -10.0f / 22.0f, -10.0f / 22.0f}, 148 {-11.0f / 24.0f, -11.0f / 24.0f, -11.0f / 24.0f, -11.0f / 24.0f}, 149 {-12.0f / 26.0f, -12.0f / 26.0f, -12.0f / 26.0f, -12.0f / 26.0f}, 150 {-13.0f / 28.0f, -13.0f / 28.0f, -13.0f / 28.0f, -13.0f / 28.0f}, 151 {-14.0f / 30.0f, -14.0f / 30.0f, -14.0f / 30.0f, -14.0f / 30.0f}, 152 {-15.0f / 32.0f, -15.0f / 32.0f, -15.0f / 32.0f, -15.0f / 32.0f}, 153 }; 154 155 memcpy(&this->cWeight, cWeight, sizeof(cWeight)); 156 memcpy(&this->uvWeight, uvWeight, sizeof(uvWeight)); 157 memcpy(&this->uvStart, uvStart, sizeof(uvStart)); 158 159 static const unsigned int occlusionCount[16] = {0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4}; 160 161 memcpy(&this->occlusionCount, &occlusionCount, sizeof(occlusionCount)); 162 163 for(int i = 0; i < 16; i++) 164 { 165 maskB4Q[i][0] = -(i >> 0 & 1); 166 maskB4Q[i][1] = -(i >> 1 & 1); 167 maskB4Q[i][2] = -(i >> 2 & 1); 168 maskB4Q[i][3] = -(i >> 3 & 1); 169 maskB4Q[i][4] = -(i >> 0 & 1); 170 maskB4Q[i][5] = -(i >> 1 & 1); 171 maskB4Q[i][6] = -(i >> 2 & 1); 172 maskB4Q[i][7] = -(i >> 3 & 1); 173 174 invMaskB4Q[i][0] = ~maskB4Q[i][0]; 175 invMaskB4Q[i][1] = ~maskB4Q[i][1]; 176 invMaskB4Q[i][2] = ~maskB4Q[i][2]; 177 invMaskB4Q[i][3] = ~maskB4Q[i][3]; 178 invMaskB4Q[i][4] = ~maskB4Q[i][4]; 179 invMaskB4Q[i][5] = ~maskB4Q[i][5]; 180 invMaskB4Q[i][6] = ~maskB4Q[i][6]; 181 invMaskB4Q[i][7] = ~maskB4Q[i][7]; 182 183 maskW4Q[i][0] = -(i >> 0 & 1); 184 maskW4Q[i][1] = -(i >> 1 & 1); 185 maskW4Q[i][2] = -(i >> 2 & 1); 186 maskW4Q[i][3] = -(i >> 3 & 1); 187 188 invMaskW4Q[i][0] = ~maskW4Q[i][0]; 189 invMaskW4Q[i][1] = ~maskW4Q[i][1]; 190 invMaskW4Q[i][2] = ~maskW4Q[i][2]; 191 invMaskW4Q[i][3] = ~maskW4Q[i][3]; 192 193 maskD4X[i][0] = -(i >> 0 & 1); 194 maskD4X[i][1] = -(i >> 1 & 1); 195 maskD4X[i][2] = -(i >> 2 & 1); 196 maskD4X[i][3] = -(i >> 3 & 1); 197 198 invMaskD4X[i][0] = ~maskD4X[i][0]; 199 invMaskD4X[i][1] = ~maskD4X[i][1]; 200 invMaskD4X[i][2] = ~maskD4X[i][2]; 201 invMaskD4X[i][3] = ~maskD4X[i][3]; 202 203 maskQ0Q[i] = -(i >> 0 & 1); 204 maskQ1Q[i] = -(i >> 1 & 1); 205 maskQ2Q[i] = -(i >> 2 & 1); 206 maskQ3Q[i] = -(i >> 3 & 1); 207 208 invMaskQ0Q[i] = ~maskQ0Q[i]; 209 invMaskQ1Q[i] = ~maskQ1Q[i]; 210 invMaskQ2Q[i] = ~maskQ2Q[i]; 211 invMaskQ3Q[i] = ~maskQ3Q[i]; 212 213 maskX0X[i][0] = maskX0X[i][1] = maskX0X[i][2] = maskX0X[i][3] = -(i >> 0 & 1); 214 maskX1X[i][0] = maskX1X[i][1] = maskX1X[i][2] = maskX1X[i][3] = -(i >> 1 & 1); 215 maskX2X[i][0] = maskX2X[i][1] = maskX2X[i][2] = maskX2X[i][3] = -(i >> 2 & 1); 216 maskX3X[i][0] = maskX3X[i][1] = maskX3X[i][2] = maskX3X[i][3] = -(i >> 3 & 1); 217 218 invMaskX0X[i][0] = invMaskX0X[i][1] = invMaskX0X[i][2] = invMaskX0X[i][3] = ~maskX0X[i][0]; 219 invMaskX1X[i][0] = invMaskX1X[i][1] = invMaskX1X[i][2] = invMaskX1X[i][3] = ~maskX1X[i][0]; 220 invMaskX2X[i][0] = invMaskX2X[i][1] = invMaskX2X[i][2] = invMaskX2X[i][3] = ~maskX2X[i][0]; 221 invMaskX3X[i][0] = invMaskX3X[i][1] = invMaskX3X[i][2] = invMaskX3X[i][3] = ~maskX3X[i][0]; 222 223 maskD01Q[i][0] = -(i >> 0 & 1); 224 maskD01Q[i][1] = -(i >> 1 & 1); 225 maskD23Q[i][0] = -(i >> 2 & 1); 226 maskD23Q[i][1] = -(i >> 3 & 1); 227 228 invMaskD01Q[i][0] = ~maskD01Q[i][0]; 229 invMaskD01Q[i][1] = ~maskD01Q[i][1]; 230 invMaskD23Q[i][0] = ~maskD23Q[i][0]; 231 invMaskD23Q[i][1] = ~maskD23Q[i][1]; 232 233 maskQ01X[i][0] = -(i >> 0 & 1); 234 maskQ01X[i][1] = -(i >> 1 & 1); 235 maskQ23X[i][0] = -(i >> 2 & 1); 236 maskQ23X[i][1] = -(i >> 3 & 1); 237 238 invMaskQ01X[i][0] = ~maskQ01X[i][0]; 239 invMaskQ01X[i][1] = ~maskQ01X[i][1]; 240 invMaskQ23X[i][0] = ~maskQ23X[i][0]; 241 invMaskQ23X[i][1] = ~maskQ23X[i][1]; 242 } 243 244 for(int i = 0; i < 8; i++) 245 { 246 mask565Q[i][0] = 247 mask565Q[i][1] = 248 mask565Q[i][2] = 249 mask565Q[i][3] = (i & 0x1 ? 0x001F : 0) | (i & 0x2 ? 0x07E0 : 0) | (i & 0x4 ? 0xF800 : 0); 250 } 251 252 for(int i = 0; i < 4; i++) 253 { 254 maskW01Q[i][0] = -(i >> 0 & 1); 255 maskW01Q[i][1] = -(i >> 1 & 1); 256 maskW01Q[i][2] = -(i >> 0 & 1); 257 maskW01Q[i][3] = -(i >> 1 & 1); 258 259 maskD01X[i][0] = -(i >> 0 & 1); 260 maskD01X[i][1] = -(i >> 1 & 1); 261 maskD01X[i][2] = -(i >> 0 & 1); 262 maskD01X[i][3] = -(i >> 1 & 1); 263 } 264 265 for(int i = 0; i < 256; i++) 266 { 267 sRGBtoLinear8_16[i] = (unsigned short)(sw::sRGBtoLinear((float)i / 0xFF) * 0xFFFF + 0.5f); 268 } 269 270 for(int i = 0; i < 64; i++) 271 { 272 sRGBtoLinear6_16[i] = (unsigned short)(sw::sRGBtoLinear((float)i / 0x3F) * 0xFFFF + 0.5f); 273 } 274 275 for(int i = 0; i < 32; i++) 276 { 277 sRGBtoLinear5_16[i] = (unsigned short)(sw::sRGBtoLinear((float)i / 0x1F) * 0xFFFF + 0.5f); 278 } 279 280 for(int i = 0; i < 0x1000; i++) 281 { 282 linearToSRGB12_16[i] = (unsigned short)(clamp(sw::linearToSRGB((float)i / 0x0FFF) * 0xFFFF + 0.5f, 0.0f, (float)0xFFFF)); 283 sRGBtoLinear12_16[i] = (unsigned short)(clamp(sw::sRGBtoLinear((float)i / 0x0FFF) * 0xFFFF + 0.5f, 0.0f, (float)0xFFFF)); 284 } 285 286 for(int q = 0; q < 4; q++) 287 { 288 for(int c = 0; c < 16; c++) 289 { 290 for(int i = 0; i < 4; i++) 291 { 292 const float X[4] = {+0.3125f, -0.3125f, -0.1250f, +0.1250f}; 293 const float Y[4] = {+0.1250f, -0.1250f, +0.3125f, -0.3125f}; 294 295 sampleX[q][c][i] = c & (1 << i) ? X[q] : 0.0f; 296 sampleY[q][c][i] = c & (1 << i) ? Y[q] : 0.0f; 297 weight[c][i] = c & (1 << i) ? 1.0f : 0.0f; 298 } 299 } 300 } 301 302 const int Xf[4] = {-5, +5, +2, -2}; // Fragment offsets 303 const int Yf[4] = {-2, +2, -5, +5}; // Fragment offsets 304 305 memcpy(&this->Xf, &Xf, sizeof(Xf)); 306 memcpy(&this->Yf, &Yf, sizeof(Yf)); 307 308 static const float4 X[4] = {{-0.3125f, -0.3125f, -0.3125f, -0.3125f}, 309 {+0.3125f, +0.3125f, +0.3125f, +0.3125f}, 310 {+0.1250f, +0.1250f, +0.1250f, +0.1250f}, 311 {-0.1250f, -0.1250f, -0.1250f, -0.1250f}}; 312 313 static const float4 Y[4] = {{-0.1250f, -0.1250f, -0.1250f, -0.1250f}, 314 {+0.1250f, +0.1250f, +0.1250f, +0.1250f}, 315 {-0.3125f, -0.3125f, -0.3125f, -0.3125f}, 316 {+0.3125f, +0.3125f, +0.3125f, +0.3125f}}; 317 318 memcpy(&this->X, &X, sizeof(X)); 319 memcpy(&this->Y, &Y, sizeof(Y)); 320 321 const dword maxX[16] = {0x00000000, 0x00000001, 0x00000100, 0x00000101, 0x00010000, 0x00010001, 0x00010100, 0x00010101, 0x01000000, 0x01000001, 0x01000100, 0x01000101, 0x01010000, 0x01010001, 0x01010100, 0x01010101}; 322 const dword maxY[16] = {0x00000000, 0x00000002, 0x00000200, 0x00000202, 0x00020000, 0x00020002, 0x00020200, 0x00020202, 0x02000000, 0x02000002, 0x02000200, 0x02000202, 0x02020000, 0x02020002, 0x02020200, 0x02020202}; 323 const dword maxZ[16] = {0x00000000, 0x00000004, 0x00000400, 0x00000404, 0x00040000, 0x00040004, 0x00040400, 0x00040404, 0x04000000, 0x04000004, 0x04000400, 0x04000404, 0x04040000, 0x04040004, 0x04040400, 0x04040404}; 324 const dword minX[16] = {0x00000000, 0x00000008, 0x00000800, 0x00000808, 0x00080000, 0x00080008, 0x00080800, 0x00080808, 0x08000000, 0x08000008, 0x08000800, 0x08000808, 0x08080000, 0x08080008, 0x08080800, 0x08080808}; 325 const dword minY[16] = {0x00000000, 0x00000010, 0x00001000, 0x00001010, 0x00100000, 0x00100010, 0x00101000, 0x00101010, 0x10000000, 0x10000010, 0x10001000, 0x10001010, 0x10100000, 0x10100010, 0x10101000, 0x10101010}; 326 const dword minZ[16] = {0x00000000, 0x00000020, 0x00002000, 0x00002020, 0x00200000, 0x00200020, 0x00202000, 0x00202020, 0x20000000, 0x20000020, 0x20002000, 0x20002020, 0x20200000, 0x20200020, 0x20202000, 0x20202020}; 327 const dword fini[16] = {0x00000000, 0x00000080, 0x00008000, 0x00008080, 0x00800000, 0x00800080, 0x00808000, 0x00808080, 0x80000000, 0x80000080, 0x80008000, 0x80008080, 0x80800000, 0x80800080, 0x80808000, 0x80808080}; 328 329 memcpy(&this->maxX, &maxX, sizeof(maxX)); 330 memcpy(&this->maxY, &maxY, sizeof(maxY)); 331 memcpy(&this->maxZ, &maxZ, sizeof(maxZ)); 332 memcpy(&this->minX, &minX, sizeof(minX)); 333 memcpy(&this->minY, &minY, sizeof(minY)); 334 memcpy(&this->minZ, &minZ, sizeof(minZ)); 335 memcpy(&this->fini, &fini, sizeof(fini)); 336 337 static const dword4 maxPos = {0x7F7FFFFF, 0x7F7FFFFF, 0x7F7FFFFF, 0x7F7FFFFE}; 338 339 memcpy(&this->maxPos, &maxPos, sizeof(maxPos)); 340 341 static const float4 unscaleByte = {1.0f / 0xFF, 1.0f / 0xFF, 1.0f / 0xFF, 1.0f / 0xFF}; 342 static const float4 unscaleSByte = {1.0f / 0x7F, 1.0f / 0x7F, 1.0f / 0x7F, 1.0f / 0x7F}; 343 static const float4 unscaleShort = {1.0f / 0x7FFF, 1.0f / 0x7FFF, 1.0f / 0x7FFF, 1.0f / 0x7FFF}; 344 static const float4 unscaleUShort = {1.0f / 0xFFFF, 1.0f / 0xFFFF, 1.0f / 0xFFFF, 1.0f / 0xFFFF}; 345 static const float4 unscaleInt = {1.0f / 0x7FFFFFFF, 1.0f / 0x7FFFFFFF, 1.0f / 0x7FFFFFFF, 1.0f / 0x7FFFFFFF}; 346 static const float4 unscaleUInt = {1.0f / 0xFFFFFFFF, 1.0f / 0xFFFFFFFF, 1.0f / 0xFFFFFFFF, 1.0f / 0xFFFFFFFF}; 347 static const float4 unscaleFixed = {1.0f / 0x00010000, 1.0f / 0x00010000, 1.0f / 0x00010000, 1.0f / 0x00010000}; 348 349 memcpy(&this->unscaleByte, &unscaleByte, sizeof(unscaleByte)); 350 memcpy(&this->unscaleSByte, &unscaleSByte, sizeof(unscaleSByte)); 351 memcpy(&this->unscaleShort, &unscaleShort, sizeof(unscaleShort)); 352 memcpy(&this->unscaleUShort, &unscaleUShort, sizeof(unscaleUShort)); 353 memcpy(&this->unscaleInt, &unscaleInt, sizeof(unscaleInt)); 354 memcpy(&this->unscaleUInt, &unscaleUInt, sizeof(unscaleUInt)); 355 memcpy(&this->unscaleFixed, &unscaleFixed, sizeof(unscaleFixed)); 356 357 for(int i = 0; i <= 0xFFFF; i++) 358 { 359 half2float[i] = (float)reinterpret_cast<half&>(i); 360 } 361 } 362 }