1 // Copyright 2016 The SwiftShader Authors. All Rights Reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #include "ShaderCore.hpp" 16 17 #include "Renderer/Renderer.hpp" 18 #include "Common/Debug.hpp" 19 20 #include <limits.h> 21 22 namespace sw 23 { 24 extern TranscendentalPrecision logPrecision; 25 extern TranscendentalPrecision expPrecision; 26 extern TranscendentalPrecision rcpPrecision; 27 extern TranscendentalPrecision rsqPrecision; 28 Vector4s()29 Vector4s::Vector4s() 30 { 31 } 32 Vector4s(unsigned short x,unsigned short y,unsigned short z,unsigned short w)33 Vector4s::Vector4s(unsigned short x, unsigned short y, unsigned short z, unsigned short w) 34 { 35 this->x = Short4(x); 36 this->y = Short4(y); 37 this->z = Short4(z); 38 this->w = Short4(w); 39 } 40 Vector4s(const Vector4s & rhs)41 Vector4s::Vector4s(const Vector4s &rhs) 42 { 43 x = rhs.x; 44 y = rhs.y; 45 z = rhs.z; 46 w = rhs.w; 47 } 48 operator =(const Vector4s & rhs)49 Vector4s &Vector4s::operator=(const Vector4s &rhs) 50 { 51 x = rhs.x; 52 y = rhs.y; 53 z = rhs.z; 54 w = rhs.w; 55 56 return *this; 57 } 58 operator [](int i)59 Short4 &Vector4s::operator[](int i) 60 { 61 switch(i) 62 { 63 case 0: return x; 64 case 1: return y; 65 case 2: return z; 66 case 3: return w; 67 } 68 69 return x; 70 } 71 Vector4i()72 Vector4i::Vector4i() 73 { 74 } 75 Vector4i(int x,int y,int z,int w)76 Vector4i::Vector4i(int x, int y, int z, int w) 77 { 78 this->x = Int4(x); 79 this->y = Int4(y); 80 this->z = Int4(z); 81 this->w = Int4(w); 82 } 83 Vector4i(const Vector4i & rhs)84 Vector4i::Vector4i(const Vector4i &rhs) 85 { 86 x = rhs.x; 87 y = rhs.y; 88 z = rhs.z; 89 w = rhs.w; 90 } 91 operator =(const Vector4i & rhs)92 Vector4i &Vector4i::operator=(const Vector4i &rhs) 93 { 94 x = rhs.x; 95 y = rhs.y; 96 z = rhs.z; 97 w = rhs.w; 98 99 return *this; 100 } 101 operator [](int i)102 Int4 &Vector4i::operator[](int i) 103 { 104 switch(i) 105 { 106 case 0: return x; 107 case 1: return y; 108 case 2: return z; 109 case 3: return w; 110 } 111 112 return x; 113 } 114 Vector4u()115 Vector4u::Vector4u() 116 { 117 } 118 Vector4u(unsigned int x,unsigned int y,unsigned int z,unsigned int w)119 Vector4u::Vector4u(unsigned int x, unsigned int y, unsigned int z, unsigned int w) 120 { 121 this->x = UInt4(x); 122 this->y = UInt4(y); 123 this->z = UInt4(z); 124 this->w = UInt4(w); 125 } 126 Vector4u(const Vector4u & rhs)127 Vector4u::Vector4u(const Vector4u &rhs) 128 { 129 x = rhs.x; 130 y = rhs.y; 131 z = rhs.z; 132 w = rhs.w; 133 } 134 operator =(const Vector4u & rhs)135 Vector4u &Vector4u::operator=(const Vector4u &rhs) 136 { 137 x = rhs.x; 138 y = rhs.y; 139 z = rhs.z; 140 w = rhs.w; 141 142 return *this; 143 } 144 operator [](int i)145 UInt4 &Vector4u::operator[](int i) 146 { 147 switch(i) 148 { 149 case 0: return x; 150 case 1: return y; 151 case 2: return z; 152 case 3: return w; 153 } 154 155 return x; 156 } 157 Vector4f()158 Vector4f::Vector4f() 159 { 160 } 161 Vector4f(float x,float y,float z,float w)162 Vector4f::Vector4f(float x, float y, float z, float w) 163 { 164 this->x = Float4(x); 165 this->y = Float4(y); 166 this->z = Float4(z); 167 this->w = Float4(w); 168 } 169 Vector4f(const Vector4f & rhs)170 Vector4f::Vector4f(const Vector4f &rhs) 171 { 172 x = rhs.x; 173 y = rhs.y; 174 z = rhs.z; 175 w = rhs.w; 176 } 177 operator =(const Vector4f & rhs)178 Vector4f &Vector4f::operator=(const Vector4f &rhs) 179 { 180 x = rhs.x; 181 y = rhs.y; 182 z = rhs.z; 183 w = rhs.w; 184 185 return *this; 186 } 187 operator [](int i)188 Float4 &Vector4f::operator[](int i) 189 { 190 switch(i) 191 { 192 case 0: return x; 193 case 1: return y; 194 case 2: return z; 195 case 3: return w; 196 } 197 198 return x; 199 } 200 exponential2(RValue<Float4> x,bool pp)201 Float4 exponential2(RValue<Float4> x, bool pp) 202 { 203 Float4 x0; 204 Float4 x1; 205 Int4 x2; 206 207 x0 = x; 208 209 x0 = Min(x0, As<Float4>(Int4(0x43010000))); // 129.00000e+0f 210 x0 = Max(x0, As<Float4>(Int4(0xC2FDFFFF))); // -126.99999e+0f 211 x1 = x0; 212 x1 -= Float4(0.5f); 213 x2 = RoundInt(x1); 214 x1 = Float4(x2); 215 x2 += Int4(0x0000007F); // 127 216 x2 = x2 << 23; 217 x0 -= x1; 218 x1 = As<Float4>(Int4(0x3AF61905)); // 1.8775767e-3f 219 x1 *= x0; 220 x1 += As<Float4>(Int4(0x3C134806)); // 8.9893397e-3f 221 x1 *= x0; 222 x1 += As<Float4>(Int4(0x3D64AA23)); // 5.5826318e-2f 223 x1 *= x0; 224 x1 += As<Float4>(Int4(0x3E75EAD4)); // 2.4015361e-1f 225 x1 *= x0; 226 x1 += As<Float4>(Int4(0x3F31727B)); // 6.9315308e-1f 227 x1 *= x0; 228 x1 += As<Float4>(Int4(0x3F7FFFFF)); // 9.9999994e-1f 229 x1 *= As<Float4>(x2); 230 231 return x1; 232 } 233 logarithm2(RValue<Float4> x,bool absolute,bool pp)234 Float4 logarithm2(RValue<Float4> x, bool absolute, bool pp) 235 { 236 Float4 x0; 237 Float4 x1; 238 Float4 x2; 239 Float4 x3; 240 241 x0 = x; 242 243 x1 = As<Float4>(As<Int4>(x0) & Int4(0x7F800000)); 244 x1 = As<Float4>(As<UInt4>(x1) >> 8); 245 x1 = As<Float4>(As<Int4>(x1) | As<Int4>(Float4(1.0f))); 246 x1 = (x1 - Float4(1.4960938f)) * Float4(256.0f); // FIXME: (x1 - 1.4960938f) * 256.0f; 247 x0 = As<Float4>((As<Int4>(x0) & Int4(0x007FFFFF)) | As<Int4>(Float4(1.0f))); 248 249 x2 = (Float4(9.5428179e-2f) * x0 + Float4(4.7779095e-1f)) * x0 + Float4(1.9782813e-1f); 250 x3 = ((Float4(1.6618466e-2f) * x0 + Float4(2.0350508e-1f)) * x0 + Float4(2.7382900e-1f)) * x0 + Float4(4.0496687e-2f); 251 x2 /= x3; 252 253 x1 += (x0 - Float4(1.0f)) * x2; 254 255 return x1; 256 } 257 exponential(RValue<Float4> x,bool pp)258 Float4 exponential(RValue<Float4> x, bool pp) 259 { 260 // FIXME: Propagate the constant 261 return exponential2(Float4(1.44269541f) * x, pp); // 1/ln(2) 262 } 263 logarithm(RValue<Float4> x,bool absolute,bool pp)264 Float4 logarithm(RValue<Float4> x, bool absolute, bool pp) 265 { 266 // FIXME: Propagate the constant 267 return Float4(6.93147181e-1f) * logarithm2(x, absolute, pp); // ln(2) 268 } 269 power(RValue<Float4> x,RValue<Float4> y,bool pp)270 Float4 power(RValue<Float4> x, RValue<Float4> y, bool pp) 271 { 272 Float4 log = logarithm2(x, true, pp); 273 log *= y; 274 return exponential2(log, pp); 275 } 276 reciprocal(RValue<Float4> x,bool pp,bool finite,bool exactAtPow2)277 Float4 reciprocal(RValue<Float4> x, bool pp, bool finite, bool exactAtPow2) 278 { 279 Float4 rcp; 280 281 if(!pp && rcpPrecision >= WHQL) 282 { 283 rcp = Float4(1.0f) / x; 284 } 285 else 286 { 287 rcp = Rcp_pp(x, exactAtPow2); 288 289 if(!pp) 290 { 291 rcp = (rcp + rcp) - (x * rcp * rcp); 292 } 293 } 294 295 if(finite) 296 { 297 int big = 0x7F7FFFFF; 298 rcp = Min(rcp, Float4((float&)big)); 299 } 300 301 return rcp; 302 } 303 reciprocalSquareRoot(RValue<Float4> x,bool absolute,bool pp)304 Float4 reciprocalSquareRoot(RValue<Float4> x, bool absolute, bool pp) 305 { 306 Float4 abs = x; 307 308 if(absolute) 309 { 310 abs = Abs(abs); 311 } 312 313 Float4 rsq; 314 315 if(!pp && rsqPrecision >= IEEE) 316 { 317 rsq = Float4(1.0f) / Sqrt(abs); 318 } 319 else 320 { 321 rsq = RcpSqrt_pp(abs); 322 323 if(!pp) 324 { 325 rsq = rsq * (Float4(3.0f) - rsq * rsq * abs) * Float4(0.5f); 326 } 327 } 328 329 int big = 0x7F7FFFFF; 330 rsq = Min(rsq, Float4((float&)big)); 331 332 return rsq; 333 } 334 modulo(RValue<Float4> x,RValue<Float4> y)335 Float4 modulo(RValue<Float4> x, RValue<Float4> y) 336 { 337 return x - y * Floor(x / y); 338 } 339 sine_pi(RValue<Float4> x,bool pp)340 Float4 sine_pi(RValue<Float4> x, bool pp) 341 { 342 const Float4 A = Float4(-4.05284734e-1f); // -4/pi^2 343 const Float4 B = Float4(1.27323954e+0f); // 4/pi 344 const Float4 C = Float4(7.75160950e-1f); 345 const Float4 D = Float4(2.24839049e-1f); 346 347 // Parabola approximating sine 348 Float4 sin = x * (Abs(x) * A + B); 349 350 // Improve precision from 0.06 to 0.001 351 if(true) 352 { 353 sin = sin * (Abs(sin) * D + C); 354 } 355 356 return sin; 357 } 358 cosine_pi(RValue<Float4> x,bool pp)359 Float4 cosine_pi(RValue<Float4> x, bool pp) 360 { 361 // cos(x) = sin(x + pi/2) 362 Float4 y = x + Float4(1.57079632e+0f); 363 364 // Wrap around 365 y -= As<Float4>(CmpNLT(y, Float4(3.14159265e+0f)) & As<Int4>(Float4(6.28318530e+0f))); 366 367 return sine_pi(y, pp); 368 } 369 sine(RValue<Float4> x,bool pp)370 Float4 sine(RValue<Float4> x, bool pp) 371 { 372 // Reduce to [-0.5, 0.5] range 373 Float4 y = x * Float4(1.59154943e-1f); // 1/2pi 374 y = y - Round(y); 375 376 const Float4 A = Float4(-16.0f); 377 const Float4 B = Float4(8.0f); 378 const Float4 C = Float4(7.75160950e-1f); 379 const Float4 D = Float4(2.24839049e-1f); 380 381 // Parabola approximating sine 382 Float4 sin = y * (Abs(y) * A + B); 383 384 // Improve precision from 0.06 to 0.001 385 if(true) 386 { 387 sin = sin * (Abs(sin) * D + C); 388 } 389 390 return sin; 391 } 392 cosine(RValue<Float4> x,bool pp)393 Float4 cosine(RValue<Float4> x, bool pp) 394 { 395 // cos(x) = sin(x + pi/2) 396 Float4 y = x + Float4(1.57079632e+0f); 397 return sine(y, pp); 398 } 399 tangent(RValue<Float4> x,bool pp)400 Float4 tangent(RValue<Float4> x, bool pp) 401 { 402 return sine(x, pp) / cosine(x, pp); 403 } 404 arccos(RValue<Float4> x,bool pp)405 Float4 arccos(RValue<Float4> x, bool pp) 406 { 407 // pi/2 - arcsin(x) 408 return Float4(1.57079632e+0f) - arcsin(x); 409 } 410 arcsin(RValue<Float4> x,bool pp)411 Float4 arcsin(RValue<Float4> x, bool pp) 412 { 413 // x*(pi/2-sqrt(1-x*x)*pi/5) 414 return x * (Float4(1.57079632e+0f) - Sqrt(Float4(1.0f) - x*x) * Float4(6.28318531e-1f)); 415 } 416 arctan(RValue<Float4> x,bool pp)417 Float4 arctan(RValue<Float4> x, bool pp) 418 { 419 Int4 O = CmpNLT(Abs(x), Float4(1.0f)); 420 Float4 y = As<Float4>(O & As<Int4>(Float4(1.0f) / x) | ~O & As<Int4>(x)); // FIXME: Vector select 421 422 // Approximation of atan in [-1..1] 423 Float4 theta = y * (Float4(-0.27f) * Abs(y) + Float4(1.05539816f)); 424 425 // +/-pi/2 depending on sign of x 426 Float4 sgnPi_2 = As<Float4>(As<Int4>(Float4(1.57079632e+0f)) ^ (As<Int4>(x) & Int4(0x80000000))); 427 428 theta = As<Float4>(O & As<Int4>(sgnPi_2 - theta) | ~O & As<Int4>(theta)); // FIXME: Vector select 429 430 return theta; 431 } 432 arctan(RValue<Float4> y,RValue<Float4> x,bool pp)433 Float4 arctan(RValue<Float4> y, RValue<Float4> x, bool pp) 434 { 435 // Rotate to upper semicircle when in lower semicircle 436 Int4 S = CmpLT(y, Float4(0.0f)); 437 Float4 theta = As<Float4>(S & As<Int4>(Float4(-3.14159265e+0f))); // -pi 438 Float4 x0 = As<Float4>((As<Int4>(y) & Int4(0x80000000)) ^ As<Int4>(x)); 439 Float4 y0 = Abs(y); 440 441 // Rotate to right quadrant when in left quadrant 442 Int4 Q = CmpLT(x0, Float4(0.0f)); 443 theta += As<Float4>(Q & As<Int4>(Float4(1.57079632e+0f))); // pi/2 444 Float4 x1 = As<Float4>(Q & As<Int4>(y0) | ~Q & As<Int4>(x0)); // FIXME: Vector select 445 Float4 y1 = As<Float4>(Q & As<Int4>(-x0) | ~Q & As<Int4>(y0)); // FIXME: Vector select 446 447 // Rotate to first octant when in second octant 448 Int4 O = CmpNLT(y1, x1); 449 theta += As<Float4>(O & As<Int4>(Float4(7.85398163e-1f))); // pi/4 450 Float4 x2 = As<Float4>(O & As<Int4>(Float4(7.07106781e-1f) * x1 + Float4(7.07106781e-1f) * y1) | ~O & As<Int4>(x1)); // sqrt(2)/2 // FIXME: Vector select 451 Float4 y2 = As<Float4>(O & As<Int4>(Float4(7.07106781e-1f) * y1 - Float4(7.07106781e-1f) * x1) | ~O & As<Int4>(y1)); // FIXME: Vector select 452 453 // Approximation of atan in [0..1] 454 Float4 y_x = y2 / x2; 455 theta += y_x * (Float4(-0.27f) * y_x + Float4(1.05539816f)); 456 457 return theta; 458 } 459 sineh(RValue<Float4> x,bool pp)460 Float4 sineh(RValue<Float4> x, bool pp) 461 { 462 return (exponential(x, pp) - exponential(-x, pp)) * Float4(0.5f); 463 } 464 cosineh(RValue<Float4> x,bool pp)465 Float4 cosineh(RValue<Float4> x, bool pp) 466 { 467 return (exponential(x, pp) + exponential(-x, pp)) * Float4(0.5f); 468 } 469 tangenth(RValue<Float4> x,bool pp)470 Float4 tangenth(RValue<Float4> x, bool pp) 471 { 472 Float4 e_x = exponential(x, pp); 473 Float4 e_minus_x = exponential(-x, pp); 474 return (e_x - e_minus_x) / (e_x + e_minus_x); 475 } 476 arccosh(RValue<Float4> x,bool pp)477 Float4 arccosh(RValue<Float4> x, bool pp) 478 { 479 return logarithm(x + Sqrt(x + Float4(1.0f)) * Sqrt(x - Float4(1.0f)), pp); 480 } 481 arcsinh(RValue<Float4> x,bool pp)482 Float4 arcsinh(RValue<Float4> x, bool pp) 483 { 484 return logarithm(x + Sqrt(x * x + Float4(1.0f)), pp); 485 } 486 arctanh(RValue<Float4> x,bool pp)487 Float4 arctanh(RValue<Float4> x, bool pp) 488 { 489 return logarithm((Float4(1.0f) + x) / (Float4(1.0f) - x), pp) * Float4(0.5f); 490 } 491 dot2(const Vector4f & v0,const Vector4f & v1)492 Float4 dot2(const Vector4f &v0, const Vector4f &v1) 493 { 494 return v0.x * v1.x + v0.y * v1.y; 495 } 496 dot3(const Vector4f & v0,const Vector4f & v1)497 Float4 dot3(const Vector4f &v0, const Vector4f &v1) 498 { 499 return v0.x * v1.x + v0.y * v1.y + v0.z * v1.z; 500 } 501 dot4(const Vector4f & v0,const Vector4f & v1)502 Float4 dot4(const Vector4f &v0, const Vector4f &v1) 503 { 504 return v0.x * v1.x + v0.y * v1.y + v0.z * v1.z + v0.w * v1.w; 505 } 506 transpose4x4(Short4 & row0,Short4 & row1,Short4 & row2,Short4 & row3)507 void transpose4x4(Short4 &row0, Short4 &row1, Short4 &row2, Short4 &row3) 508 { 509 Int2 tmp0 = UnpackHigh(row0, row1); 510 Int2 tmp1 = UnpackHigh(row2, row3); 511 Int2 tmp2 = UnpackLow(row0, row1); 512 Int2 tmp3 = UnpackLow(row2, row3); 513 514 row0 = As<Short4>(UnpackLow(tmp2, tmp3)); 515 row1 = As<Short4>(UnpackHigh(tmp2, tmp3)); 516 row2 = As<Short4>(UnpackLow(tmp0, tmp1)); 517 row3 = As<Short4>(UnpackHigh(tmp0, tmp1)); 518 } 519 transpose4x4(Float4 & row0,Float4 & row1,Float4 & row2,Float4 & row3)520 void transpose4x4(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3) 521 { 522 Float4 tmp0 = UnpackLow(row0, row1); 523 Float4 tmp1 = UnpackLow(row2, row3); 524 Float4 tmp2 = UnpackHigh(row0, row1); 525 Float4 tmp3 = UnpackHigh(row2, row3); 526 527 row0 = Float4(tmp0.xy, tmp1.xy); 528 row1 = Float4(tmp0.zw, tmp1.zw); 529 row2 = Float4(tmp2.xy, tmp3.xy); 530 row3 = Float4(tmp2.zw, tmp3.zw); 531 } 532 transpose4x3(Float4 & row0,Float4 & row1,Float4 & row2,Float4 & row3)533 void transpose4x3(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3) 534 { 535 Float4 tmp0 = UnpackLow(row0, row1); 536 Float4 tmp1 = UnpackLow(row2, row3); 537 Float4 tmp2 = UnpackHigh(row0, row1); 538 Float4 tmp3 = UnpackHigh(row2, row3); 539 540 row0 = Float4(tmp0.xy, tmp1.xy); 541 row1 = Float4(tmp0.zw, tmp1.zw); 542 row2 = Float4(tmp2.xy, tmp3.xy); 543 } 544 transpose4x2(Float4 & row0,Float4 & row1,Float4 & row2,Float4 & row3)545 void transpose4x2(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3) 546 { 547 Float4 tmp0 = UnpackLow(row0, row1); 548 Float4 tmp1 = UnpackLow(row2, row3); 549 550 row0 = Float4(tmp0.xy, tmp1.xy); 551 row1 = Float4(tmp0.zw, tmp1.zw); 552 } 553 transpose4x1(Float4 & row0,Float4 & row1,Float4 & row2,Float4 & row3)554 void transpose4x1(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3) 555 { 556 Float4 tmp0 = UnpackLow(row0, row1); 557 Float4 tmp1 = UnpackLow(row2, row3); 558 559 row0 = Float4(tmp0.xy, tmp1.xy); 560 } 561 transpose2x4(Float4 & row0,Float4 & row1,Float4 & row2,Float4 & row3)562 void transpose2x4(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3) 563 { 564 row0 = UnpackLow(row0, row1); 565 row1 = Float4(row0.zw, row1.zw); 566 row2 = UnpackHigh(row0, row1); 567 row3 = Float4(row2.zw, row3.zw); 568 } 569 transpose2x4h(Float4 & row0,Float4 & row1,Float4 & row2,Float4 & row3)570 void transpose2x4h(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3) 571 { 572 row0 = UnpackLow(row2, row3); 573 row1 = Float4(row0.zw, row1.zw); 574 row2 = UnpackHigh(row2, row3); 575 row3 = Float4(row2.zw, row3.zw); 576 } 577 transpose4xN(Float4 & row0,Float4 & row1,Float4 & row2,Float4 & row3,int N)578 void transpose4xN(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3, int N) 579 { 580 switch(N) 581 { 582 case 1: transpose4x1(row0, row1, row2, row3); break; 583 case 2: transpose4x2(row0, row1, row2, row3); break; 584 case 3: transpose4x3(row0, row1, row2, row3); break; 585 case 4: transpose4x4(row0, row1, row2, row3); break; 586 } 587 } 588 mov(Vector4f & dst,const Vector4f & src,bool integerDestination)589 void ShaderCore::mov(Vector4f &dst, const Vector4f &src, bool integerDestination) 590 { 591 if(integerDestination) 592 { 593 dst.x = As<Float4>(RoundInt(src.x)); 594 dst.y = As<Float4>(RoundInt(src.y)); 595 dst.z = As<Float4>(RoundInt(src.z)); 596 dst.w = As<Float4>(RoundInt(src.w)); 597 } 598 else 599 { 600 dst = src; 601 } 602 } 603 neg(Vector4f & dst,const Vector4f & src)604 void ShaderCore::neg(Vector4f &dst, const Vector4f &src) 605 { 606 dst.x = -src.x; 607 dst.y = -src.y; 608 dst.z = -src.z; 609 dst.w = -src.w; 610 } 611 ineg(Vector4f & dst,const Vector4f & src)612 void ShaderCore::ineg(Vector4f &dst, const Vector4f &src) 613 { 614 dst.x = As<Float4>(-As<Int4>(src.x)); 615 dst.y = As<Float4>(-As<Int4>(src.y)); 616 dst.z = As<Float4>(-As<Int4>(src.z)); 617 dst.w = As<Float4>(-As<Int4>(src.w)); 618 } 619 f2b(Vector4f & dst,const Vector4f & src)620 void ShaderCore::f2b(Vector4f &dst, const Vector4f &src) 621 { 622 dst.x = As<Float4>(CmpNEQ(src.x, Float4(0.0f))); 623 dst.y = As<Float4>(CmpNEQ(src.y, Float4(0.0f))); 624 dst.z = As<Float4>(CmpNEQ(src.z, Float4(0.0f))); 625 dst.w = As<Float4>(CmpNEQ(src.w, Float4(0.0f))); 626 } 627 b2f(Vector4f & dst,const Vector4f & src)628 void ShaderCore::b2f(Vector4f &dst, const Vector4f &src) 629 { 630 dst.x = As<Float4>(As<Int4>(src.x) & As<Int4>(Float4(1.0f))); 631 dst.y = As<Float4>(As<Int4>(src.y) & As<Int4>(Float4(1.0f))); 632 dst.z = As<Float4>(As<Int4>(src.z) & As<Int4>(Float4(1.0f))); 633 dst.w = As<Float4>(As<Int4>(src.w) & As<Int4>(Float4(1.0f))); 634 } 635 f2i(Vector4f & dst,const Vector4f & src)636 void ShaderCore::f2i(Vector4f &dst, const Vector4f &src) 637 { 638 dst.x = As<Float4>(Int4(src.x)); 639 dst.y = As<Float4>(Int4(src.y)); 640 dst.z = As<Float4>(Int4(src.z)); 641 dst.w = As<Float4>(Int4(src.w)); 642 } 643 i2f(Vector4f & dst,const Vector4f & src)644 void ShaderCore::i2f(Vector4f &dst, const Vector4f &src) 645 { 646 dst.x = Float4(As<Int4>(src.x)); 647 dst.y = Float4(As<Int4>(src.y)); 648 dst.z = Float4(As<Int4>(src.z)); 649 dst.w = Float4(As<Int4>(src.w)); 650 } 651 f2u(Vector4f & dst,const Vector4f & src)652 void ShaderCore::f2u(Vector4f &dst, const Vector4f &src) 653 { 654 dst.x = As<Float4>(UInt4(src.x)); 655 dst.y = As<Float4>(UInt4(src.y)); 656 dst.z = As<Float4>(UInt4(src.z)); 657 dst.w = As<Float4>(UInt4(src.w)); 658 } 659 u2f(Vector4f & dst,const Vector4f & src)660 void ShaderCore::u2f(Vector4f &dst, const Vector4f &src) 661 { 662 dst.x = Float4(As<UInt4>(src.x)); 663 dst.y = Float4(As<UInt4>(src.y)); 664 dst.z = Float4(As<UInt4>(src.z)); 665 dst.w = Float4(As<UInt4>(src.w)); 666 } 667 i2b(Vector4f & dst,const Vector4f & src)668 void ShaderCore::i2b(Vector4f &dst, const Vector4f &src) 669 { 670 dst.x = As<Float4>(CmpNEQ(As<Int4>(src.x), Int4(0))); 671 dst.y = As<Float4>(CmpNEQ(As<Int4>(src.y), Int4(0))); 672 dst.z = As<Float4>(CmpNEQ(As<Int4>(src.z), Int4(0))); 673 dst.w = As<Float4>(CmpNEQ(As<Int4>(src.w), Int4(0))); 674 } 675 b2i(Vector4f & dst,const Vector4f & src)676 void ShaderCore::b2i(Vector4f &dst, const Vector4f &src) 677 { 678 dst.x = As<Float4>(As<Int4>(src.x) & Int4(1)); 679 dst.y = As<Float4>(As<Int4>(src.y) & Int4(1)); 680 dst.z = As<Float4>(As<Int4>(src.z) & Int4(1)); 681 dst.w = As<Float4>(As<Int4>(src.w) & Int4(1)); 682 } 683 add(Vector4f & dst,const Vector4f & src0,const Vector4f & src1)684 void ShaderCore::add(Vector4f &dst, const Vector4f &src0, const Vector4f &src1) 685 { 686 dst.x = src0.x + src1.x; 687 dst.y = src0.y + src1.y; 688 dst.z = src0.z + src1.z; 689 dst.w = src0.w + src1.w; 690 } 691 iadd(Vector4f & dst,const Vector4f & src0,const Vector4f & src1)692 void ShaderCore::iadd(Vector4f &dst, const Vector4f &src0, const Vector4f &src1) 693 { 694 dst.x = As<Float4>(As<Int4>(src0.x) + As<Int4>(src1.x)); 695 dst.y = As<Float4>(As<Int4>(src0.y) + As<Int4>(src1.y)); 696 dst.z = As<Float4>(As<Int4>(src0.z) + As<Int4>(src1.z)); 697 dst.w = As<Float4>(As<Int4>(src0.w) + As<Int4>(src1.w)); 698 } 699 sub(Vector4f & dst,const Vector4f & src0,const Vector4f & src1)700 void ShaderCore::sub(Vector4f &dst, const Vector4f &src0, const Vector4f &src1) 701 { 702 dst.x = src0.x - src1.x; 703 dst.y = src0.y - src1.y; 704 dst.z = src0.z - src1.z; 705 dst.w = src0.w - src1.w; 706 } 707 isub(Vector4f & dst,const Vector4f & src0,const Vector4f & src1)708 void ShaderCore::isub(Vector4f &dst, const Vector4f &src0, const Vector4f &src1) 709 { 710 dst.x = As<Float4>(As<Int4>(src0.x) - As<Int4>(src1.x)); 711 dst.y = As<Float4>(As<Int4>(src0.y) - As<Int4>(src1.y)); 712 dst.z = As<Float4>(As<Int4>(src0.z) - As<Int4>(src1.z)); 713 dst.w = As<Float4>(As<Int4>(src0.w) - As<Int4>(src1.w)); 714 } 715 mad(Vector4f & dst,const Vector4f & src0,const Vector4f & src1,const Vector4f & src2)716 void ShaderCore::mad(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2) 717 { 718 dst.x = src0.x * src1.x + src2.x; 719 dst.y = src0.y * src1.y + src2.y; 720 dst.z = src0.z * src1.z + src2.z; 721 dst.w = src0.w * src1.w + src2.w; 722 } 723 imad(Vector4f & dst,const Vector4f & src0,const Vector4f & src1,const Vector4f & src2)724 void ShaderCore::imad(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2) 725 { 726 dst.x = As<Float4>(As<Int4>(src0.x) * As<Int4>(src1.x) + As<Int4>(src2.x)); 727 dst.y = As<Float4>(As<Int4>(src0.y) * As<Int4>(src1.y) + As<Int4>(src2.y)); 728 dst.z = As<Float4>(As<Int4>(src0.z) * As<Int4>(src1.z) + As<Int4>(src2.z)); 729 dst.w = As<Float4>(As<Int4>(src0.w) * As<Int4>(src1.w) + As<Int4>(src2.w)); 730 } 731 mul(Vector4f & dst,const Vector4f & src0,const Vector4f & src1)732 void ShaderCore::mul(Vector4f &dst, const Vector4f &src0, const Vector4f &src1) 733 { 734 dst.x = src0.x * src1.x; 735 dst.y = src0.y * src1.y; 736 dst.z = src0.z * src1.z; 737 dst.w = src0.w * src1.w; 738 } 739 imul(Vector4f & dst,const Vector4f & src0,const Vector4f & src1)740 void ShaderCore::imul(Vector4f &dst, const Vector4f &src0, const Vector4f &src1) 741 { 742 dst.x = As<Float4>(As<Int4>(src0.x) * As<Int4>(src1.x)); 743 dst.y = As<Float4>(As<Int4>(src0.y) * As<Int4>(src1.y)); 744 dst.z = As<Float4>(As<Int4>(src0.z) * As<Int4>(src1.z)); 745 dst.w = As<Float4>(As<Int4>(src0.w) * As<Int4>(src1.w)); 746 } 747 rcpx(Vector4f & dst,const Vector4f & src,bool pp)748 void ShaderCore::rcpx(Vector4f &dst, const Vector4f &src, bool pp) 749 { 750 Float4 rcp = reciprocal(src.x, pp, true); 751 752 dst.x = rcp; 753 dst.y = rcp; 754 dst.z = rcp; 755 dst.w = rcp; 756 } 757 div(Vector4f & dst,const Vector4f & src0,const Vector4f & src1)758 void ShaderCore::div(Vector4f &dst, const Vector4f &src0, const Vector4f &src1) 759 { 760 dst.x = src0.x / src1.x; 761 dst.y = src0.y / src1.y; 762 dst.z = src0.z / src1.z; 763 dst.w = src0.w / src1.w; 764 } 765 idiv(Vector4f & dst,const Vector4f & src0,const Vector4f & src1)766 void ShaderCore::idiv(Vector4f &dst, const Vector4f &src0, const Vector4f &src1) 767 { 768 Float4 intMax(As<Float4>(Int4(INT_MAX))); 769 cmp0i(dst.x, src1.x, intMax, src1.x); 770 dst.x = As<Float4>(As<Int4>(src0.x) / As<Int4>(dst.x)); 771 cmp0i(dst.y, src1.y, intMax, src1.y); 772 dst.y = As<Float4>(As<Int4>(src0.y) / As<Int4>(dst.y)); 773 cmp0i(dst.z, src1.z, intMax, src1.z); 774 dst.z = As<Float4>(As<Int4>(src0.z) / As<Int4>(dst.z)); 775 cmp0i(dst.w, src1.w, intMax, src1.w); 776 dst.w = As<Float4>(As<Int4>(src0.w) / As<Int4>(dst.w)); 777 } 778 udiv(Vector4f & dst,const Vector4f & src0,const Vector4f & src1)779 void ShaderCore::udiv(Vector4f &dst, const Vector4f &src0, const Vector4f &src1) 780 { 781 Float4 uintMax(As<Float4>(UInt4(UINT_MAX))); 782 cmp0i(dst.x, src1.x, uintMax, src1.x); 783 dst.x = As<Float4>(As<UInt4>(src0.x) / As<UInt4>(dst.x)); 784 cmp0i(dst.y, src1.y, uintMax, src1.y); 785 dst.y = As<Float4>(As<UInt4>(src0.y) / As<UInt4>(dst.y)); 786 cmp0i(dst.z, src1.z, uintMax, src1.z); 787 dst.z = As<Float4>(As<UInt4>(src0.z) / As<UInt4>(dst.z)); 788 cmp0i(dst.w, src1.w, uintMax, src1.w); 789 dst.w = As<Float4>(As<UInt4>(src0.w) / As<UInt4>(dst.w)); 790 } 791 mod(Vector4f & dst,const Vector4f & src0,const Vector4f & src1)792 void ShaderCore::mod(Vector4f &dst, const Vector4f &src0, const Vector4f &src1) 793 { 794 dst.x = modulo(src0.x, src1.x); 795 dst.y = modulo(src0.y, src1.y); 796 dst.z = modulo(src0.z, src1.z); 797 dst.w = modulo(src0.w, src1.w); 798 } 799 imod(Vector4f & dst,const Vector4f & src0,const Vector4f & src1)800 void ShaderCore::imod(Vector4f &dst, const Vector4f &src0, const Vector4f &src1) 801 { 802 cmp0i(dst.x, src1.x, src0.x, src1.x); 803 dst.x = As<Float4>(As<Int4>(src0.x) % As<Int4>(dst.x)); 804 cmp0i(dst.y, src1.y, src0.y, src1.y); 805 dst.y = As<Float4>(As<Int4>(src0.y) % As<Int4>(dst.y)); 806 cmp0i(dst.z, src1.z, src0.z, src1.z); 807 dst.z = As<Float4>(As<Int4>(src0.z) % As<Int4>(dst.z)); 808 cmp0i(dst.w, src1.w, src0.w, src1.w); 809 dst.w = As<Float4>(As<Int4>(src0.w) % As<Int4>(dst.w)); 810 } umod(Vector4f & dst,const Vector4f & src0,const Vector4f & src1)811 void ShaderCore::umod(Vector4f &dst, const Vector4f &src0, const Vector4f &src1) 812 { 813 cmp0i(dst.x, src1.x, src0.x, src1.x); 814 dst.x = As<Float4>(As<UInt4>(src0.x) % As<UInt4>(dst.x)); 815 cmp0i(dst.y, src1.y, src0.y, src1.y); 816 dst.y = As<Float4>(As<UInt4>(src0.y) % As<UInt4>(dst.y)); 817 cmp0i(dst.z, src1.z, src0.z, src1.z); 818 dst.z = As<Float4>(As<UInt4>(src0.z) % As<UInt4>(dst.z)); 819 cmp0i(dst.w, src1.w, src0.w, src1.w); 820 dst.w = As<Float4>(As<UInt4>(src0.w) % As<UInt4>(dst.w)); 821 } 822 shl(Vector4f & dst,const Vector4f & src0,const Vector4f & src1)823 void ShaderCore::shl(Vector4f &dst, const Vector4f &src0, const Vector4f &src1) 824 { 825 dst.x = As<Float4>(As<Int4>(src0.x) << As<Int4>(src1.x)); 826 dst.y = As<Float4>(As<Int4>(src0.y) << As<Int4>(src1.y)); 827 dst.z = As<Float4>(As<Int4>(src0.z) << As<Int4>(src1.z)); 828 dst.w = As<Float4>(As<Int4>(src0.w) << As<Int4>(src1.w)); 829 } 830 ishr(Vector4f & dst,const Vector4f & src0,const Vector4f & src1)831 void ShaderCore::ishr(Vector4f &dst, const Vector4f &src0, const Vector4f &src1) 832 { 833 dst.x = As<Float4>(As<Int4>(src0.x) >> As<Int4>(src1.x)); 834 dst.y = As<Float4>(As<Int4>(src0.y) >> As<Int4>(src1.y)); 835 dst.z = As<Float4>(As<Int4>(src0.z) >> As<Int4>(src1.z)); 836 dst.w = As<Float4>(As<Int4>(src0.w) >> As<Int4>(src1.w)); 837 } 838 ushr(Vector4f & dst,const Vector4f & src0,const Vector4f & src1)839 void ShaderCore::ushr(Vector4f &dst, const Vector4f &src0, const Vector4f &src1) 840 { 841 dst.x = As<Float4>(As<UInt4>(src0.x) >> As<UInt4>(src1.x)); 842 dst.y = As<Float4>(As<UInt4>(src0.y) >> As<UInt4>(src1.y)); 843 dst.z = As<Float4>(As<UInt4>(src0.z) >> As<UInt4>(src1.z)); 844 dst.w = As<Float4>(As<UInt4>(src0.w) >> As<UInt4>(src1.w)); 845 } 846 rsqx(Vector4f & dst,const Vector4f & src,bool pp)847 void ShaderCore::rsqx(Vector4f &dst, const Vector4f &src, bool pp) 848 { 849 Float4 rsq = reciprocalSquareRoot(src.x, true, pp); 850 851 dst.x = rsq; 852 dst.y = rsq; 853 dst.z = rsq; 854 dst.w = rsq; 855 } 856 sqrt(Vector4f & dst,const Vector4f & src,bool pp)857 void ShaderCore::sqrt(Vector4f &dst, const Vector4f &src, bool pp) 858 { 859 dst.x = Sqrt(src.x); 860 dst.y = Sqrt(src.y); 861 dst.z = Sqrt(src.z); 862 dst.w = Sqrt(src.w); 863 } 864 rsq(Vector4f & dst,const Vector4f & src,bool pp)865 void ShaderCore::rsq(Vector4f &dst, const Vector4f &src, bool pp) 866 { 867 dst.x = reciprocalSquareRoot(src.x, false, pp); 868 dst.y = reciprocalSquareRoot(src.y, false, pp); 869 dst.z = reciprocalSquareRoot(src.z, false, pp); 870 dst.w = reciprocalSquareRoot(src.w, false, pp); 871 } 872 len2(Float4 & dst,const Vector4f & src,bool pp)873 void ShaderCore::len2(Float4 &dst, const Vector4f &src, bool pp) 874 { 875 dst = Sqrt(dot2(src, src)); 876 } 877 len3(Float4 & dst,const Vector4f & src,bool pp)878 void ShaderCore::len3(Float4 &dst, const Vector4f &src, bool pp) 879 { 880 dst = Sqrt(dot3(src, src)); 881 } 882 len4(Float4 & dst,const Vector4f & src,bool pp)883 void ShaderCore::len4(Float4 &dst, const Vector4f &src, bool pp) 884 { 885 dst = Sqrt(dot4(src, src)); 886 } 887 dist1(Float4 & dst,const Vector4f & src0,const Vector4f & src1,bool pp)888 void ShaderCore::dist1(Float4 &dst, const Vector4f &src0, const Vector4f &src1, bool pp) 889 { 890 dst = Abs(src0.x - src1.x); 891 } 892 dist2(Float4 & dst,const Vector4f & src0,const Vector4f & src1,bool pp)893 void ShaderCore::dist2(Float4 &dst, const Vector4f &src0, const Vector4f &src1, bool pp) 894 { 895 Float4 dx = src0.x - src1.x; 896 Float4 dy = src0.y - src1.y; 897 Float4 dot2 = dx * dx + dy * dy; 898 dst = Sqrt(dot2); 899 } 900 dist3(Float4 & dst,const Vector4f & src0,const Vector4f & src1,bool pp)901 void ShaderCore::dist3(Float4 &dst, const Vector4f &src0, const Vector4f &src1, bool pp) 902 { 903 Float4 dx = src0.x - src1.x; 904 Float4 dy = src0.y - src1.y; 905 Float4 dz = src0.z - src1.z; 906 Float4 dot3 = dx * dx + dy * dy + dz * dz; 907 dst = Sqrt(dot3); 908 } 909 dist4(Float4 & dst,const Vector4f & src0,const Vector4f & src1,bool pp)910 void ShaderCore::dist4(Float4 &dst, const Vector4f &src0, const Vector4f &src1, bool pp) 911 { 912 Float4 dx = src0.x - src1.x; 913 Float4 dy = src0.y - src1.y; 914 Float4 dz = src0.z - src1.z; 915 Float4 dw = src0.w - src1.w; 916 Float4 dot4 = dx * dx + dy * dy + dz * dz + dw * dw; 917 dst = Sqrt(dot4); 918 } 919 dp1(Vector4f & dst,const Vector4f & src0,const Vector4f & src1)920 void ShaderCore::dp1(Vector4f &dst, const Vector4f &src0, const Vector4f &src1) 921 { 922 Float4 t = src0.x * src1.x; 923 924 dst.x = t; 925 dst.y = t; 926 dst.z = t; 927 dst.w = t; 928 } 929 dp2(Vector4f & dst,const Vector4f & src0,const Vector4f & src1)930 void ShaderCore::dp2(Vector4f &dst, const Vector4f &src0, const Vector4f &src1) 931 { 932 Float4 t = dot2(src0, src1); 933 934 dst.x = t; 935 dst.y = t; 936 dst.z = t; 937 dst.w = t; 938 } 939 dp2add(Vector4f & dst,const Vector4f & src0,const Vector4f & src1,const Vector4f & src2)940 void ShaderCore::dp2add(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2) 941 { 942 Float4 t = dot2(src0, src1) + src2.x; 943 944 dst.x = t; 945 dst.y = t; 946 dst.z = t; 947 dst.w = t; 948 } 949 dp3(Vector4f & dst,const Vector4f & src0,const Vector4f & src1)950 void ShaderCore::dp3(Vector4f &dst, const Vector4f &src0, const Vector4f &src1) 951 { 952 Float4 dot = dot3(src0, src1); 953 954 dst.x = dot; 955 dst.y = dot; 956 dst.z = dot; 957 dst.w = dot; 958 } 959 dp4(Vector4f & dst,const Vector4f & src0,const Vector4f & src1)960 void ShaderCore::dp4(Vector4f &dst, const Vector4f &src0, const Vector4f &src1) 961 { 962 Float4 dot = dot4(src0, src1); 963 964 dst.x = dot; 965 dst.y = dot; 966 dst.z = dot; 967 dst.w = dot; 968 } 969 min(Vector4f & dst,const Vector4f & src0,const Vector4f & src1)970 void ShaderCore::min(Vector4f &dst, const Vector4f &src0, const Vector4f &src1) 971 { 972 dst.x = Min(src0.x, src1.x); 973 dst.y = Min(src0.y, src1.y); 974 dst.z = Min(src0.z, src1.z); 975 dst.w = Min(src0.w, src1.w); 976 } 977 imin(Vector4f & dst,const Vector4f & src0,const Vector4f & src1)978 void ShaderCore::imin(Vector4f &dst, const Vector4f &src0, const Vector4f &src1) 979 { 980 dst.x = As<Float4>(Min(As<Int4>(src0.x), As<Int4>(src1.x))); 981 dst.y = As<Float4>(Min(As<Int4>(src0.y), As<Int4>(src1.y))); 982 dst.z = As<Float4>(Min(As<Int4>(src0.z), As<Int4>(src1.z))); 983 dst.w = As<Float4>(Min(As<Int4>(src0.w), As<Int4>(src1.w))); 984 } 985 umin(Vector4f & dst,const Vector4f & src0,const Vector4f & src1)986 void ShaderCore::umin(Vector4f &dst, const Vector4f &src0, const Vector4f &src1) 987 { 988 dst.x = As<Float4>(Min(As<UInt4>(src0.x), As<UInt4>(src1.x))); 989 dst.y = As<Float4>(Min(As<UInt4>(src0.y), As<UInt4>(src1.y))); 990 dst.z = As<Float4>(Min(As<UInt4>(src0.z), As<UInt4>(src1.z))); 991 dst.w = As<Float4>(Min(As<UInt4>(src0.w), As<UInt4>(src1.w))); 992 } 993 max(Vector4f & dst,const Vector4f & src0,const Vector4f & src1)994 void ShaderCore::max(Vector4f &dst, const Vector4f &src0, const Vector4f &src1) 995 { 996 dst.x = Max(src0.x, src1.x); 997 dst.y = Max(src0.y, src1.y); 998 dst.z = Max(src0.z, src1.z); 999 dst.w = Max(src0.w, src1.w); 1000 } 1001 imax(Vector4f & dst,const Vector4f & src0,const Vector4f & src1)1002 void ShaderCore::imax(Vector4f &dst, const Vector4f &src0, const Vector4f &src1) 1003 { 1004 dst.x = As<Float4>(Max(As<Int4>(src0.x), As<Int4>(src1.x))); 1005 dst.y = As<Float4>(Max(As<Int4>(src0.y), As<Int4>(src1.y))); 1006 dst.z = As<Float4>(Max(As<Int4>(src0.z), As<Int4>(src1.z))); 1007 dst.w = As<Float4>(Max(As<Int4>(src0.w), As<Int4>(src1.w))); 1008 } 1009 umax(Vector4f & dst,const Vector4f & src0,const Vector4f & src1)1010 void ShaderCore::umax(Vector4f &dst, const Vector4f &src0, const Vector4f &src1) 1011 { 1012 dst.x = As<Float4>(Max(As<Int4>(src0.x), As<Int4>(src1.x))); 1013 dst.y = As<Float4>(Max(As<Int4>(src0.y), As<Int4>(src1.y))); 1014 dst.z = As<Float4>(Max(As<Int4>(src0.z), As<Int4>(src1.z))); 1015 dst.w = As<Float4>(Max(As<Int4>(src0.w), As<Int4>(src1.w))); 1016 } 1017 slt(Vector4f & dst,const Vector4f & src0,const Vector4f & src1)1018 void ShaderCore::slt(Vector4f &dst, const Vector4f &src0, const Vector4f &src1) 1019 { 1020 dst.x = As<Float4>(As<Int4>(CmpLT(src0.x, src1.x)) & As<Int4>(Float4(1.0f))); 1021 dst.y = As<Float4>(As<Int4>(CmpLT(src0.y, src1.y)) & As<Int4>(Float4(1.0f))); 1022 dst.z = As<Float4>(As<Int4>(CmpLT(src0.z, src1.z)) & As<Int4>(Float4(1.0f))); 1023 dst.w = As<Float4>(As<Int4>(CmpLT(src0.w, src1.w)) & As<Int4>(Float4(1.0f))); 1024 } 1025 step(Vector4f & dst,const Vector4f & edge,const Vector4f & x)1026 void ShaderCore::step(Vector4f &dst, const Vector4f &edge, const Vector4f &x) 1027 { 1028 dst.x = As<Float4>(CmpNLT(x.x, edge.x) & As<Int4>(Float4(1.0f))); 1029 dst.y = As<Float4>(CmpNLT(x.y, edge.y) & As<Int4>(Float4(1.0f))); 1030 dst.z = As<Float4>(CmpNLT(x.z, edge.z) & As<Int4>(Float4(1.0f))); 1031 dst.w = As<Float4>(CmpNLT(x.w, edge.w) & As<Int4>(Float4(1.0f))); 1032 } 1033 exp2x(Vector4f & dst,const Vector4f & src,bool pp)1034 void ShaderCore::exp2x(Vector4f &dst, const Vector4f &src, bool pp) 1035 { 1036 Float4 exp = exponential2(src.x, pp); 1037 1038 dst.x = exp; 1039 dst.y = exp; 1040 dst.z = exp; 1041 dst.w = exp; 1042 } 1043 exp2(Vector4f & dst,const Vector4f & src,bool pp)1044 void ShaderCore::exp2(Vector4f &dst, const Vector4f &src, bool pp) 1045 { 1046 dst.x = exponential2(src.x, pp); 1047 dst.y = exponential2(src.y, pp); 1048 dst.z = exponential2(src.z, pp); 1049 dst.w = exponential2(src.w, pp); 1050 } 1051 exp(Vector4f & dst,const Vector4f & src,bool pp)1052 void ShaderCore::exp(Vector4f &dst, const Vector4f &src, bool pp) 1053 { 1054 dst.x = exponential(src.x, pp); 1055 dst.y = exponential(src.y, pp); 1056 dst.z = exponential(src.z, pp); 1057 dst.w = exponential(src.w, pp); 1058 } 1059 log2x(Vector4f & dst,const Vector4f & src,bool pp)1060 void ShaderCore::log2x(Vector4f &dst, const Vector4f &src, bool pp) 1061 { 1062 Float4 log = logarithm2(src.x, true, pp); 1063 1064 dst.x = log; 1065 dst.y = log; 1066 dst.z = log; 1067 dst.w = log; 1068 } 1069 log2(Vector4f & dst,const Vector4f & src,bool pp)1070 void ShaderCore::log2(Vector4f &dst, const Vector4f &src, bool pp) 1071 { 1072 dst.x = logarithm2(src.x, pp); 1073 dst.y = logarithm2(src.y, pp); 1074 dst.z = logarithm2(src.z, pp); 1075 dst.w = logarithm2(src.w, pp); 1076 } 1077 log(Vector4f & dst,const Vector4f & src,bool pp)1078 void ShaderCore::log(Vector4f &dst, const Vector4f &src, bool pp) 1079 { 1080 dst.x = logarithm(src.x, false, pp); 1081 dst.y = logarithm(src.y, false, pp); 1082 dst.z = logarithm(src.z, false, pp); 1083 dst.w = logarithm(src.w, false, pp); 1084 } 1085 lit(Vector4f & dst,const Vector4f & src)1086 void ShaderCore::lit(Vector4f &dst, const Vector4f &src) 1087 { 1088 dst.x = Float4(1.0f); 1089 dst.y = Max(src.x, Float4(0.0f)); 1090 1091 Float4 pow; 1092 1093 pow = src.w; 1094 pow = Min(pow, Float4(127.9961f)); 1095 pow = Max(pow, Float4(-127.9961f)); 1096 1097 dst.z = power(src.y, pow); 1098 dst.z = As<Float4>(As<Int4>(dst.z) & CmpNLT(src.x, Float4(0.0f))); 1099 dst.z = As<Float4>(As<Int4>(dst.z) & CmpNLT(src.y, Float4(0.0f))); 1100 1101 dst.w = Float4(1.0f); 1102 } 1103 att(Vector4f & dst,const Vector4f & src0,const Vector4f & src1)1104 void ShaderCore::att(Vector4f &dst, const Vector4f &src0, const Vector4f &src1) 1105 { 1106 // Computes attenuation factors (1, d, d^2, 1/d) assuming src0 = d^2, src1 = 1/d 1107 dst.x = 1; 1108 dst.y = src0.y * src1.y; 1109 dst.z = src0.z; 1110 dst.w = src1.w; 1111 } 1112 lrp(Vector4f & dst,const Vector4f & src0,const Vector4f & src1,const Vector4f & src2)1113 void ShaderCore::lrp(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2) 1114 { 1115 dst.x = src0.x * (src1.x - src2.x) + src2.x; 1116 dst.y = src0.y * (src1.y - src2.y) + src2.y; 1117 dst.z = src0.z * (src1.z - src2.z) + src2.z; 1118 dst.w = src0.w * (src1.w - src2.w) + src2.w; 1119 } 1120 smooth(Vector4f & dst,const Vector4f & edge0,const Vector4f & edge1,const Vector4f & x)1121 void ShaderCore::smooth(Vector4f &dst, const Vector4f &edge0, const Vector4f &edge1, const Vector4f &x) 1122 { 1123 Float4 tx = Min(Max((x.x - edge0.x) / (edge1.x - edge0.x), Float4(0.0f)), Float4(1.0f)); dst.x = tx * tx * (Float4(3.0f) - Float4(2.0f) * tx); 1124 Float4 ty = Min(Max((x.y - edge0.y) / (edge1.y - edge0.y), Float4(0.0f)), Float4(1.0f)); dst.y = ty * ty * (Float4(3.0f) - Float4(2.0f) * ty); 1125 Float4 tz = Min(Max((x.z - edge0.z) / (edge1.z - edge0.z), Float4(0.0f)), Float4(1.0f)); dst.z = tz * tz * (Float4(3.0f) - Float4(2.0f) * tz); 1126 Float4 tw = Min(Max((x.w - edge0.w) / (edge1.w - edge0.w), Float4(0.0f)), Float4(1.0f)); dst.w = tw * tw * (Float4(3.0f) - Float4(2.0f) * tw); 1127 } 1128 floatToHalfBits(Float4 & dst,const Float4 & floatBits,bool storeInUpperBits)1129 void ShaderCore::floatToHalfBits(Float4& dst, const Float4& floatBits, bool storeInUpperBits) 1130 { 1131 static const uint32_t mask_sign = 0x80000000u; 1132 static const uint32_t mask_round = ~0xfffu; 1133 static const uint32_t c_f32infty = 255 << 23; 1134 static const uint32_t c_magic = 15 << 23; 1135 static const uint32_t c_nanbit = 0x200; 1136 static const uint32_t c_infty_as_fp16 = 0x7c00; 1137 static const uint32_t c_clamp = (31 << 23) - 0x1000; 1138 1139 UInt4 justsign = UInt4(mask_sign) & As<UInt4>(floatBits); 1140 UInt4 absf = As<UInt4>(floatBits) ^ justsign; 1141 UInt4 b_isnormal = CmpNLE(UInt4(c_f32infty), absf); 1142 1143 // Note: this version doesn't round to the nearest even in case of a tie as defined by IEEE 754-2008, it rounds to +inf 1144 // instead of nearest even, since that's fine for GLSL ES 3.0's needs (see section 2.1.1 Floating-Point Computation) 1145 UInt4 joined = ((((As<UInt4>(Min(As<Float4>(absf & UInt4(mask_round)) * As<Float4>(UInt4(c_magic)), 1146 As<Float4>(UInt4(c_clamp))))) - UInt4(mask_round)) >> 13) & b_isnormal) | 1147 ((b_isnormal ^ UInt4(0xFFFFFFFF)) & ((CmpNLE(absf, UInt4(c_f32infty)) & UInt4(c_nanbit)) | 1148 UInt4(c_infty_as_fp16))); 1149 1150 dst = As<Float4>(storeInUpperBits ? As<UInt4>(dst) | ((joined << 16) | justsign) : joined | (justsign >> 16)); 1151 } 1152 halfToFloatBits(Float4 & dst,const Float4 & halfBits)1153 void ShaderCore::halfToFloatBits(Float4& dst, const Float4& halfBits) 1154 { 1155 static const uint32_t mask_nosign = 0x7FFF; 1156 static const uint32_t magic = (254 - 15) << 23; 1157 static const uint32_t was_infnan = 0x7BFF; 1158 static const uint32_t exp_infnan = 255 << 23; 1159 1160 UInt4 expmant = As<UInt4>(halfBits) & UInt4(mask_nosign); 1161 dst = As<Float4>(As<UInt4>(As<Float4>(expmant << 13) * As<Float4>(UInt4(magic))) | 1162 ((As<UInt4>(halfBits) ^ UInt4(expmant)) << 16) | 1163 (CmpNLE(As<UInt4>(expmant), UInt4(was_infnan)) & UInt4(exp_infnan))); 1164 } 1165 packHalf2x16(Vector4f & d,const Vector4f & s0)1166 void ShaderCore::packHalf2x16(Vector4f &d, const Vector4f &s0) 1167 { 1168 // half2 | half1 1169 floatToHalfBits(d.x, s0.x, false); 1170 floatToHalfBits(d.x, s0.y, true); 1171 } 1172 unpackHalf2x16(Vector4f & dst,const Vector4f & s0)1173 void ShaderCore::unpackHalf2x16(Vector4f &dst, const Vector4f &s0) 1174 { 1175 // half2 | half1 1176 halfToFloatBits(dst.x, As<Float4>(As<UInt4>(s0.x) & UInt4(0x0000FFFF))); 1177 halfToFloatBits(dst.y, As<Float4>((As<UInt4>(s0.x) & UInt4(0xFFFF0000)) >> 16)); 1178 } 1179 packSnorm2x16(Vector4f & d,const Vector4f & s0)1180 void ShaderCore::packSnorm2x16(Vector4f &d, const Vector4f &s0) 1181 { 1182 // round(clamp(c, -1.0, 1.0) * 32767.0) 1183 d.x = As<Float4>((Int4(Round(Min(Max(s0.x, Float4(-1.0f)), Float4(1.0f)) * Float4(32767.0f))) & Int4(0xFFFF)) | 1184 ((Int4(Round(Min(Max(s0.y, Float4(-1.0f)), Float4(1.0f)) * Float4(32767.0f))) & Int4(0xFFFF)) << 16)); 1185 } 1186 packUnorm2x16(Vector4f & d,const Vector4f & s0)1187 void ShaderCore::packUnorm2x16(Vector4f &d, const Vector4f &s0) 1188 { 1189 // round(clamp(c, 0.0, 1.0) * 65535.0) 1190 d.x = As<Float4>((Int4(Round(Min(Max(s0.x, Float4(0.0f)), Float4(1.0f)) * Float4(65535.0f))) & Int4(0xFFFF)) | 1191 ((Int4(Round(Min(Max(s0.y, Float4(0.0f)), Float4(1.0f)) * Float4(65535.0f))) & Int4(0xFFFF)) << 16)); 1192 } 1193 unpackSnorm2x16(Vector4f & dst,const Vector4f & s0)1194 void ShaderCore::unpackSnorm2x16(Vector4f &dst, const Vector4f &s0) 1195 { 1196 // clamp(f / 32727.0, -1.0, 1.0) 1197 dst.x = Min(Max(Float4(As<Int4>((As<UInt4>(s0.x) & UInt4(0x0000FFFF)) << 16)) * Float4(1.0f / float(0x7FFF0000)), Float4(-1.0f)), Float4(1.0f)); 1198 dst.y = Min(Max(Float4(As<Int4>(As<UInt4>(s0.x) & UInt4(0xFFFF0000))) * Float4(1.0f / float(0x7FFF0000)), Float4(-1.0f)), Float4(1.0f)); 1199 } 1200 unpackUnorm2x16(Vector4f & dst,const Vector4f & s0)1201 void ShaderCore::unpackUnorm2x16(Vector4f &dst, const Vector4f &s0) 1202 { 1203 // f / 65535.0 1204 dst.x = Float4((As<UInt4>(s0.x) & UInt4(0x0000FFFF)) << 16) * Float4(1.0f / float(0xFFFF0000)); 1205 dst.y = Float4(As<UInt4>(s0.x) & UInt4(0xFFFF0000)) * Float4(1.0f / float(0xFFFF0000)); 1206 } 1207 det2(Vector4f & dst,const Vector4f & src0,const Vector4f & src1)1208 void ShaderCore::det2(Vector4f &dst, const Vector4f &src0, const Vector4f &src1) 1209 { 1210 dst.x = src0.x * src1.y - src0.y * src1.x; 1211 dst.y = dst.z = dst.w = dst.x; 1212 } 1213 det3(Vector4f & dst,const Vector4f & src0,const Vector4f & src1,const Vector4f & src2)1214 void ShaderCore::det3(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2) 1215 { 1216 crs(dst, src1, src2); 1217 dp3(dst, dst, src0); 1218 } 1219 det4(Vector4f & dst,const Vector4f & src0,const Vector4f & src1,const Vector4f & src2,const Vector4f & src3)1220 void ShaderCore::det4(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2, const Vector4f &src3) 1221 { 1222 dst.x = src2.z * src3.w - src2.w * src3.z; 1223 dst.y = src1.w * src3.z - src1.z * src3.w; 1224 dst.z = src1.z * src2.w - src1.w * src2.z; 1225 dst.x = src0.x * (src1.y * dst.x + src2.y * dst.y + src3.y * dst.z) - 1226 src0.y * (src1.x * dst.x + src2.x * dst.y + src3.x * dst.z) + 1227 src0.z * (src1.x * (src2.y * src3.w - src2.w * src3.y) + 1228 src2.x * (src1.w * src3.y - src1.y * src3.w) + 1229 src3.x * (src1.y * src2.w - src1.w * src2.y)) + 1230 src0.w * (src1.x * (src2.z * src3.y - src2.y * src3.z) + 1231 src2.x * (src1.y * src3.z - src1.z * src3.y) + 1232 src3.x * (src1.z * src2.y - src1.y * src2.z)); 1233 dst.y = dst.z = dst.w = dst.x; 1234 } 1235 frc(Vector4f & dst,const Vector4f & src)1236 void ShaderCore::frc(Vector4f &dst, const Vector4f &src) 1237 { 1238 dst.x = Frac(src.x); 1239 dst.y = Frac(src.y); 1240 dst.z = Frac(src.z); 1241 dst.w = Frac(src.w); 1242 } 1243 trunc(Vector4f & dst,const Vector4f & src)1244 void ShaderCore::trunc(Vector4f &dst, const Vector4f &src) 1245 { 1246 dst.x = Trunc(src.x); 1247 dst.y = Trunc(src.y); 1248 dst.z = Trunc(src.z); 1249 dst.w = Trunc(src.w); 1250 } 1251 floor(Vector4f & dst,const Vector4f & src)1252 void ShaderCore::floor(Vector4f &dst, const Vector4f &src) 1253 { 1254 dst.x = Floor(src.x); 1255 dst.y = Floor(src.y); 1256 dst.z = Floor(src.z); 1257 dst.w = Floor(src.w); 1258 } 1259 round(Vector4f & dst,const Vector4f & src)1260 void ShaderCore::round(Vector4f &dst, const Vector4f &src) 1261 { 1262 dst.x = Round(src.x); 1263 dst.y = Round(src.y); 1264 dst.z = Round(src.z); 1265 dst.w = Round(src.w); 1266 } 1267 roundEven(Vector4f & dst,const Vector4f & src)1268 void ShaderCore::roundEven(Vector4f &dst, const Vector4f &src) 1269 { 1270 // dst = round(src) + ((round(src) < src) * 2 - 1) * (fract(src) == 0.5) * isOdd(round(src)); 1271 // ex.: 1.5: 2 + (0 * 2 - 1) * 1 * 0 = 2 1272 // 2.5: 3 + (0 * 2 - 1) * 1 * 1 = 2 1273 // -1.5: -2 + (1 * 2 - 1) * 1 * 0 = -2 1274 // -2.5: -3 + (1 * 2 - 1) * 1 * 1 = -2 1275 // Even if the round implementation rounds the other way: 1276 // 1.5: 1 + (1 * 2 - 1) * 1 * 1 = 2 1277 // 2.5: 2 + (1 * 2 - 1) * 1 * 0 = 2 1278 // -1.5: -1 + (0 * 2 - 1) * 1 * 1 = -2 1279 // -2.5: -2 + (0 * 2 - 1) * 1 * 0 = -2 1280 round(dst, src); 1281 dst.x += ((Float4(CmpLT(dst.x, src.x) & Int4(1)) * Float4(2.0f)) - Float4(1.0f)) * Float4(CmpEQ(Frac(src.x), Float4(0.5f)) & Int4(1)) * Float4(Int4(dst.x) & Int4(1)); 1282 dst.y += ((Float4(CmpLT(dst.y, src.y) & Int4(1)) * Float4(2.0f)) - Float4(1.0f)) * Float4(CmpEQ(Frac(src.y), Float4(0.5f)) & Int4(1)) * Float4(Int4(dst.y) & Int4(1)); 1283 dst.z += ((Float4(CmpLT(dst.z, src.z) & Int4(1)) * Float4(2.0f)) - Float4(1.0f)) * Float4(CmpEQ(Frac(src.z), Float4(0.5f)) & Int4(1)) * Float4(Int4(dst.z) & Int4(1)); 1284 dst.w += ((Float4(CmpLT(dst.w, src.w) & Int4(1)) * Float4(2.0f)) - Float4(1.0f)) * Float4(CmpEQ(Frac(src.w), Float4(0.5f)) & Int4(1)) * Float4(Int4(dst.w) & Int4(1)); 1285 } 1286 ceil(Vector4f & dst,const Vector4f & src)1287 void ShaderCore::ceil(Vector4f &dst, const Vector4f &src) 1288 { 1289 dst.x = Ceil(src.x); 1290 dst.y = Ceil(src.y); 1291 dst.z = Ceil(src.z); 1292 dst.w = Ceil(src.w); 1293 } 1294 powx(Vector4f & dst,const Vector4f & src0,const Vector4f & src1,bool pp)1295 void ShaderCore::powx(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, bool pp) 1296 { 1297 Float4 pow = power(src0.x, src1.x, pp); 1298 1299 dst.x = pow; 1300 dst.y = pow; 1301 dst.z = pow; 1302 dst.w = pow; 1303 } 1304 pow(Vector4f & dst,const Vector4f & src0,const Vector4f & src1,bool pp)1305 void ShaderCore::pow(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, bool pp) 1306 { 1307 dst.x = power(src0.x, src1.x, pp); 1308 dst.y = power(src0.y, src1.y, pp); 1309 dst.z = power(src0.z, src1.z, pp); 1310 dst.w = power(src0.w, src1.w, pp); 1311 } 1312 crs(Vector4f & dst,const Vector4f & src0,const Vector4f & src1)1313 void ShaderCore::crs(Vector4f &dst, const Vector4f &src0, const Vector4f &src1) 1314 { 1315 dst.x = src0.y * src1.z - src0.z * src1.y; 1316 dst.y = src0.z * src1.x - src0.x * src1.z; 1317 dst.z = src0.x * src1.y - src0.y * src1.x; 1318 } 1319 forward1(Vector4f & dst,const Vector4f & N,const Vector4f & I,const Vector4f & Nref)1320 void ShaderCore::forward1(Vector4f &dst, const Vector4f &N, const Vector4f &I, const Vector4f &Nref) 1321 { 1322 Int4 flip = CmpNLT(Nref.x * I.x, Float4(0.0f)) & Int4(0x80000000); 1323 1324 dst.x = As<Float4>(flip ^ As<Int4>(N.x)); 1325 } 1326 forward2(Vector4f & dst,const Vector4f & N,const Vector4f & I,const Vector4f & Nref)1327 void ShaderCore::forward2(Vector4f &dst, const Vector4f &N, const Vector4f &I, const Vector4f &Nref) 1328 { 1329 Int4 flip = CmpNLT(dot2(Nref, I), Float4(0.0f)) & Int4(0x80000000); 1330 1331 dst.x = As<Float4>(flip ^ As<Int4>(N.x)); 1332 dst.y = As<Float4>(flip ^ As<Int4>(N.y)); 1333 } 1334 forward3(Vector4f & dst,const Vector4f & N,const Vector4f & I,const Vector4f & Nref)1335 void ShaderCore::forward3(Vector4f &dst, const Vector4f &N, const Vector4f &I, const Vector4f &Nref) 1336 { 1337 Int4 flip = CmpNLT(dot3(Nref, I), Float4(0.0f)) & Int4(0x80000000); 1338 1339 dst.x = As<Float4>(flip ^ As<Int4>(N.x)); 1340 dst.y = As<Float4>(flip ^ As<Int4>(N.y)); 1341 dst.z = As<Float4>(flip ^ As<Int4>(N.z)); 1342 } 1343 forward4(Vector4f & dst,const Vector4f & N,const Vector4f & I,const Vector4f & Nref)1344 void ShaderCore::forward4(Vector4f &dst, const Vector4f &N, const Vector4f &I, const Vector4f &Nref) 1345 { 1346 Int4 flip = CmpNLT(dot4(Nref, I), Float4(0.0f)) & Int4(0x80000000); 1347 1348 dst.x = As<Float4>(flip ^ As<Int4>(N.x)); 1349 dst.y = As<Float4>(flip ^ As<Int4>(N.y)); 1350 dst.z = As<Float4>(flip ^ As<Int4>(N.z)); 1351 dst.w = As<Float4>(flip ^ As<Int4>(N.w)); 1352 } 1353 reflect1(Vector4f & dst,const Vector4f & I,const Vector4f & N)1354 void ShaderCore::reflect1(Vector4f &dst, const Vector4f &I, const Vector4f &N) 1355 { 1356 Float4 d = N.x * I.x; 1357 1358 dst.x = I.x - Float4(2.0f) * d * N.x; 1359 } 1360 reflect2(Vector4f & dst,const Vector4f & I,const Vector4f & N)1361 void ShaderCore::reflect2(Vector4f &dst, const Vector4f &I, const Vector4f &N) 1362 { 1363 Float4 d = dot2(N, I); 1364 1365 dst.x = I.x - Float4(2.0f) * d * N.x; 1366 dst.y = I.y - Float4(2.0f) * d * N.y; 1367 } 1368 reflect3(Vector4f & dst,const Vector4f & I,const Vector4f & N)1369 void ShaderCore::reflect3(Vector4f &dst, const Vector4f &I, const Vector4f &N) 1370 { 1371 Float4 d = dot3(N, I); 1372 1373 dst.x = I.x - Float4(2.0f) * d * N.x; 1374 dst.y = I.y - Float4(2.0f) * d * N.y; 1375 dst.z = I.z - Float4(2.0f) * d * N.z; 1376 } 1377 reflect4(Vector4f & dst,const Vector4f & I,const Vector4f & N)1378 void ShaderCore::reflect4(Vector4f &dst, const Vector4f &I, const Vector4f &N) 1379 { 1380 Float4 d = dot4(N, I); 1381 1382 dst.x = I.x - Float4(2.0f) * d * N.x; 1383 dst.y = I.y - Float4(2.0f) * d * N.y; 1384 dst.z = I.z - Float4(2.0f) * d * N.z; 1385 dst.w = I.w - Float4(2.0f) * d * N.w; 1386 } 1387 refract1(Vector4f & dst,const Vector4f & I,const Vector4f & N,const Float4 & eta)1388 void ShaderCore::refract1(Vector4f &dst, const Vector4f &I, const Vector4f &N, const Float4 &eta) 1389 { 1390 Float4 d = N.x * I.x; 1391 Float4 k = Float4(1.0f) - eta * eta * (Float4(1.0f) - d * d); 1392 Int4 pos = CmpNLT(k, Float4(0.0f)); 1393 Float4 t = (eta * d + Sqrt(k)); 1394 1395 dst.x = As<Float4>(pos & As<Int4>(eta * I.x - t * N.x)); 1396 } 1397 refract2(Vector4f & dst,const Vector4f & I,const Vector4f & N,const Float4 & eta)1398 void ShaderCore::refract2(Vector4f &dst, const Vector4f &I, const Vector4f &N, const Float4 &eta) 1399 { 1400 Float4 d = dot2(N, I); 1401 Float4 k = Float4(1.0f) - eta * eta * (Float4(1.0f) - d * d); 1402 Int4 pos = CmpNLT(k, Float4(0.0f)); 1403 Float4 t = (eta * d + Sqrt(k)); 1404 1405 dst.x = As<Float4>(pos & As<Int4>(eta * I.x - t * N.x)); 1406 dst.y = As<Float4>(pos & As<Int4>(eta * I.y - t * N.y)); 1407 } 1408 refract3(Vector4f & dst,const Vector4f & I,const Vector4f & N,const Float4 & eta)1409 void ShaderCore::refract3(Vector4f &dst, const Vector4f &I, const Vector4f &N, const Float4 &eta) 1410 { 1411 Float4 d = dot3(N, I); 1412 Float4 k = Float4(1.0f) - eta * eta * (Float4(1.0f) - d * d); 1413 Int4 pos = CmpNLT(k, Float4(0.0f)); 1414 Float4 t = (eta * d + Sqrt(k)); 1415 1416 dst.x = As<Float4>(pos & As<Int4>(eta * I.x - t * N.x)); 1417 dst.y = As<Float4>(pos & As<Int4>(eta * I.y - t * N.y)); 1418 dst.z = As<Float4>(pos & As<Int4>(eta * I.z - t * N.z)); 1419 } 1420 refract4(Vector4f & dst,const Vector4f & I,const Vector4f & N,const Float4 & eta)1421 void ShaderCore::refract4(Vector4f &dst, const Vector4f &I, const Vector4f &N, const Float4 &eta) 1422 { 1423 Float4 d = dot4(N, I); 1424 Float4 k = Float4(1.0f) - eta * eta * (Float4(1.0f) - d * d); 1425 Int4 pos = CmpNLT(k, Float4(0.0f)); 1426 Float4 t = (eta * d + Sqrt(k)); 1427 1428 dst.x = As<Float4>(pos & As<Int4>(eta * I.x - t * N.x)); 1429 dst.y = As<Float4>(pos & As<Int4>(eta * I.y - t * N.y)); 1430 dst.z = As<Float4>(pos & As<Int4>(eta * I.z - t * N.z)); 1431 dst.w = As<Float4>(pos & As<Int4>(eta * I.w - t * N.w)); 1432 } 1433 sgn(Vector4f & dst,const Vector4f & src)1434 void ShaderCore::sgn(Vector4f &dst, const Vector4f &src) 1435 { 1436 sgn(dst.x, src.x); 1437 sgn(dst.y, src.y); 1438 sgn(dst.z, src.z); 1439 sgn(dst.w, src.w); 1440 } 1441 isgn(Vector4f & dst,const Vector4f & src)1442 void ShaderCore::isgn(Vector4f &dst, const Vector4f &src) 1443 { 1444 isgn(dst.x, src.x); 1445 isgn(dst.y, src.y); 1446 isgn(dst.z, src.z); 1447 isgn(dst.w, src.w); 1448 } 1449 abs(Vector4f & dst,const Vector4f & src)1450 void ShaderCore::abs(Vector4f &dst, const Vector4f &src) 1451 { 1452 dst.x = Abs(src.x); 1453 dst.y = Abs(src.y); 1454 dst.z = Abs(src.z); 1455 dst.w = Abs(src.w); 1456 } 1457 iabs(Vector4f & dst,const Vector4f & src)1458 void ShaderCore::iabs(Vector4f &dst, const Vector4f &src) 1459 { 1460 dst.x = As<Float4>(Abs(As<Int4>(src.x))); 1461 dst.y = As<Float4>(Abs(As<Int4>(src.y))); 1462 dst.z = As<Float4>(Abs(As<Int4>(src.z))); 1463 dst.w = As<Float4>(Abs(As<Int4>(src.w))); 1464 } 1465 nrm2(Vector4f & dst,const Vector4f & src,bool pp)1466 void ShaderCore::nrm2(Vector4f &dst, const Vector4f &src, bool pp) 1467 { 1468 Float4 dot = dot2(src, src); 1469 Float4 rsq = reciprocalSquareRoot(dot, false, pp); 1470 1471 dst.x = src.x * rsq; 1472 dst.y = src.y * rsq; 1473 dst.z = src.z * rsq; 1474 dst.w = src.w * rsq; 1475 } 1476 nrm3(Vector4f & dst,const Vector4f & src,bool pp)1477 void ShaderCore::nrm3(Vector4f &dst, const Vector4f &src, bool pp) 1478 { 1479 Float4 dot = dot3(src, src); 1480 Float4 rsq = reciprocalSquareRoot(dot, false, pp); 1481 1482 dst.x = src.x * rsq; 1483 dst.y = src.y * rsq; 1484 dst.z = src.z * rsq; 1485 dst.w = src.w * rsq; 1486 } 1487 nrm4(Vector4f & dst,const Vector4f & src,bool pp)1488 void ShaderCore::nrm4(Vector4f &dst, const Vector4f &src, bool pp) 1489 { 1490 Float4 dot = dot4(src, src); 1491 Float4 rsq = reciprocalSquareRoot(dot, false, pp); 1492 1493 dst.x = src.x * rsq; 1494 dst.y = src.y * rsq; 1495 dst.z = src.z * rsq; 1496 dst.w = src.w * rsq; 1497 } 1498 sincos(Vector4f & dst,const Vector4f & src,bool pp)1499 void ShaderCore::sincos(Vector4f &dst, const Vector4f &src, bool pp) 1500 { 1501 dst.x = cosine_pi(src.x, pp); 1502 dst.y = sine_pi(src.x, pp); 1503 } 1504 cos(Vector4f & dst,const Vector4f & src,bool pp)1505 void ShaderCore::cos(Vector4f &dst, const Vector4f &src, bool pp) 1506 { 1507 dst.x = cosine(src.x, pp); 1508 dst.y = cosine(src.y, pp); 1509 dst.z = cosine(src.z, pp); 1510 dst.w = cosine(src.w, pp); 1511 } 1512 sin(Vector4f & dst,const Vector4f & src,bool pp)1513 void ShaderCore::sin(Vector4f &dst, const Vector4f &src, bool pp) 1514 { 1515 dst.x = sine(src.x, pp); 1516 dst.y = sine(src.y, pp); 1517 dst.z = sine(src.z, pp); 1518 dst.w = sine(src.w, pp); 1519 } 1520 tan(Vector4f & dst,const Vector4f & src,bool pp)1521 void ShaderCore::tan(Vector4f &dst, const Vector4f &src, bool pp) 1522 { 1523 dst.x = tangent(src.x, pp); 1524 dst.y = tangent(src.y, pp); 1525 dst.z = tangent(src.z, pp); 1526 dst.w = tangent(src.w, pp); 1527 } 1528 acos(Vector4f & dst,const Vector4f & src,bool pp)1529 void ShaderCore::acos(Vector4f &dst, const Vector4f &src, bool pp) 1530 { 1531 dst.x = arccos(src.x, pp); 1532 dst.y = arccos(src.y, pp); 1533 dst.z = arccos(src.z, pp); 1534 dst.w = arccos(src.w, pp); 1535 } 1536 asin(Vector4f & dst,const Vector4f & src,bool pp)1537 void ShaderCore::asin(Vector4f &dst, const Vector4f &src, bool pp) 1538 { 1539 dst.x = arcsin(src.x, pp); 1540 dst.y = arcsin(src.y, pp); 1541 dst.z = arcsin(src.z, pp); 1542 dst.w = arcsin(src.w, pp); 1543 } 1544 atan(Vector4f & dst,const Vector4f & src,bool pp)1545 void ShaderCore::atan(Vector4f &dst, const Vector4f &src, bool pp) 1546 { 1547 dst.x = arctan(src.x, pp); 1548 dst.y = arctan(src.y, pp); 1549 dst.z = arctan(src.z, pp); 1550 dst.w = arctan(src.w, pp); 1551 } 1552 atan2(Vector4f & dst,const Vector4f & src0,const Vector4f & src1,bool pp)1553 void ShaderCore::atan2(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, bool pp) 1554 { 1555 dst.x = arctan(src0.x, src1.x, pp); 1556 dst.y = arctan(src0.y, src1.y, pp); 1557 dst.z = arctan(src0.z, src1.z, pp); 1558 dst.w = arctan(src0.w, src1.w, pp); 1559 } 1560 cosh(Vector4f & dst,const Vector4f & src,bool pp)1561 void ShaderCore::cosh(Vector4f &dst, const Vector4f &src, bool pp) 1562 { 1563 dst.x = cosineh(src.x, pp); 1564 dst.y = cosineh(src.y, pp); 1565 dst.z = cosineh(src.z, pp); 1566 dst.w = cosineh(src.w, pp); 1567 } 1568 sinh(Vector4f & dst,const Vector4f & src,bool pp)1569 void ShaderCore::sinh(Vector4f &dst, const Vector4f &src, bool pp) 1570 { 1571 dst.x = sineh(src.x, pp); 1572 dst.y = sineh(src.y, pp); 1573 dst.z = sineh(src.z, pp); 1574 dst.w = sineh(src.w, pp); 1575 } 1576 tanh(Vector4f & dst,const Vector4f & src,bool pp)1577 void ShaderCore::tanh(Vector4f &dst, const Vector4f &src, bool pp) 1578 { 1579 dst.x = tangenth(src.x, pp); 1580 dst.y = tangenth(src.y, pp); 1581 dst.z = tangenth(src.z, pp); 1582 dst.w = tangenth(src.w, pp); 1583 } 1584 acosh(Vector4f & dst,const Vector4f & src,bool pp)1585 void ShaderCore::acosh(Vector4f &dst, const Vector4f &src, bool pp) 1586 { 1587 dst.x = arccosh(src.x, pp); 1588 dst.y = arccosh(src.y, pp); 1589 dst.z = arccosh(src.z, pp); 1590 dst.w = arccosh(src.w, pp); 1591 } 1592 asinh(Vector4f & dst,const Vector4f & src,bool pp)1593 void ShaderCore::asinh(Vector4f &dst, const Vector4f &src, bool pp) 1594 { 1595 dst.x = arcsinh(src.x, pp); 1596 dst.y = arcsinh(src.y, pp); 1597 dst.z = arcsinh(src.z, pp); 1598 dst.w = arcsinh(src.w, pp); 1599 } 1600 atanh(Vector4f & dst,const Vector4f & src,bool pp)1601 void ShaderCore::atanh(Vector4f &dst, const Vector4f &src, bool pp) 1602 { 1603 dst.x = arctanh(src.x, pp); 1604 dst.y = arctanh(src.y, pp); 1605 dst.z = arctanh(src.z, pp); 1606 dst.w = arctanh(src.w, pp); 1607 } 1608 expp(Vector4f & dst,const Vector4f & src,unsigned short version)1609 void ShaderCore::expp(Vector4f &dst, const Vector4f &src, unsigned short version) 1610 { 1611 if(version < 0x0200) 1612 { 1613 Float4 frc = Frac(src.x); 1614 Float4 floor = src.x - frc; 1615 1616 dst.x = exponential2(floor, true); 1617 dst.y = frc; 1618 dst.z = exponential2(src.x, true); 1619 dst.w = Float4(1.0f); 1620 } 1621 else // Version >= 2.0 1622 { 1623 exp2x(dst, src, true); // FIXME: 10-bit precision suffices 1624 } 1625 } 1626 logp(Vector4f & dst,const Vector4f & src,unsigned short version)1627 void ShaderCore::logp(Vector4f &dst, const Vector4f &src, unsigned short version) 1628 { 1629 if(version < 0x0200) 1630 { 1631 Float4 tmp0; 1632 Float4 tmp1; 1633 Float4 t; 1634 Int4 r; 1635 1636 tmp0 = Abs(src.x); 1637 tmp1 = tmp0; 1638 1639 // X component 1640 r = As<Int4>(As<UInt4>(tmp0) >> 23) - Int4(127); 1641 dst.x = Float4(r); 1642 1643 // Y component 1644 dst.y = As<Float4>((As<Int4>(tmp1) & Int4(0x007FFFFF)) | As<Int4>(Float4(1.0f))); 1645 1646 // Z component 1647 dst.z = logarithm2(src.x, true, true); 1648 1649 // W component 1650 dst.w = 1.0f; 1651 } 1652 else 1653 { 1654 log2x(dst, src, true); 1655 } 1656 } 1657 cmp0(Vector4f & dst,const Vector4f & src0,const Vector4f & src1,const Vector4f & src2)1658 void ShaderCore::cmp0(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2) 1659 { 1660 cmp0(dst.x, src0.x, src1.x, src2.x); 1661 cmp0(dst.y, src0.y, src1.y, src2.y); 1662 cmp0(dst.z, src0.z, src1.z, src2.z); 1663 cmp0(dst.w, src0.w, src1.w, src2.w); 1664 } 1665 select(Vector4f & dst,const Vector4f & src0,const Vector4f & src1,const Vector4f & src2)1666 void ShaderCore::select(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2) 1667 { 1668 select(dst.x, As<Int4>(src0.x), src1.x, src2.x); 1669 select(dst.y, As<Int4>(src0.y), src1.y, src2.y); 1670 select(dst.z, As<Int4>(src0.z), src1.z, src2.z); 1671 select(dst.w, As<Int4>(src0.w), src1.w, src2.w); 1672 } 1673 extract(Float4 & dst,const Vector4f & src0,const Float4 & src1)1674 void ShaderCore::extract(Float4 &dst, const Vector4f &src0, const Float4 &src1) 1675 { 1676 select(dst, CmpEQ(As<Int4>(src1), Int4(1)), src0.y, src0.x); 1677 select(dst, CmpEQ(As<Int4>(src1), Int4(2)), src0.z, dst); 1678 select(dst, CmpEQ(As<Int4>(src1), Int4(3)), src0.w, dst); 1679 } 1680 insert(Vector4f & dst,const Vector4f & src,const Float4 & element,const Float4 & index)1681 void ShaderCore::insert(Vector4f &dst, const Vector4f &src, const Float4 &element, const Float4 &index) 1682 { 1683 select(dst.x, CmpEQ(As<Int4>(index), Int4(0)), element, src.x); 1684 select(dst.y, CmpEQ(As<Int4>(index), Int4(1)), element, src.y); 1685 select(dst.z, CmpEQ(As<Int4>(index), Int4(2)), element, src.z); 1686 select(dst.w, CmpEQ(As<Int4>(index), Int4(3)), element, src.w); 1687 } 1688 sgn(Float4 & dst,const Float4 & src)1689 void ShaderCore::sgn(Float4 &dst, const Float4 &src) 1690 { 1691 Int4 neg = As<Int4>(CmpLT(src, Float4(-0.0f))) & As<Int4>(Float4(-1.0f)); 1692 Int4 pos = As<Int4>(CmpNLE(src, Float4(+0.0f))) & As<Int4>(Float4(1.0f)); 1693 dst = As<Float4>(neg | pos); 1694 } 1695 isgn(Float4 & dst,const Float4 & src)1696 void ShaderCore::isgn(Float4 &dst, const Float4 &src) 1697 { 1698 Int4 neg = CmpLT(As<Int4>(src), Int4(0)) & Int4(-1); 1699 Int4 pos = CmpNLE(As<Int4>(src), Int4(0)) & Int4(1); 1700 dst = As<Float4>(neg | pos); 1701 } 1702 cmp0(Float4 & dst,const Float4 & src0,const Float4 & src1,const Float4 & src2)1703 void ShaderCore::cmp0(Float4 &dst, const Float4 &src0, const Float4 &src1, const Float4 &src2) 1704 { 1705 Int4 pos = CmpLE(Float4(0.0f), src0); 1706 select(dst, pos, src1, src2); 1707 } 1708 cmp0i(Float4 & dst,const Float4 & src0,const Float4 & src1,const Float4 & src2)1709 void ShaderCore::cmp0i(Float4 &dst, const Float4 &src0, const Float4 &src1, const Float4 &src2) 1710 { 1711 Int4 pos = CmpEQ(Int4(0), As<Int4>(src0)); 1712 select(dst, pos, src1, src2); 1713 } 1714 select(Float4 & dst,RValue<Int4> src0,const Float4 & src1,const Float4 & src2)1715 void ShaderCore::select(Float4 &dst, RValue<Int4> src0, const Float4 &src1, const Float4 &src2) 1716 { 1717 // FIXME: LLVM vector select 1718 dst = As<Float4>(src0 & As<Int4>(src1) | ~src0 & As<Int4>(src2)); 1719 } 1720 cmp(Vector4f & dst,const Vector4f & src0,const Vector4f & src1,Control control)1721 void ShaderCore::cmp(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, Control control) 1722 { 1723 switch(control) 1724 { 1725 case Shader::CONTROL_GT: 1726 dst.x = As<Float4>(CmpNLE(src0.x, src1.x)); 1727 dst.y = As<Float4>(CmpNLE(src0.y, src1.y)); 1728 dst.z = As<Float4>(CmpNLE(src0.z, src1.z)); 1729 dst.w = As<Float4>(CmpNLE(src0.w, src1.w)); 1730 break; 1731 case Shader::CONTROL_EQ: 1732 dst.x = As<Float4>(CmpEQ(src0.x, src1.x)); 1733 dst.y = As<Float4>(CmpEQ(src0.y, src1.y)); 1734 dst.z = As<Float4>(CmpEQ(src0.z, src1.z)); 1735 dst.w = As<Float4>(CmpEQ(src0.w, src1.w)); 1736 break; 1737 case Shader::CONTROL_GE: 1738 dst.x = As<Float4>(CmpNLT(src0.x, src1.x)); 1739 dst.y = As<Float4>(CmpNLT(src0.y, src1.y)); 1740 dst.z = As<Float4>(CmpNLT(src0.z, src1.z)); 1741 dst.w = As<Float4>(CmpNLT(src0.w, src1.w)); 1742 break; 1743 case Shader::CONTROL_LT: 1744 dst.x = As<Float4>(CmpLT(src0.x, src1.x)); 1745 dst.y = As<Float4>(CmpLT(src0.y, src1.y)); 1746 dst.z = As<Float4>(CmpLT(src0.z, src1.z)); 1747 dst.w = As<Float4>(CmpLT(src0.w, src1.w)); 1748 break; 1749 case Shader::CONTROL_NE: 1750 dst.x = As<Float4>(CmpNEQ(src0.x, src1.x)); 1751 dst.y = As<Float4>(CmpNEQ(src0.y, src1.y)); 1752 dst.z = As<Float4>(CmpNEQ(src0.z, src1.z)); 1753 dst.w = As<Float4>(CmpNEQ(src0.w, src1.w)); 1754 break; 1755 case Shader::CONTROL_LE: 1756 dst.x = As<Float4>(CmpLE(src0.x, src1.x)); 1757 dst.y = As<Float4>(CmpLE(src0.y, src1.y)); 1758 dst.z = As<Float4>(CmpLE(src0.z, src1.z)); 1759 dst.w = As<Float4>(CmpLE(src0.w, src1.w)); 1760 break; 1761 default: 1762 ASSERT(false); 1763 } 1764 } 1765 icmp(Vector4f & dst,const Vector4f & src0,const Vector4f & src1,Control control)1766 void ShaderCore::icmp(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, Control control) 1767 { 1768 switch(control) 1769 { 1770 case Shader::CONTROL_GT: 1771 dst.x = As<Float4>(CmpNLE(As<Int4>(src0.x), As<Int4>(src1.x))); 1772 dst.y = As<Float4>(CmpNLE(As<Int4>(src0.y), As<Int4>(src1.y))); 1773 dst.z = As<Float4>(CmpNLE(As<Int4>(src0.z), As<Int4>(src1.z))); 1774 dst.w = As<Float4>(CmpNLE(As<Int4>(src0.w), As<Int4>(src1.w))); 1775 break; 1776 case Shader::CONTROL_EQ: 1777 dst.x = As<Float4>(CmpEQ(As<Int4>(src0.x), As<Int4>(src1.x))); 1778 dst.y = As<Float4>(CmpEQ(As<Int4>(src0.y), As<Int4>(src1.y))); 1779 dst.z = As<Float4>(CmpEQ(As<Int4>(src0.z), As<Int4>(src1.z))); 1780 dst.w = As<Float4>(CmpEQ(As<Int4>(src0.w), As<Int4>(src1.w))); 1781 break; 1782 case Shader::CONTROL_GE: 1783 dst.x = As<Float4>(CmpNLT(As<Int4>(src0.x), As<Int4>(src1.x))); 1784 dst.y = As<Float4>(CmpNLT(As<Int4>(src0.y), As<Int4>(src1.y))); 1785 dst.z = As<Float4>(CmpNLT(As<Int4>(src0.z), As<Int4>(src1.z))); 1786 dst.w = As<Float4>(CmpNLT(As<Int4>(src0.w), As<Int4>(src1.w))); 1787 break; 1788 case Shader::CONTROL_LT: 1789 dst.x = As<Float4>(CmpLT(As<Int4>(src0.x), As<Int4>(src1.x))); 1790 dst.y = As<Float4>(CmpLT(As<Int4>(src0.y), As<Int4>(src1.y))); 1791 dst.z = As<Float4>(CmpLT(As<Int4>(src0.z), As<Int4>(src1.z))); 1792 dst.w = As<Float4>(CmpLT(As<Int4>(src0.w), As<Int4>(src1.w))); 1793 break; 1794 case Shader::CONTROL_NE: 1795 dst.x = As<Float4>(CmpNEQ(As<Int4>(src0.x), As<Int4>(src1.x))); 1796 dst.y = As<Float4>(CmpNEQ(As<Int4>(src0.y), As<Int4>(src1.y))); 1797 dst.z = As<Float4>(CmpNEQ(As<Int4>(src0.z), As<Int4>(src1.z))); 1798 dst.w = As<Float4>(CmpNEQ(As<Int4>(src0.w), As<Int4>(src1.w))); 1799 break; 1800 case Shader::CONTROL_LE: 1801 dst.x = As<Float4>(CmpLE(As<Int4>(src0.x), As<Int4>(src1.x))); 1802 dst.y = As<Float4>(CmpLE(As<Int4>(src0.y), As<Int4>(src1.y))); 1803 dst.z = As<Float4>(CmpLE(As<Int4>(src0.z), As<Int4>(src1.z))); 1804 dst.w = As<Float4>(CmpLE(As<Int4>(src0.w), As<Int4>(src1.w))); 1805 break; 1806 default: 1807 ASSERT(false); 1808 } 1809 } 1810 ucmp(Vector4f & dst,const Vector4f & src0,const Vector4f & src1,Control control)1811 void ShaderCore::ucmp(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, Control control) 1812 { 1813 switch(control) 1814 { 1815 case Shader::CONTROL_GT: 1816 dst.x = As<Float4>(CmpNLE(As<UInt4>(src0.x), As<UInt4>(src1.x))); 1817 dst.y = As<Float4>(CmpNLE(As<UInt4>(src0.y), As<UInt4>(src1.y))); 1818 dst.z = As<Float4>(CmpNLE(As<UInt4>(src0.z), As<UInt4>(src1.z))); 1819 dst.w = As<Float4>(CmpNLE(As<UInt4>(src0.w), As<UInt4>(src1.w))); 1820 break; 1821 case Shader::CONTROL_EQ: 1822 dst.x = As<Float4>(CmpEQ(As<UInt4>(src0.x), As<UInt4>(src1.x))); 1823 dst.y = As<Float4>(CmpEQ(As<UInt4>(src0.y), As<UInt4>(src1.y))); 1824 dst.z = As<Float4>(CmpEQ(As<UInt4>(src0.z), As<UInt4>(src1.z))); 1825 dst.w = As<Float4>(CmpEQ(As<UInt4>(src0.w), As<UInt4>(src1.w))); 1826 break; 1827 case Shader::CONTROL_GE: 1828 dst.x = As<Float4>(CmpNLT(As<UInt4>(src0.x), As<UInt4>(src1.x))); 1829 dst.y = As<Float4>(CmpNLT(As<UInt4>(src0.y), As<UInt4>(src1.y))); 1830 dst.z = As<Float4>(CmpNLT(As<UInt4>(src0.z), As<UInt4>(src1.z))); 1831 dst.w = As<Float4>(CmpNLT(As<UInt4>(src0.w), As<UInt4>(src1.w))); 1832 break; 1833 case Shader::CONTROL_LT: 1834 dst.x = As<Float4>(CmpLT(As<UInt4>(src0.x), As<UInt4>(src1.x))); 1835 dst.y = As<Float4>(CmpLT(As<UInt4>(src0.y), As<UInt4>(src1.y))); 1836 dst.z = As<Float4>(CmpLT(As<UInt4>(src0.z), As<UInt4>(src1.z))); 1837 dst.w = As<Float4>(CmpLT(As<UInt4>(src0.w), As<UInt4>(src1.w))); 1838 break; 1839 case Shader::CONTROL_NE: 1840 dst.x = As<Float4>(CmpNEQ(As<UInt4>(src0.x), As<UInt4>(src1.x))); 1841 dst.y = As<Float4>(CmpNEQ(As<UInt4>(src0.y), As<UInt4>(src1.y))); 1842 dst.z = As<Float4>(CmpNEQ(As<UInt4>(src0.z), As<UInt4>(src1.z))); 1843 dst.w = As<Float4>(CmpNEQ(As<UInt4>(src0.w), As<UInt4>(src1.w))); 1844 break; 1845 case Shader::CONTROL_LE: 1846 dst.x = As<Float4>(CmpLE(As<UInt4>(src0.x), As<UInt4>(src1.x))); 1847 dst.y = As<Float4>(CmpLE(As<UInt4>(src0.y), As<UInt4>(src1.y))); 1848 dst.z = As<Float4>(CmpLE(As<UInt4>(src0.z), As<UInt4>(src1.z))); 1849 dst.w = As<Float4>(CmpLE(As<UInt4>(src0.w), As<UInt4>(src1.w))); 1850 break; 1851 default: 1852 ASSERT(false); 1853 } 1854 } 1855 all(Float4 & dst,const Vector4f & src)1856 void ShaderCore::all(Float4 &dst, const Vector4f &src) 1857 { 1858 dst = As<Float4>(As<Int4>(src.x) & As<Int4>(src.y) & As<Int4>(src.z) & As<Int4>(src.w)); 1859 } 1860 any(Float4 & dst,const Vector4f & src)1861 void ShaderCore::any(Float4 &dst, const Vector4f &src) 1862 { 1863 dst = As<Float4>(As<Int4>(src.x) | As<Int4>(src.y) | As<Int4>(src.z) | As<Int4>(src.w)); 1864 } 1865 not(Vector4f & dst,const Vector4f & src)1866 void ShaderCore::not(Vector4f &dst, const Vector4f &src) 1867 { 1868 dst.x = As<Float4>(As<Int4>(src.x) ^ Int4(0xFFFFFFFF)); 1869 dst.y = As<Float4>(As<Int4>(src.y) ^ Int4(0xFFFFFFFF)); 1870 dst.z = As<Float4>(As<Int4>(src.z) ^ Int4(0xFFFFFFFF)); 1871 dst.w = As<Float4>(As<Int4>(src.w) ^ Int4(0xFFFFFFFF)); 1872 } 1873 or(Vector4f & dst,const Vector4f & src0,const Vector4f & src1)1874 void ShaderCore::or(Vector4f &dst, const Vector4f &src0, const Vector4f &src1) 1875 { 1876 dst.x = As<Float4>(As<Int4>(src0.x) | As<Int4>(src1.x)); 1877 dst.y = As<Float4>(As<Int4>(src0.y) | As<Int4>(src1.y)); 1878 dst.z = As<Float4>(As<Int4>(src0.z) | As<Int4>(src1.z)); 1879 dst.w = As<Float4>(As<Int4>(src0.w) | As<Int4>(src1.w)); 1880 } 1881 xor(Vector4f & dst,const Vector4f & src0,const Vector4f & src1)1882 void ShaderCore::xor(Vector4f &dst, const Vector4f &src0, const Vector4f &src1) 1883 { 1884 dst.x = As<Float4>(As<Int4>(src0.x) ^ As<Int4>(src1.x)); 1885 dst.y = As<Float4>(As<Int4>(src0.y) ^ As<Int4>(src1.y)); 1886 dst.z = As<Float4>(As<Int4>(src0.z) ^ As<Int4>(src1.z)); 1887 dst.w = As<Float4>(As<Int4>(src0.w) ^ As<Int4>(src1.w)); 1888 } 1889 and(Vector4f & dst,const Vector4f & src0,const Vector4f & src1)1890 void ShaderCore::and(Vector4f &dst, const Vector4f &src0, const Vector4f &src1) 1891 { 1892 dst.x = As<Float4>(As<Int4>(src0.x) & As<Int4>(src1.x)); 1893 dst.y = As<Float4>(As<Int4>(src0.y) & As<Int4>(src1.y)); 1894 dst.z = As<Float4>(As<Int4>(src0.z) & As<Int4>(src1.z)); 1895 dst.w = As<Float4>(As<Int4>(src0.w) & As<Int4>(src1.w)); 1896 } 1897 equal(Vector4f & dst,const Vector4f & src0,const Vector4f & src1)1898 void ShaderCore::equal(Vector4f &dst, const Vector4f &src0, const Vector4f &src1) 1899 { 1900 dst.x = As<Float4>(CmpEQ(As<UInt4>(src0.x), As<UInt4>(src1.x)) & 1901 CmpEQ(As<UInt4>(src0.y), As<UInt4>(src1.y)) & 1902 CmpEQ(As<UInt4>(src0.z), As<UInt4>(src1.z)) & 1903 CmpEQ(As<UInt4>(src0.w), As<UInt4>(src1.w))); 1904 dst.y = dst.x; 1905 dst.z = dst.x; 1906 dst.w = dst.x; 1907 } 1908 notEqual(Vector4f & dst,const Vector4f & src0,const Vector4f & src1)1909 void ShaderCore::notEqual(Vector4f &dst, const Vector4f &src0, const Vector4f &src1) 1910 { 1911 dst.x = As<Float4>(CmpNEQ(As<UInt4>(src0.x), As<UInt4>(src1.x)) | 1912 CmpNEQ(As<UInt4>(src0.y), As<UInt4>(src1.y)) | 1913 CmpNEQ(As<UInt4>(src0.z), As<UInt4>(src1.z)) | 1914 CmpNEQ(As<UInt4>(src0.w), As<UInt4>(src1.w))); 1915 dst.y = dst.x; 1916 dst.z = dst.x; 1917 dst.w = dst.x; 1918 } 1919 } 1920