1 // Copyright 2016 The SwiftShader Authors. All Rights Reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #include "PixelRoutine.hpp" 16 17 #include "SamplerCore.hpp" 18 #include "Constants.hpp" 19 #include "Renderer/Renderer.hpp" 20 #include "Renderer/QuadRasterizer.hpp" 21 #include "Renderer/Surface.hpp" 22 #include "Renderer/Primitive.hpp" 23 #include "Common/Debug.hpp" 24 25 namespace sw 26 { 27 extern bool complementaryDepthBuffer; 28 extern bool postBlendSRGB; 29 extern bool exactColorRounding; 30 extern bool forceClearRegisters; 31 PixelRoutine(const PixelProcessor::State & state,const PixelShader * shader)32 PixelRoutine::PixelRoutine(const PixelProcessor::State &state, const PixelShader *shader) : QuadRasterizer(state, shader), v(shader && shader->dynamicallyIndexedInput) 33 { 34 if(!shader || shader->getShaderModel() < 0x0200 || forceClearRegisters) 35 { 36 for(int i = 0; i < MAX_FRAGMENT_INPUTS; i++) 37 { 38 v[i].x = Float4(0.0f); 39 v[i].y = Float4(0.0f); 40 v[i].z = Float4(0.0f); 41 v[i].w = Float4(0.0f); 42 } 43 } 44 } 45 ~PixelRoutine()46 PixelRoutine::~PixelRoutine() 47 { 48 } 49 quad(Pointer<Byte> cBuffer[RENDERTARGETS],Pointer<Byte> & zBuffer,Pointer<Byte> & sBuffer,Int cMask[4],Int & x,Int & y)50 void PixelRoutine::quad(Pointer<Byte> cBuffer[RENDERTARGETS], Pointer<Byte> &zBuffer, Pointer<Byte> &sBuffer, Int cMask[4], Int &x, Int &y) 51 { 52 #if PERF_PROFILE 53 Long pipeTime = Ticks(); 54 #endif 55 56 const bool earlyDepthTest = !state.depthOverride && !state.alphaTestActive(); 57 58 Int zMask[4]; // Depth mask 59 Int sMask[4]; // Stencil mask 60 61 for(unsigned int q = 0; q < state.multiSample; q++) 62 { 63 zMask[q] = cMask[q]; 64 sMask[q] = cMask[q]; 65 } 66 67 for(unsigned int q = 0; q < state.multiSample; q++) 68 { 69 stencilTest(sBuffer, q, x, sMask[q], cMask[q]); 70 } 71 72 Float4 f; 73 Float4 rhwCentroid; 74 75 Float4 xxxx = Float4(Float(x)) + *Pointer<Float4>(primitive + OFFSET(Primitive,xQuad), 16); 76 77 if(interpolateZ()) 78 { 79 for(unsigned int q = 0; q < state.multiSample; q++) 80 { 81 Float4 x = xxxx; 82 83 if(state.multiSample > 1) 84 { 85 x -= *Pointer<Float4>(constants + OFFSET(Constants,X) + q * sizeof(float4)); 86 } 87 88 z[q] = interpolate(x, Dz[q], z[q], primitive + OFFSET(Primitive,z), false, false, state.depthClamp); 89 } 90 } 91 92 Bool depthPass = false; 93 94 if(earlyDepthTest) 95 { 96 for(unsigned int q = 0; q < state.multiSample; q++) 97 { 98 depthPass = depthPass || depthTest(zBuffer, q, x, z[q], sMask[q], zMask[q], cMask[q]); 99 } 100 } 101 102 If(depthPass || Bool(!earlyDepthTest)) 103 { 104 #if PERF_PROFILE 105 Long interpTime = Ticks(); 106 #endif 107 108 Float4 yyyy = Float4(Float(y)) + *Pointer<Float4>(primitive + OFFSET(Primitive,yQuad), 16); 109 110 // Centroid locations 111 Float4 XXXX = Float4(0.0f); 112 Float4 YYYY = Float4(0.0f); 113 114 if(state.centroid) 115 { 116 Float4 WWWW(1.0e-9f); 117 118 for(unsigned int q = 0; q < state.multiSample; q++) 119 { 120 XXXX += *Pointer<Float4>(constants + OFFSET(Constants,sampleX[q]) + 16 * cMask[q]); 121 YYYY += *Pointer<Float4>(constants + OFFSET(Constants,sampleY[q]) + 16 * cMask[q]); 122 WWWW += *Pointer<Float4>(constants + OFFSET(Constants,weight) + 16 * cMask[q]); 123 } 124 125 WWWW = Rcp_pp(WWWW); 126 XXXX *= WWWW; 127 YYYY *= WWWW; 128 129 XXXX += xxxx; 130 YYYY += yyyy; 131 } 132 133 if(interpolateW()) 134 { 135 w = interpolate(xxxx, Dw, rhw, primitive + OFFSET(Primitive,w), false, false, false); 136 rhw = reciprocal(w, false, false, true); 137 138 if(state.centroid) 139 { 140 rhwCentroid = reciprocal(interpolateCentroid(XXXX, YYYY, rhwCentroid, primitive + OFFSET(Primitive,w), false, false)); 141 } 142 } 143 144 for(int interpolant = 0; interpolant < MAX_FRAGMENT_INPUTS; interpolant++) 145 { 146 for(int component = 0; component < 4; component++) 147 { 148 if(state.interpolant[interpolant].component & (1 << component)) 149 { 150 if(!state.interpolant[interpolant].centroid) 151 { 152 v[interpolant][component] = interpolate(xxxx, Dv[interpolant][component], rhw, primitive + OFFSET(Primitive, V[interpolant][component]), (state.interpolant[interpolant].flat & (1 << component)) != 0, state.perspective, false); 153 } 154 else 155 { 156 v[interpolant][component] = interpolateCentroid(XXXX, YYYY, rhwCentroid, primitive + OFFSET(Primitive, V[interpolant][component]), (state.interpolant[interpolant].flat & (1 << component)) != 0, state.perspective); 157 } 158 } 159 } 160 161 Float4 rcp; 162 163 switch(state.interpolant[interpolant].project) 164 { 165 case 0: 166 break; 167 case 1: 168 rcp = reciprocal(v[interpolant].y); 169 v[interpolant].x = v[interpolant].x * rcp; 170 break; 171 case 2: 172 rcp = reciprocal(v[interpolant].z); 173 v[interpolant].x = v[interpolant].x * rcp; 174 v[interpolant].y = v[interpolant].y * rcp; 175 break; 176 case 3: 177 rcp = reciprocal(v[interpolant].w); 178 v[interpolant].x = v[interpolant].x * rcp; 179 v[interpolant].y = v[interpolant].y * rcp; 180 v[interpolant].z = v[interpolant].z * rcp; 181 break; 182 } 183 } 184 185 if(state.fog.component) 186 { 187 f = interpolate(xxxx, Df, rhw, primitive + OFFSET(Primitive,f), state.fog.flat & 0x01, state.perspective, false); 188 } 189 190 setBuiltins(x, y, z, w); 191 192 #if PERF_PROFILE 193 cycles[PERF_INTERP] += Ticks() - interpTime; 194 #endif 195 196 Bool alphaPass = true; 197 198 if(colorUsed()) 199 { 200 #if PERF_PROFILE 201 Long shaderTime = Ticks(); 202 #endif 203 204 applyShader(cMask); 205 206 #if PERF_PROFILE 207 cycles[PERF_SHADER] += Ticks() - shaderTime; 208 #endif 209 210 alphaPass = alphaTest(cMask); 211 212 if((shader && shader->containsKill()) || state.alphaTestActive()) 213 { 214 for(unsigned int q = 0; q < state.multiSample; q++) 215 { 216 zMask[q] &= cMask[q]; 217 sMask[q] &= cMask[q]; 218 } 219 } 220 } 221 222 If(alphaPass) 223 { 224 if(!earlyDepthTest) 225 { 226 for(unsigned int q = 0; q < state.multiSample; q++) 227 { 228 depthPass = depthPass || depthTest(zBuffer, q, x, z[q], sMask[q], zMask[q], cMask[q]); 229 } 230 } 231 232 #if PERF_PROFILE 233 Long ropTime = Ticks(); 234 #endif 235 236 If(depthPass || Bool(earlyDepthTest)) 237 { 238 for(unsigned int q = 0; q < state.multiSample; q++) 239 { 240 if(state.multiSampleMask & (1 << q)) 241 { 242 writeDepth(zBuffer, q, x, z[q], zMask[q]); 243 244 if(state.occlusionEnabled) 245 { 246 occlusion += *Pointer<UInt>(constants + OFFSET(Constants,occlusionCount) + 4 * (zMask[q] & sMask[q])); 247 } 248 } 249 } 250 251 if(colorUsed()) 252 { 253 #if PERF_PROFILE 254 AddAtomic(Pointer<Long>(&profiler.ropOperations), 4); 255 #endif 256 257 rasterOperation(f, cBuffer, x, sMask, zMask, cMask); 258 } 259 } 260 261 #if PERF_PROFILE 262 cycles[PERF_ROP] += Ticks() - ropTime; 263 #endif 264 } 265 } 266 267 for(unsigned int q = 0; q < state.multiSample; q++) 268 { 269 if(state.multiSampleMask & (1 << q)) 270 { 271 writeStencil(sBuffer, q, x, sMask[q], zMask[q], cMask[q]); 272 } 273 } 274 275 #if PERF_PROFILE 276 cycles[PERF_PIPE] += Ticks() - pipeTime; 277 #endif 278 } 279 interpolateCentroid(Float4 & x,Float4 & y,Float4 & rhw,Pointer<Byte> planeEquation,bool flat,bool perspective)280 Float4 PixelRoutine::interpolateCentroid(Float4 &x, Float4 &y, Float4 &rhw, Pointer<Byte> planeEquation, bool flat, bool perspective) 281 { 282 Float4 interpolant = *Pointer<Float4>(planeEquation + OFFSET(PlaneEquation,C), 16); 283 284 if(!flat) 285 { 286 interpolant += x * *Pointer<Float4>(planeEquation + OFFSET(PlaneEquation,A), 16) + 287 y * *Pointer<Float4>(planeEquation + OFFSET(PlaneEquation,B), 16); 288 289 if(perspective) 290 { 291 interpolant *= rhw; 292 } 293 } 294 295 return interpolant; 296 } 297 stencilTest(Pointer<Byte> & sBuffer,int q,Int & x,Int & sMask,Int & cMask)298 void PixelRoutine::stencilTest(Pointer<Byte> &sBuffer, int q, Int &x, Int &sMask, Int &cMask) 299 { 300 if(!state.stencilActive) 301 { 302 return; 303 } 304 305 // (StencilRef & StencilMask) CompFunc (StencilBufferValue & StencilMask) 306 307 Pointer<Byte> buffer = sBuffer + 2 * x; 308 309 if(q > 0) 310 { 311 buffer += q * *Pointer<Int>(data + OFFSET(DrawData,stencilSliceB)); 312 } 313 314 Byte8 value = *Pointer<Byte8>(buffer); 315 Byte8 valueCCW = value; 316 317 if(!state.noStencilMask) 318 { 319 value &= *Pointer<Byte8>(data + OFFSET(DrawData,stencil[0].testMaskQ)); 320 } 321 322 stencilTest(value, state.stencilCompareMode, false); 323 324 if(state.twoSidedStencil) 325 { 326 if(!state.noStencilMaskCCW) 327 { 328 valueCCW &= *Pointer<Byte8>(data + OFFSET(DrawData,stencil[1].testMaskQ)); 329 } 330 331 stencilTest(valueCCW, state.stencilCompareModeCCW, true); 332 333 value &= *Pointer<Byte8>(primitive + OFFSET(Primitive,clockwiseMask)); 334 valueCCW &= *Pointer<Byte8>(primitive + OFFSET(Primitive,invClockwiseMask)); 335 value |= valueCCW; 336 } 337 338 sMask = SignMask(value) & cMask; 339 } 340 stencilTest(Byte8 & value,StencilCompareMode stencilCompareMode,bool CCW)341 void PixelRoutine::stencilTest(Byte8 &value, StencilCompareMode stencilCompareMode, bool CCW) 342 { 343 Byte8 equal; 344 345 switch(stencilCompareMode) 346 { 347 case STENCIL_ALWAYS: 348 value = Byte8(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF); 349 break; 350 case STENCIL_NEVER: 351 value = Byte8(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); 352 break; 353 case STENCIL_LESS: // a < b ~ b > a 354 value += Byte8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80); 355 value = CmpGT(As<SByte8>(value), *Pointer<SByte8>(data + OFFSET(DrawData,stencil[CCW].referenceMaskedSignedQ))); 356 break; 357 case STENCIL_EQUAL: 358 value = CmpEQ(value, *Pointer<Byte8>(data + OFFSET(DrawData,stencil[CCW].referenceMaskedQ))); 359 break; 360 case STENCIL_NOTEQUAL: // a != b ~ !(a == b) 361 value = CmpEQ(value, *Pointer<Byte8>(data + OFFSET(DrawData,stencil[CCW].referenceMaskedQ))); 362 value ^= Byte8(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF); 363 break; 364 case STENCIL_LESSEQUAL: // a <= b ~ (b > a) || (a == b) 365 equal = value; 366 equal = CmpEQ(equal, *Pointer<Byte8>(data + OFFSET(DrawData,stencil[CCW].referenceMaskedQ))); 367 value += Byte8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80); 368 value = CmpGT(As<SByte8>(value), *Pointer<SByte8>(data + OFFSET(DrawData,stencil[CCW].referenceMaskedSignedQ))); 369 value |= equal; 370 break; 371 case STENCIL_GREATER: // a > b 372 equal = *Pointer<Byte8>(data + OFFSET(DrawData,stencil[CCW].referenceMaskedSignedQ)); 373 value += Byte8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80); 374 equal = CmpGT(As<SByte8>(equal), As<SByte8>(value)); 375 value = equal; 376 break; 377 case STENCIL_GREATEREQUAL: // a >= b ~ !(a < b) ~ !(b > a) 378 value += Byte8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80); 379 value = CmpGT(As<SByte8>(value), *Pointer<SByte8>(data + OFFSET(DrawData,stencil[CCW].referenceMaskedSignedQ))); 380 value ^= Byte8(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF); 381 break; 382 default: 383 ASSERT(false); 384 } 385 } 386 depthTest(Pointer<Byte> & zBuffer,int q,Int & x,Float4 & z,Int & sMask,Int & zMask,Int & cMask)387 Bool PixelRoutine::depthTest(Pointer<Byte> &zBuffer, int q, Int &x, Float4 &z, Int &sMask, Int &zMask, Int &cMask) 388 { 389 if(!state.depthTestActive) 390 { 391 return true; 392 } 393 394 Float4 Z = z; 395 396 if(shader && shader->depthOverride()) 397 { 398 if(complementaryDepthBuffer) 399 { 400 Z = Float4(1.0f) - oDepth; 401 } 402 else 403 { 404 Z = oDepth; 405 } 406 } 407 408 Pointer<Byte> buffer; 409 Int pitch; 410 411 if(!state.quadLayoutDepthBuffer) 412 { 413 buffer = zBuffer + 4 * x; 414 pitch = *Pointer<Int>(data + OFFSET(DrawData,depthPitchB)); 415 } 416 else 417 { 418 buffer = zBuffer + 8 * x; 419 } 420 421 if(q > 0) 422 { 423 buffer += q * *Pointer<Int>(data + OFFSET(DrawData,depthSliceB)); 424 } 425 426 Float4 zValue; 427 428 if(state.depthCompareMode != DEPTH_NEVER || (state.depthCompareMode != DEPTH_ALWAYS && !state.depthWriteEnable)) 429 { 430 if(!state.quadLayoutDepthBuffer) 431 { 432 // FIXME: Properly optimizes? 433 zValue.xy = *Pointer<Float4>(buffer); 434 zValue.zw = *Pointer<Float4>(buffer + pitch - 8); 435 } 436 else 437 { 438 zValue = *Pointer<Float4>(buffer, 16); 439 } 440 } 441 442 Int4 zTest; 443 444 switch(state.depthCompareMode) 445 { 446 case DEPTH_ALWAYS: 447 // Optimized 448 break; 449 case DEPTH_NEVER: 450 // Optimized 451 break; 452 case DEPTH_EQUAL: 453 zTest = CmpEQ(zValue, Z); 454 break; 455 case DEPTH_NOTEQUAL: 456 zTest = CmpNEQ(zValue, Z); 457 break; 458 case DEPTH_LESS: 459 if(complementaryDepthBuffer) 460 { 461 zTest = CmpLT(zValue, Z); 462 } 463 else 464 { 465 zTest = CmpNLE(zValue, Z); 466 } 467 break; 468 case DEPTH_GREATEREQUAL: 469 if(complementaryDepthBuffer) 470 { 471 zTest = CmpNLT(zValue, Z); 472 } 473 else 474 { 475 zTest = CmpLE(zValue, Z); 476 } 477 break; 478 case DEPTH_LESSEQUAL: 479 if(complementaryDepthBuffer) 480 { 481 zTest = CmpLE(zValue, Z); 482 } 483 else 484 { 485 zTest = CmpNLT(zValue, Z); 486 } 487 break; 488 case DEPTH_GREATER: 489 if(complementaryDepthBuffer) 490 { 491 zTest = CmpNLE(zValue, Z); 492 } 493 else 494 { 495 zTest = CmpLT(zValue, Z); 496 } 497 break; 498 default: 499 ASSERT(false); 500 } 501 502 switch(state.depthCompareMode) 503 { 504 case DEPTH_ALWAYS: 505 zMask = cMask; 506 break; 507 case DEPTH_NEVER: 508 zMask = 0x0; 509 break; 510 default: 511 zMask = SignMask(zTest) & cMask; 512 break; 513 } 514 515 if(state.stencilActive) 516 { 517 zMask &= sMask; 518 } 519 520 return zMask != 0; 521 } 522 alphaTest(Int & aMask,Short4 & alpha)523 void PixelRoutine::alphaTest(Int &aMask, Short4 &alpha) 524 { 525 Short4 cmp; 526 Short4 equal; 527 528 switch(state.alphaCompareMode) 529 { 530 case ALPHA_ALWAYS: 531 aMask = 0xF; 532 break; 533 case ALPHA_NEVER: 534 aMask = 0x0; 535 break; 536 case ALPHA_EQUAL: 537 cmp = CmpEQ(alpha, *Pointer<Short4>(data + OFFSET(DrawData,factor.alphaReference4))); 538 aMask = SignMask(PackSigned(cmp, Short4(0x0000))); 539 break; 540 case ALPHA_NOTEQUAL: // a != b ~ !(a == b) 541 cmp = CmpEQ(alpha, *Pointer<Short4>(data + OFFSET(DrawData,factor.alphaReference4))) ^ Short4(0xFFFFu); // FIXME 542 aMask = SignMask(PackSigned(cmp, Short4(0x0000))); 543 break; 544 case ALPHA_LESS: // a < b ~ b > a 545 cmp = CmpGT(*Pointer<Short4>(data + OFFSET(DrawData,factor.alphaReference4)), alpha); 546 aMask = SignMask(PackSigned(cmp, Short4(0x0000))); 547 break; 548 case ALPHA_GREATEREQUAL: // a >= b ~ (a > b) || (a == b) ~ !(b > a) // TODO: Approximate 549 equal = CmpEQ(alpha, *Pointer<Short4>(data + OFFSET(DrawData,factor.alphaReference4))); 550 cmp = CmpGT(alpha, *Pointer<Short4>(data + OFFSET(DrawData,factor.alphaReference4))); 551 cmp |= equal; 552 aMask = SignMask(PackSigned(cmp, Short4(0x0000))); 553 break; 554 case ALPHA_LESSEQUAL: // a <= b ~ !(a > b) 555 cmp = CmpGT(alpha, *Pointer<Short4>(data + OFFSET(DrawData,factor.alphaReference4))) ^ Short4(0xFFFFu); // FIXME 556 aMask = SignMask(PackSigned(cmp, Short4(0x0000))); 557 break; 558 case ALPHA_GREATER: // a > b 559 cmp = CmpGT(alpha, *Pointer<Short4>(data + OFFSET(DrawData,factor.alphaReference4))); 560 aMask = SignMask(PackSigned(cmp, Short4(0x0000))); 561 break; 562 default: 563 ASSERT(false); 564 } 565 } 566 alphaToCoverage(Int cMask[4],Float4 & alpha)567 void PixelRoutine::alphaToCoverage(Int cMask[4], Float4 &alpha) 568 { 569 Int4 coverage0 = CmpNLT(alpha, *Pointer<Float4>(data + OFFSET(DrawData,a2c0))); 570 Int4 coverage1 = CmpNLT(alpha, *Pointer<Float4>(data + OFFSET(DrawData,a2c1))); 571 Int4 coverage2 = CmpNLT(alpha, *Pointer<Float4>(data + OFFSET(DrawData,a2c2))); 572 Int4 coverage3 = CmpNLT(alpha, *Pointer<Float4>(data + OFFSET(DrawData,a2c3))); 573 574 Int aMask0 = SignMask(coverage0); 575 Int aMask1 = SignMask(coverage1); 576 Int aMask2 = SignMask(coverage2); 577 Int aMask3 = SignMask(coverage3); 578 579 cMask[0] &= aMask0; 580 cMask[1] &= aMask1; 581 cMask[2] &= aMask2; 582 cMask[3] &= aMask3; 583 } 584 fogBlend(Vector4f & c0,Float4 & fog)585 void PixelRoutine::fogBlend(Vector4f &c0, Float4 &fog) 586 { 587 if(!state.fogActive) 588 { 589 return; 590 } 591 592 if(state.pixelFogMode != FOG_NONE) 593 { 594 pixelFog(fog); 595 596 fog = Min(fog, Float4(1.0f)); 597 fog = Max(fog, Float4(0.0f)); 598 } 599 600 c0.x -= *Pointer<Float4>(data + OFFSET(DrawData,fog.colorF[0])); 601 c0.y -= *Pointer<Float4>(data + OFFSET(DrawData,fog.colorF[1])); 602 c0.z -= *Pointer<Float4>(data + OFFSET(DrawData,fog.colorF[2])); 603 604 c0.x *= fog; 605 c0.y *= fog; 606 c0.z *= fog; 607 608 c0.x += *Pointer<Float4>(data + OFFSET(DrawData,fog.colorF[0])); 609 c0.y += *Pointer<Float4>(data + OFFSET(DrawData,fog.colorF[1])); 610 c0.z += *Pointer<Float4>(data + OFFSET(DrawData,fog.colorF[2])); 611 } 612 pixelFog(Float4 & visibility)613 void PixelRoutine::pixelFog(Float4 &visibility) 614 { 615 Float4 &zw = visibility; 616 617 if(state.pixelFogMode != FOG_NONE) 618 { 619 if(state.wBasedFog) 620 { 621 zw = rhw; 622 } 623 else 624 { 625 if(complementaryDepthBuffer) 626 { 627 zw = Float4(1.0f) - z[0]; 628 } 629 else 630 { 631 zw = z[0]; 632 } 633 } 634 } 635 636 switch(state.pixelFogMode) 637 { 638 case FOG_NONE: 639 break; 640 case FOG_LINEAR: 641 zw *= *Pointer<Float4>(data + OFFSET(DrawData,fog.scale)); 642 zw += *Pointer<Float4>(data + OFFSET(DrawData,fog.offset)); 643 break; 644 case FOG_EXP: 645 zw *= *Pointer<Float4>(data + OFFSET(DrawData,fog.densityE)); 646 zw = exponential2(zw, true); 647 break; 648 case FOG_EXP2: 649 zw *= zw; 650 zw *= *Pointer<Float4>(data + OFFSET(DrawData,fog.density2E)); 651 zw = exponential2(zw, true); 652 break; 653 default: 654 ASSERT(false); 655 } 656 } 657 writeDepth(Pointer<Byte> & zBuffer,int q,Int & x,Float4 & z,Int & zMask)658 void PixelRoutine::writeDepth(Pointer<Byte> &zBuffer, int q, Int &x, Float4 &z, Int &zMask) 659 { 660 if(!state.depthWriteEnable) 661 { 662 return; 663 } 664 665 Float4 Z = z; 666 667 if(shader && shader->depthOverride()) 668 { 669 if(complementaryDepthBuffer) 670 { 671 Z = Float4(1.0f) - oDepth; 672 } 673 else 674 { 675 Z = oDepth; 676 } 677 } 678 679 Pointer<Byte> buffer; 680 Int pitch; 681 682 if(!state.quadLayoutDepthBuffer) 683 { 684 buffer = zBuffer + 4 * x; 685 pitch = *Pointer<Int>(data + OFFSET(DrawData,depthPitchB)); 686 } 687 else 688 { 689 buffer = zBuffer + 8 * x; 690 } 691 692 if(q > 0) 693 { 694 buffer += q * *Pointer<Int>(data + OFFSET(DrawData,depthSliceB)); 695 } 696 697 Float4 zValue; 698 699 if(state.depthCompareMode != DEPTH_NEVER || (state.depthCompareMode != DEPTH_ALWAYS && !state.depthWriteEnable)) 700 { 701 if(!state.quadLayoutDepthBuffer) 702 { 703 // FIXME: Properly optimizes? 704 zValue.xy = *Pointer<Float4>(buffer); 705 zValue.zw = *Pointer<Float4>(buffer + pitch - 8); 706 } 707 else 708 { 709 zValue = *Pointer<Float4>(buffer, 16); 710 } 711 } 712 713 Z = As<Float4>(As<Int4>(Z) & *Pointer<Int4>(constants + OFFSET(Constants,maskD4X) + zMask * 16, 16)); 714 zValue = As<Float4>(As<Int4>(zValue) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskD4X) + zMask * 16, 16)); 715 Z = As<Float4>(As<Int4>(Z) | As<Int4>(zValue)); 716 717 if(!state.quadLayoutDepthBuffer) 718 { 719 // FIXME: Properly optimizes? 720 *Pointer<Float2>(buffer) = Float2(Z.xy); 721 *Pointer<Float2>(buffer + pitch) = Float2(Z.zw); 722 } 723 else 724 { 725 *Pointer<Float4>(buffer, 16) = Z; 726 } 727 } 728 writeStencil(Pointer<Byte> & sBuffer,int q,Int & x,Int & sMask,Int & zMask,Int & cMask)729 void PixelRoutine::writeStencil(Pointer<Byte> &sBuffer, int q, Int &x, Int &sMask, Int &zMask, Int &cMask) 730 { 731 if(!state.stencilActive) 732 { 733 return; 734 } 735 736 if(state.stencilPassOperation == OPERATION_KEEP && state.stencilZFailOperation == OPERATION_KEEP && state.stencilFailOperation == OPERATION_KEEP) 737 { 738 if(!state.twoSidedStencil || (state.stencilPassOperationCCW == OPERATION_KEEP && state.stencilZFailOperationCCW == OPERATION_KEEP && state.stencilFailOperationCCW == OPERATION_KEEP)) 739 { 740 return; 741 } 742 } 743 744 if(state.stencilWriteMasked && (!state.twoSidedStencil || state.stencilWriteMaskedCCW)) 745 { 746 return; 747 } 748 749 Pointer<Byte> buffer = sBuffer + 2 * x; 750 751 if(q > 0) 752 { 753 buffer += q * *Pointer<Int>(data + OFFSET(DrawData,stencilSliceB)); 754 } 755 756 Byte8 bufferValue = *Pointer<Byte8>(buffer); 757 758 Byte8 newValue; 759 stencilOperation(newValue, bufferValue, state.stencilPassOperation, state.stencilZFailOperation, state.stencilFailOperation, false, zMask, sMask); 760 761 if(!state.noStencilWriteMask) 762 { 763 Byte8 maskedValue = bufferValue; 764 newValue &= *Pointer<Byte8>(data + OFFSET(DrawData,stencil[0].writeMaskQ)); 765 maskedValue &= *Pointer<Byte8>(data + OFFSET(DrawData,stencil[0].invWriteMaskQ)); 766 newValue |= maskedValue; 767 } 768 769 if(state.twoSidedStencil) 770 { 771 Byte8 newValueCCW; 772 773 stencilOperation(newValueCCW, bufferValue, state.stencilPassOperationCCW, state.stencilZFailOperationCCW, state.stencilFailOperationCCW, true, zMask, sMask); 774 775 if(!state.noStencilWriteMaskCCW) 776 { 777 Byte8 maskedValue = bufferValue; 778 newValueCCW &= *Pointer<Byte8>(data + OFFSET(DrawData,stencil[1].writeMaskQ)); 779 maskedValue &= *Pointer<Byte8>(data + OFFSET(DrawData,stencil[1].invWriteMaskQ)); 780 newValueCCW |= maskedValue; 781 } 782 783 newValue &= *Pointer<Byte8>(primitive + OFFSET(Primitive,clockwiseMask)); 784 newValueCCW &= *Pointer<Byte8>(primitive + OFFSET(Primitive,invClockwiseMask)); 785 newValue |= newValueCCW; 786 } 787 788 newValue &= *Pointer<Byte8>(constants + OFFSET(Constants,maskB4Q) + 8 * cMask); 789 bufferValue &= *Pointer<Byte8>(constants + OFFSET(Constants,invMaskB4Q) + 8 * cMask); 790 newValue |= bufferValue; 791 792 *Pointer<Byte4>(buffer) = Byte4(newValue); 793 } 794 stencilOperation(Byte8 & newValue,Byte8 & bufferValue,StencilOperation stencilPassOperation,StencilOperation stencilZFailOperation,StencilOperation stencilFailOperation,bool CCW,Int & zMask,Int & sMask)795 void PixelRoutine::stencilOperation(Byte8 &newValue, Byte8 &bufferValue, StencilOperation stencilPassOperation, StencilOperation stencilZFailOperation, StencilOperation stencilFailOperation, bool CCW, Int &zMask, Int &sMask) 796 { 797 Byte8 &pass = newValue; 798 Byte8 fail; 799 Byte8 zFail; 800 801 stencilOperation(pass, bufferValue, stencilPassOperation, CCW); 802 803 if(stencilZFailOperation != stencilPassOperation) 804 { 805 stencilOperation(zFail, bufferValue, stencilZFailOperation, CCW); 806 } 807 808 if(stencilFailOperation != stencilPassOperation || stencilFailOperation != stencilZFailOperation) 809 { 810 stencilOperation(fail, bufferValue, stencilFailOperation, CCW); 811 } 812 813 if(stencilFailOperation != stencilPassOperation || stencilFailOperation != stencilZFailOperation) 814 { 815 if(state.depthTestActive && stencilZFailOperation != stencilPassOperation) // zMask valid and values not the same 816 { 817 pass &= *Pointer<Byte8>(constants + OFFSET(Constants,maskB4Q) + 8 * zMask); 818 zFail &= *Pointer<Byte8>(constants + OFFSET(Constants,invMaskB4Q) + 8 * zMask); 819 pass |= zFail; 820 } 821 822 pass &= *Pointer<Byte8>(constants + OFFSET(Constants,maskB4Q) + 8 * sMask); 823 fail &= *Pointer<Byte8>(constants + OFFSET(Constants,invMaskB4Q) + 8 * sMask); 824 pass |= fail; 825 } 826 } 827 stencilOperation(Byte8 & output,Byte8 & bufferValue,StencilOperation operation,bool CCW)828 void PixelRoutine::stencilOperation(Byte8 &output, Byte8 &bufferValue, StencilOperation operation, bool CCW) 829 { 830 switch(operation) 831 { 832 case OPERATION_KEEP: 833 output = bufferValue; 834 break; 835 case OPERATION_ZERO: 836 output = Byte8(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); 837 break; 838 case OPERATION_REPLACE: 839 output = *Pointer<Byte8>(data + OFFSET(DrawData,stencil[CCW].referenceQ)); 840 break; 841 case OPERATION_INCRSAT: 842 output = AddSat(bufferValue, Byte8(1, 1, 1, 1, 1, 1, 1, 1)); 843 break; 844 case OPERATION_DECRSAT: 845 output = SubSat(bufferValue, Byte8(1, 1, 1, 1, 1, 1, 1, 1)); 846 break; 847 case OPERATION_INVERT: 848 output = bufferValue ^ Byte8(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF); 849 break; 850 case OPERATION_INCR: 851 output = bufferValue + Byte8(1, 1, 1, 1, 1, 1, 1, 1); 852 break; 853 case OPERATION_DECR: 854 output = bufferValue - Byte8(1, 1, 1, 1, 1, 1, 1, 1); 855 break; 856 default: 857 ASSERT(false); 858 } 859 } 860 blendFactor(Vector4s & blendFactor,const Vector4s & current,const Vector4s & pixel,BlendFactor blendFactorActive)861 void PixelRoutine::blendFactor(Vector4s &blendFactor, const Vector4s ¤t, const Vector4s &pixel, BlendFactor blendFactorActive) 862 { 863 switch(blendFactorActive) 864 { 865 case BLEND_ZERO: 866 // Optimized 867 break; 868 case BLEND_ONE: 869 // Optimized 870 break; 871 case BLEND_SOURCE: 872 blendFactor.x = current.x; 873 blendFactor.y = current.y; 874 blendFactor.z = current.z; 875 break; 876 case BLEND_INVSOURCE: 877 blendFactor.x = Short4(0xFFFFu) - current.x; 878 blendFactor.y = Short4(0xFFFFu) - current.y; 879 blendFactor.z = Short4(0xFFFFu) - current.z; 880 break; 881 case BLEND_DEST: 882 blendFactor.x = pixel.x; 883 blendFactor.y = pixel.y; 884 blendFactor.z = pixel.z; 885 break; 886 case BLEND_INVDEST: 887 blendFactor.x = Short4(0xFFFFu) - pixel.x; 888 blendFactor.y = Short4(0xFFFFu) - pixel.y; 889 blendFactor.z = Short4(0xFFFFu) - pixel.z; 890 break; 891 case BLEND_SOURCEALPHA: 892 blendFactor.x = current.w; 893 blendFactor.y = current.w; 894 blendFactor.z = current.w; 895 break; 896 case BLEND_INVSOURCEALPHA: 897 blendFactor.x = Short4(0xFFFFu) - current.w; 898 blendFactor.y = Short4(0xFFFFu) - current.w; 899 blendFactor.z = Short4(0xFFFFu) - current.w; 900 break; 901 case BLEND_DESTALPHA: 902 blendFactor.x = pixel.w; 903 blendFactor.y = pixel.w; 904 blendFactor.z = pixel.w; 905 break; 906 case BLEND_INVDESTALPHA: 907 blendFactor.x = Short4(0xFFFFu) - pixel.w; 908 blendFactor.y = Short4(0xFFFFu) - pixel.w; 909 blendFactor.z = Short4(0xFFFFu) - pixel.w; 910 break; 911 case BLEND_SRCALPHASAT: 912 blendFactor.x = Short4(0xFFFFu) - pixel.w; 913 blendFactor.x = Min(As<UShort4>(blendFactor.x), As<UShort4>(current.w)); 914 blendFactor.y = blendFactor.x; 915 blendFactor.z = blendFactor.x; 916 break; 917 case BLEND_CONSTANT: 918 blendFactor.x = *Pointer<Short4>(data + OFFSET(DrawData,factor.blendConstant4W[0])); 919 blendFactor.y = *Pointer<Short4>(data + OFFSET(DrawData,factor.blendConstant4W[1])); 920 blendFactor.z = *Pointer<Short4>(data + OFFSET(DrawData,factor.blendConstant4W[2])); 921 break; 922 case BLEND_INVCONSTANT: 923 blendFactor.x = *Pointer<Short4>(data + OFFSET(DrawData,factor.invBlendConstant4W[0])); 924 blendFactor.y = *Pointer<Short4>(data + OFFSET(DrawData,factor.invBlendConstant4W[1])); 925 blendFactor.z = *Pointer<Short4>(data + OFFSET(DrawData,factor.invBlendConstant4W[2])); 926 break; 927 case BLEND_CONSTANTALPHA: 928 blendFactor.x = *Pointer<Short4>(data + OFFSET(DrawData,factor.blendConstant4W[3])); 929 blendFactor.y = *Pointer<Short4>(data + OFFSET(DrawData,factor.blendConstant4W[3])); 930 blendFactor.z = *Pointer<Short4>(data + OFFSET(DrawData,factor.blendConstant4W[3])); 931 break; 932 case BLEND_INVCONSTANTALPHA: 933 blendFactor.x = *Pointer<Short4>(data + OFFSET(DrawData,factor.invBlendConstant4W[3])); 934 blendFactor.y = *Pointer<Short4>(data + OFFSET(DrawData,factor.invBlendConstant4W[3])); 935 blendFactor.z = *Pointer<Short4>(data + OFFSET(DrawData,factor.invBlendConstant4W[3])); 936 break; 937 default: 938 ASSERT(false); 939 } 940 } 941 blendFactorAlpha(Vector4s & blendFactor,const Vector4s & current,const Vector4s & pixel,BlendFactor blendFactorAlphaActive)942 void PixelRoutine::blendFactorAlpha(Vector4s &blendFactor, const Vector4s ¤t, const Vector4s &pixel, BlendFactor blendFactorAlphaActive) 943 { 944 switch(blendFactorAlphaActive) 945 { 946 case BLEND_ZERO: 947 // Optimized 948 break; 949 case BLEND_ONE: 950 // Optimized 951 break; 952 case BLEND_SOURCE: 953 blendFactor.w = current.w; 954 break; 955 case BLEND_INVSOURCE: 956 blendFactor.w = Short4(0xFFFFu) - current.w; 957 break; 958 case BLEND_DEST: 959 blendFactor.w = pixel.w; 960 break; 961 case BLEND_INVDEST: 962 blendFactor.w = Short4(0xFFFFu) - pixel.w; 963 break; 964 case BLEND_SOURCEALPHA: 965 blendFactor.w = current.w; 966 break; 967 case BLEND_INVSOURCEALPHA: 968 blendFactor.w = Short4(0xFFFFu) - current.w; 969 break; 970 case BLEND_DESTALPHA: 971 blendFactor.w = pixel.w; 972 break; 973 case BLEND_INVDESTALPHA: 974 blendFactor.w = Short4(0xFFFFu) - pixel.w; 975 break; 976 case BLEND_SRCALPHASAT: 977 blendFactor.w = Short4(0xFFFFu); 978 break; 979 case BLEND_CONSTANT: 980 case BLEND_CONSTANTALPHA: 981 blendFactor.w = *Pointer<Short4>(data + OFFSET(DrawData,factor.blendConstant4W[3])); 982 break; 983 case BLEND_INVCONSTANT: 984 case BLEND_INVCONSTANTALPHA: 985 blendFactor.w = *Pointer<Short4>(data + OFFSET(DrawData,factor.invBlendConstant4W[3])); 986 break; 987 default: 988 ASSERT(false); 989 } 990 } 991 isSRGB(int index) const992 bool PixelRoutine::isSRGB(int index) const 993 { 994 return Surface::isSRGBformat(state.targetFormat[index]); 995 } 996 readPixel(int index,Pointer<Byte> & cBuffer,Int & x,Vector4s & pixel)997 void PixelRoutine::readPixel(int index, Pointer<Byte> &cBuffer, Int &x, Vector4s &pixel) 998 { 999 Short4 c01; 1000 Short4 c23; 1001 Pointer<Byte> buffer; 1002 Pointer<Byte> buffer2; 1003 1004 switch(state.targetFormat[index]) 1005 { 1006 case FORMAT_R5G6B5: 1007 buffer = cBuffer + 2 * x; 1008 buffer2 = buffer + *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); 1009 c01 = As<Short4>(Int2(*Pointer<Int>(buffer), *Pointer<Int>(buffer2))); 1010 1011 pixel.x = c01 & Short4(0xF800u); 1012 pixel.y = (c01 & Short4(0x07E0u)) << 5; 1013 pixel.z = (c01 & Short4(0x001Fu)) << 11; 1014 pixel.w = Short4(0xFFFFu); 1015 break; 1016 case FORMAT_A8R8G8B8: 1017 buffer = cBuffer + 4 * x; 1018 c01 = *Pointer<Short4>(buffer); 1019 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); 1020 c23 = *Pointer<Short4>(buffer); 1021 pixel.z = c01; 1022 pixel.y = c01; 1023 pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(c23)); 1024 pixel.y = UnpackHigh(As<Byte8>(pixel.y), As<Byte8>(c23)); 1025 pixel.x = pixel.z; 1026 pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(pixel.y)); 1027 pixel.x = UnpackHigh(As<Byte8>(pixel.x), As<Byte8>(pixel.y)); 1028 pixel.y = pixel.z; 1029 pixel.w = pixel.x; 1030 pixel.x = UnpackLow(As<Byte8>(pixel.x), As<Byte8>(pixel.x)); 1031 pixel.y = UnpackHigh(As<Byte8>(pixel.y), As<Byte8>(pixel.y)); 1032 pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(pixel.z)); 1033 pixel.w = UnpackHigh(As<Byte8>(pixel.w), As<Byte8>(pixel.w)); 1034 break; 1035 case FORMAT_A8B8G8R8: 1036 case FORMAT_SRGB8_A8: 1037 buffer = cBuffer + 4 * x; 1038 c01 = *Pointer<Short4>(buffer); 1039 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); 1040 c23 = *Pointer<Short4>(buffer); 1041 pixel.z = c01; 1042 pixel.y = c01; 1043 pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(c23)); 1044 pixel.y = UnpackHigh(As<Byte8>(pixel.y), As<Byte8>(c23)); 1045 pixel.x = pixel.z; 1046 pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(pixel.y)); 1047 pixel.x = UnpackHigh(As<Byte8>(pixel.x), As<Byte8>(pixel.y)); 1048 pixel.y = pixel.z; 1049 pixel.w = pixel.x; 1050 pixel.x = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(pixel.z)); 1051 pixel.y = UnpackHigh(As<Byte8>(pixel.y), As<Byte8>(pixel.y)); 1052 pixel.z = UnpackLow(As<Byte8>(pixel.w), As<Byte8>(pixel.w)); 1053 pixel.w = UnpackHigh(As<Byte8>(pixel.w), As<Byte8>(pixel.w)); 1054 break; 1055 case FORMAT_A8: 1056 buffer = cBuffer + 1 * x; 1057 pixel.w = Insert(pixel.w, *Pointer<Short>(buffer), 0); 1058 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); 1059 pixel.w = Insert(pixel.w, *Pointer<Short>(buffer), 1); 1060 pixel.w = UnpackLow(As<Byte8>(pixel.w), As<Byte8>(pixel.w)); 1061 pixel.x = Short4(0x0000); 1062 pixel.y = Short4(0x0000); 1063 pixel.z = Short4(0x0000); 1064 break; 1065 case FORMAT_R8: 1066 buffer = cBuffer + 1 * x; 1067 pixel.x = Insert(pixel.x, *Pointer<Short>(buffer), 0); 1068 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); 1069 pixel.x = Insert(pixel.x, *Pointer<Short>(buffer), 1); 1070 pixel.x = UnpackLow(As<Byte8>(pixel.x), As<Byte8>(pixel.x)); 1071 pixel.y = Short4(0x0000); 1072 pixel.z = Short4(0x0000); 1073 pixel.w = Short4(0xFFFFu); 1074 break; 1075 case FORMAT_X8R8G8B8: 1076 buffer = cBuffer + 4 * x; 1077 c01 = *Pointer<Short4>(buffer); 1078 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); 1079 c23 = *Pointer<Short4>(buffer); 1080 pixel.z = c01; 1081 pixel.y = c01; 1082 pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(c23)); 1083 pixel.y = UnpackHigh(As<Byte8>(pixel.y), As<Byte8>(c23)); 1084 pixel.x = pixel.z; 1085 pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(pixel.y)); 1086 pixel.x = UnpackHigh(As<Byte8>(pixel.x), As<Byte8>(pixel.y)); 1087 pixel.y = pixel.z; 1088 pixel.x = UnpackLow(As<Byte8>(pixel.x), As<Byte8>(pixel.x)); 1089 pixel.y = UnpackHigh(As<Byte8>(pixel.y), As<Byte8>(pixel.y)); 1090 pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(pixel.z)); 1091 pixel.w = Short4(0xFFFFu); 1092 break; 1093 case FORMAT_G8R8: 1094 buffer = cBuffer + 2 * x; 1095 c01 = As<Short4>(Insert(As<Int2>(c01), *Pointer<Int>(buffer), 0)); 1096 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); 1097 c01 = As<Short4>(Insert(As<Int2>(c01), *Pointer<Int>(buffer), 1)); 1098 pixel.x = (c01 & Short4(0x00FFu)) | (c01 << 8); 1099 pixel.y = (c01 & Short4(0xFF00u)) | As<Short4>(As<UShort4>(c01) >> 8); 1100 pixel.z = Short4(0x0000u); 1101 pixel.w = Short4(0xFFFFu); 1102 break; 1103 case FORMAT_X8B8G8R8: 1104 case FORMAT_SRGB8_X8: 1105 buffer = cBuffer + 4 * x; 1106 c01 = *Pointer<Short4>(buffer); 1107 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); 1108 c23 = *Pointer<Short4>(buffer); 1109 pixel.z = c01; 1110 pixel.y = c01; 1111 pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(c23)); 1112 pixel.y = UnpackHigh(As<Byte8>(pixel.y), As<Byte8>(c23)); 1113 pixel.x = pixel.z; 1114 pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(pixel.y)); 1115 pixel.x = UnpackHigh(As<Byte8>(pixel.x), As<Byte8>(pixel.y)); 1116 pixel.y = pixel.z; 1117 pixel.w = pixel.x; 1118 pixel.x = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(pixel.z)); 1119 pixel.y = UnpackHigh(As<Byte8>(pixel.y), As<Byte8>(pixel.y)); 1120 pixel.z = UnpackLow(As<Byte8>(pixel.w), As<Byte8>(pixel.w)); 1121 pixel.w = Short4(0xFFFFu); 1122 break; 1123 case FORMAT_A8G8R8B8Q: 1124 UNIMPLEMENTED(); 1125 // pixel.z = UnpackLow(As<Byte8>(pixel.z), *Pointer<Byte8>(cBuffer + 8 * x + 0)); 1126 // pixel.x = UnpackHigh(As<Byte8>(pixel.x), *Pointer<Byte8>(cBuffer + 8 * x + 0)); 1127 // pixel.y = UnpackLow(As<Byte8>(pixel.y), *Pointer<Byte8>(cBuffer + 8 * x + 8)); 1128 // pixel.w = UnpackHigh(As<Byte8>(pixel.w), *Pointer<Byte8>(cBuffer + 8 * x + 8)); 1129 break; 1130 case FORMAT_X8G8R8B8Q: 1131 UNIMPLEMENTED(); 1132 // pixel.z = UnpackLow(As<Byte8>(pixel.z), *Pointer<Byte8>(cBuffer + 8 * x + 0)); 1133 // pixel.x = UnpackHigh(As<Byte8>(pixel.x), *Pointer<Byte8>(cBuffer + 8 * x + 0)); 1134 // pixel.y = UnpackLow(As<Byte8>(pixel.y), *Pointer<Byte8>(cBuffer + 8 * x + 8)); 1135 // pixel.w = Short4(0xFFFFu); 1136 break; 1137 case FORMAT_A16B16G16R16: 1138 buffer = cBuffer; 1139 pixel.x = *Pointer<Short4>(buffer + 8 * x); 1140 pixel.y = *Pointer<Short4>(buffer + 8 * x + 8); 1141 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); 1142 pixel.z = *Pointer<Short4>(buffer + 8 * x); 1143 pixel.w = *Pointer<Short4>(buffer + 8 * x + 8); 1144 transpose4x4(pixel.x, pixel.y, pixel.z, pixel.w); 1145 break; 1146 case FORMAT_G16R16: 1147 buffer = cBuffer; 1148 pixel.x = *Pointer<Short4>(buffer + 4 * x); 1149 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); 1150 pixel.y = *Pointer<Short4>(buffer + 4 * x); 1151 pixel.z = pixel.x; 1152 pixel.x = As<Short4>(UnpackLow(pixel.x, pixel.y)); 1153 pixel.z = As<Short4>(UnpackHigh(pixel.z, pixel.y)); 1154 pixel.y = pixel.z; 1155 pixel.x = As<Short4>(UnpackLow(pixel.x, pixel.z)); 1156 pixel.y = As<Short4>(UnpackHigh(pixel.y, pixel.z)); 1157 pixel.z = Short4(0xFFFFu); 1158 pixel.w = Short4(0xFFFFu); 1159 break; 1160 default: 1161 ASSERT(false); 1162 } 1163 1164 if((postBlendSRGB && state.writeSRGB) || isSRGB(index)) 1165 { 1166 sRGBtoLinear16_12_16(pixel); 1167 } 1168 } 1169 alphaBlend(int index,Pointer<Byte> & cBuffer,Vector4s & current,Int & x)1170 void PixelRoutine::alphaBlend(int index, Pointer<Byte> &cBuffer, Vector4s ¤t, Int &x) 1171 { 1172 if(!state.alphaBlendActive) 1173 { 1174 return; 1175 } 1176 1177 Vector4s pixel; 1178 readPixel(index, cBuffer, x, pixel); 1179 1180 // Final Color = ObjectColor * SourceBlendFactor + PixelColor * DestinationBlendFactor 1181 Vector4s sourceFactor; 1182 Vector4s destFactor; 1183 1184 blendFactor(sourceFactor, current, pixel, state.sourceBlendFactor); 1185 blendFactor(destFactor, current, pixel, state.destBlendFactor); 1186 1187 if(state.sourceBlendFactor != BLEND_ONE && state.sourceBlendFactor != BLEND_ZERO) 1188 { 1189 current.x = MulHigh(As<UShort4>(current.x), As<UShort4>(sourceFactor.x)); 1190 current.y = MulHigh(As<UShort4>(current.y), As<UShort4>(sourceFactor.y)); 1191 current.z = MulHigh(As<UShort4>(current.z), As<UShort4>(sourceFactor.z)); 1192 } 1193 1194 if(state.destBlendFactor != BLEND_ONE && state.destBlendFactor != BLEND_ZERO) 1195 { 1196 pixel.x = MulHigh(As<UShort4>(pixel.x), As<UShort4>(destFactor.x)); 1197 pixel.y = MulHigh(As<UShort4>(pixel.y), As<UShort4>(destFactor.y)); 1198 pixel.z = MulHigh(As<UShort4>(pixel.z), As<UShort4>(destFactor.z)); 1199 } 1200 1201 switch(state.blendOperation) 1202 { 1203 case BLENDOP_ADD: 1204 current.x = AddSat(As<UShort4>(current.x), As<UShort4>(pixel.x)); 1205 current.y = AddSat(As<UShort4>(current.y), As<UShort4>(pixel.y)); 1206 current.z = AddSat(As<UShort4>(current.z), As<UShort4>(pixel.z)); 1207 break; 1208 case BLENDOP_SUB: 1209 current.x = SubSat(As<UShort4>(current.x), As<UShort4>(pixel.x)); 1210 current.y = SubSat(As<UShort4>(current.y), As<UShort4>(pixel.y)); 1211 current.z = SubSat(As<UShort4>(current.z), As<UShort4>(pixel.z)); 1212 break; 1213 case BLENDOP_INVSUB: 1214 current.x = SubSat(As<UShort4>(pixel.x), As<UShort4>(current.x)); 1215 current.y = SubSat(As<UShort4>(pixel.y), As<UShort4>(current.y)); 1216 current.z = SubSat(As<UShort4>(pixel.z), As<UShort4>(current.z)); 1217 break; 1218 case BLENDOP_MIN: 1219 current.x = Min(As<UShort4>(current.x), As<UShort4>(pixel.x)); 1220 current.y = Min(As<UShort4>(current.y), As<UShort4>(pixel.y)); 1221 current.z = Min(As<UShort4>(current.z), As<UShort4>(pixel.z)); 1222 break; 1223 case BLENDOP_MAX: 1224 current.x = Max(As<UShort4>(current.x), As<UShort4>(pixel.x)); 1225 current.y = Max(As<UShort4>(current.y), As<UShort4>(pixel.y)); 1226 current.z = Max(As<UShort4>(current.z), As<UShort4>(pixel.z)); 1227 break; 1228 case BLENDOP_SOURCE: 1229 // No operation 1230 break; 1231 case BLENDOP_DEST: 1232 current.x = pixel.x; 1233 current.y = pixel.y; 1234 current.z = pixel.z; 1235 break; 1236 case BLENDOP_NULL: 1237 current.x = Short4(0x0000); 1238 current.y = Short4(0x0000); 1239 current.z = Short4(0x0000); 1240 break; 1241 default: 1242 ASSERT(false); 1243 } 1244 1245 blendFactorAlpha(sourceFactor, current, pixel, state.sourceBlendFactorAlpha); 1246 blendFactorAlpha(destFactor, current, pixel, state.destBlendFactorAlpha); 1247 1248 if(state.sourceBlendFactorAlpha != BLEND_ONE && state.sourceBlendFactorAlpha != BLEND_ZERO) 1249 { 1250 current.w = MulHigh(As<UShort4>(current.w), As<UShort4>(sourceFactor.w)); 1251 } 1252 1253 if(state.destBlendFactorAlpha != BLEND_ONE && state.destBlendFactorAlpha != BLEND_ZERO) 1254 { 1255 pixel.w = MulHigh(As<UShort4>(pixel.w), As<UShort4>(destFactor.w)); 1256 } 1257 1258 switch(state.blendOperationAlpha) 1259 { 1260 case BLENDOP_ADD: 1261 current.w = AddSat(As<UShort4>(current.w), As<UShort4>(pixel.w)); 1262 break; 1263 case BLENDOP_SUB: 1264 current.w = SubSat(As<UShort4>(current.w), As<UShort4>(pixel.w)); 1265 break; 1266 case BLENDOP_INVSUB: 1267 current.w = SubSat(As<UShort4>(pixel.w), As<UShort4>(current.w)); 1268 break; 1269 case BLENDOP_MIN: 1270 current.w = Min(As<UShort4>(current.w), As<UShort4>(pixel.w)); 1271 break; 1272 case BLENDOP_MAX: 1273 current.w = Max(As<UShort4>(current.w), As<UShort4>(pixel.w)); 1274 break; 1275 case BLENDOP_SOURCE: 1276 // No operation 1277 break; 1278 case BLENDOP_DEST: 1279 current.w = pixel.w; 1280 break; 1281 case BLENDOP_NULL: 1282 current.w = Short4(0x0000); 1283 break; 1284 default: 1285 ASSERT(false); 1286 } 1287 } 1288 logicOperation(int index,Pointer<Byte> & cBuffer,Vector4s & current,Int & x)1289 void PixelRoutine::logicOperation(int index, Pointer<Byte> &cBuffer, Vector4s ¤t, Int &x) 1290 { 1291 if(state.logicalOperation == LOGICALOP_COPY) 1292 { 1293 return; 1294 } 1295 1296 Vector4s pixel; 1297 readPixel(index, cBuffer, x, pixel); 1298 1299 switch(state.logicalOperation) 1300 { 1301 case LOGICALOP_CLEAR: 1302 current.x = UShort4(0); 1303 current.y = UShort4(0); 1304 current.z = UShort4(0); 1305 break; 1306 case LOGICALOP_SET: 1307 current.x = UShort4(0xFFFFu); 1308 current.y = UShort4(0xFFFFu); 1309 current.z = UShort4(0xFFFFu); 1310 break; 1311 case LOGICALOP_COPY: 1312 ASSERT(false); // Optimized out 1313 break; 1314 case LOGICALOP_COPY_INVERTED: 1315 current.x = ~current.x; 1316 current.y = ~current.y; 1317 current.z = ~current.z; 1318 break; 1319 case LOGICALOP_NOOP: 1320 current.x = pixel.x; 1321 current.y = pixel.y; 1322 current.z = pixel.z; 1323 break; 1324 case LOGICALOP_INVERT: 1325 current.x = ~pixel.x; 1326 current.y = ~pixel.y; 1327 current.z = ~pixel.z; 1328 break; 1329 case LOGICALOP_AND: 1330 current.x = pixel.x & current.x; 1331 current.y = pixel.y & current.y; 1332 current.z = pixel.z & current.z; 1333 break; 1334 case LOGICALOP_NAND: 1335 current.x = ~(pixel.x & current.x); 1336 current.y = ~(pixel.y & current.y); 1337 current.z = ~(pixel.z & current.z); 1338 break; 1339 case LOGICALOP_OR: 1340 current.x = pixel.x | current.x; 1341 current.y = pixel.y | current.y; 1342 current.z = pixel.z | current.z; 1343 break; 1344 case LOGICALOP_NOR: 1345 current.x = ~(pixel.x | current.x); 1346 current.y = ~(pixel.y | current.y); 1347 current.z = ~(pixel.z | current.z); 1348 break; 1349 case LOGICALOP_XOR: 1350 current.x = pixel.x ^ current.x; 1351 current.y = pixel.y ^ current.y; 1352 current.z = pixel.z ^ current.z; 1353 break; 1354 case LOGICALOP_EQUIV: 1355 current.x = ~(pixel.x ^ current.x); 1356 current.y = ~(pixel.y ^ current.y); 1357 current.z = ~(pixel.z ^ current.z); 1358 break; 1359 case LOGICALOP_AND_REVERSE: 1360 current.x = ~pixel.x & current.x; 1361 current.y = ~pixel.y & current.y; 1362 current.z = ~pixel.z & current.z; 1363 break; 1364 case LOGICALOP_AND_INVERTED: 1365 current.x = pixel.x & ~current.x; 1366 current.y = pixel.y & ~current.y; 1367 current.z = pixel.z & ~current.z; 1368 break; 1369 case LOGICALOP_OR_REVERSE: 1370 current.x = ~pixel.x | current.x; 1371 current.y = ~pixel.y | current.y; 1372 current.z = ~pixel.z | current.z; 1373 break; 1374 case LOGICALOP_OR_INVERTED: 1375 current.x = pixel.x | ~current.x; 1376 current.y = pixel.y | ~current.y; 1377 current.z = pixel.z | ~current.z; 1378 break; 1379 default: 1380 ASSERT(false); 1381 } 1382 } 1383 writeColor(int index,Pointer<Byte> & cBuffer,Int & x,Vector4s & current,Int & sMask,Int & zMask,Int & cMask)1384 void PixelRoutine::writeColor(int index, Pointer<Byte> &cBuffer, Int &x, Vector4s ¤t, Int &sMask, Int &zMask, Int &cMask) 1385 { 1386 if((postBlendSRGB && state.writeSRGB) || isSRGB(index)) 1387 { 1388 linearToSRGB16_12_16(current); 1389 } 1390 1391 if(exactColorRounding) 1392 { 1393 switch(state.targetFormat[index]) 1394 { 1395 case FORMAT_R5G6B5: 1396 current.x = AddSat(As<UShort4>(current.x), UShort4(0x0400)); 1397 current.y = AddSat(As<UShort4>(current.y), UShort4(0x0200)); 1398 current.z = AddSat(As<UShort4>(current.z), UShort4(0x0400)); 1399 break; 1400 case FORMAT_X8G8R8B8Q: 1401 case FORMAT_A8G8R8B8Q: 1402 case FORMAT_X8R8G8B8: 1403 case FORMAT_X8B8G8R8: 1404 case FORMAT_A8R8G8B8: 1405 case FORMAT_A8B8G8R8: 1406 case FORMAT_SRGB8_X8: 1407 case FORMAT_SRGB8_A8: 1408 case FORMAT_G8R8: 1409 case FORMAT_R8: 1410 current.x = current.x - As<Short4>(As<UShort4>(current.x) >> 8) + Short4(0x0080); 1411 current.y = current.y - As<Short4>(As<UShort4>(current.y) >> 8) + Short4(0x0080); 1412 current.z = current.z - As<Short4>(As<UShort4>(current.z) >> 8) + Short4(0x0080); 1413 current.w = current.w - As<Short4>(As<UShort4>(current.w) >> 8) + Short4(0x0080); 1414 break; 1415 default: 1416 break; 1417 } 1418 } 1419 1420 int rgbaWriteMask = state.colorWriteActive(index); 1421 int bgraWriteMask = (rgbaWriteMask & 0x0000000A) | (rgbaWriteMask & 0x00000001) << 2 | (rgbaWriteMask & 0x00000004) >> 2; 1422 1423 switch(state.targetFormat[index]) 1424 { 1425 case FORMAT_R5G6B5: 1426 { 1427 current.x = current.x & Short4(0xF800u); 1428 current.y = As<UShort4>(current.y & Short4(0xFC00u)) >> 5; 1429 current.z = As<UShort4>(current.z) >> 11; 1430 1431 current.x = current.x | current.y | current.z; 1432 } 1433 break; 1434 case FORMAT_X8G8R8B8Q: 1435 UNIMPLEMENTED(); 1436 // current.x = As<Short4>(As<UShort4>(current.x) >> 8); 1437 // current.y = As<Short4>(As<UShort4>(current.y) >> 8); 1438 // current.z = As<Short4>(As<UShort4>(current.z) >> 8); 1439 1440 // current.z = As<Short4>(Pack(As<UShort4>(current.z), As<UShort4>(current.x))); 1441 // current.y = As<Short4>(Pack(As<UShort4>(current.y), As<UShort4>(current.y))); 1442 break; 1443 case FORMAT_A8G8R8B8Q: 1444 UNIMPLEMENTED(); 1445 // current.x = As<Short4>(As<UShort4>(current.x) >> 8); 1446 // current.y = As<Short4>(As<UShort4>(current.y) >> 8); 1447 // current.z = As<Short4>(As<UShort4>(current.z) >> 8); 1448 // current.w = As<Short4>(As<UShort4>(current.w) >> 8); 1449 1450 // current.z = As<Short4>(Pack(As<UShort4>(current.z), As<UShort4>(current.x))); 1451 // current.y = As<Short4>(Pack(As<UShort4>(current.y), As<UShort4>(current.w))); 1452 break; 1453 case FORMAT_X8R8G8B8: 1454 case FORMAT_A8R8G8B8: 1455 if(state.targetFormat[index] == FORMAT_X8R8G8B8 || rgbaWriteMask == 0x7) 1456 { 1457 current.x = As<Short4>(As<UShort4>(current.x) >> 8); 1458 current.y = As<Short4>(As<UShort4>(current.y) >> 8); 1459 current.z = As<Short4>(As<UShort4>(current.z) >> 8); 1460 1461 current.z = As<Short4>(PackUnsigned(current.z, current.x)); 1462 current.y = As<Short4>(PackUnsigned(current.y, current.y)); 1463 1464 current.x = current.z; 1465 current.z = UnpackLow(As<Byte8>(current.z), As<Byte8>(current.y)); 1466 current.x = UnpackHigh(As<Byte8>(current.x), As<Byte8>(current.y)); 1467 current.y = current.z; 1468 current.z = As<Short4>(UnpackLow(current.z, current.x)); 1469 current.y = As<Short4>(UnpackHigh(current.y, current.x)); 1470 } 1471 else 1472 { 1473 current.x = As<Short4>(As<UShort4>(current.x) >> 8); 1474 current.y = As<Short4>(As<UShort4>(current.y) >> 8); 1475 current.z = As<Short4>(As<UShort4>(current.z) >> 8); 1476 current.w = As<Short4>(As<UShort4>(current.w) >> 8); 1477 1478 current.z = As<Short4>(PackUnsigned(current.z, current.x)); 1479 current.y = As<Short4>(PackUnsigned(current.y, current.w)); 1480 1481 current.x = current.z; 1482 current.z = UnpackLow(As<Byte8>(current.z), As<Byte8>(current.y)); 1483 current.x = UnpackHigh(As<Byte8>(current.x), As<Byte8>(current.y)); 1484 current.y = current.z; 1485 current.z = As<Short4>(UnpackLow(current.z, current.x)); 1486 current.y = As<Short4>(UnpackHigh(current.y, current.x)); 1487 } 1488 break; 1489 case FORMAT_X8B8G8R8: 1490 case FORMAT_A8B8G8R8: 1491 case FORMAT_SRGB8_X8: 1492 case FORMAT_SRGB8_A8: 1493 if(state.targetFormat[index] == FORMAT_X8B8G8R8 || state.targetFormat[index] == FORMAT_SRGB8_X8 || rgbaWriteMask == 0x7) 1494 { 1495 current.x = As<Short4>(As<UShort4>(current.x) >> 8); 1496 current.y = As<Short4>(As<UShort4>(current.y) >> 8); 1497 current.z = As<Short4>(As<UShort4>(current.z) >> 8); 1498 1499 current.z = As<Short4>(PackUnsigned(current.x, current.z)); 1500 current.y = As<Short4>(PackUnsigned(current.y, current.y)); 1501 1502 current.x = current.z; 1503 current.z = UnpackLow(As<Byte8>(current.z), As<Byte8>(current.y)); 1504 current.x = UnpackHigh(As<Byte8>(current.x), As<Byte8>(current.y)); 1505 current.y = current.z; 1506 current.z = As<Short4>(UnpackLow(current.z, current.x)); 1507 current.y = As<Short4>(UnpackHigh(current.y, current.x)); 1508 } 1509 else 1510 { 1511 current.x = As<Short4>(As<UShort4>(current.x) >> 8); 1512 current.y = As<Short4>(As<UShort4>(current.y) >> 8); 1513 current.z = As<Short4>(As<UShort4>(current.z) >> 8); 1514 current.w = As<Short4>(As<UShort4>(current.w) >> 8); 1515 1516 current.z = As<Short4>(PackUnsigned(current.x, current.z)); 1517 current.y = As<Short4>(PackUnsigned(current.y, current.w)); 1518 1519 current.x = current.z; 1520 current.z = UnpackLow(As<Byte8>(current.z), As<Byte8>(current.y)); 1521 current.x = UnpackHigh(As<Byte8>(current.x), As<Byte8>(current.y)); 1522 current.y = current.z; 1523 current.z = As<Short4>(UnpackLow(current.z, current.x)); 1524 current.y = As<Short4>(UnpackHigh(current.y, current.x)); 1525 } 1526 break; 1527 case FORMAT_G8R8: 1528 current.x = As<Short4>(As<UShort4>(current.x) >> 8); 1529 current.y = As<Short4>(As<UShort4>(current.y) >> 8); 1530 current.x = As<Short4>(PackUnsigned(current.x, current.x)); 1531 current.y = As<Short4>(PackUnsigned(current.y, current.y)); 1532 current.x = UnpackLow(As<Byte8>(current.x), As<Byte8>(current.y)); 1533 break; 1534 case FORMAT_R8: 1535 current.x = As<Short4>(As<UShort4>(current.x) >> 8); 1536 current.x = As<Short4>(PackUnsigned(current.x, current.x)); 1537 break; 1538 case FORMAT_A8: 1539 current.w = As<Short4>(As<UShort4>(current.w) >> 8); 1540 current.w = As<Short4>(PackUnsigned(current.w, current.w)); 1541 break; 1542 case FORMAT_G16R16: 1543 current.z = current.x; 1544 current.x = As<Short4>(UnpackLow(current.x, current.y)); 1545 current.z = As<Short4>(UnpackHigh(current.z, current.y)); 1546 current.y = current.z; 1547 break; 1548 case FORMAT_A16B16G16R16: 1549 transpose4x4(current.x, current.y, current.z, current.w); 1550 break; 1551 default: 1552 ASSERT(false); 1553 } 1554 1555 Short4 c01 = current.z; 1556 Short4 c23 = current.y; 1557 1558 Int xMask; // Combination of all masks 1559 1560 if(state.depthTestActive) 1561 { 1562 xMask = zMask; 1563 } 1564 else 1565 { 1566 xMask = cMask; 1567 } 1568 1569 if(state.stencilActive) 1570 { 1571 xMask &= sMask; 1572 } 1573 1574 switch(state.targetFormat[index]) 1575 { 1576 case FORMAT_R5G6B5: 1577 { 1578 Pointer<Byte> buffer = cBuffer + 2 * x; 1579 Int value = *Pointer<Int>(buffer); 1580 1581 Int c01 = Extract(As<Int2>(current.x), 0); 1582 1583 if((bgraWriteMask & 0x00000007) != 0x00000007) 1584 { 1585 Int masked = value; 1586 c01 &= *Pointer<Int>(constants + OFFSET(Constants,mask565Q[bgraWriteMask & 0x7][0])); 1587 masked &= *Pointer<Int>(constants + OFFSET(Constants,mask565Q[~bgraWriteMask & 0x7][0])); 1588 c01 |= masked; 1589 } 1590 1591 c01 &= *Pointer<Int>(constants + OFFSET(Constants,maskW4Q[0][0]) + xMask * 8); 1592 value &= *Pointer<Int>(constants + OFFSET(Constants,invMaskW4Q[0][0]) + xMask * 8); 1593 c01 |= value; 1594 *Pointer<Int>(buffer) = c01; 1595 1596 buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index])); 1597 value = *Pointer<Int>(buffer); 1598 1599 Int c23 = Extract(As<Int2>(current.x), 1); 1600 1601 if((bgraWriteMask & 0x00000007) != 0x00000007) 1602 { 1603 Int masked = value; 1604 c23 &= *Pointer<Int>(constants + OFFSET(Constants,mask565Q[bgraWriteMask & 0x7][0])); 1605 masked &= *Pointer<Int>(constants + OFFSET(Constants,mask565Q[~bgraWriteMask & 0x7][0])); 1606 c23 |= masked; 1607 } 1608 1609 c23 &= *Pointer<Int>(constants + OFFSET(Constants,maskW4Q[0][2]) + xMask * 8); 1610 value &= *Pointer<Int>(constants + OFFSET(Constants,invMaskW4Q[0][2]) + xMask * 8); 1611 c23 |= value; 1612 *Pointer<Int>(buffer) = c23; 1613 } 1614 break; 1615 case FORMAT_A8G8R8B8Q: 1616 case FORMAT_X8G8R8B8Q: // FIXME: Don't touch alpha? 1617 UNIMPLEMENTED(); 1618 // value = *Pointer<Short4>(cBuffer + 8 * x + 0); 1619 1620 // if((state.targetFormat[index] == FORMAT_A8G8R8B8Q && bgraWriteMask != 0x0000000F) || 1621 // ((state.targetFormat[index] == FORMAT_X8G8R8B8Q && bgraWriteMask != 0x00000007) && 1622 // (state.targetFormat[index] == FORMAT_X8G8R8B8Q && bgraWriteMask != 0x0000000F))) // FIXME: Need for masking when XRGB && Fh? 1623 // { 1624 // Short4 masked = value; 1625 // c01 &= *Pointer<Short4>(constants + OFFSET(Constants,maskB4Q[bgraWriteMask][0])); 1626 // masked &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskB4Q[bgraWriteMask][0])); 1627 // c01 |= masked; 1628 // } 1629 1630 // c01 &= *Pointer<Short4>(constants + OFFSET(Constants,maskD01Q) + xMask * 8); 1631 // value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskD01Q) + xMask * 8); 1632 // c01 |= value; 1633 // *Pointer<Short4>(cBuffer + 8 * x + 0) = c01; 1634 1635 // value = *Pointer<Short4>(cBuffer + 8 * x + 8); 1636 1637 // if((state.targetFormat[index] == FORMAT_A8G8R8B8Q && bgraWriteMask != 0x0000000F) || 1638 // ((state.targetFormat[index] == FORMAT_X8G8R8B8Q && bgraWriteMask != 0x00000007) && 1639 // (state.targetFormat[index] == FORMAT_X8G8R8B8Q && bgraWriteMask != 0x0000000F))) // FIXME: Need for masking when XRGB && Fh? 1640 // { 1641 // Short4 masked = value; 1642 // c23 &= *Pointer<Short4>(constants + OFFSET(Constants,maskB4Q[bgraWriteMask][0])); 1643 // masked &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskB4Q[bgraWriteMask][0])); 1644 // c23 |= masked; 1645 // } 1646 1647 // c23 &= *Pointer<Short4>(constants + OFFSET(Constants,maskD23Q) + xMask * 8); 1648 // value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskD23Q) + xMask * 8); 1649 // c23 |= value; 1650 // *Pointer<Short4>(cBuffer + 8 * x + 8) = c23; 1651 break; 1652 case FORMAT_A8R8G8B8: 1653 case FORMAT_X8R8G8B8: // FIXME: Don't touch alpha? 1654 { 1655 Pointer<Byte> buffer = cBuffer + x * 4; 1656 Short4 value = *Pointer<Short4>(buffer); 1657 1658 if((state.targetFormat[index] == FORMAT_A8R8G8B8 && bgraWriteMask != 0x0000000F) || 1659 ((state.targetFormat[index] == FORMAT_X8R8G8B8 && bgraWriteMask != 0x00000007) && 1660 (state.targetFormat[index] == FORMAT_X8R8G8B8 && bgraWriteMask != 0x0000000F))) // FIXME: Need for masking when XRGB && Fh? 1661 { 1662 Short4 masked = value; 1663 c01 &= *Pointer<Short4>(constants + OFFSET(Constants,maskB4Q[bgraWriteMask][0])); 1664 masked &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskB4Q[bgraWriteMask][0])); 1665 c01 |= masked; 1666 } 1667 1668 c01 &= *Pointer<Short4>(constants + OFFSET(Constants,maskD01Q) + xMask * 8); 1669 value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskD01Q) + xMask * 8); 1670 c01 |= value; 1671 *Pointer<Short4>(buffer) = c01; 1672 1673 buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index])); 1674 value = *Pointer<Short4>(buffer); 1675 1676 if((state.targetFormat[index] == FORMAT_A8R8G8B8 && bgraWriteMask != 0x0000000F) || 1677 ((state.targetFormat[index] == FORMAT_X8R8G8B8 && bgraWriteMask != 0x00000007) && 1678 (state.targetFormat[index] == FORMAT_X8R8G8B8 && bgraWriteMask != 0x0000000F))) // FIXME: Need for masking when XRGB && Fh? 1679 { 1680 Short4 masked = value; 1681 c23 &= *Pointer<Short4>(constants + OFFSET(Constants,maskB4Q[bgraWriteMask][0])); 1682 masked &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskB4Q[bgraWriteMask][0])); 1683 c23 |= masked; 1684 } 1685 1686 c23 &= *Pointer<Short4>(constants + OFFSET(Constants,maskD23Q) + xMask * 8); 1687 value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskD23Q) + xMask * 8); 1688 c23 |= value; 1689 *Pointer<Short4>(buffer) = c23; 1690 } 1691 break; 1692 case FORMAT_A8B8G8R8: 1693 case FORMAT_X8B8G8R8: // FIXME: Don't touch alpha? 1694 case FORMAT_SRGB8_X8: 1695 case FORMAT_SRGB8_A8: 1696 { 1697 Pointer<Byte> buffer = cBuffer + x * 4; 1698 Short4 value = *Pointer<Short4>(buffer); 1699 1700 bool masked = (((state.targetFormat[index] == FORMAT_A8B8G8R8 || state.targetFormat[index] == FORMAT_SRGB8_A8) && rgbaWriteMask != 0x0000000F) || 1701 (((state.targetFormat[index] == FORMAT_X8B8G8R8 || state.targetFormat[index] == FORMAT_SRGB8_X8) && rgbaWriteMask != 0x00000007) && 1702 ((state.targetFormat[index] == FORMAT_X8B8G8R8 || state.targetFormat[index] == FORMAT_SRGB8_X8) && rgbaWriteMask != 0x0000000F))); // FIXME: Need for masking when XBGR && Fh? 1703 1704 if(masked) 1705 { 1706 Short4 masked = value; 1707 c01 &= *Pointer<Short4>(constants + OFFSET(Constants,maskB4Q[rgbaWriteMask][0])); 1708 masked &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskB4Q[rgbaWriteMask][0])); 1709 c01 |= masked; 1710 } 1711 1712 c01 &= *Pointer<Short4>(constants + OFFSET(Constants,maskD01Q) + xMask * 8); 1713 value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskD01Q) + xMask * 8); 1714 c01 |= value; 1715 *Pointer<Short4>(buffer) = c01; 1716 1717 buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index])); 1718 value = *Pointer<Short4>(buffer); 1719 1720 if(masked) 1721 { 1722 Short4 masked = value; 1723 c23 &= *Pointer<Short4>(constants + OFFSET(Constants,maskB4Q[rgbaWriteMask][0])); 1724 masked &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskB4Q[rgbaWriteMask][0])); 1725 c23 |= masked; 1726 } 1727 1728 c23 &= *Pointer<Short4>(constants + OFFSET(Constants,maskD23Q) + xMask * 8); 1729 value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskD23Q) + xMask * 8); 1730 c23 |= value; 1731 *Pointer<Short4>(buffer) = c23; 1732 } 1733 break; 1734 case FORMAT_G8R8: 1735 if((rgbaWriteMask & 0x00000003) != 0x0) 1736 { 1737 Pointer<Byte> buffer = cBuffer + 2 * x; 1738 Int2 value; 1739 value = Insert(value, *Pointer<Int>(buffer), 0); 1740 Int pitch = *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); 1741 value = Insert(value, *Pointer<Int>(buffer + pitch), 1); 1742 1743 Int2 packedCol = As<Int2>(current.x); 1744 1745 UInt2 mergedMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskW4Q) + xMask * 8); 1746 if((rgbaWriteMask & 0x3) != 0x3) 1747 { 1748 Int tmpMask = *Pointer<Int>(constants + OFFSET(Constants, maskB4Q[5 * (rgbaWriteMask & 0x3)][0])); 1749 UInt2 rgbaMask = As<UInt2>(Int2(tmpMask, tmpMask)); 1750 mergedMask &= rgbaMask; 1751 } 1752 1753 packedCol = As<Int2>((As<UInt2>(packedCol) & mergedMask) | (As<UInt2>(value) & ~mergedMask)); 1754 1755 *Pointer<UInt>(buffer) = As<UInt>(Extract(packedCol, 0)); 1756 *Pointer<UInt>(buffer + pitch) = As<UInt>(Extract(packedCol, 1)); 1757 } 1758 break; 1759 case FORMAT_R8: 1760 if(rgbaWriteMask & 0x00000001) 1761 { 1762 Pointer<Byte> buffer = cBuffer + 1 * x; 1763 Short4 value; 1764 value = Insert(value, *Pointer<Short>(buffer), 0); 1765 Int pitch = *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); 1766 value = Insert(value, *Pointer<Short>(buffer + pitch), 1); 1767 1768 current.x &= *Pointer<Short4>(constants + OFFSET(Constants, maskB4Q) + 8 * xMask); 1769 value &= *Pointer<Short4>(constants + OFFSET(Constants, invMaskB4Q) + 8 * xMask); 1770 current.x |= value; 1771 1772 *Pointer<Short>(buffer) = Extract(current.x, 0); 1773 *Pointer<Short>(buffer + pitch) = Extract(current.x, 1); 1774 } 1775 break; 1776 case FORMAT_A8: 1777 if(rgbaWriteMask & 0x00000008) 1778 { 1779 Pointer<Byte> buffer = cBuffer + 1 * x; 1780 Short4 value; 1781 value = Insert(value, *Pointer<Short>(buffer), 0); 1782 Int pitch = *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index])); 1783 value = Insert(value, *Pointer<Short>(buffer + pitch), 1); 1784 1785 current.w &= *Pointer<Short4>(constants + OFFSET(Constants,maskB4Q) + 8 * xMask); 1786 value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskB4Q) + 8 * xMask); 1787 current.w |= value; 1788 1789 *Pointer<Short>(buffer) = Extract(current.w, 0); 1790 *Pointer<Short>(buffer + pitch) = Extract(current.w, 1); 1791 } 1792 break; 1793 case FORMAT_G16R16: 1794 { 1795 Pointer<Byte> buffer = cBuffer + 4 * x; 1796 1797 Short4 value = *Pointer<Short4>(buffer); 1798 1799 if((rgbaWriteMask & 0x00000003) != 0x00000003) 1800 { 1801 Short4 masked = value; 1802 current.x &= *Pointer<Short4>(constants + OFFSET(Constants,maskW01Q[rgbaWriteMask & 0x3][0])); 1803 masked &= *Pointer<Short4>(constants + OFFSET(Constants,maskW01Q[~rgbaWriteMask & 0x3][0])); 1804 current.x |= masked; 1805 } 1806 1807 current.x &= *Pointer<Short4>(constants + OFFSET(Constants,maskD01Q) + xMask * 8); 1808 value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskD01Q) + xMask * 8); 1809 current.x |= value; 1810 *Pointer<Short4>(buffer) = current.x; 1811 1812 buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index])); 1813 1814 value = *Pointer<Short4>(buffer); 1815 1816 if((rgbaWriteMask & 0x00000003) != 0x00000003) 1817 { 1818 Short4 masked = value; 1819 current.y &= *Pointer<Short4>(constants + OFFSET(Constants,maskW01Q[rgbaWriteMask & 0x3][0])); 1820 masked &= *Pointer<Short4>(constants + OFFSET(Constants,maskW01Q[~rgbaWriteMask & 0x3][0])); 1821 current.y |= masked; 1822 } 1823 1824 current.y &= *Pointer<Short4>(constants + OFFSET(Constants,maskD23Q) + xMask * 8); 1825 value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskD23Q) + xMask * 8); 1826 current.y |= value; 1827 *Pointer<Short4>(buffer) = current.y; 1828 } 1829 break; 1830 case FORMAT_A16B16G16R16: 1831 { 1832 Pointer<Byte> buffer = cBuffer + 8 * x; 1833 1834 { 1835 Short4 value = *Pointer<Short4>(buffer); 1836 1837 if(rgbaWriteMask != 0x0000000F) 1838 { 1839 Short4 masked = value; 1840 current.x &= *Pointer<Short4>(constants + OFFSET(Constants,maskW4Q[rgbaWriteMask][0])); 1841 masked &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskW4Q[rgbaWriteMask][0])); 1842 current.x |= masked; 1843 } 1844 1845 current.x &= *Pointer<Short4>(constants + OFFSET(Constants,maskQ0Q) + xMask * 8); 1846 value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskQ0Q) + xMask * 8); 1847 current.x |= value; 1848 *Pointer<Short4>(buffer) = current.x; 1849 } 1850 1851 { 1852 Short4 value = *Pointer<Short4>(buffer + 8); 1853 1854 if(rgbaWriteMask != 0x0000000F) 1855 { 1856 Short4 masked = value; 1857 current.y &= *Pointer<Short4>(constants + OFFSET(Constants,maskW4Q[rgbaWriteMask][0])); 1858 masked &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskW4Q[rgbaWriteMask][0])); 1859 current.y |= masked; 1860 } 1861 1862 current.y &= *Pointer<Short4>(constants + OFFSET(Constants,maskQ1Q) + xMask * 8); 1863 value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskQ1Q) + xMask * 8); 1864 current.y |= value; 1865 *Pointer<Short4>(buffer + 8) = current.y; 1866 } 1867 1868 buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index])); 1869 1870 { 1871 Short4 value = *Pointer<Short4>(buffer); 1872 1873 if(rgbaWriteMask != 0x0000000F) 1874 { 1875 Short4 masked = value; 1876 current.z &= *Pointer<Short4>(constants + OFFSET(Constants,maskW4Q[rgbaWriteMask][0])); 1877 masked &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskW4Q[rgbaWriteMask][0])); 1878 current.z |= masked; 1879 } 1880 1881 current.z &= *Pointer<Short4>(constants + OFFSET(Constants,maskQ2Q) + xMask * 8); 1882 value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskQ2Q) + xMask * 8); 1883 current.z |= value; 1884 *Pointer<Short4>(buffer) = current.z; 1885 } 1886 1887 { 1888 Short4 value = *Pointer<Short4>(buffer + 8); 1889 1890 if(rgbaWriteMask != 0x0000000F) 1891 { 1892 Short4 masked = value; 1893 current.w &= *Pointer<Short4>(constants + OFFSET(Constants,maskW4Q[rgbaWriteMask][0])); 1894 masked &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskW4Q[rgbaWriteMask][0])); 1895 current.w |= masked; 1896 } 1897 1898 current.w &= *Pointer<Short4>(constants + OFFSET(Constants,maskQ3Q) + xMask * 8); 1899 value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskQ3Q) + xMask * 8); 1900 current.w |= value; 1901 *Pointer<Short4>(buffer + 8) = current.w; 1902 } 1903 } 1904 break; 1905 default: 1906 ASSERT(false); 1907 } 1908 } 1909 blendFactor(Vector4f & blendFactor,const Vector4f & oC,const Vector4f & pixel,BlendFactor blendFactorActive)1910 void PixelRoutine::blendFactor(Vector4f &blendFactor, const Vector4f &oC, const Vector4f &pixel, BlendFactor blendFactorActive) 1911 { 1912 switch(blendFactorActive) 1913 { 1914 case BLEND_ZERO: 1915 // Optimized 1916 break; 1917 case BLEND_ONE: 1918 // Optimized 1919 break; 1920 case BLEND_SOURCE: 1921 blendFactor.x = oC.x; 1922 blendFactor.y = oC.y; 1923 blendFactor.z = oC.z; 1924 break; 1925 case BLEND_INVSOURCE: 1926 blendFactor.x = Float4(1.0f) - oC.x; 1927 blendFactor.y = Float4(1.0f) - oC.y; 1928 blendFactor.z = Float4(1.0f) - oC.z; 1929 break; 1930 case BLEND_DEST: 1931 blendFactor.x = pixel.x; 1932 blendFactor.y = pixel.y; 1933 blendFactor.z = pixel.z; 1934 break; 1935 case BLEND_INVDEST: 1936 blendFactor.x = Float4(1.0f) - pixel.x; 1937 blendFactor.y = Float4(1.0f) - pixel.y; 1938 blendFactor.z = Float4(1.0f) - pixel.z; 1939 break; 1940 case BLEND_SOURCEALPHA: 1941 blendFactor.x = oC.w; 1942 blendFactor.y = oC.w; 1943 blendFactor.z = oC.w; 1944 break; 1945 case BLEND_INVSOURCEALPHA: 1946 blendFactor.x = Float4(1.0f) - oC.w; 1947 blendFactor.y = Float4(1.0f) - oC.w; 1948 blendFactor.z = Float4(1.0f) - oC.w; 1949 break; 1950 case BLEND_DESTALPHA: 1951 blendFactor.x = pixel.w; 1952 blendFactor.y = pixel.w; 1953 blendFactor.z = pixel.w; 1954 break; 1955 case BLEND_INVDESTALPHA: 1956 blendFactor.x = Float4(1.0f) - pixel.w; 1957 blendFactor.y = Float4(1.0f) - pixel.w; 1958 blendFactor.z = Float4(1.0f) - pixel.w; 1959 break; 1960 case BLEND_SRCALPHASAT: 1961 blendFactor.x = Float4(1.0f) - pixel.w; 1962 blendFactor.x = Min(blendFactor.x, oC.w); 1963 blendFactor.y = blendFactor.x; 1964 blendFactor.z = blendFactor.x; 1965 break; 1966 case BLEND_CONSTANT: 1967 blendFactor.x = *Pointer<Float4>(data + OFFSET(DrawData,factor.blendConstant4F[0])); 1968 blendFactor.y = *Pointer<Float4>(data + OFFSET(DrawData,factor.blendConstant4F[1])); 1969 blendFactor.z = *Pointer<Float4>(data + OFFSET(DrawData,factor.blendConstant4F[2])); 1970 break; 1971 case BLEND_INVCONSTANT: 1972 blendFactor.x = *Pointer<Float4>(data + OFFSET(DrawData,factor.invBlendConstant4F[0])); 1973 blendFactor.y = *Pointer<Float4>(data + OFFSET(DrawData,factor.invBlendConstant4F[1])); 1974 blendFactor.z = *Pointer<Float4>(data + OFFSET(DrawData,factor.invBlendConstant4F[2])); 1975 break; 1976 default: 1977 ASSERT(false); 1978 } 1979 } 1980 blendFactorAlpha(Vector4f & blendFactor,const Vector4f & oC,const Vector4f & pixel,BlendFactor blendFactorAlphaActive)1981 void PixelRoutine::blendFactorAlpha(Vector4f &blendFactor, const Vector4f &oC, const Vector4f &pixel, BlendFactor blendFactorAlphaActive) 1982 { 1983 switch(blendFactorAlphaActive) 1984 { 1985 case BLEND_ZERO: 1986 // Optimized 1987 break; 1988 case BLEND_ONE: 1989 // Optimized 1990 break; 1991 case BLEND_SOURCE: 1992 blendFactor.w = oC.w; 1993 break; 1994 case BLEND_INVSOURCE: 1995 blendFactor.w = Float4(1.0f) - oC.w; 1996 break; 1997 case BLEND_DEST: 1998 blendFactor.w = pixel.w; 1999 break; 2000 case BLEND_INVDEST: 2001 blendFactor.w = Float4(1.0f) - pixel.w; 2002 break; 2003 case BLEND_SOURCEALPHA: 2004 blendFactor.w = oC.w; 2005 break; 2006 case BLEND_INVSOURCEALPHA: 2007 blendFactor.w = Float4(1.0f) - oC.w; 2008 break; 2009 case BLEND_DESTALPHA: 2010 blendFactor.w = pixel.w; 2011 break; 2012 case BLEND_INVDESTALPHA: 2013 blendFactor.w = Float4(1.0f) - pixel.w; 2014 break; 2015 case BLEND_SRCALPHASAT: 2016 blendFactor.w = Float4(1.0f); 2017 break; 2018 case BLEND_CONSTANT: 2019 blendFactor.w = *Pointer<Float4>(data + OFFSET(DrawData,factor.blendConstant4F[3])); 2020 break; 2021 case BLEND_INVCONSTANT: 2022 blendFactor.w = *Pointer<Float4>(data + OFFSET(DrawData,factor.invBlendConstant4F[3])); 2023 break; 2024 default: 2025 ASSERT(false); 2026 } 2027 } 2028 alphaBlend(int index,Pointer<Byte> & cBuffer,Vector4f & oC,Int & x)2029 void PixelRoutine::alphaBlend(int index, Pointer<Byte> &cBuffer, Vector4f &oC, Int &x) 2030 { 2031 if(!state.alphaBlendActive) 2032 { 2033 return; 2034 } 2035 2036 Pointer<Byte> buffer; 2037 Vector4f pixel; 2038 2039 Vector4s color; 2040 Short4 c01; 2041 Short4 c23; 2042 2043 Float4 one; 2044 if(Surface::isFloatFormat(state.targetFormat[index])) 2045 { 2046 one = Float4(1.0f); 2047 } 2048 else if(Surface::isNonNormalizedInteger(state.targetFormat[index])) 2049 { 2050 one = As<Float4>(Surface::isUnsignedComponent(state.targetFormat[index], 0) ? Int4(0xFFFFFFFF) : Int4(0x7FFFFFFF)); 2051 } 2052 2053 switch(state.targetFormat[index]) 2054 { 2055 case FORMAT_R32I: 2056 case FORMAT_R32UI: 2057 case FORMAT_R32F: 2058 buffer = cBuffer; 2059 // FIXME: movlps 2060 pixel.x.x = *Pointer<Float>(buffer + 4 * x + 0); 2061 pixel.x.y = *Pointer<Float>(buffer + 4 * x + 4); 2062 buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index])); 2063 // FIXME: movhps 2064 pixel.x.z = *Pointer<Float>(buffer + 4 * x + 0); 2065 pixel.x.w = *Pointer<Float>(buffer + 4 * x + 4); 2066 pixel.y = pixel.z = pixel.w = one; 2067 break; 2068 case FORMAT_G32R32I: 2069 case FORMAT_G32R32UI: 2070 case FORMAT_G32R32F: 2071 buffer = cBuffer; 2072 pixel.x = *Pointer<Float4>(buffer + 8 * x, 16); 2073 buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index])); 2074 pixel.y = *Pointer<Float4>(buffer + 8 * x, 16); 2075 pixel.z = pixel.x; 2076 pixel.x = ShuffleLowHigh(pixel.x, pixel.y, 0x88); 2077 pixel.z = ShuffleLowHigh(pixel.z, pixel.y, 0xDD); 2078 pixel.y = pixel.z; 2079 pixel.z = pixel.w = one; 2080 break; 2081 case FORMAT_X32B32G32R32F: 2082 case FORMAT_A32B32G32R32F: 2083 case FORMAT_X32B32G32R32F_UNSIGNED: 2084 case FORMAT_A32B32G32R32I: 2085 case FORMAT_A32B32G32R32UI: 2086 buffer = cBuffer; 2087 pixel.x = *Pointer<Float4>(buffer + 16 * x, 16); 2088 pixel.y = *Pointer<Float4>(buffer + 16 * x + 16, 16); 2089 buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index])); 2090 pixel.z = *Pointer<Float4>(buffer + 16 * x, 16); 2091 pixel.w = *Pointer<Float4>(buffer + 16 * x + 16, 16); 2092 transpose4x4(pixel.x, pixel.y, pixel.z, pixel.w); 2093 if(state.targetFormat[index] == FORMAT_X32B32G32R32F || 2094 state.targetFormat[index] == FORMAT_X32B32G32R32F_UNSIGNED) 2095 { 2096 pixel.w = Float4(1.0f); 2097 } 2098 break; 2099 default: 2100 ASSERT(false); 2101 } 2102 2103 if((postBlendSRGB && state.writeSRGB) || isSRGB(index)) 2104 { 2105 sRGBtoLinear(pixel.x); 2106 sRGBtoLinear(pixel.y); 2107 sRGBtoLinear(pixel.z); 2108 } 2109 2110 // Final Color = ObjectColor * SourceBlendFactor + PixelColor * DestinationBlendFactor 2111 Vector4f sourceFactor; 2112 Vector4f destFactor; 2113 2114 blendFactor(sourceFactor, oC, pixel, state.sourceBlendFactor); 2115 blendFactor(destFactor, oC, pixel, state.destBlendFactor); 2116 2117 if(state.sourceBlendFactor != BLEND_ONE && state.sourceBlendFactor != BLEND_ZERO) 2118 { 2119 oC.x *= sourceFactor.x; 2120 oC.y *= sourceFactor.y; 2121 oC.z *= sourceFactor.z; 2122 } 2123 2124 if(state.destBlendFactor != BLEND_ONE && state.destBlendFactor != BLEND_ZERO) 2125 { 2126 pixel.x *= destFactor.x; 2127 pixel.y *= destFactor.y; 2128 pixel.z *= destFactor.z; 2129 } 2130 2131 switch(state.blendOperation) 2132 { 2133 case BLENDOP_ADD: 2134 oC.x += pixel.x; 2135 oC.y += pixel.y; 2136 oC.z += pixel.z; 2137 break; 2138 case BLENDOP_SUB: 2139 oC.x -= pixel.x; 2140 oC.y -= pixel.y; 2141 oC.z -= pixel.z; 2142 break; 2143 case BLENDOP_INVSUB: 2144 oC.x = pixel.x - oC.x; 2145 oC.y = pixel.y - oC.y; 2146 oC.z = pixel.z - oC.z; 2147 break; 2148 case BLENDOP_MIN: 2149 oC.x = Min(oC.x, pixel.x); 2150 oC.y = Min(oC.y, pixel.y); 2151 oC.z = Min(oC.z, pixel.z); 2152 break; 2153 case BLENDOP_MAX: 2154 oC.x = Max(oC.x, pixel.x); 2155 oC.y = Max(oC.y, pixel.y); 2156 oC.z = Max(oC.z, pixel.z); 2157 break; 2158 case BLENDOP_SOURCE: 2159 // No operation 2160 break; 2161 case BLENDOP_DEST: 2162 oC.x = pixel.x; 2163 oC.y = pixel.y; 2164 oC.z = pixel.z; 2165 break; 2166 case BLENDOP_NULL: 2167 oC.x = Float4(0.0f); 2168 oC.y = Float4(0.0f); 2169 oC.z = Float4(0.0f); 2170 break; 2171 default: 2172 ASSERT(false); 2173 } 2174 2175 blendFactorAlpha(sourceFactor, oC, pixel, state.sourceBlendFactorAlpha); 2176 blendFactorAlpha(destFactor, oC, pixel, state.destBlendFactorAlpha); 2177 2178 if(state.sourceBlendFactorAlpha != BLEND_ONE && state.sourceBlendFactorAlpha != BLEND_ZERO) 2179 { 2180 oC.w *= sourceFactor.w; 2181 } 2182 2183 if(state.destBlendFactorAlpha != BLEND_ONE && state.destBlendFactorAlpha != BLEND_ZERO) 2184 { 2185 pixel.w *= destFactor.w; 2186 } 2187 2188 switch(state.blendOperationAlpha) 2189 { 2190 case BLENDOP_ADD: 2191 oC.w += pixel.w; 2192 break; 2193 case BLENDOP_SUB: 2194 oC.w -= pixel.w; 2195 break; 2196 case BLENDOP_INVSUB: 2197 pixel.w -= oC.w; 2198 oC.w = pixel.w; 2199 break; 2200 case BLENDOP_MIN: 2201 oC.w = Min(oC.w, pixel.w); 2202 break; 2203 case BLENDOP_MAX: 2204 oC.w = Max(oC.w, pixel.w); 2205 break; 2206 case BLENDOP_SOURCE: 2207 // No operation 2208 break; 2209 case BLENDOP_DEST: 2210 oC.w = pixel.w; 2211 break; 2212 case BLENDOP_NULL: 2213 oC.w = Float4(0.0f); 2214 break; 2215 default: 2216 ASSERT(false); 2217 } 2218 } 2219 writeColor(int index,Pointer<Byte> & cBuffer,Int & x,Vector4f & oC,Int & sMask,Int & zMask,Int & cMask)2220 void PixelRoutine::writeColor(int index, Pointer<Byte> &cBuffer, Int &x, Vector4f &oC, Int &sMask, Int &zMask, Int &cMask) 2221 { 2222 switch(state.targetFormat[index]) 2223 { 2224 case FORMAT_R32F: 2225 case FORMAT_R32I: 2226 case FORMAT_R32UI: 2227 case FORMAT_R16I: 2228 case FORMAT_R16UI: 2229 case FORMAT_R8I: 2230 case FORMAT_R8UI: 2231 break; 2232 case FORMAT_G32R32F: 2233 case FORMAT_G32R32I: 2234 case FORMAT_G32R32UI: 2235 case FORMAT_G16R16I: 2236 case FORMAT_G16R16UI: 2237 case FORMAT_G8R8I: 2238 case FORMAT_G8R8UI: 2239 oC.z = oC.x; 2240 oC.x = UnpackLow(oC.x, oC.y); 2241 oC.z = UnpackHigh(oC.z, oC.y); 2242 oC.y = oC.z; 2243 break; 2244 case FORMAT_X32B32G32R32F: 2245 case FORMAT_A32B32G32R32F: 2246 case FORMAT_X32B32G32R32F_UNSIGNED: 2247 case FORMAT_A32B32G32R32I: 2248 case FORMAT_A32B32G32R32UI: 2249 case FORMAT_A16B16G16R16I: 2250 case FORMAT_A16B16G16R16UI: 2251 case FORMAT_A8B8G8R8I: 2252 case FORMAT_A8B8G8R8UI: 2253 transpose4x4(oC.x, oC.y, oC.z, oC.w); 2254 break; 2255 default: 2256 ASSERT(false); 2257 } 2258 2259 int rgbaWriteMask = state.colorWriteActive(index); 2260 2261 Int xMask; // Combination of all masks 2262 2263 if(state.depthTestActive) 2264 { 2265 xMask = zMask; 2266 } 2267 else 2268 { 2269 xMask = cMask; 2270 } 2271 2272 if(state.stencilActive) 2273 { 2274 xMask &= sMask; 2275 } 2276 2277 Pointer<Byte> buffer; 2278 Float4 value; 2279 2280 switch(state.targetFormat[index]) 2281 { 2282 case FORMAT_R32F: 2283 case FORMAT_R32I: 2284 case FORMAT_R32UI: 2285 if(rgbaWriteMask & 0x00000001) 2286 { 2287 buffer = cBuffer + 4 * x; 2288 2289 // FIXME: movlps 2290 value.x = *Pointer<Float>(buffer + 0); 2291 value.y = *Pointer<Float>(buffer + 4); 2292 2293 buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index])); 2294 2295 // FIXME: movhps 2296 value.z = *Pointer<Float>(buffer + 0); 2297 value.w = *Pointer<Float>(buffer + 4); 2298 2299 oC.x = As<Float4>(As<Int4>(oC.x) & *Pointer<Int4>(constants + OFFSET(Constants,maskD4X) + xMask * 16, 16)); 2300 value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskD4X) + xMask * 16, 16)); 2301 oC.x = As<Float4>(As<Int4>(oC.x) | As<Int4>(value)); 2302 2303 // FIXME: movhps 2304 *Pointer<Float>(buffer + 0) = oC.x.z; 2305 *Pointer<Float>(buffer + 4) = oC.x.w; 2306 2307 buffer -= *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index])); 2308 2309 // FIXME: movlps 2310 *Pointer<Float>(buffer + 0) = oC.x.x; 2311 *Pointer<Float>(buffer + 4) = oC.x.y; 2312 } 2313 break; 2314 case FORMAT_R16I: 2315 case FORMAT_R16UI: 2316 if(rgbaWriteMask & 0x00000001) 2317 { 2318 buffer = cBuffer + 2 * x; 2319 2320 UShort4 xyzw; 2321 xyzw = As<UShort4>(Insert(As<Int2>(xyzw), *Pointer<Int>(buffer), 0)); 2322 2323 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); 2324 2325 xyzw = As<UShort4>(Insert(As<Int2>(xyzw), *Pointer<Int>(buffer), 1)); 2326 value = As<Float4>(Int4(xyzw)); 2327 2328 oC.x = As<Float4>(As<Int4>(oC.x) & *Pointer<Int4>(constants + OFFSET(Constants, maskD4X) + xMask * 16, 16)); 2329 value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants, invMaskD4X) + xMask * 16, 16)); 2330 oC.x = As<Float4>(As<Int4>(oC.x) | As<Int4>(value)); 2331 2332 if(state.targetFormat[index] == FORMAT_R16I) 2333 { 2334 Float component = oC.x.z; 2335 *Pointer<Short>(buffer + 0) = Short(As<Int>(component)); 2336 component = oC.x.w; 2337 *Pointer<Short>(buffer + 2) = Short(As<Int>(component)); 2338 2339 buffer -= *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); 2340 2341 component = oC.x.x; 2342 *Pointer<Short>(buffer + 0) = Short(As<Int>(component)); 2343 component = oC.x.y; 2344 *Pointer<Short>(buffer + 2) = Short(As<Int>(component)); 2345 } 2346 else // FORMAT_R16UI 2347 { 2348 Float component = oC.x.z; 2349 *Pointer<UShort>(buffer + 0) = UShort(As<Int>(component)); 2350 component = oC.x.w; 2351 *Pointer<UShort>(buffer + 2) = UShort(As<Int>(component)); 2352 2353 buffer -= *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); 2354 2355 component = oC.x.x; 2356 *Pointer<UShort>(buffer + 0) = UShort(As<Int>(component)); 2357 component = oC.x.y; 2358 *Pointer<UShort>(buffer + 2) = UShort(As<Int>(component)); 2359 } 2360 } 2361 break; 2362 case FORMAT_R8I: 2363 case FORMAT_R8UI: 2364 if(rgbaWriteMask & 0x00000001) 2365 { 2366 buffer = cBuffer + x; 2367 2368 UInt xyzw, packedCol; 2369 2370 xyzw = UInt(*Pointer<UShort>(buffer)) & 0xFFFF; 2371 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); 2372 xyzw |= UInt(*Pointer<UShort>(buffer)) << 16; 2373 2374 Short4 tmpCol = Short4(As<Int4>(oC.x)); 2375 if(state.targetFormat[index] == FORMAT_R8I) 2376 { 2377 tmpCol = As<Short4>(PackSigned(tmpCol, tmpCol)); 2378 } 2379 else 2380 { 2381 tmpCol = As<Short4>(PackUnsigned(tmpCol, tmpCol)); 2382 } 2383 packedCol = Extract(As<Int2>(tmpCol), 0); 2384 2385 packedCol = (packedCol & *Pointer<UInt>(constants + OFFSET(Constants, maskB4Q) + 8 * xMask)) | 2386 (xyzw & *Pointer<UInt>(constants + OFFSET(Constants, invMaskB4Q) + 8 * xMask)); 2387 2388 *Pointer<UShort>(buffer) = UShort(packedCol >> 16); 2389 buffer -= *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); 2390 *Pointer<UShort>(buffer) = UShort(packedCol); 2391 } 2392 break; 2393 case FORMAT_G32R32F: 2394 case FORMAT_G32R32I: 2395 case FORMAT_G32R32UI: 2396 buffer = cBuffer + 8 * x; 2397 2398 value = *Pointer<Float4>(buffer); 2399 2400 if((rgbaWriteMask & 0x00000003) != 0x00000003) 2401 { 2402 Float4 masked = value; 2403 oC.x = As<Float4>(As<Int4>(oC.x) & *Pointer<Int4>(constants + OFFSET(Constants,maskD01X[rgbaWriteMask & 0x3][0]))); 2404 masked = As<Float4>(As<Int4>(masked) & *Pointer<Int4>(constants + OFFSET(Constants,maskD01X[~rgbaWriteMask & 0x3][0]))); 2405 oC.x = As<Float4>(As<Int4>(oC.x) | As<Int4>(masked)); 2406 } 2407 2408 oC.x = As<Float4>(As<Int4>(oC.x) & *Pointer<Int4>(constants + OFFSET(Constants,maskQ01X) + xMask * 16, 16)); 2409 value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskQ01X) + xMask * 16, 16)); 2410 oC.x = As<Float4>(As<Int4>(oC.x) | As<Int4>(value)); 2411 *Pointer<Float4>(buffer) = oC.x; 2412 2413 buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index])); 2414 2415 value = *Pointer<Float4>(buffer); 2416 2417 if((rgbaWriteMask & 0x00000003) != 0x00000003) 2418 { 2419 Float4 masked; 2420 2421 masked = value; 2422 oC.y = As<Float4>(As<Int4>(oC.y) & *Pointer<Int4>(constants + OFFSET(Constants,maskD01X[rgbaWriteMask & 0x3][0]))); 2423 masked = As<Float4>(As<Int4>(masked) & *Pointer<Int4>(constants + OFFSET(Constants,maskD01X[~rgbaWriteMask & 0x3][0]))); 2424 oC.y = As<Float4>(As<Int4>(oC.y) | As<Int4>(masked)); 2425 } 2426 2427 oC.y = As<Float4>(As<Int4>(oC.y) & *Pointer<Int4>(constants + OFFSET(Constants,maskQ23X) + xMask * 16, 16)); 2428 value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskQ23X) + xMask * 16, 16)); 2429 oC.y = As<Float4>(As<Int4>(oC.y) | As<Int4>(value)); 2430 *Pointer<Float4>(buffer) = oC.y; 2431 break; 2432 case FORMAT_G16R16I: 2433 case FORMAT_G16R16UI: 2434 if((rgbaWriteMask & 0x00000003) != 0x0) 2435 { 2436 buffer = cBuffer + 4 * x; 2437 2438 UInt2 rgbaMask; 2439 UShort4 packedCol = UShort4(As<Int4>(oC.x)); 2440 UShort4 value = *Pointer<UShort4>(buffer); 2441 UInt2 mergedMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskD01Q) + xMask * 8); 2442 if((rgbaWriteMask & 0x3) != 0x3) 2443 { 2444 Int tmpMask = *Pointer<Int>(constants + OFFSET(Constants, maskW4Q[rgbaWriteMask & 0x3][0])); 2445 rgbaMask = As<UInt2>(Int2(tmpMask, tmpMask)); 2446 mergedMask &= rgbaMask; 2447 } 2448 *Pointer<UInt2>(buffer) = (As<UInt2>(packedCol) & mergedMask) | (As<UInt2>(value) & ~mergedMask); 2449 2450 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); 2451 2452 packedCol = UShort4(As<Int4>(oC.y)); 2453 value = *Pointer<UShort4>(buffer); 2454 mergedMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskD23Q) + xMask * 8); 2455 if((rgbaWriteMask & 0x3) != 0x3) 2456 { 2457 mergedMask &= rgbaMask; 2458 } 2459 *Pointer<UInt2>(buffer) = (As<UInt2>(packedCol) & mergedMask) | (As<UInt2>(value) & ~mergedMask); 2460 } 2461 break; 2462 case FORMAT_G8R8I: 2463 case FORMAT_G8R8UI: 2464 if((rgbaWriteMask & 0x00000003) != 0x0) 2465 { 2466 buffer = cBuffer + 2 * x; 2467 2468 Int2 xyzw, packedCol; 2469 2470 xyzw = Insert(xyzw, *Pointer<Int>(buffer), 0); 2471 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); 2472 xyzw = Insert(xyzw, *Pointer<Int>(buffer), 1); 2473 2474 if(state.targetFormat[index] == FORMAT_G8R8I) 2475 { 2476 packedCol = As<Int2>(PackSigned(Short4(As<Int4>(oC.x)), Short4(As<Int4>(oC.y)))); 2477 } 2478 else 2479 { 2480 packedCol = As<Int2>(PackUnsigned(Short4(As<Int4>(oC.x)), Short4(As<Int4>(oC.y)))); 2481 } 2482 2483 UInt2 mergedMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskW4Q) + xMask * 8); 2484 if((rgbaWriteMask & 0x3) != 0x3) 2485 { 2486 Int tmpMask = *Pointer<Int>(constants + OFFSET(Constants, maskB4Q[5 * (rgbaWriteMask & 0x3)][0])); 2487 UInt2 rgbaMask = As<UInt2>(Int2(tmpMask, tmpMask)); 2488 mergedMask &= rgbaMask; 2489 } 2490 2491 packedCol = As<Int2>((As<UInt2>(packedCol) & mergedMask) | (As<UInt2>(xyzw) & ~mergedMask)); 2492 2493 *Pointer<UInt>(buffer) = As<UInt>(Extract(packedCol, 1)); 2494 buffer -= *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); 2495 *Pointer<UInt>(buffer) = As<UInt>(Extract(packedCol, 0)); 2496 } 2497 break; 2498 case FORMAT_X32B32G32R32F: 2499 case FORMAT_A32B32G32R32F: 2500 case FORMAT_X32B32G32R32F_UNSIGNED: 2501 case FORMAT_A32B32G32R32I: 2502 case FORMAT_A32B32G32R32UI: 2503 buffer = cBuffer + 16 * x; 2504 2505 { 2506 value = *Pointer<Float4>(buffer, 16); 2507 2508 if(rgbaWriteMask != 0x0000000F) 2509 { 2510 Float4 masked = value; 2511 oC.x = As<Float4>(As<Int4>(oC.x) & *Pointer<Int4>(constants + OFFSET(Constants,maskD4X[rgbaWriteMask][0]))); 2512 masked = As<Float4>(As<Int4>(masked) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskD4X[rgbaWriteMask][0]))); 2513 oC.x = As<Float4>(As<Int4>(oC.x) | As<Int4>(masked)); 2514 } 2515 2516 oC.x = As<Float4>(As<Int4>(oC.x) & *Pointer<Int4>(constants + OFFSET(Constants,maskX0X) + xMask * 16, 16)); 2517 value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskX0X) + xMask * 16, 16)); 2518 oC.x = As<Float4>(As<Int4>(oC.x) | As<Int4>(value)); 2519 *Pointer<Float4>(buffer, 16) = oC.x; 2520 } 2521 2522 { 2523 value = *Pointer<Float4>(buffer + 16, 16); 2524 2525 if(rgbaWriteMask != 0x0000000F) 2526 { 2527 Float4 masked = value; 2528 oC.y = As<Float4>(As<Int4>(oC.y) & *Pointer<Int4>(constants + OFFSET(Constants,maskD4X[rgbaWriteMask][0]))); 2529 masked = As<Float4>(As<Int4>(masked) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskD4X[rgbaWriteMask][0]))); 2530 oC.y = As<Float4>(As<Int4>(oC.y) | As<Int4>(masked)); 2531 } 2532 2533 oC.y = As<Float4>(As<Int4>(oC.y) & *Pointer<Int4>(constants + OFFSET(Constants,maskX1X) + xMask * 16, 16)); 2534 value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskX1X) + xMask * 16, 16)); 2535 oC.y = As<Float4>(As<Int4>(oC.y) | As<Int4>(value)); 2536 *Pointer<Float4>(buffer + 16, 16) = oC.y; 2537 } 2538 2539 buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index])); 2540 2541 { 2542 value = *Pointer<Float4>(buffer, 16); 2543 2544 if(rgbaWriteMask != 0x0000000F) 2545 { 2546 Float4 masked = value; 2547 oC.z = As<Float4>(As<Int4>(oC.z) & *Pointer<Int4>(constants + OFFSET(Constants,maskD4X[rgbaWriteMask][0]))); 2548 masked = As<Float4>(As<Int4>(masked) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskD4X[rgbaWriteMask][0]))); 2549 oC.z = As<Float4>(As<Int4>(oC.z) | As<Int4>(masked)); 2550 } 2551 2552 oC.z = As<Float4>(As<Int4>(oC.z) & *Pointer<Int4>(constants + OFFSET(Constants,maskX2X) + xMask * 16, 16)); 2553 value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskX2X) + xMask * 16, 16)); 2554 oC.z = As<Float4>(As<Int4>(oC.z) | As<Int4>(value)); 2555 *Pointer<Float4>(buffer, 16) = oC.z; 2556 } 2557 2558 { 2559 value = *Pointer<Float4>(buffer + 16, 16); 2560 2561 if(rgbaWriteMask != 0x0000000F) 2562 { 2563 Float4 masked = value; 2564 oC.w = As<Float4>(As<Int4>(oC.w) & *Pointer<Int4>(constants + OFFSET(Constants,maskD4X[rgbaWriteMask][0]))); 2565 masked = As<Float4>(As<Int4>(masked) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskD4X[rgbaWriteMask][0]))); 2566 oC.w = As<Float4>(As<Int4>(oC.w) | As<Int4>(masked)); 2567 } 2568 2569 oC.w = As<Float4>(As<Int4>(oC.w) & *Pointer<Int4>(constants + OFFSET(Constants,maskX3X) + xMask * 16, 16)); 2570 value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskX3X) + xMask * 16, 16)); 2571 oC.w = As<Float4>(As<Int4>(oC.w) | As<Int4>(value)); 2572 *Pointer<Float4>(buffer + 16, 16) = oC.w; 2573 } 2574 break; 2575 case FORMAT_A16B16G16R16I: 2576 case FORMAT_A16B16G16R16UI: 2577 if((rgbaWriteMask & 0x0000000F) != 0x0) 2578 { 2579 buffer = cBuffer + 8 * x; 2580 2581 UInt4 rgbaMask; 2582 UShort8 value = *Pointer<UShort8>(buffer); 2583 UShort8 packedCol = UShort8(UShort4(As<Int4>(oC.x)), UShort4(As<Int4>(oC.y))); 2584 UInt4 mergedMask = *Pointer<UInt4>(constants + OFFSET(Constants, maskQ01X) + xMask * 16); 2585 if((rgbaWriteMask & 0xF) != 0xF) 2586 { 2587 UInt2 tmpMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskW4Q[rgbaWriteMask][0])); 2588 rgbaMask = UInt4(tmpMask, tmpMask); 2589 mergedMask &= rgbaMask; 2590 } 2591 *Pointer<UInt4>(buffer) = (As<UInt4>(packedCol) & mergedMask) | (As<UInt4>(value) & ~mergedMask); 2592 2593 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); 2594 2595 value = *Pointer<UShort8>(buffer); 2596 packedCol = UShort8(UShort4(As<Int4>(oC.z)), UShort4(As<Int4>(oC.w))); 2597 mergedMask = *Pointer<UInt4>(constants + OFFSET(Constants, maskQ23X) + xMask * 16); 2598 if((rgbaWriteMask & 0xF) != 0xF) 2599 { 2600 mergedMask &= rgbaMask; 2601 } 2602 *Pointer<UInt4>(buffer) = (As<UInt4>(packedCol) & mergedMask) | (As<UInt4>(value) & ~mergedMask); 2603 } 2604 break; 2605 case FORMAT_A8B8G8R8I: 2606 case FORMAT_A8B8G8R8UI: 2607 if((rgbaWriteMask & 0x0000000F) != 0x0) 2608 { 2609 UInt2 value, packedCol, mergedMask; 2610 2611 buffer = cBuffer + 4 * x; 2612 2613 if(state.targetFormat[index] == FORMAT_A8B8G8R8I) 2614 { 2615 packedCol = As<UInt2>(PackSigned(Short4(As<Int4>(oC.x)), Short4(As<Int4>(oC.y)))); 2616 } 2617 else 2618 { 2619 packedCol = As<UInt2>(PackUnsigned(Short4(As<Int4>(oC.x)), Short4(As<Int4>(oC.y)))); 2620 } 2621 value = *Pointer<UInt2>(buffer, 16); 2622 mergedMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskD01Q) + xMask * 8); 2623 if(rgbaWriteMask != 0xF) 2624 { 2625 mergedMask &= *Pointer<UInt2>(constants + OFFSET(Constants, maskB4Q[rgbaWriteMask][0])); 2626 } 2627 *Pointer<UInt2>(buffer) = (packedCol & mergedMask) | (value & ~mergedMask); 2628 2629 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); 2630 2631 if(state.targetFormat[index] == FORMAT_A8B8G8R8I) 2632 { 2633 packedCol = As<UInt2>(PackSigned(Short4(As<Int4>(oC.z)), Short4(As<Int4>(oC.w)))); 2634 } 2635 else 2636 { 2637 packedCol = As<UInt2>(PackUnsigned(Short4(As<Int4>(oC.z)), Short4(As<Int4>(oC.w)))); 2638 } 2639 value = *Pointer<UInt2>(buffer, 16); 2640 mergedMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskD23Q) + xMask * 8); 2641 if(rgbaWriteMask != 0xF) 2642 { 2643 mergedMask &= *Pointer<UInt2>(constants + OFFSET(Constants, maskB4Q[rgbaWriteMask][0])); 2644 } 2645 *Pointer<UInt2>(buffer) = (packedCol & mergedMask) | (value & ~mergedMask); 2646 } 2647 break; 2648 default: 2649 ASSERT(false); 2650 } 2651 } 2652 convertFixed16(Float4 & cf,bool saturate)2653 UShort4 PixelRoutine::convertFixed16(Float4 &cf, bool saturate) 2654 { 2655 return UShort4(cf * Float4(0xFFFF), saturate); 2656 } 2657 sRGBtoLinear16_12_16(Vector4s & c)2658 void PixelRoutine::sRGBtoLinear16_12_16(Vector4s &c) 2659 { 2660 Pointer<Byte> LUT = constants + OFFSET(Constants,sRGBtoLinear12_16); 2661 2662 c.x = As<UShort4>(c.x) >> 4; 2663 c.y = As<UShort4>(c.y) >> 4; 2664 c.z = As<UShort4>(c.z) >> 4; 2665 2666 c.x = Insert(c.x, *Pointer<Short>(LUT + 2 * Int(Extract(c.x, 0))), 0); 2667 c.x = Insert(c.x, *Pointer<Short>(LUT + 2 * Int(Extract(c.x, 1))), 1); 2668 c.x = Insert(c.x, *Pointer<Short>(LUT + 2 * Int(Extract(c.x, 2))), 2); 2669 c.x = Insert(c.x, *Pointer<Short>(LUT + 2 * Int(Extract(c.x, 3))), 3); 2670 2671 c.y = Insert(c.y, *Pointer<Short>(LUT + 2 * Int(Extract(c.y, 0))), 0); 2672 c.y = Insert(c.y, *Pointer<Short>(LUT + 2 * Int(Extract(c.y, 1))), 1); 2673 c.y = Insert(c.y, *Pointer<Short>(LUT + 2 * Int(Extract(c.y, 2))), 2); 2674 c.y = Insert(c.y, *Pointer<Short>(LUT + 2 * Int(Extract(c.y, 3))), 3); 2675 2676 c.z = Insert(c.z, *Pointer<Short>(LUT + 2 * Int(Extract(c.z, 0))), 0); 2677 c.z = Insert(c.z, *Pointer<Short>(LUT + 2 * Int(Extract(c.z, 1))), 1); 2678 c.z = Insert(c.z, *Pointer<Short>(LUT + 2 * Int(Extract(c.z, 2))), 2); 2679 c.z = Insert(c.z, *Pointer<Short>(LUT + 2 * Int(Extract(c.z, 3))), 3); 2680 } 2681 linearToSRGB16_12_16(Vector4s & c)2682 void PixelRoutine::linearToSRGB16_12_16(Vector4s &c) 2683 { 2684 c.x = As<UShort4>(c.x) >> 4; 2685 c.y = As<UShort4>(c.y) >> 4; 2686 c.z = As<UShort4>(c.z) >> 4; 2687 2688 linearToSRGB12_16(c); 2689 } 2690 linearToSRGB12_16(Vector4s & c)2691 void PixelRoutine::linearToSRGB12_16(Vector4s &c) 2692 { 2693 Pointer<Byte> LUT = constants + OFFSET(Constants,linearToSRGB12_16); 2694 2695 c.x = Insert(c.x, *Pointer<Short>(LUT + 2 * Int(Extract(c.x, 0))), 0); 2696 c.x = Insert(c.x, *Pointer<Short>(LUT + 2 * Int(Extract(c.x, 1))), 1); 2697 c.x = Insert(c.x, *Pointer<Short>(LUT + 2 * Int(Extract(c.x, 2))), 2); 2698 c.x = Insert(c.x, *Pointer<Short>(LUT + 2 * Int(Extract(c.x, 3))), 3); 2699 2700 c.y = Insert(c.y, *Pointer<Short>(LUT + 2 * Int(Extract(c.y, 0))), 0); 2701 c.y = Insert(c.y, *Pointer<Short>(LUT + 2 * Int(Extract(c.y, 1))), 1); 2702 c.y = Insert(c.y, *Pointer<Short>(LUT + 2 * Int(Extract(c.y, 2))), 2); 2703 c.y = Insert(c.y, *Pointer<Short>(LUT + 2 * Int(Extract(c.y, 3))), 3); 2704 2705 c.z = Insert(c.z, *Pointer<Short>(LUT + 2 * Int(Extract(c.z, 0))), 0); 2706 c.z = Insert(c.z, *Pointer<Short>(LUT + 2 * Int(Extract(c.z, 1))), 1); 2707 c.z = Insert(c.z, *Pointer<Short>(LUT + 2 * Int(Extract(c.z, 2))), 2); 2708 c.z = Insert(c.z, *Pointer<Short>(LUT + 2 * Int(Extract(c.z, 3))), 3); 2709 } 2710 sRGBtoLinear(const Float4 & x)2711 Float4 PixelRoutine::sRGBtoLinear(const Float4 &x) // Approximates x^2.2 2712 { 2713 Float4 linear = x * x; 2714 linear = linear * Float4(0.73f) + linear * x * Float4(0.27f); 2715 2716 return Min(Max(linear, Float4(0.0f)), Float4(1.0f)); 2717 } 2718 colorUsed()2719 bool PixelRoutine::colorUsed() 2720 { 2721 return state.colorWriteMask || state.alphaTestActive() || state.shaderContainsKill; 2722 } 2723 } 2724