1 // Copyright 2016 The SwiftShader Authors. All Rights Reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #include "PixelPipeline.hpp" 16 #include "Renderer.hpp" 17 #include "SamplerCore.hpp" 18 19 namespace sw 20 { 21 extern bool postBlendSRGB; 22 setBuiltins(Int & x,Int & y,Float4 (& z)[4],Float4 & w)23 void PixelPipeline::setBuiltins(Int &x, Int &y, Float4(&z)[4], Float4 &w) 24 { 25 if(state.color[0].component & 0x1) diffuse.x = convertFixed12(v[0].x); else diffuse.x = Short4(0x1000); 26 if(state.color[0].component & 0x2) diffuse.y = convertFixed12(v[0].y); else diffuse.y = Short4(0x1000); 27 if(state.color[0].component & 0x4) diffuse.z = convertFixed12(v[0].z); else diffuse.z = Short4(0x1000); 28 if(state.color[0].component & 0x8) diffuse.w = convertFixed12(v[0].w); else diffuse.w = Short4(0x1000); 29 30 if(state.color[1].component & 0x1) specular.x = convertFixed12(v[1].x); else specular.x = Short4(0x0000, 0x0000, 0x0000, 0x0000); 31 if(state.color[1].component & 0x2) specular.y = convertFixed12(v[1].y); else specular.y = Short4(0x0000, 0x0000, 0x0000, 0x0000); 32 if(state.color[1].component & 0x4) specular.z = convertFixed12(v[1].z); else specular.z = Short4(0x0000, 0x0000, 0x0000, 0x0000); 33 if(state.color[1].component & 0x8) specular.w = convertFixed12(v[1].w); else specular.w = Short4(0x0000, 0x0000, 0x0000, 0x0000); 34 } 35 fixedFunction()36 void PixelPipeline::fixedFunction() 37 { 38 current = diffuse; 39 Vector4s temp(0x0000, 0x0000, 0x0000, 0x0000); 40 41 for(int stage = 0; stage < 8; stage++) 42 { 43 if(state.textureStage[stage].stageOperation == TextureStage::STAGE_DISABLE) 44 { 45 break; 46 } 47 48 Vector4s texture; 49 50 if(state.textureStage[stage].usesTexture) 51 { 52 sampleTexture(texture, stage, stage); 53 } 54 55 blendTexture(temp, texture, stage); 56 } 57 58 specularPixel(current, specular); 59 } 60 applyShader(Int cMask[4])61 void PixelPipeline::applyShader(Int cMask[4]) 62 { 63 if(!shader) 64 { 65 fixedFunction(); 66 return; 67 } 68 69 int pad = 0; // Count number of texm3x3pad instructions 70 Vector4s dPairing; // Destination for first pairing instruction 71 72 for(size_t i = 0; i < shader->getLength(); i++) 73 { 74 const Shader::Instruction *instruction = shader->getInstruction(i); 75 Shader::Opcode opcode = instruction->opcode; 76 77 // #ifndef NDEBUG // FIXME: Centralize debug output control 78 // shader->printInstruction(i, "debug.txt"); 79 // #endif 80 81 if(opcode == Shader::OPCODE_DCL || opcode == Shader::OPCODE_DEF || opcode == Shader::OPCODE_DEFI || opcode == Shader::OPCODE_DEFB) 82 { 83 continue; 84 } 85 86 const Dst &dst = instruction->dst; 87 const Src &src0 = instruction->src[0]; 88 const Src &src1 = instruction->src[1]; 89 const Src &src2 = instruction->src[2]; 90 91 unsigned short version = shader->getVersion(); 92 bool pairing = i + 1 < shader->getLength() && shader->getInstruction(i + 1)->coissue; // First instruction of pair 93 bool coissue = instruction->coissue; // Second instruction of pair 94 95 Vector4s d; 96 Vector4s s0; 97 Vector4s s1; 98 Vector4s s2; 99 100 if(src0.type != Shader::PARAMETER_VOID) s0 = fetchRegister(src0); 101 if(src1.type != Shader::PARAMETER_VOID) s1 = fetchRegister(src1); 102 if(src2.type != Shader::PARAMETER_VOID) s2 = fetchRegister(src2); 103 104 Float4 x = version < 0x0104 ? v[2 + dst.index].x : v[2 + src0.index].x; 105 Float4 y = version < 0x0104 ? v[2 + dst.index].y : v[2 + src0.index].y; 106 Float4 z = version < 0x0104 ? v[2 + dst.index].z : v[2 + src0.index].z; 107 Float4 w = version < 0x0104 ? v[2 + dst.index].w : v[2 + src0.index].w; 108 109 switch(opcode) 110 { 111 case Shader::OPCODE_PS_1_0: break; 112 case Shader::OPCODE_PS_1_1: break; 113 case Shader::OPCODE_PS_1_2: break; 114 case Shader::OPCODE_PS_1_3: break; 115 case Shader::OPCODE_PS_1_4: break; 116 117 case Shader::OPCODE_DEF: break; 118 119 case Shader::OPCODE_NOP: break; 120 case Shader::OPCODE_MOV: MOV(d, s0); break; 121 case Shader::OPCODE_ADD: ADD(d, s0, s1); break; 122 case Shader::OPCODE_SUB: SUB(d, s0, s1); break; 123 case Shader::OPCODE_MAD: MAD(d, s0, s1, s2); break; 124 case Shader::OPCODE_MUL: MUL(d, s0, s1); break; 125 case Shader::OPCODE_DP3: DP3(d, s0, s1); break; 126 case Shader::OPCODE_DP4: DP4(d, s0, s1); break; 127 case Shader::OPCODE_LRP: LRP(d, s0, s1, s2); break; 128 case Shader::OPCODE_TEXCOORD: 129 if(version < 0x0104) 130 { 131 TEXCOORD(d, x, y, z, dst.index); 132 } 133 else 134 { 135 if((src0.swizzle & 0x30) == 0x20) // .xyz 136 { 137 TEXCRD(d, x, y, z, src0.index, src0.modifier == Shader::MODIFIER_DZ || src0.modifier == Shader::MODIFIER_DW); 138 } 139 else // .xwy 140 { 141 TEXCRD(d, x, y, w, src0.index, src0.modifier == Shader::MODIFIER_DZ || src0.modifier == Shader::MODIFIER_DW); 142 } 143 } 144 break; 145 case Shader::OPCODE_TEXKILL: 146 if(version < 0x0104) 147 { 148 TEXKILL(cMask, x, y, z); 149 } 150 else if(version == 0x0104) 151 { 152 if(dst.type == Shader::PARAMETER_TEXTURE) 153 { 154 TEXKILL(cMask, x, y, z); 155 } 156 else 157 { 158 TEXKILL(cMask, rs[dst.index]); 159 } 160 } 161 else ASSERT(false); 162 break; 163 case Shader::OPCODE_TEX: 164 if(version < 0x0104) 165 { 166 TEX(d, x, y, z, dst.index, false); 167 } 168 else if(version == 0x0104) 169 { 170 if(src0.type == Shader::PARAMETER_TEXTURE) 171 { 172 if((src0.swizzle & 0x30) == 0x20) // .xyz 173 { 174 TEX(d, x, y, z, dst.index, src0.modifier == Shader::MODIFIER_DZ || src0.modifier == Shader::MODIFIER_DW); 175 } 176 else // .xyw 177 { 178 TEX(d, x, y, w, dst.index, src0.modifier == Shader::MODIFIER_DZ || src0.modifier == Shader::MODIFIER_DW); 179 } 180 } 181 else 182 { 183 TEXLD(d, s0, dst.index, src0.modifier == Shader::MODIFIER_DZ || src0.modifier == Shader::MODIFIER_DW); 184 } 185 } 186 else ASSERT(false); 187 break; 188 case Shader::OPCODE_TEXBEM: TEXBEM(d, s0, x, y, z, dst.index); break; 189 case Shader::OPCODE_TEXBEML: TEXBEML(d, s0, x, y, z, dst.index); break; 190 case Shader::OPCODE_TEXREG2AR: TEXREG2AR(d, s0, dst.index); break; 191 case Shader::OPCODE_TEXREG2GB: TEXREG2GB(d, s0, dst.index); break; 192 case Shader::OPCODE_TEXM3X2PAD: TEXM3X2PAD(x, y, z, s0, 0, src0.modifier == Shader::MODIFIER_SIGN); break; 193 case Shader::OPCODE_TEXM3X2TEX: TEXM3X2TEX(d, x, y, z, dst.index, s0, src0.modifier == Shader::MODIFIER_SIGN); break; 194 case Shader::OPCODE_TEXM3X3PAD: TEXM3X3PAD(x, y, z, s0, pad++ % 2, src0.modifier == Shader::MODIFIER_SIGN); break; 195 case Shader::OPCODE_TEXM3X3TEX: TEXM3X3TEX(d, x, y, z, dst.index, s0, src0.modifier == Shader::MODIFIER_SIGN); break; 196 case Shader::OPCODE_TEXM3X3SPEC: TEXM3X3SPEC(d, x, y, z, dst.index, s0, s1); break; 197 case Shader::OPCODE_TEXM3X3VSPEC: TEXM3X3VSPEC(d, x, y, z, dst.index, s0); break; 198 case Shader::OPCODE_CND: CND(d, s0, s1, s2); break; 199 case Shader::OPCODE_TEXREG2RGB: TEXREG2RGB(d, s0, dst.index); break; 200 case Shader::OPCODE_TEXDP3TEX: TEXDP3TEX(d, x, y, z, dst.index, s0); break; 201 case Shader::OPCODE_TEXM3X2DEPTH: TEXM3X2DEPTH(d, x, y, z, s0, src0.modifier == Shader::MODIFIER_SIGN); break; 202 case Shader::OPCODE_TEXDP3: TEXDP3(d, x, y, z, s0); break; 203 case Shader::OPCODE_TEXM3X3: TEXM3X3(d, x, y, z, s0, src0.modifier == Shader::MODIFIER_SIGN); break; 204 case Shader::OPCODE_TEXDEPTH: TEXDEPTH(); break; 205 case Shader::OPCODE_CMP0: CMP(d, s0, s1, s2); break; 206 case Shader::OPCODE_BEM: BEM(d, s0, s1, dst.index); break; 207 case Shader::OPCODE_PHASE: break; 208 case Shader::OPCODE_END: break; 209 default: 210 ASSERT(false); 211 } 212 213 if(dst.type != Shader::PARAMETER_VOID && opcode != Shader::OPCODE_TEXKILL) 214 { 215 if(dst.shift > 0) 216 { 217 if(dst.mask & 0x1) { d.x = AddSat(d.x, d.x); if(dst.shift > 1) d.x = AddSat(d.x, d.x); if(dst.shift > 2) d.x = AddSat(d.x, d.x); } 218 if(dst.mask & 0x2) { d.y = AddSat(d.y, d.y); if(dst.shift > 1) d.y = AddSat(d.y, d.y); if(dst.shift > 2) d.y = AddSat(d.y, d.y); } 219 if(dst.mask & 0x4) { d.z = AddSat(d.z, d.z); if(dst.shift > 1) d.z = AddSat(d.z, d.z); if(dst.shift > 2) d.z = AddSat(d.z, d.z); } 220 if(dst.mask & 0x8) { d.w = AddSat(d.w, d.w); if(dst.shift > 1) d.w = AddSat(d.w, d.w); if(dst.shift > 2) d.w = AddSat(d.w, d.w); } 221 } 222 else if(dst.shift < 0) 223 { 224 if(dst.mask & 0x1) d.x = d.x >> -dst.shift; 225 if(dst.mask & 0x2) d.y = d.y >> -dst.shift; 226 if(dst.mask & 0x4) d.z = d.z >> -dst.shift; 227 if(dst.mask & 0x8) d.w = d.w >> -dst.shift; 228 } 229 230 if(dst.saturate) 231 { 232 if(dst.mask & 0x1) { d.x = Min(d.x, Short4(0x1000)); d.x = Max(d.x, Short4(0x0000, 0x0000, 0x0000, 0x0000)); } 233 if(dst.mask & 0x2) { d.y = Min(d.y, Short4(0x1000)); d.y = Max(d.y, Short4(0x0000, 0x0000, 0x0000, 0x0000)); } 234 if(dst.mask & 0x4) { d.z = Min(d.z, Short4(0x1000)); d.z = Max(d.z, Short4(0x0000, 0x0000, 0x0000, 0x0000)); } 235 if(dst.mask & 0x8) { d.w = Min(d.w, Short4(0x1000)); d.w = Max(d.w, Short4(0x0000, 0x0000, 0x0000, 0x0000)); } 236 } 237 238 if(pairing) 239 { 240 if(dst.mask & 0x1) dPairing.x = d.x; 241 if(dst.mask & 0x2) dPairing.y = d.y; 242 if(dst.mask & 0x4) dPairing.z = d.z; 243 if(dst.mask & 0x8) dPairing.w = d.w; 244 } 245 246 if(coissue) 247 { 248 const Dst &dst = shader->getInstruction(i - 1)->dst; 249 250 writeDestination(dPairing, dst); 251 } 252 253 if(!pairing) 254 { 255 writeDestination(d, dst); 256 } 257 } 258 } 259 } 260 alphaTest(Int cMask[4])261 Bool PixelPipeline::alphaTest(Int cMask[4]) 262 { 263 current.x = Min(current.x, Short4(0x0FFF, 0x0FFF, 0x0FFF, 0x0FFF)); current.x = Max(current.x, Short4(0x0000, 0x0000, 0x0000, 0x0000)); 264 current.y = Min(current.y, Short4(0x0FFF, 0x0FFF, 0x0FFF, 0x0FFF)); current.y = Max(current.y, Short4(0x0000, 0x0000, 0x0000, 0x0000)); 265 current.z = Min(current.z, Short4(0x0FFF, 0x0FFF, 0x0FFF, 0x0FFF)); current.z = Max(current.z, Short4(0x0000, 0x0000, 0x0000, 0x0000)); 266 current.w = Min(current.w, Short4(0x0FFF, 0x0FFF, 0x0FFF, 0x0FFF)); current.w = Max(current.w, Short4(0x0000, 0x0000, 0x0000, 0x0000)); 267 268 if(!state.alphaTestActive()) 269 { 270 return true; 271 } 272 273 Int aMask; 274 275 if(state.transparencyAntialiasing == TRANSPARENCY_NONE) 276 { 277 PixelRoutine::alphaTest(aMask, current.w); 278 279 for(unsigned int q = 0; q < state.multiSample; q++) 280 { 281 cMask[q] &= aMask; 282 } 283 } 284 else if(state.transparencyAntialiasing == TRANSPARENCY_ALPHA_TO_COVERAGE) 285 { 286 Float4 alpha = Float4(current.w) * Float4(1.0f / 0x1000); 287 288 alphaToCoverage(cMask, alpha); 289 } 290 else ASSERT(false); 291 292 Int pass = cMask[0]; 293 294 for(unsigned int q = 1; q < state.multiSample; q++) 295 { 296 pass = pass | cMask[q]; 297 } 298 299 return pass != 0x0; 300 } 301 rasterOperation(Float4 & fog,Pointer<Byte> cBuffer[4],Int & x,Int sMask[4],Int zMask[4],Int cMask[4])302 void PixelPipeline::rasterOperation(Float4 &fog, Pointer<Byte> cBuffer[4], Int &x, Int sMask[4], Int zMask[4], Int cMask[4]) 303 { 304 if(!state.colorWriteActive(0)) 305 { 306 return; 307 } 308 309 Vector4f oC; 310 311 switch(state.targetFormat[0]) 312 { 313 case FORMAT_R5G6B5: 314 case FORMAT_X8R8G8B8: 315 case FORMAT_X8B8G8R8: 316 case FORMAT_A8R8G8B8: 317 case FORMAT_A8B8G8R8: 318 case FORMAT_A8: 319 case FORMAT_G16R16: 320 case FORMAT_A16B16G16R16: 321 if(!postBlendSRGB && state.writeSRGB) 322 { 323 linearToSRGB12_16(current); 324 } 325 else 326 { 327 current.x <<= 4; 328 current.y <<= 4; 329 current.z <<= 4; 330 current.w <<= 4; 331 } 332 333 if(state.targetFormat[0] == FORMAT_R5G6B5) 334 { 335 current.x &= Short4(0xF800u); 336 current.y &= Short4(0xFC00u); 337 current.z &= Short4(0xF800u); 338 } 339 340 fogBlend(current, fog); 341 342 for(unsigned int q = 0; q < state.multiSample; q++) 343 { 344 Pointer<Byte> buffer = cBuffer[0] + q * *Pointer<Int>(data + OFFSET(DrawData, colorSliceB[0])); 345 Vector4s color = current; 346 347 if(state.multiSampleMask & (1 << q)) 348 { 349 alphaBlend(0, buffer, color, x); 350 logicOperation(0, buffer, color, x); 351 writeColor(0, buffer, x, color, sMask[q], zMask[q], cMask[q]); 352 } 353 } 354 break; 355 case FORMAT_R32F: 356 case FORMAT_G32R32F: 357 case FORMAT_X32B32G32R32F: 358 case FORMAT_A32B32G32R32F: 359 convertSigned12(oC, current); 360 PixelRoutine::fogBlend(oC, fog); 361 362 for(unsigned int q = 0; q < state.multiSample; q++) 363 { 364 Pointer<Byte> buffer = cBuffer[0] + q * *Pointer<Int>(data + OFFSET(DrawData, colorSliceB[0])); 365 Vector4f color = oC; 366 367 if(state.multiSampleMask & (1 << q)) 368 { 369 alphaBlend(0, buffer, color, x); 370 writeColor(0, buffer, x, color, sMask[q], zMask[q], cMask[q]); 371 } 372 } 373 break; 374 default: 375 ASSERT(false); 376 } 377 } 378 blendTexture(Vector4s & temp,Vector4s & texture,int stage)379 void PixelPipeline::blendTexture(Vector4s &temp, Vector4s &texture, int stage) 380 { 381 Vector4s *arg1; 382 Vector4s *arg2; 383 Vector4s *arg3; 384 Vector4s res; 385 386 Vector4s constant; 387 Vector4s tfactor; 388 389 const TextureStage::State &textureStage = state.textureStage[stage]; 390 391 if(textureStage.firstArgument == TextureStage::SOURCE_CONSTANT || 392 textureStage.firstArgumentAlpha == TextureStage::SOURCE_CONSTANT || 393 textureStage.secondArgument == TextureStage::SOURCE_CONSTANT || 394 textureStage.secondArgumentAlpha == TextureStage::SOURCE_CONSTANT || 395 textureStage.thirdArgument == TextureStage::SOURCE_CONSTANT || 396 textureStage.thirdArgumentAlpha == TextureStage::SOURCE_CONSTANT) 397 { 398 constant.x = *Pointer<Short4>(data + OFFSET(DrawData, textureStage[stage].constantColor4[0])); 399 constant.y = *Pointer<Short4>(data + OFFSET(DrawData, textureStage[stage].constantColor4[1])); 400 constant.z = *Pointer<Short4>(data + OFFSET(DrawData, textureStage[stage].constantColor4[2])); 401 constant.w = *Pointer<Short4>(data + OFFSET(DrawData, textureStage[stage].constantColor4[3])); 402 } 403 404 if(textureStage.firstArgument == TextureStage::SOURCE_TFACTOR || 405 textureStage.firstArgumentAlpha == TextureStage::SOURCE_TFACTOR || 406 textureStage.secondArgument == TextureStage::SOURCE_TFACTOR || 407 textureStage.secondArgumentAlpha == TextureStage::SOURCE_TFACTOR || 408 textureStage.thirdArgument == TextureStage::SOURCE_TFACTOR || 409 textureStage.thirdArgumentAlpha == TextureStage::SOURCE_TFACTOR) 410 { 411 tfactor.x = *Pointer<Short4>(data + OFFSET(DrawData, factor.textureFactor4[0])); 412 tfactor.y = *Pointer<Short4>(data + OFFSET(DrawData, factor.textureFactor4[1])); 413 tfactor.z = *Pointer<Short4>(data + OFFSET(DrawData, factor.textureFactor4[2])); 414 tfactor.w = *Pointer<Short4>(data + OFFSET(DrawData, factor.textureFactor4[3])); 415 } 416 417 // Premodulate 418 if(stage > 0 && textureStage.usesTexture) 419 { 420 if(state.textureStage[stage - 1].stageOperation == TextureStage::STAGE_PREMODULATE) 421 { 422 current.x = MulHigh(current.x, texture.x) << 4; 423 current.y = MulHigh(current.y, texture.y) << 4; 424 current.z = MulHigh(current.z, texture.z) << 4; 425 } 426 427 if(state.textureStage[stage - 1].stageOperationAlpha == TextureStage::STAGE_PREMODULATE) 428 { 429 current.w = MulHigh(current.w, texture.w) << 4; 430 } 431 } 432 433 if(luminance) 434 { 435 texture.x = MulHigh(texture.x, L) << 4; 436 texture.y = MulHigh(texture.y, L) << 4; 437 texture.z = MulHigh(texture.z, L) << 4; 438 439 luminance = false; 440 } 441 442 switch(textureStage.firstArgument) 443 { 444 case TextureStage::SOURCE_TEXTURE: arg1 = &texture; break; 445 case TextureStage::SOURCE_CONSTANT: arg1 = &constant; break; 446 case TextureStage::SOURCE_CURRENT: arg1 = ¤t; break; 447 case TextureStage::SOURCE_DIFFUSE: arg1 = &diffuse; break; 448 case TextureStage::SOURCE_SPECULAR: arg1 = &specular; break; 449 case TextureStage::SOURCE_TEMP: arg1 = &temp; break; 450 case TextureStage::SOURCE_TFACTOR: arg1 = &tfactor; break; 451 default: 452 ASSERT(false); 453 } 454 455 switch(textureStage.secondArgument) 456 { 457 case TextureStage::SOURCE_TEXTURE: arg2 = &texture; break; 458 case TextureStage::SOURCE_CONSTANT: arg2 = &constant; break; 459 case TextureStage::SOURCE_CURRENT: arg2 = ¤t; break; 460 case TextureStage::SOURCE_DIFFUSE: arg2 = &diffuse; break; 461 case TextureStage::SOURCE_SPECULAR: arg2 = &specular; break; 462 case TextureStage::SOURCE_TEMP: arg2 = &temp; break; 463 case TextureStage::SOURCE_TFACTOR: arg2 = &tfactor; break; 464 default: 465 ASSERT(false); 466 } 467 468 switch(textureStage.thirdArgument) 469 { 470 case TextureStage::SOURCE_TEXTURE: arg3 = &texture; break; 471 case TextureStage::SOURCE_CONSTANT: arg3 = &constant; break; 472 case TextureStage::SOURCE_CURRENT: arg3 = ¤t; break; 473 case TextureStage::SOURCE_DIFFUSE: arg3 = &diffuse; break; 474 case TextureStage::SOURCE_SPECULAR: arg3 = &specular; break; 475 case TextureStage::SOURCE_TEMP: arg3 = &temp; break; 476 case TextureStage::SOURCE_TFACTOR: arg3 = &tfactor; break; 477 default: 478 ASSERT(false); 479 } 480 481 Vector4s mod1; 482 Vector4s mod2; 483 Vector4s mod3; 484 485 switch(textureStage.firstModifier) 486 { 487 case TextureStage::MODIFIER_COLOR: 488 break; 489 case TextureStage::MODIFIER_INVCOLOR: 490 mod1.x = SubSat(Short4(0x1000), arg1->x); 491 mod1.y = SubSat(Short4(0x1000), arg1->y); 492 mod1.z = SubSat(Short4(0x1000), arg1->z); 493 mod1.w = SubSat(Short4(0x1000), arg1->w); 494 495 arg1 = &mod1; 496 break; 497 case TextureStage::MODIFIER_ALPHA: 498 mod1.x = arg1->w; 499 mod1.y = arg1->w; 500 mod1.z = arg1->w; 501 mod1.w = arg1->w; 502 503 arg1 = &mod1; 504 break; 505 case TextureStage::MODIFIER_INVALPHA: 506 mod1.x = SubSat(Short4(0x1000), arg1->w); 507 mod1.y = SubSat(Short4(0x1000), arg1->w); 508 mod1.z = SubSat(Short4(0x1000), arg1->w); 509 mod1.w = SubSat(Short4(0x1000), arg1->w); 510 511 arg1 = &mod1; 512 break; 513 default: 514 ASSERT(false); 515 } 516 517 switch(textureStage.secondModifier) 518 { 519 case TextureStage::MODIFIER_COLOR: 520 break; 521 case TextureStage::MODIFIER_INVCOLOR: 522 mod2.x = SubSat(Short4(0x1000), arg2->x); 523 mod2.y = SubSat(Short4(0x1000), arg2->y); 524 mod2.z = SubSat(Short4(0x1000), arg2->z); 525 mod2.w = SubSat(Short4(0x1000), arg2->w); 526 527 arg2 = &mod2; 528 break; 529 case TextureStage::MODIFIER_ALPHA: 530 mod2.x = arg2->w; 531 mod2.y = arg2->w; 532 mod2.z = arg2->w; 533 mod2.w = arg2->w; 534 535 arg2 = &mod2; 536 break; 537 case TextureStage::MODIFIER_INVALPHA: 538 mod2.x = SubSat(Short4(0x1000), arg2->w); 539 mod2.y = SubSat(Short4(0x1000), arg2->w); 540 mod2.z = SubSat(Short4(0x1000), arg2->w); 541 mod2.w = SubSat(Short4(0x1000), arg2->w); 542 543 arg2 = &mod2; 544 break; 545 default: 546 ASSERT(false); 547 } 548 549 switch(textureStage.thirdModifier) 550 { 551 case TextureStage::MODIFIER_COLOR: 552 break; 553 case TextureStage::MODIFIER_INVCOLOR: 554 mod3.x = SubSat(Short4(0x1000), arg3->x); 555 mod3.y = SubSat(Short4(0x1000), arg3->y); 556 mod3.z = SubSat(Short4(0x1000), arg3->z); 557 mod3.w = SubSat(Short4(0x1000), arg3->w); 558 559 arg3 = &mod3; 560 break; 561 case TextureStage::MODIFIER_ALPHA: 562 mod3.x = arg3->w; 563 mod3.y = arg3->w; 564 mod3.z = arg3->w; 565 mod3.w = arg3->w; 566 567 arg3 = &mod3; 568 break; 569 case TextureStage::MODIFIER_INVALPHA: 570 mod3.x = SubSat(Short4(0x1000), arg3->w); 571 mod3.y = SubSat(Short4(0x1000), arg3->w); 572 mod3.z = SubSat(Short4(0x1000), arg3->w); 573 mod3.w = SubSat(Short4(0x1000), arg3->w); 574 575 arg3 = &mod3; 576 break; 577 default: 578 ASSERT(false); 579 } 580 581 switch(textureStage.stageOperation) 582 { 583 case TextureStage::STAGE_DISABLE: 584 break; 585 case TextureStage::STAGE_SELECTARG1: // Arg1 586 res.x = arg1->x; 587 res.y = arg1->y; 588 res.z = arg1->z; 589 break; 590 case TextureStage::STAGE_SELECTARG2: // Arg2 591 res.x = arg2->x; 592 res.y = arg2->y; 593 res.z = arg2->z; 594 break; 595 case TextureStage::STAGE_SELECTARG3: // Arg3 596 res.x = arg3->x; 597 res.y = arg3->y; 598 res.z = arg3->z; 599 break; 600 case TextureStage::STAGE_MODULATE: // Arg1 * Arg2 601 res.x = MulHigh(arg1->x, arg2->x) << 4; 602 res.y = MulHigh(arg1->y, arg2->y) << 4; 603 res.z = MulHigh(arg1->z, arg2->z) << 4; 604 break; 605 case TextureStage::STAGE_MODULATE2X: // Arg1 * Arg2 * 2 606 res.x = MulHigh(arg1->x, arg2->x) << 5; 607 res.y = MulHigh(arg1->y, arg2->y) << 5; 608 res.z = MulHigh(arg1->z, arg2->z) << 5; 609 break; 610 case TextureStage::STAGE_MODULATE4X: // Arg1 * Arg2 * 4 611 res.x = MulHigh(arg1->x, arg2->x) << 6; 612 res.y = MulHigh(arg1->y, arg2->y) << 6; 613 res.z = MulHigh(arg1->z, arg2->z) << 6; 614 break; 615 case TextureStage::STAGE_ADD: // Arg1 + Arg2 616 res.x = AddSat(arg1->x, arg2->x); 617 res.y = AddSat(arg1->y, arg2->y); 618 res.z = AddSat(arg1->z, arg2->z); 619 break; 620 case TextureStage::STAGE_ADDSIGNED: // Arg1 + Arg2 - 0.5 621 res.x = AddSat(arg1->x, arg2->x); 622 res.y = AddSat(arg1->y, arg2->y); 623 res.z = AddSat(arg1->z, arg2->z); 624 625 res.x = SubSat(res.x, Short4(0x0800, 0x0800, 0x0800, 0x0800)); 626 res.y = SubSat(res.y, Short4(0x0800, 0x0800, 0x0800, 0x0800)); 627 res.z = SubSat(res.z, Short4(0x0800, 0x0800, 0x0800, 0x0800)); 628 break; 629 case TextureStage::STAGE_ADDSIGNED2X: // (Arg1 + Arg2 - 0.5) << 1 630 res.x = AddSat(arg1->x, arg2->x); 631 res.y = AddSat(arg1->y, arg2->y); 632 res.z = AddSat(arg1->z, arg2->z); 633 634 res.x = SubSat(res.x, Short4(0x0800, 0x0800, 0x0800, 0x0800)); 635 res.y = SubSat(res.y, Short4(0x0800, 0x0800, 0x0800, 0x0800)); 636 res.z = SubSat(res.z, Short4(0x0800, 0x0800, 0x0800, 0x0800)); 637 638 res.x = AddSat(res.x, res.x); 639 res.y = AddSat(res.y, res.y); 640 res.z = AddSat(res.z, res.z); 641 break; 642 case TextureStage::STAGE_SUBTRACT: // Arg1 - Arg2 643 res.x = SubSat(arg1->x, arg2->x); 644 res.y = SubSat(arg1->y, arg2->y); 645 res.z = SubSat(arg1->z, arg2->z); 646 break; 647 case TextureStage::STAGE_ADDSMOOTH: // Arg1 + Arg2 - Arg1 * Arg2 648 { 649 Short4 tmp; 650 651 tmp = MulHigh(arg1->x, arg2->x) << 4; res.x = AddSat(arg1->x, arg2->x); res.x = SubSat(res.x, tmp); 652 tmp = MulHigh(arg1->y, arg2->y) << 4; res.y = AddSat(arg1->y, arg2->y); res.y = SubSat(res.y, tmp); 653 tmp = MulHigh(arg1->z, arg2->z) << 4; res.z = AddSat(arg1->z, arg2->z); res.z = SubSat(res.z, tmp); 654 } 655 break; 656 case TextureStage::STAGE_MULTIPLYADD: // Arg3 + Arg1 * Arg2 657 res.x = MulHigh(arg1->x, arg2->x) << 4; res.x = AddSat(res.x, arg3->x); 658 res.y = MulHigh(arg1->y, arg2->y) << 4; res.y = AddSat(res.y, arg3->y); 659 res.z = MulHigh(arg1->z, arg2->z) << 4; res.z = AddSat(res.z, arg3->z); 660 break; 661 case TextureStage::STAGE_LERP: // Arg3 * (Arg1 - Arg2) + Arg2 662 res.x = SubSat(arg1->x, arg2->x); res.x = MulHigh(res.x, arg3->x) << 4; res.x = AddSat(res.x, arg2->x); 663 res.y = SubSat(arg1->y, arg2->y); res.y = MulHigh(res.y, arg3->y) << 4; res.y = AddSat(res.y, arg2->y); 664 res.z = SubSat(arg1->z, arg2->z); res.z = MulHigh(res.z, arg3->z) << 4; res.z = AddSat(res.z, arg2->z); 665 break; 666 case TextureStage::STAGE_DOT3: // 2 * (Arg1.x - 0.5) * 2 * (Arg2.x - 0.5) + 2 * (Arg1.y - 0.5) * 2 * (Arg2.y - 0.5) + 2 * (Arg1.z - 0.5) * 2 * (Arg2.z - 0.5) 667 { 668 Short4 tmp; 669 670 res.x = SubSat(arg1->x, Short4(0x0800, 0x0800, 0x0800, 0x0800)); tmp = SubSat(arg2->x, Short4(0x0800, 0x0800, 0x0800, 0x0800)); res.x = MulHigh(res.x, tmp); 671 res.y = SubSat(arg1->y, Short4(0x0800, 0x0800, 0x0800, 0x0800)); tmp = SubSat(arg2->y, Short4(0x0800, 0x0800, 0x0800, 0x0800)); res.y = MulHigh(res.y, tmp); 672 res.z = SubSat(arg1->z, Short4(0x0800, 0x0800, 0x0800, 0x0800)); tmp = SubSat(arg2->z, Short4(0x0800, 0x0800, 0x0800, 0x0800)); res.z = MulHigh(res.z, tmp); 673 674 res.x = res.x << 6; 675 res.y = res.y << 6; 676 res.z = res.z << 6; 677 678 res.x = AddSat(res.x, res.y); 679 res.x = AddSat(res.x, res.z); 680 681 // Clamp to [0, 1] 682 res.x = Max(res.x, Short4(0x0000, 0x0000, 0x0000, 0x0000)); 683 res.x = Min(res.x, Short4(0x1000)); 684 685 res.y = res.x; 686 res.z = res.x; 687 res.w = res.x; 688 } 689 break; 690 case TextureStage::STAGE_BLENDCURRENTALPHA: // Alpha * (Arg1 - Arg2) + Arg2 691 res.x = SubSat(arg1->x, arg2->x); res.x = MulHigh(res.x, current.w) << 4; res.x = AddSat(res.x, arg2->x); 692 res.y = SubSat(arg1->y, arg2->y); res.y = MulHigh(res.y, current.w) << 4; res.y = AddSat(res.y, arg2->y); 693 res.z = SubSat(arg1->z, arg2->z); res.z = MulHigh(res.z, current.w) << 4; res.z = AddSat(res.z, arg2->z); 694 break; 695 case TextureStage::STAGE_BLENDDIFFUSEALPHA: // Alpha * (Arg1 - Arg2) + Arg2 696 res.x = SubSat(arg1->x, arg2->x); res.x = MulHigh(res.x, diffuse.w) << 4; res.x = AddSat(res.x, arg2->x); 697 res.y = SubSat(arg1->y, arg2->y); res.y = MulHigh(res.y, diffuse.w) << 4; res.y = AddSat(res.y, arg2->y); 698 res.z = SubSat(arg1->z, arg2->z); res.z = MulHigh(res.z, diffuse.w) << 4; res.z = AddSat(res.z, arg2->z); 699 break; 700 case TextureStage::STAGE_BLENDFACTORALPHA: // Alpha * (Arg1 - Arg2) + Arg2 701 res.x = SubSat(arg1->x, arg2->x); res.x = MulHigh(res.x, *Pointer<Short4>(data + OFFSET(DrawData, factor.textureFactor4[3]))) << 4; res.x = AddSat(res.x, arg2->x); 702 res.y = SubSat(arg1->y, arg2->y); res.y = MulHigh(res.y, *Pointer<Short4>(data + OFFSET(DrawData, factor.textureFactor4[3]))) << 4; res.y = AddSat(res.y, arg2->y); 703 res.z = SubSat(arg1->z, arg2->z); res.z = MulHigh(res.z, *Pointer<Short4>(data + OFFSET(DrawData, factor.textureFactor4[3]))) << 4; res.z = AddSat(res.z, arg2->z); 704 break; 705 case TextureStage::STAGE_BLENDTEXTUREALPHA: // Alpha * (Arg1 - Arg2) + Arg2 706 res.x = SubSat(arg1->x, arg2->x); res.x = MulHigh(res.x, texture.w) << 4; res.x = AddSat(res.x, arg2->x); 707 res.y = SubSat(arg1->y, arg2->y); res.y = MulHigh(res.y, texture.w) << 4; res.y = AddSat(res.y, arg2->y); 708 res.z = SubSat(arg1->z, arg2->z); res.z = MulHigh(res.z, texture.w) << 4; res.z = AddSat(res.z, arg2->z); 709 break; 710 case TextureStage::STAGE_BLENDTEXTUREALPHAPM: // Arg1 + Arg2 * (1 - Alpha) 711 res.x = SubSat(Short4(0x1000), texture.w); res.x = MulHigh(res.x, arg2->x) << 4; res.x = AddSat(res.x, arg1->x); 712 res.y = SubSat(Short4(0x1000), texture.w); res.y = MulHigh(res.y, arg2->y) << 4; res.y = AddSat(res.y, arg1->y); 713 res.z = SubSat(Short4(0x1000), texture.w); res.z = MulHigh(res.z, arg2->z) << 4; res.z = AddSat(res.z, arg1->z); 714 break; 715 case TextureStage::STAGE_PREMODULATE: 716 res.x = arg1->x; 717 res.y = arg1->y; 718 res.z = arg1->z; 719 break; 720 case TextureStage::STAGE_MODULATEALPHA_ADDCOLOR: // Arg1 + Arg1.w * Arg2 721 res.x = MulHigh(arg1->w, arg2->x) << 4; res.x = AddSat(res.x, arg1->x); 722 res.y = MulHigh(arg1->w, arg2->y) << 4; res.y = AddSat(res.y, arg1->y); 723 res.z = MulHigh(arg1->w, arg2->z) << 4; res.z = AddSat(res.z, arg1->z); 724 break; 725 case TextureStage::STAGE_MODULATECOLOR_ADDALPHA: // Arg1 * Arg2 + Arg1.w 726 res.x = MulHigh(arg1->x, arg2->x) << 4; res.x = AddSat(res.x, arg1->w); 727 res.y = MulHigh(arg1->y, arg2->y) << 4; res.y = AddSat(res.y, arg1->w); 728 res.z = MulHigh(arg1->z, arg2->z) << 4; res.z = AddSat(res.z, arg1->w); 729 break; 730 case TextureStage::STAGE_MODULATEINVALPHA_ADDCOLOR: // (1 - Arg1.w) * Arg2 + Arg1 731 { 732 Short4 tmp; 733 734 res.x = AddSat(arg1->x, arg2->x); tmp = MulHigh(arg1->w, arg2->x) << 4; res.x = SubSat(res.x, tmp); 735 res.y = AddSat(arg1->y, arg2->y); tmp = MulHigh(arg1->w, arg2->y) << 4; res.y = SubSat(res.y, tmp); 736 res.z = AddSat(arg1->z, arg2->z); tmp = MulHigh(arg1->w, arg2->z) << 4; res.z = SubSat(res.z, tmp); 737 } 738 break; 739 case TextureStage::STAGE_MODULATEINVCOLOR_ADDALPHA: // (1 - Arg1) * Arg2 + Arg1.w 740 { 741 Short4 tmp; 742 743 res.x = AddSat(arg1->w, arg2->x); tmp = MulHigh(arg1->x, arg2->x) << 4; res.x = SubSat(res.x, tmp); 744 res.y = AddSat(arg1->w, arg2->y); tmp = MulHigh(arg1->y, arg2->y) << 4; res.y = SubSat(res.y, tmp); 745 res.z = AddSat(arg1->w, arg2->z); tmp = MulHigh(arg1->z, arg2->z) << 4; res.z = SubSat(res.z, tmp); 746 } 747 break; 748 case TextureStage::STAGE_BUMPENVMAP: 749 { 750 du = Float4(texture.x) * Float4(1.0f / 0x0FE0); 751 dv = Float4(texture.y) * Float4(1.0f / 0x0FE0); 752 753 Float4 du2; 754 Float4 dv2; 755 756 du2 = du; 757 dv2 = dv; 758 du *= *Pointer<Float4>(data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4F[0][0])); 759 dv2 *= *Pointer<Float4>(data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4F[1][0])); 760 du += dv2; 761 dv *= *Pointer<Float4>(data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4F[1][1])); 762 du2 *= *Pointer<Float4>(data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4F[0][1])); 763 dv += du2; 764 765 perturbate = true; 766 767 res.x = current.x; 768 res.y = current.y; 769 res.z = current.z; 770 res.w = current.w; 771 } 772 break; 773 case TextureStage::STAGE_BUMPENVMAPLUMINANCE: 774 { 775 du = Float4(texture.x) * Float4(1.0f / 0x0FE0); 776 dv = Float4(texture.y) * Float4(1.0f / 0x0FE0); 777 778 Float4 du2; 779 Float4 dv2; 780 781 du2 = du; 782 dv2 = dv; 783 784 du *= *Pointer<Float4>(data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4F[0][0])); 785 dv2 *= *Pointer<Float4>(data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4F[1][0])); 786 du += dv2; 787 dv *= *Pointer<Float4>(data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4F[1][1])); 788 du2 *= *Pointer<Float4>(data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4F[0][1])); 789 dv += du2; 790 791 perturbate = true; 792 793 L = texture.z; 794 L = MulHigh(L, *Pointer<Short4>(data + OFFSET(DrawData, textureStage[stage].luminanceScale4))); 795 L = L << 4; 796 L = AddSat(L, *Pointer<Short4>(data + OFFSET(DrawData, textureStage[stage].luminanceOffset4))); 797 L = Max(L, Short4(0x0000, 0x0000, 0x0000, 0x0000)); 798 L = Min(L, Short4(0x1000)); 799 800 luminance = true; 801 802 res.x = current.x; 803 res.y = current.y; 804 res.z = current.z; 805 res.w = current.w; 806 } 807 break; 808 default: 809 ASSERT(false); 810 } 811 812 if(textureStage.stageOperation != TextureStage::STAGE_DOT3) 813 { 814 switch(textureStage.firstArgumentAlpha) 815 { 816 case TextureStage::SOURCE_TEXTURE: arg1 = &texture; break; 817 case TextureStage::SOURCE_CONSTANT: arg1 = &constant; break; 818 case TextureStage::SOURCE_CURRENT: arg1 = ¤t; break; 819 case TextureStage::SOURCE_DIFFUSE: arg1 = &diffuse; break; 820 case TextureStage::SOURCE_SPECULAR: arg1 = &specular; break; 821 case TextureStage::SOURCE_TEMP: arg1 = &temp; break; 822 case TextureStage::SOURCE_TFACTOR: arg1 = &tfactor; break; 823 default: 824 ASSERT(false); 825 } 826 827 switch(textureStage.secondArgumentAlpha) 828 { 829 case TextureStage::SOURCE_TEXTURE: arg2 = &texture; break; 830 case TextureStage::SOURCE_CONSTANT: arg2 = &constant; break; 831 case TextureStage::SOURCE_CURRENT: arg2 = ¤t; break; 832 case TextureStage::SOURCE_DIFFUSE: arg2 = &diffuse; break; 833 case TextureStage::SOURCE_SPECULAR: arg2 = &specular; break; 834 case TextureStage::SOURCE_TEMP: arg2 = &temp; break; 835 case TextureStage::SOURCE_TFACTOR: arg2 = &tfactor; break; 836 default: 837 ASSERT(false); 838 } 839 840 switch(textureStage.thirdArgumentAlpha) 841 { 842 case TextureStage::SOURCE_TEXTURE: arg3 = &texture; break; 843 case TextureStage::SOURCE_CONSTANT: arg3 = &constant; break; 844 case TextureStage::SOURCE_CURRENT: arg3 = ¤t; break; 845 case TextureStage::SOURCE_DIFFUSE: arg3 = &diffuse; break; 846 case TextureStage::SOURCE_SPECULAR: arg3 = &specular; break; 847 case TextureStage::SOURCE_TEMP: arg3 = &temp; break; 848 case TextureStage::SOURCE_TFACTOR: arg3 = &tfactor; break; 849 default: 850 ASSERT(false); 851 } 852 853 switch(textureStage.firstModifierAlpha) // FIXME: Check if actually used 854 { 855 case TextureStage::MODIFIER_COLOR: 856 break; 857 case TextureStage::MODIFIER_INVCOLOR: 858 mod1.w = SubSat(Short4(0x1000), arg1->w); 859 860 arg1 = &mod1; 861 break; 862 case TextureStage::MODIFIER_ALPHA: 863 // Redudant 864 break; 865 case TextureStage::MODIFIER_INVALPHA: 866 mod1.w = SubSat(Short4(0x1000), arg1->w); 867 868 arg1 = &mod1; 869 break; 870 default: 871 ASSERT(false); 872 } 873 874 switch(textureStage.secondModifierAlpha) // FIXME: Check if actually used 875 { 876 case TextureStage::MODIFIER_COLOR: 877 break; 878 case TextureStage::MODIFIER_INVCOLOR: 879 mod2.w = SubSat(Short4(0x1000), arg2->w); 880 881 arg2 = &mod2; 882 break; 883 case TextureStage::MODIFIER_ALPHA: 884 // Redudant 885 break; 886 case TextureStage::MODIFIER_INVALPHA: 887 mod2.w = SubSat(Short4(0x1000), arg2->w); 888 889 arg2 = &mod2; 890 break; 891 default: 892 ASSERT(false); 893 } 894 895 switch(textureStage.thirdModifierAlpha) // FIXME: Check if actually used 896 { 897 case TextureStage::MODIFIER_COLOR: 898 break; 899 case TextureStage::MODIFIER_INVCOLOR: 900 mod3.w = SubSat(Short4(0x1000), arg3->w); 901 902 arg3 = &mod3; 903 break; 904 case TextureStage::MODIFIER_ALPHA: 905 // Redudant 906 break; 907 case TextureStage::MODIFIER_INVALPHA: 908 mod3.w = SubSat(Short4(0x1000), arg3->w); 909 910 arg3 = &mod3; 911 break; 912 default: 913 ASSERT(false); 914 } 915 916 switch(textureStage.stageOperationAlpha) 917 { 918 case TextureStage::STAGE_DISABLE: 919 break; 920 case TextureStage::STAGE_SELECTARG1: // Arg1 921 res.w = arg1->w; 922 break; 923 case TextureStage::STAGE_SELECTARG2: // Arg2 924 res.w = arg2->w; 925 break; 926 case TextureStage::STAGE_SELECTARG3: // Arg3 927 res.w = arg3->w; 928 break; 929 case TextureStage::STAGE_MODULATE: // Arg1 * Arg2 930 res.w = MulHigh(arg1->w, arg2->w) << 4; 931 break; 932 case TextureStage::STAGE_MODULATE2X: // Arg1 * Arg2 * 2 933 res.w = MulHigh(arg1->w, arg2->w) << 5; 934 break; 935 case TextureStage::STAGE_MODULATE4X: // Arg1 * Arg2 * 4 936 res.w = MulHigh(arg1->w, arg2->w) << 6; 937 break; 938 case TextureStage::STAGE_ADD: // Arg1 + Arg2 939 res.w = AddSat(arg1->w, arg2->w); 940 break; 941 case TextureStage::STAGE_ADDSIGNED: // Arg1 + Arg2 - 0.5 942 res.w = AddSat(arg1->w, arg2->w); 943 res.w = SubSat(res.w, Short4(0x0800, 0x0800, 0x0800, 0x0800)); 944 break; 945 case TextureStage::STAGE_ADDSIGNED2X: // (Arg1 + Arg2 - 0.5) << 1 946 res.w = AddSat(arg1->w, arg2->w); 947 res.w = SubSat(res.w, Short4(0x0800, 0x0800, 0x0800, 0x0800)); 948 res.w = AddSat(res.w, res.w); 949 break; 950 case TextureStage::STAGE_SUBTRACT: // Arg1 - Arg2 951 res.w = SubSat(arg1->w, arg2->w); 952 break; 953 case TextureStage::STAGE_ADDSMOOTH: // Arg1 + Arg2 - Arg1 * Arg2 954 { 955 Short4 tmp; 956 957 tmp = MulHigh(arg1->w, arg2->w) << 4; res.w = AddSat(arg1->w, arg2->w); res.w = SubSat(res.w, tmp); 958 } 959 break; 960 case TextureStage::STAGE_MULTIPLYADD: // Arg3 + Arg1 * Arg2 961 res.w = MulHigh(arg1->w, arg2->w) << 4; res.w = AddSat(res.w, arg3->w); 962 break; 963 case TextureStage::STAGE_LERP: // Arg3 * (Arg1 - Arg2) + Arg2 964 res.w = SubSat(arg1->w, arg2->w); res.w = MulHigh(res.w, arg3->w) << 4; res.w = AddSat(res.w, arg2->w); 965 break; 966 case TextureStage::STAGE_DOT3: 967 break; // Already computed in color channel 968 case TextureStage::STAGE_BLENDCURRENTALPHA: // Alpha * (Arg1 - Arg2) + Arg2 969 res.w = SubSat(arg1->w, arg2->w); res.w = MulHigh(res.w, current.w) << 4; res.w = AddSat(res.w, arg2->w); 970 break; 971 case TextureStage::STAGE_BLENDDIFFUSEALPHA: // Arg1 * (Alpha) + Arg2 * (1 - Alpha) 972 res.w = SubSat(arg1->w, arg2->w); res.w = MulHigh(res.w, diffuse.w) << 4; res.w = AddSat(res.w, arg2->w); 973 break; 974 case TextureStage::STAGE_BLENDFACTORALPHA: 975 res.w = SubSat(arg1->w, arg2->w); res.w = MulHigh(res.w, *Pointer<Short4>(data + OFFSET(DrawData, factor.textureFactor4[3]))) << 4; res.w = AddSat(res.w, arg2->w); 976 break; 977 case TextureStage::STAGE_BLENDTEXTUREALPHA: // Arg1 * (Alpha) + Arg2 * (1 - Alpha) 978 res.w = SubSat(arg1->w, arg2->w); res.w = MulHigh(res.w, texture.w) << 4; res.w = AddSat(res.w, arg2->w); 979 break; 980 case TextureStage::STAGE_BLENDTEXTUREALPHAPM: // Arg1 + Arg2 * (1 - Alpha) 981 res.w = SubSat(Short4(0x1000), texture.w); res.w = MulHigh(res.w, arg2->w) << 4; res.w = AddSat(res.w, arg1->w); 982 break; 983 case TextureStage::STAGE_PREMODULATE: 984 res.w = arg1->w; 985 break; 986 case TextureStage::STAGE_MODULATEALPHA_ADDCOLOR: 987 case TextureStage::STAGE_MODULATECOLOR_ADDALPHA: 988 case TextureStage::STAGE_MODULATEINVALPHA_ADDCOLOR: 989 case TextureStage::STAGE_MODULATEINVCOLOR_ADDALPHA: 990 case TextureStage::STAGE_BUMPENVMAP: 991 case TextureStage::STAGE_BUMPENVMAPLUMINANCE: 992 break; // Invalid alpha operations 993 default: 994 ASSERT(false); 995 } 996 } 997 998 // Clamp result to [0, 1] 999 1000 switch(textureStage.stageOperation) 1001 { 1002 case TextureStage::STAGE_DISABLE: 1003 case TextureStage::STAGE_SELECTARG1: 1004 case TextureStage::STAGE_SELECTARG2: 1005 case TextureStage::STAGE_SELECTARG3: 1006 case TextureStage::STAGE_MODULATE: 1007 case TextureStage::STAGE_MODULATE2X: 1008 case TextureStage::STAGE_MODULATE4X: 1009 case TextureStage::STAGE_ADD: 1010 case TextureStage::STAGE_MULTIPLYADD: 1011 case TextureStage::STAGE_LERP: 1012 case TextureStage::STAGE_BLENDCURRENTALPHA: 1013 case TextureStage::STAGE_BLENDDIFFUSEALPHA: 1014 case TextureStage::STAGE_BLENDFACTORALPHA: 1015 case TextureStage::STAGE_BLENDTEXTUREALPHA: 1016 case TextureStage::STAGE_BLENDTEXTUREALPHAPM: 1017 case TextureStage::STAGE_DOT3: // Already clamped 1018 case TextureStage::STAGE_PREMODULATE: 1019 case TextureStage::STAGE_MODULATEALPHA_ADDCOLOR: 1020 case TextureStage::STAGE_MODULATECOLOR_ADDALPHA: 1021 case TextureStage::STAGE_MODULATEINVALPHA_ADDCOLOR: 1022 case TextureStage::STAGE_MODULATEINVCOLOR_ADDALPHA: 1023 case TextureStage::STAGE_BUMPENVMAP: 1024 case TextureStage::STAGE_BUMPENVMAPLUMINANCE: 1025 if(state.textureStage[stage].cantUnderflow) 1026 { 1027 break; // Can't go below zero 1028 } 1029 case TextureStage::STAGE_ADDSIGNED: 1030 case TextureStage::STAGE_ADDSIGNED2X: 1031 case TextureStage::STAGE_SUBTRACT: 1032 case TextureStage::STAGE_ADDSMOOTH: 1033 res.x = Max(res.x, Short4(0x0000, 0x0000, 0x0000, 0x0000)); 1034 res.y = Max(res.y, Short4(0x0000, 0x0000, 0x0000, 0x0000)); 1035 res.z = Max(res.z, Short4(0x0000, 0x0000, 0x0000, 0x0000)); 1036 break; 1037 default: 1038 ASSERT(false); 1039 } 1040 1041 switch(textureStage.stageOperationAlpha) 1042 { 1043 case TextureStage::STAGE_DISABLE: 1044 case TextureStage::STAGE_SELECTARG1: 1045 case TextureStage::STAGE_SELECTARG2: 1046 case TextureStage::STAGE_SELECTARG3: 1047 case TextureStage::STAGE_MODULATE: 1048 case TextureStage::STAGE_MODULATE2X: 1049 case TextureStage::STAGE_MODULATE4X: 1050 case TextureStage::STAGE_ADD: 1051 case TextureStage::STAGE_MULTIPLYADD: 1052 case TextureStage::STAGE_LERP: 1053 case TextureStage::STAGE_BLENDCURRENTALPHA: 1054 case TextureStage::STAGE_BLENDDIFFUSEALPHA: 1055 case TextureStage::STAGE_BLENDFACTORALPHA: 1056 case TextureStage::STAGE_BLENDTEXTUREALPHA: 1057 case TextureStage::STAGE_BLENDTEXTUREALPHAPM: 1058 case TextureStage::STAGE_DOT3: // Already clamped 1059 case TextureStage::STAGE_PREMODULATE: 1060 case TextureStage::STAGE_MODULATEALPHA_ADDCOLOR: 1061 case TextureStage::STAGE_MODULATECOLOR_ADDALPHA: 1062 case TextureStage::STAGE_MODULATEINVALPHA_ADDCOLOR: 1063 case TextureStage::STAGE_MODULATEINVCOLOR_ADDALPHA: 1064 case TextureStage::STAGE_BUMPENVMAP: 1065 case TextureStage::STAGE_BUMPENVMAPLUMINANCE: 1066 if(state.textureStage[stage].cantUnderflow) 1067 { 1068 break; // Can't go below zero 1069 } 1070 case TextureStage::STAGE_ADDSIGNED: 1071 case TextureStage::STAGE_ADDSIGNED2X: 1072 case TextureStage::STAGE_SUBTRACT: 1073 case TextureStage::STAGE_ADDSMOOTH: 1074 res.w = Max(res.w, Short4(0x0000, 0x0000, 0x0000, 0x0000)); 1075 break; 1076 default: 1077 ASSERT(false); 1078 } 1079 1080 switch(textureStage.stageOperation) 1081 { 1082 case TextureStage::STAGE_DISABLE: 1083 case TextureStage::STAGE_SELECTARG1: 1084 case TextureStage::STAGE_SELECTARG2: 1085 case TextureStage::STAGE_SELECTARG3: 1086 case TextureStage::STAGE_MODULATE: 1087 case TextureStage::STAGE_SUBTRACT: 1088 case TextureStage::STAGE_ADDSMOOTH: 1089 case TextureStage::STAGE_LERP: 1090 case TextureStage::STAGE_BLENDCURRENTALPHA: 1091 case TextureStage::STAGE_BLENDDIFFUSEALPHA: 1092 case TextureStage::STAGE_BLENDFACTORALPHA: 1093 case TextureStage::STAGE_BLENDTEXTUREALPHA: 1094 case TextureStage::STAGE_DOT3: // Already clamped 1095 case TextureStage::STAGE_PREMODULATE: 1096 case TextureStage::STAGE_MODULATEINVALPHA_ADDCOLOR: 1097 case TextureStage::STAGE_MODULATEINVCOLOR_ADDALPHA: 1098 case TextureStage::STAGE_BUMPENVMAP: 1099 case TextureStage::STAGE_BUMPENVMAPLUMINANCE: 1100 break; // Can't go above one 1101 case TextureStage::STAGE_MODULATE2X: 1102 case TextureStage::STAGE_MODULATE4X: 1103 case TextureStage::STAGE_ADD: 1104 case TextureStage::STAGE_ADDSIGNED: 1105 case TextureStage::STAGE_ADDSIGNED2X: 1106 case TextureStage::STAGE_MULTIPLYADD: 1107 case TextureStage::STAGE_BLENDTEXTUREALPHAPM: 1108 case TextureStage::STAGE_MODULATEALPHA_ADDCOLOR: 1109 case TextureStage::STAGE_MODULATECOLOR_ADDALPHA: 1110 res.x = Min(res.x, Short4(0x1000)); 1111 res.y = Min(res.y, Short4(0x1000)); 1112 res.z = Min(res.z, Short4(0x1000)); 1113 break; 1114 default: 1115 ASSERT(false); 1116 } 1117 1118 switch(textureStage.stageOperationAlpha) 1119 { 1120 case TextureStage::STAGE_DISABLE: 1121 case TextureStage::STAGE_SELECTARG1: 1122 case TextureStage::STAGE_SELECTARG2: 1123 case TextureStage::STAGE_SELECTARG3: 1124 case TextureStage::STAGE_MODULATE: 1125 case TextureStage::STAGE_SUBTRACT: 1126 case TextureStage::STAGE_ADDSMOOTH: 1127 case TextureStage::STAGE_LERP: 1128 case TextureStage::STAGE_BLENDCURRENTALPHA: 1129 case TextureStage::STAGE_BLENDDIFFUSEALPHA: 1130 case TextureStage::STAGE_BLENDFACTORALPHA: 1131 case TextureStage::STAGE_BLENDTEXTUREALPHA: 1132 case TextureStage::STAGE_DOT3: // Already clamped 1133 case TextureStage::STAGE_PREMODULATE: 1134 case TextureStage::STAGE_MODULATEINVALPHA_ADDCOLOR: 1135 case TextureStage::STAGE_MODULATEINVCOLOR_ADDALPHA: 1136 case TextureStage::STAGE_BUMPENVMAP: 1137 case TextureStage::STAGE_BUMPENVMAPLUMINANCE: 1138 break; // Can't go above one 1139 case TextureStage::STAGE_MODULATE2X: 1140 case TextureStage::STAGE_MODULATE4X: 1141 case TextureStage::STAGE_ADD: 1142 case TextureStage::STAGE_ADDSIGNED: 1143 case TextureStage::STAGE_ADDSIGNED2X: 1144 case TextureStage::STAGE_MULTIPLYADD: 1145 case TextureStage::STAGE_BLENDTEXTUREALPHAPM: 1146 case TextureStage::STAGE_MODULATEALPHA_ADDCOLOR: 1147 case TextureStage::STAGE_MODULATECOLOR_ADDALPHA: 1148 res.w = Min(res.w, Short4(0x1000)); 1149 break; 1150 default: 1151 ASSERT(false); 1152 } 1153 1154 switch(textureStage.destinationArgument) 1155 { 1156 case TextureStage::DESTINATION_CURRENT: 1157 current.x = res.x; 1158 current.y = res.y; 1159 current.z = res.z; 1160 current.w = res.w; 1161 break; 1162 case TextureStage::DESTINATION_TEMP: 1163 temp.x = res.x; 1164 temp.y = res.y; 1165 temp.z = res.z; 1166 temp.w = res.w; 1167 break; 1168 default: 1169 ASSERT(false); 1170 } 1171 } 1172 fogBlend(Vector4s & current,Float4 & f)1173 void PixelPipeline::fogBlend(Vector4s ¤t, Float4 &f) 1174 { 1175 if(!state.fogActive) 1176 { 1177 return; 1178 } 1179 1180 if(state.pixelFogMode != FOG_NONE) 1181 { 1182 pixelFog(f); 1183 } 1184 1185 UShort4 fog = convertFixed16(f, true); 1186 1187 current.x = As<Short4>(MulHigh(As<UShort4>(current.x), fog)); 1188 current.y = As<Short4>(MulHigh(As<UShort4>(current.y), fog)); 1189 current.z = As<Short4>(MulHigh(As<UShort4>(current.z), fog)); 1190 1191 UShort4 invFog = UShort4(0xFFFFu) - fog; 1192 1193 current.x += As<Short4>(MulHigh(invFog, *Pointer<UShort4>(data + OFFSET(DrawData, fog.color4[0])))); 1194 current.y += As<Short4>(MulHigh(invFog, *Pointer<UShort4>(data + OFFSET(DrawData, fog.color4[1])))); 1195 current.z += As<Short4>(MulHigh(invFog, *Pointer<UShort4>(data + OFFSET(DrawData, fog.color4[2])))); 1196 } 1197 specularPixel(Vector4s & current,Vector4s & specular)1198 void PixelPipeline::specularPixel(Vector4s ¤t, Vector4s &specular) 1199 { 1200 if(!state.specularAdd) 1201 { 1202 return; 1203 } 1204 1205 current.x = AddSat(current.x, specular.x); 1206 current.y = AddSat(current.y, specular.y); 1207 current.z = AddSat(current.z, specular.z); 1208 } 1209 sampleTexture(Vector4s & c,int coordinates,int stage,bool project)1210 void PixelPipeline::sampleTexture(Vector4s &c, int coordinates, int stage, bool project) 1211 { 1212 Float4 x = v[2 + coordinates].x; 1213 Float4 y = v[2 + coordinates].y; 1214 Float4 z = v[2 + coordinates].z; 1215 Float4 w = v[2 + coordinates].w; 1216 1217 if(perturbate) 1218 { 1219 x += du; 1220 y += dv; 1221 1222 perturbate = false; 1223 } 1224 1225 sampleTexture(c, stage, x, y, z, w, project); 1226 } 1227 sampleTexture(Vector4s & c,int stage,Float4 & u,Float4 & v,Float4 & w,Float4 & q,bool project)1228 void PixelPipeline::sampleTexture(Vector4s &c, int stage, Float4 &u, Float4 &v, Float4 &w, Float4 &q, bool project) 1229 { 1230 #if PERF_PROFILE 1231 Long texTime = Ticks(); 1232 #endif 1233 1234 Vector4f dsx; 1235 Vector4f dsy; 1236 1237 Pointer<Byte> texture = data + OFFSET(DrawData, mipmap) + stage * sizeof(Texture); 1238 1239 if(!project) 1240 { 1241 sampler[stage]->sampleTexture(texture, c, u, v, w, q, dsx, dsy); 1242 } 1243 else 1244 { 1245 Float4 rq = reciprocal(q); 1246 1247 Float4 u_q = u * rq; 1248 Float4 v_q = v * rq; 1249 Float4 w_q = w * rq; 1250 1251 sampler[stage]->sampleTexture(texture, c, u_q, v_q, w_q, q, dsx, dsy); 1252 } 1253 1254 #if PERF_PROFILE 1255 cycles[PERF_TEX] += Ticks() - texTime; 1256 #endif 1257 } 1258 convertFixed12(RValue<Float4> cf)1259 Short4 PixelPipeline::convertFixed12(RValue<Float4> cf) 1260 { 1261 return RoundShort4(cf * Float4(0x1000)); 1262 } 1263 convertFixed12(Vector4s & cs,Vector4f & cf)1264 void PixelPipeline::convertFixed12(Vector4s &cs, Vector4f &cf) 1265 { 1266 cs.x = convertFixed12(cf.x); 1267 cs.y = convertFixed12(cf.y); 1268 cs.z = convertFixed12(cf.z); 1269 cs.w = convertFixed12(cf.w); 1270 } 1271 convertSigned12(Short4 & cs)1272 Float4 PixelPipeline::convertSigned12(Short4 &cs) 1273 { 1274 return Float4(cs) * Float4(1.0f / 0x0FFE); 1275 } 1276 convertSigned12(Vector4f & cf,Vector4s & cs)1277 void PixelPipeline::convertSigned12(Vector4f &cf, Vector4s &cs) 1278 { 1279 cf.x = convertSigned12(cs.x); 1280 cf.y = convertSigned12(cs.y); 1281 cf.z = convertSigned12(cs.z); 1282 cf.w = convertSigned12(cs.w); 1283 } 1284 writeDestination(Vector4s & d,const Dst & dst)1285 void PixelPipeline::writeDestination(Vector4s &d, const Dst &dst) 1286 { 1287 switch(dst.type) 1288 { 1289 case Shader::PARAMETER_TEMP: 1290 if(dst.mask & 0x1) rs[dst.index].x = d.x; 1291 if(dst.mask & 0x2) rs[dst.index].y = d.y; 1292 if(dst.mask & 0x4) rs[dst.index].z = d.z; 1293 if(dst.mask & 0x8) rs[dst.index].w = d.w; 1294 break; 1295 case Shader::PARAMETER_INPUT: 1296 if(dst.mask & 0x1) vs[dst.index].x = d.x; 1297 if(dst.mask & 0x2) vs[dst.index].y = d.y; 1298 if(dst.mask & 0x4) vs[dst.index].z = d.z; 1299 if(dst.mask & 0x8) vs[dst.index].w = d.w; 1300 break; 1301 case Shader::PARAMETER_CONST: ASSERT(false); break; 1302 case Shader::PARAMETER_TEXTURE: 1303 if(dst.mask & 0x1) ts[dst.index].x = d.x; 1304 if(dst.mask & 0x2) ts[dst.index].y = d.y; 1305 if(dst.mask & 0x4) ts[dst.index].z = d.z; 1306 if(dst.mask & 0x8) ts[dst.index].w = d.w; 1307 break; 1308 case Shader::PARAMETER_COLOROUT: 1309 if(dst.mask & 0x1) vs[dst.index].x = d.x; 1310 if(dst.mask & 0x2) vs[dst.index].y = d.y; 1311 if(dst.mask & 0x4) vs[dst.index].z = d.z; 1312 if(dst.mask & 0x8) vs[dst.index].w = d.w; 1313 break; 1314 default: 1315 ASSERT(false); 1316 } 1317 } 1318 fetchRegister(const Src & src)1319 Vector4s PixelPipeline::fetchRegister(const Src &src) 1320 { 1321 Vector4s *reg; 1322 int i = src.index; 1323 1324 Vector4s c; 1325 1326 if(src.type == Shader::PARAMETER_CONST) 1327 { 1328 c.x = *Pointer<Short4>(data + OFFSET(DrawData, ps.cW[i][0])); 1329 c.y = *Pointer<Short4>(data + OFFSET(DrawData, ps.cW[i][1])); 1330 c.z = *Pointer<Short4>(data + OFFSET(DrawData, ps.cW[i][2])); 1331 c.w = *Pointer<Short4>(data + OFFSET(DrawData, ps.cW[i][3])); 1332 } 1333 1334 switch(src.type) 1335 { 1336 case Shader::PARAMETER_TEMP: reg = &rs[i]; break; 1337 case Shader::PARAMETER_INPUT: reg = &vs[i]; break; 1338 case Shader::PARAMETER_CONST: reg = &c; break; 1339 case Shader::PARAMETER_TEXTURE: reg = &ts[i]; break; 1340 case Shader::PARAMETER_VOID: return rs[0]; // Dummy 1341 case Shader::PARAMETER_FLOAT4LITERAL: return rs[0]; // Dummy 1342 default: ASSERT(false); return rs[0]; 1343 } 1344 1345 const Short4 &x = (*reg)[(src.swizzle >> 0) & 0x3]; 1346 const Short4 &y = (*reg)[(src.swizzle >> 2) & 0x3]; 1347 const Short4 &z = (*reg)[(src.swizzle >> 4) & 0x3]; 1348 const Short4 &w = (*reg)[(src.swizzle >> 6) & 0x3]; 1349 1350 Vector4s mod; 1351 1352 switch(src.modifier) 1353 { 1354 case Shader::MODIFIER_NONE: 1355 mod.x = x; 1356 mod.y = y; 1357 mod.z = z; 1358 mod.w = w; 1359 break; 1360 case Shader::MODIFIER_BIAS: 1361 mod.x = SubSat(x, Short4(0x0800, 0x0800, 0x0800, 0x0800)); 1362 mod.y = SubSat(y, Short4(0x0800, 0x0800, 0x0800, 0x0800)); 1363 mod.z = SubSat(z, Short4(0x0800, 0x0800, 0x0800, 0x0800)); 1364 mod.w = SubSat(w, Short4(0x0800, 0x0800, 0x0800, 0x0800)); 1365 break; 1366 case Shader::MODIFIER_BIAS_NEGATE: 1367 mod.x = SubSat(Short4(0x0800, 0x0800, 0x0800, 0x0800), x); 1368 mod.y = SubSat(Short4(0x0800, 0x0800, 0x0800, 0x0800), y); 1369 mod.z = SubSat(Short4(0x0800, 0x0800, 0x0800, 0x0800), z); 1370 mod.w = SubSat(Short4(0x0800, 0x0800, 0x0800, 0x0800), w); 1371 break; 1372 case Shader::MODIFIER_COMPLEMENT: 1373 mod.x = SubSat(Short4(0x1000), x); 1374 mod.y = SubSat(Short4(0x1000), y); 1375 mod.z = SubSat(Short4(0x1000), z); 1376 mod.w = SubSat(Short4(0x1000), w); 1377 break; 1378 case Shader::MODIFIER_NEGATE: 1379 mod.x = -x; 1380 mod.y = -y; 1381 mod.z = -z; 1382 mod.w = -w; 1383 break; 1384 case Shader::MODIFIER_X2: 1385 mod.x = AddSat(x, x); 1386 mod.y = AddSat(y, y); 1387 mod.z = AddSat(z, z); 1388 mod.w = AddSat(w, w); 1389 break; 1390 case Shader::MODIFIER_X2_NEGATE: 1391 mod.x = -AddSat(x, x); 1392 mod.y = -AddSat(y, y); 1393 mod.z = -AddSat(z, z); 1394 mod.w = -AddSat(w, w); 1395 break; 1396 case Shader::MODIFIER_SIGN: 1397 mod.x = SubSat(x, Short4(0x0800, 0x0800, 0x0800, 0x0800)); 1398 mod.y = SubSat(y, Short4(0x0800, 0x0800, 0x0800, 0x0800)); 1399 mod.z = SubSat(z, Short4(0x0800, 0x0800, 0x0800, 0x0800)); 1400 mod.w = SubSat(w, Short4(0x0800, 0x0800, 0x0800, 0x0800)); 1401 mod.x = AddSat(mod.x, mod.x); 1402 mod.y = AddSat(mod.y, mod.y); 1403 mod.z = AddSat(mod.z, mod.z); 1404 mod.w = AddSat(mod.w, mod.w); 1405 break; 1406 case Shader::MODIFIER_SIGN_NEGATE: 1407 mod.x = SubSat(Short4(0x0800, 0x0800, 0x0800, 0x0800), x); 1408 mod.y = SubSat(Short4(0x0800, 0x0800, 0x0800, 0x0800), y); 1409 mod.z = SubSat(Short4(0x0800, 0x0800, 0x0800, 0x0800), z); 1410 mod.w = SubSat(Short4(0x0800, 0x0800, 0x0800, 0x0800), w); 1411 mod.x = AddSat(mod.x, mod.x); 1412 mod.y = AddSat(mod.y, mod.y); 1413 mod.z = AddSat(mod.z, mod.z); 1414 mod.w = AddSat(mod.w, mod.w); 1415 break; 1416 case Shader::MODIFIER_DZ: 1417 mod.x = x; 1418 mod.y = y; 1419 mod.z = z; 1420 mod.w = w; 1421 // Projection performed by texture sampler 1422 break; 1423 case Shader::MODIFIER_DW: 1424 mod.x = x; 1425 mod.y = y; 1426 mod.z = z; 1427 mod.w = w; 1428 // Projection performed by texture sampler 1429 break; 1430 default: 1431 ASSERT(false); 1432 } 1433 1434 if(src.type == Shader::PARAMETER_CONST && (src.modifier == Shader::MODIFIER_X2 || src.modifier == Shader::MODIFIER_X2_NEGATE)) 1435 { 1436 mod.x = Min(mod.x, Short4(0x1000)); mod.x = Max(mod.x, Short4(-0x1000, -0x1000, -0x1000, -0x1000)); 1437 mod.y = Min(mod.y, Short4(0x1000)); mod.y = Max(mod.y, Short4(-0x1000, -0x1000, -0x1000, -0x1000)); 1438 mod.z = Min(mod.z, Short4(0x1000)); mod.z = Max(mod.z, Short4(-0x1000, -0x1000, -0x1000, -0x1000)); 1439 mod.w = Min(mod.w, Short4(0x1000)); mod.w = Max(mod.w, Short4(-0x1000, -0x1000, -0x1000, -0x1000)); 1440 } 1441 1442 return mod; 1443 } 1444 MOV(Vector4s & dst,Vector4s & src0)1445 void PixelPipeline::MOV(Vector4s &dst, Vector4s &src0) 1446 { 1447 dst.x = src0.x; 1448 dst.y = src0.y; 1449 dst.z = src0.z; 1450 dst.w = src0.w; 1451 } 1452 ADD(Vector4s & dst,Vector4s & src0,Vector4s & src1)1453 void PixelPipeline::ADD(Vector4s &dst, Vector4s &src0, Vector4s &src1) 1454 { 1455 dst.x = AddSat(src0.x, src1.x); 1456 dst.y = AddSat(src0.y, src1.y); 1457 dst.z = AddSat(src0.z, src1.z); 1458 dst.w = AddSat(src0.w, src1.w); 1459 } 1460 SUB(Vector4s & dst,Vector4s & src0,Vector4s & src1)1461 void PixelPipeline::SUB(Vector4s &dst, Vector4s &src0, Vector4s &src1) 1462 { 1463 dst.x = SubSat(src0.x, src1.x); 1464 dst.y = SubSat(src0.y, src1.y); 1465 dst.z = SubSat(src0.z, src1.z); 1466 dst.w = SubSat(src0.w, src1.w); 1467 } 1468 MAD(Vector4s & dst,Vector4s & src0,Vector4s & src1,Vector4s & src2)1469 void PixelPipeline::MAD(Vector4s &dst, Vector4s &src0, Vector4s &src1, Vector4s &src2) 1470 { 1471 // FIXME: Long fixed-point multiply fixup 1472 { dst.x = MulHigh(src0.x, src1.x); dst.x = AddSat(dst.x, dst.x); dst.x = AddSat(dst.x, dst.x); dst.x = AddSat(dst.x, dst.x); dst.x = AddSat(dst.x, dst.x); dst.x = AddSat(dst.x, src2.x); } 1473 { 1474 dst.y = MulHigh(src0.y, src1.y); dst.y = AddSat(dst.y, dst.y); dst.y = AddSat(dst.y, dst.y); dst.y = AddSat(dst.y, dst.y); dst.y = AddSat(dst.y, dst.y); dst.y = AddSat(dst.y, src2.y); 1475 } 1476 {dst.z = MulHigh(src0.z, src1.z); dst.z = AddSat(dst.z, dst.z); dst.z = AddSat(dst.z, dst.z); dst.z = AddSat(dst.z, dst.z); dst.z = AddSat(dst.z, dst.z); dst.z = AddSat(dst.z, src2.z); } 1477 {dst.w = MulHigh(src0.w, src1.w); dst.w = AddSat(dst.w, dst.w); dst.w = AddSat(dst.w, dst.w); dst.w = AddSat(dst.w, dst.w); dst.w = AddSat(dst.w, dst.w); dst.w = AddSat(dst.w, src2.w); } 1478 } 1479 MUL(Vector4s & dst,Vector4s & src0,Vector4s & src1)1480 void PixelPipeline::MUL(Vector4s &dst, Vector4s &src0, Vector4s &src1) 1481 { 1482 // FIXME: Long fixed-point multiply fixup 1483 { dst.x = MulHigh(src0.x, src1.x); dst.x = AddSat(dst.x, dst.x); dst.x = AddSat(dst.x, dst.x); dst.x = AddSat(dst.x, dst.x); dst.x = AddSat(dst.x, dst.x); } 1484 { 1485 dst.y = MulHigh(src0.y, src1.y); dst.y = AddSat(dst.y, dst.y); dst.y = AddSat(dst.y, dst.y); dst.y = AddSat(dst.y, dst.y); dst.y = AddSat(dst.y, dst.y); 1486 } 1487 {dst.z = MulHigh(src0.z, src1.z); dst.z = AddSat(dst.z, dst.z); dst.z = AddSat(dst.z, dst.z); dst.z = AddSat(dst.z, dst.z); dst.z = AddSat(dst.z, dst.z); } 1488 {dst.w = MulHigh(src0.w, src1.w); dst.w = AddSat(dst.w, dst.w); dst.w = AddSat(dst.w, dst.w); dst.w = AddSat(dst.w, dst.w); dst.w = AddSat(dst.w, dst.w); } 1489 } 1490 DP3(Vector4s & dst,Vector4s & src0,Vector4s & src1)1491 void PixelPipeline::DP3(Vector4s &dst, Vector4s &src0, Vector4s &src1) 1492 { 1493 Short4 t0; 1494 Short4 t1; 1495 1496 // FIXME: Long fixed-point multiply fixup 1497 t0 = MulHigh(src0.x, src1.x); t0 = AddSat(t0, t0); t0 = AddSat(t0, t0); t0 = AddSat(t0, t0); t0 = AddSat(t0, t0); 1498 t1 = MulHigh(src0.y, src1.y); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1); 1499 t0 = AddSat(t0, t1); 1500 t1 = MulHigh(src0.z, src1.z); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1); 1501 t0 = AddSat(t0, t1); 1502 1503 dst.x = t0; 1504 dst.y = t0; 1505 dst.z = t0; 1506 dst.w = t0; 1507 } 1508 DP4(Vector4s & dst,Vector4s & src0,Vector4s & src1)1509 void PixelPipeline::DP4(Vector4s &dst, Vector4s &src0, Vector4s &src1) 1510 { 1511 Short4 t0; 1512 Short4 t1; 1513 1514 // FIXME: Long fixed-point multiply fixup 1515 t0 = MulHigh(src0.x, src1.x); t0 = AddSat(t0, t0); t0 = AddSat(t0, t0); t0 = AddSat(t0, t0); t0 = AddSat(t0, t0); 1516 t1 = MulHigh(src0.y, src1.y); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1); 1517 t0 = AddSat(t0, t1); 1518 t1 = MulHigh(src0.z, src1.z); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1); 1519 t0 = AddSat(t0, t1); 1520 t1 = MulHigh(src0.w, src1.w); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1); 1521 t0 = AddSat(t0, t1); 1522 1523 dst.x = t0; 1524 dst.y = t0; 1525 dst.z = t0; 1526 dst.w = t0; 1527 } 1528 LRP(Vector4s & dst,Vector4s & src0,Vector4s & src1,Vector4s & src2)1529 void PixelPipeline::LRP(Vector4s &dst, Vector4s &src0, Vector4s &src1, Vector4s &src2) 1530 { 1531 // FIXME: Long fixed-point multiply fixup 1532 { dst.x = SubSat(src1.x, src2.x); dst.x = MulHigh(dst.x, src0.x); dst.x = AddSat(dst.x, dst.x); dst.x = AddSat(dst.x, dst.x); dst.x = AddSat(dst.x, dst.x); dst.x = AddSat(dst.x, dst.x); dst.x = AddSat(dst.x, src2.x); } 1533 { 1534 dst.y = SubSat(src1.y, src2.y); dst.y = MulHigh(dst.y, src0.y); dst.y = AddSat(dst.y, dst.y); dst.y = AddSat(dst.y, dst.y); dst.y = AddSat(dst.y, dst.y); dst.y = AddSat(dst.y, dst.y); dst.y = AddSat(dst.y, src2.y); 1535 } 1536 {dst.z = SubSat(src1.z, src2.z); dst.z = MulHigh(dst.z, src0.z); dst.z = AddSat(dst.z, dst.z); dst.z = AddSat(dst.z, dst.z); dst.z = AddSat(dst.z, dst.z); dst.z = AddSat(dst.z, dst.z); dst.z = AddSat(dst.z, src2.z); } 1537 {dst.w = SubSat(src1.w, src2.w); dst.w = MulHigh(dst.w, src0.w); dst.w = AddSat(dst.w, dst.w); dst.w = AddSat(dst.w, dst.w); dst.w = AddSat(dst.w, dst.w); dst.w = AddSat(dst.w, dst.w); dst.w = AddSat(dst.w, src2.w); } 1538 } 1539 TEXCOORD(Vector4s & dst,Float4 & u,Float4 & v,Float4 & s,int coordinate)1540 void PixelPipeline::TEXCOORD(Vector4s &dst, Float4 &u, Float4 &v, Float4 &s, int coordinate) 1541 { 1542 Float4 uw; 1543 Float4 vw; 1544 Float4 sw; 1545 1546 if(state.interpolant[2 + coordinate].component & 0x01) 1547 { 1548 uw = Max(u, Float4(0.0f)); 1549 uw = Min(uw, Float4(1.0f)); 1550 dst.x = convertFixed12(uw); 1551 } 1552 else 1553 { 1554 dst.x = Short4(0x0000, 0x0000, 0x0000, 0x0000); 1555 } 1556 1557 if(state.interpolant[2 + coordinate].component & 0x02) 1558 { 1559 vw = Max(v, Float4(0.0f)); 1560 vw = Min(vw, Float4(1.0f)); 1561 dst.y = convertFixed12(vw); 1562 } 1563 else 1564 { 1565 dst.y = Short4(0x0000, 0x0000, 0x0000, 0x0000); 1566 } 1567 1568 if(state.interpolant[2 + coordinate].component & 0x04) 1569 { 1570 sw = Max(s, Float4(0.0f)); 1571 sw = Min(sw, Float4(1.0f)); 1572 dst.z = convertFixed12(sw); 1573 } 1574 else 1575 { 1576 dst.z = Short4(0x0000, 0x0000, 0x0000, 0x0000); 1577 } 1578 1579 dst.w = Short4(0x1000); 1580 } 1581 TEXCRD(Vector4s & dst,Float4 & u,Float4 & v,Float4 & s,int coordinate,bool project)1582 void PixelPipeline::TEXCRD(Vector4s &dst, Float4 &u, Float4 &v, Float4 &s, int coordinate, bool project) 1583 { 1584 Float4 uw = u; 1585 Float4 vw = v; 1586 Float4 sw = s; 1587 1588 if(project) 1589 { 1590 uw *= Rcp_pp(s); 1591 vw *= Rcp_pp(s); 1592 } 1593 1594 if(state.interpolant[2 + coordinate].component & 0x01) 1595 { 1596 uw *= Float4(0x1000); 1597 uw = Max(uw, Float4(-0x8000)); 1598 uw = Min(uw, Float4(0x7FFF)); 1599 dst.x = RoundShort4(uw); 1600 } 1601 else 1602 { 1603 dst.x = Short4(0x0000); 1604 } 1605 1606 if(state.interpolant[2 + coordinate].component & 0x02) 1607 { 1608 vw *= Float4(0x1000); 1609 vw = Max(vw, Float4(-0x8000)); 1610 vw = Min(vw, Float4(0x7FFF)); 1611 dst.y = RoundShort4(vw); 1612 } 1613 else 1614 { 1615 dst.y = Short4(0x0000, 0x0000, 0x0000, 0x0000); 1616 } 1617 1618 if(state.interpolant[2 + coordinate].component & 0x04) 1619 { 1620 sw *= Float4(0x1000); 1621 sw = Max(sw, Float4(-0x8000)); 1622 sw = Min(sw, Float4(0x7FFF)); 1623 dst.z = RoundShort4(sw); 1624 } 1625 else 1626 { 1627 dst.z = Short4(0x0000, 0x0000, 0x0000, 0x0000); 1628 } 1629 } 1630 TEXDP3(Vector4s & dst,Float4 & u,Float4 & v,Float4 & s,Vector4s & src)1631 void PixelPipeline::TEXDP3(Vector4s &dst, Float4 &u, Float4 &v, Float4 &s, Vector4s &src) 1632 { 1633 TEXM3X3PAD(u, v, s, src, 0, false); 1634 1635 Short4 t0 = RoundShort4(u_ * Float4(0x1000)); 1636 1637 dst.x = t0; 1638 dst.y = t0; 1639 dst.z = t0; 1640 dst.w = t0; 1641 } 1642 TEXDP3TEX(Vector4s & dst,Float4 & u,Float4 & v,Float4 & s,int stage,Vector4s & src0)1643 void PixelPipeline::TEXDP3TEX(Vector4s &dst, Float4 &u, Float4 &v, Float4 &s, int stage, Vector4s &src0) 1644 { 1645 TEXM3X3PAD(u, v, s, src0, 0, false); 1646 1647 v_ = Float4(0.0f); 1648 w_ = Float4(0.0f); 1649 1650 sampleTexture(dst, stage, u_, v_, w_, w_); 1651 } 1652 TEXKILL(Int cMask[4],Float4 & u,Float4 & v,Float4 & s)1653 void PixelPipeline::TEXKILL(Int cMask[4], Float4 &u, Float4 &v, Float4 &s) 1654 { 1655 Int kill = SignMask(CmpNLT(u, Float4(0.0f))) & 1656 SignMask(CmpNLT(v, Float4(0.0f))) & 1657 SignMask(CmpNLT(s, Float4(0.0f))); 1658 1659 for(unsigned int q = 0; q < state.multiSample; q++) 1660 { 1661 cMask[q] &= kill; 1662 } 1663 } 1664 TEXKILL(Int cMask[4],Vector4s & src)1665 void PixelPipeline::TEXKILL(Int cMask[4], Vector4s &src) 1666 { 1667 Short4 test = src.x | src.y | src.z; 1668 Int kill = SignMask(Pack(test, test)) ^ 0x0000000F; 1669 1670 for(unsigned int q = 0; q < state.multiSample; q++) 1671 { 1672 cMask[q] &= kill; 1673 } 1674 } 1675 TEX(Vector4s & dst,Float4 & u,Float4 & v,Float4 & s,int sampler,bool project)1676 void PixelPipeline::TEX(Vector4s &dst, Float4 &u, Float4 &v, Float4 &s, int sampler, bool project) 1677 { 1678 sampleTexture(dst, sampler, u, v, s, s, project); 1679 } 1680 TEXLD(Vector4s & dst,Vector4s & src,int sampler,bool project)1681 void PixelPipeline::TEXLD(Vector4s &dst, Vector4s &src, int sampler, bool project) 1682 { 1683 Float4 u = Float4(src.x) * Float4(1.0f / 0x0FFE); 1684 Float4 v = Float4(src.y) * Float4(1.0f / 0x0FFE); 1685 Float4 s = Float4(src.z) * Float4(1.0f / 0x0FFE); 1686 1687 sampleTexture(dst, sampler, u, v, s, s, project); 1688 } 1689 TEXBEM(Vector4s & dst,Vector4s & src,Float4 & u,Float4 & v,Float4 & s,int stage)1690 void PixelPipeline::TEXBEM(Vector4s &dst, Vector4s &src, Float4 &u, Float4 &v, Float4 &s, int stage) 1691 { 1692 Float4 du = Float4(src.x) * Float4(1.0f / 0x0FFE); 1693 Float4 dv = Float4(src.y) * Float4(1.0f / 0x0FFE); 1694 1695 Float4 du2 = du; 1696 Float4 dv2 = dv; 1697 1698 du *= *Pointer<Float4>(data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4F[0][0])); 1699 dv2 *= *Pointer<Float4>(data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4F[1][0])); 1700 du += dv2; 1701 dv *= *Pointer<Float4>(data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4F[1][1])); 1702 du2 *= *Pointer<Float4>(data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4F[0][1])); 1703 dv += du2; 1704 1705 Float4 u_ = u + du; 1706 Float4 v_ = v + dv; 1707 1708 sampleTexture(dst, stage, u_, v_, s, s); 1709 } 1710 TEXBEML(Vector4s & dst,Vector4s & src,Float4 & u,Float4 & v,Float4 & s,int stage)1711 void PixelPipeline::TEXBEML(Vector4s &dst, Vector4s &src, Float4 &u, Float4 &v, Float4 &s, int stage) 1712 { 1713 Float4 du = Float4(src.x) * Float4(1.0f / 0x0FFE); 1714 Float4 dv = Float4(src.y) * Float4(1.0f / 0x0FFE); 1715 1716 Float4 du2 = du; 1717 Float4 dv2 = dv; 1718 1719 du *= *Pointer<Float4>(data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4F[0][0])); 1720 dv2 *= *Pointer<Float4>(data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4F[1][0])); 1721 du += dv2; 1722 dv *= *Pointer<Float4>(data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4F[1][1])); 1723 du2 *= *Pointer<Float4>(data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4F[0][1])); 1724 dv += du2; 1725 1726 Float4 u_ = u + du; 1727 Float4 v_ = v + dv; 1728 1729 sampleTexture(dst, stage, u_, v_, s, s); 1730 1731 Short4 L; 1732 1733 L = src.z; 1734 L = MulHigh(L, *Pointer<Short4>(data + OFFSET(DrawData, textureStage[stage].luminanceScale4))); 1735 L = L << 4; 1736 L = AddSat(L, *Pointer<Short4>(data + OFFSET(DrawData, textureStage[stage].luminanceOffset4))); 1737 L = Max(L, Short4(0x0000, 0x0000, 0x0000, 0x0000)); 1738 L = Min(L, Short4(0x1000)); 1739 1740 dst.x = MulHigh(dst.x, L); dst.x = dst.x << 4; 1741 dst.y = MulHigh(dst.y, L); dst.y = dst.y << 4; 1742 dst.z = MulHigh(dst.z, L); dst.z = dst.z << 4; 1743 } 1744 TEXREG2AR(Vector4s & dst,Vector4s & src0,int stage)1745 void PixelPipeline::TEXREG2AR(Vector4s &dst, Vector4s &src0, int stage) 1746 { 1747 Float4 u = Float4(src0.w) * Float4(1.0f / 0x0FFE); 1748 Float4 v = Float4(src0.x) * Float4(1.0f / 0x0FFE); 1749 Float4 s = Float4(src0.z) * Float4(1.0f / 0x0FFE); 1750 1751 sampleTexture(dst, stage, u, v, s, s); 1752 } 1753 TEXREG2GB(Vector4s & dst,Vector4s & src0,int stage)1754 void PixelPipeline::TEXREG2GB(Vector4s &dst, Vector4s &src0, int stage) 1755 { 1756 Float4 u = Float4(src0.y) * Float4(1.0f / 0x0FFE); 1757 Float4 v = Float4(src0.z) * Float4(1.0f / 0x0FFE); 1758 Float4 s = v; 1759 1760 sampleTexture(dst, stage, u, v, s, s); 1761 } 1762 TEXREG2RGB(Vector4s & dst,Vector4s & src0,int stage)1763 void PixelPipeline::TEXREG2RGB(Vector4s &dst, Vector4s &src0, int stage) 1764 { 1765 Float4 u = Float4(src0.x) * Float4(1.0f / 0x0FFE); 1766 Float4 v = Float4(src0.y) * Float4(1.0f / 0x0FFE); 1767 Float4 s = Float4(src0.z) * Float4(1.0f / 0x0FFE); 1768 1769 sampleTexture(dst, stage, u, v, s, s); 1770 } 1771 TEXM3X2DEPTH(Vector4s & dst,Float4 & u,Float4 & v,Float4 & s,Vector4s & src,bool signedScaling)1772 void PixelPipeline::TEXM3X2DEPTH(Vector4s &dst, Float4 &u, Float4 &v, Float4 &s, Vector4s &src, bool signedScaling) 1773 { 1774 TEXM3X2PAD(u, v, s, src, 1, signedScaling); 1775 1776 // z / w 1777 u_ *= Rcp_pp(v_); // FIXME: Set result to 1.0 when division by zero 1778 1779 oDepth = u_; 1780 } 1781 TEXM3X2PAD(Float4 & u,Float4 & v,Float4 & s,Vector4s & src0,int component,bool signedScaling)1782 void PixelPipeline::TEXM3X2PAD(Float4 &u, Float4 &v, Float4 &s, Vector4s &src0, int component, bool signedScaling) 1783 { 1784 TEXM3X3PAD(u, v, s, src0, component, signedScaling); 1785 } 1786 TEXM3X2TEX(Vector4s & dst,Float4 & u,Float4 & v,Float4 & s,int stage,Vector4s & src0,bool signedScaling)1787 void PixelPipeline::TEXM3X2TEX(Vector4s &dst, Float4 &u, Float4 &v, Float4 &s, int stage, Vector4s &src0, bool signedScaling) 1788 { 1789 TEXM3X2PAD(u, v, s, src0, 1, signedScaling); 1790 1791 w_ = Float4(0.0f); 1792 1793 sampleTexture(dst, stage, u_, v_, w_, w_); 1794 } 1795 TEXM3X3(Vector4s & dst,Float4 & u,Float4 & v,Float4 & s,Vector4s & src0,bool signedScaling)1796 void PixelPipeline::TEXM3X3(Vector4s &dst, Float4 &u, Float4 &v, Float4 &s, Vector4s &src0, bool signedScaling) 1797 { 1798 TEXM3X3PAD(u, v, s, src0, 2, signedScaling); 1799 1800 dst.x = RoundShort4(u_ * Float4(0x1000)); 1801 dst.y = RoundShort4(v_ * Float4(0x1000)); 1802 dst.z = RoundShort4(w_ * Float4(0x1000)); 1803 dst.w = Short4(0x1000); 1804 } 1805 TEXM3X3PAD(Float4 & u,Float4 & v,Float4 & s,Vector4s & src0,int component,bool signedScaling)1806 void PixelPipeline::TEXM3X3PAD(Float4 &u, Float4 &v, Float4 &s, Vector4s &src0, int component, bool signedScaling) 1807 { 1808 if(component == 0 || previousScaling != signedScaling) // FIXME: Other source modifiers? 1809 { 1810 U = Float4(src0.x); 1811 V = Float4(src0.y); 1812 W = Float4(src0.z); 1813 1814 previousScaling = signedScaling; 1815 } 1816 1817 Float4 x = U * u + V * v + W * s; 1818 1819 x *= Float4(1.0f / 0x1000); 1820 1821 switch(component) 1822 { 1823 case 0: u_ = x; break; 1824 case 1: v_ = x; break; 1825 case 2: w_ = x; break; 1826 default: ASSERT(false); 1827 } 1828 } 1829 TEXM3X3SPEC(Vector4s & dst,Float4 & u,Float4 & v,Float4 & s,int stage,Vector4s & src0,Vector4s & src1)1830 void PixelPipeline::TEXM3X3SPEC(Vector4s &dst, Float4 &u, Float4 &v, Float4 &s, int stage, Vector4s &src0, Vector4s &src1) 1831 { 1832 TEXM3X3PAD(u, v, s, src0, 2, false); 1833 1834 Float4 E[3]; // Eye vector 1835 1836 E[0] = Float4(src1.x) * Float4(1.0f / 0x0FFE); 1837 E[1] = Float4(src1.y) * Float4(1.0f / 0x0FFE); 1838 E[2] = Float4(src1.z) * Float4(1.0f / 0x0FFE); 1839 1840 // Reflection 1841 Float4 u__; 1842 Float4 v__; 1843 Float4 w__; 1844 1845 // (u'', v'', w'') = 2 * (N . E) * N - E * (N . N) 1846 u__ = u_ * E[0]; 1847 v__ = v_ * E[1]; 1848 w__ = w_ * E[2]; 1849 u__ += v__ + w__; 1850 u__ += u__; 1851 v__ = u__; 1852 w__ = u__; 1853 u__ *= u_; 1854 v__ *= v_; 1855 w__ *= w_; 1856 u_ *= u_; 1857 v_ *= v_; 1858 w_ *= w_; 1859 u_ += v_ + w_; 1860 u__ -= E[0] * u_; 1861 v__ -= E[1] * u_; 1862 w__ -= E[2] * u_; 1863 1864 sampleTexture(dst, stage, u__, v__, w__, w__); 1865 } 1866 TEXM3X3TEX(Vector4s & dst,Float4 & u,Float4 & v,Float4 & s,int stage,Vector4s & src0,bool signedScaling)1867 void PixelPipeline::TEXM3X3TEX(Vector4s &dst, Float4 &u, Float4 &v, Float4 &s, int stage, Vector4s &src0, bool signedScaling) 1868 { 1869 TEXM3X3PAD(u, v, s, src0, 2, signedScaling); 1870 1871 sampleTexture(dst, stage, u_, v_, w_, w_); 1872 } 1873 TEXM3X3VSPEC(Vector4s & dst,Float4 & x,Float4 & y,Float4 & z,int stage,Vector4s & src0)1874 void PixelPipeline::TEXM3X3VSPEC(Vector4s &dst, Float4 &x, Float4 &y, Float4 &z, int stage, Vector4s &src0) 1875 { 1876 TEXM3X3PAD(x, y, z, src0, 2, false); 1877 1878 Float4 E[3]; // Eye vector 1879 1880 E[0] = v[2 + stage - 2].w; 1881 E[1] = v[2 + stage - 1].w; 1882 E[2] = v[2 + stage - 0].w; 1883 1884 // Reflection 1885 Float4 u__; 1886 Float4 v__; 1887 Float4 w__; 1888 1889 // (u'', v'', w'') = 2 * (N . E) * N - E * (N . N) 1890 u__ = u_ * E[0]; 1891 v__ = v_ * E[1]; 1892 w__ = w_ * E[2]; 1893 u__ += v__ + w__; 1894 u__ += u__; 1895 v__ = u__; 1896 w__ = u__; 1897 u__ *= u_; 1898 v__ *= v_; 1899 w__ *= w_; 1900 u_ *= u_; 1901 v_ *= v_; 1902 w_ *= w_; 1903 u_ += v_ + w_; 1904 u__ -= E[0] * u_; 1905 v__ -= E[1] * u_; 1906 w__ -= E[2] * u_; 1907 1908 sampleTexture(dst, stage, u__, v__, w__, w__); 1909 } 1910 TEXDEPTH()1911 void PixelPipeline::TEXDEPTH() 1912 { 1913 u_ = Float4(rs[5].x); 1914 v_ = Float4(rs[5].y); 1915 1916 // z / w 1917 u_ *= Rcp_pp(v_); // FIXME: Set result to 1.0 when division by zero 1918 1919 oDepth = u_; 1920 } 1921 CND(Vector4s & dst,Vector4s & src0,Vector4s & src1,Vector4s & src2)1922 void PixelPipeline::CND(Vector4s &dst, Vector4s &src0, Vector4s &src1, Vector4s &src2) 1923 { 1924 { Short4 t0; t0 = src0.x; t0 = CmpGT(t0, Short4(0x0800, 0x0800, 0x0800, 0x0800)); Short4 t1; t1 = src1.x; t1 = t1 & t0; t0 = ~t0 & src2.x; t0 = t0 | t1; dst.x = t0; }; 1925 {Short4 t0; t0 = src0.y; t0 = CmpGT(t0, Short4(0x0800, 0x0800, 0x0800, 0x0800)); Short4 t1; t1 = src1.y; t1 = t1 & t0; t0 = ~t0 & src2.y; t0 = t0 | t1; dst.y = t0; }; 1926 {Short4 t0; t0 = src0.z; t0 = CmpGT(t0, Short4(0x0800, 0x0800, 0x0800, 0x0800)); Short4 t1; t1 = src1.z; t1 = t1 & t0; t0 = ~t0 & src2.z; t0 = t0 | t1; dst.z = t0; }; 1927 {Short4 t0; t0 = src0.w; t0 = CmpGT(t0, Short4(0x0800, 0x0800, 0x0800, 0x0800)); Short4 t1; t1 = src1.w; t1 = t1 & t0; t0 = ~t0 & src2.w; t0 = t0 | t1; dst.w = t0; }; 1928 } 1929 CMP(Vector4s & dst,Vector4s & src0,Vector4s & src1,Vector4s & src2)1930 void PixelPipeline::CMP(Vector4s &dst, Vector4s &src0, Vector4s &src1, Vector4s &src2) 1931 { 1932 { Short4 t0 = CmpGT(Short4(0x0000, 0x0000, 0x0000, 0x0000), src0.x); Short4 t1; t1 = src2.x; t1 &= t0; t0 = ~t0 & src1.x; t0 |= t1; dst.x = t0; }; 1933 {Short4 t0 = CmpGT(Short4(0x0000, 0x0000, 0x0000, 0x0000), src0.y); Short4 t1; t1 = src2.y; t1 &= t0; t0 = ~t0 & src1.y; t0 |= t1; dst.y = t0; }; 1934 {Short4 t0 = CmpGT(Short4(0x0000, 0x0000, 0x0000, 0x0000), src0.z); Short4 t1; t1 = src2.z; t1 &= t0; t0 = ~t0 & src1.z; t0 |= t1; dst.z = t0; }; 1935 {Short4 t0 = CmpGT(Short4(0x0000, 0x0000, 0x0000, 0x0000), src0.w); Short4 t1; t1 = src2.w; t1 &= t0; t0 = ~t0 & src1.w; t0 |= t1; dst.w = t0; }; 1936 } 1937 BEM(Vector4s & dst,Vector4s & src0,Vector4s & src1,int stage)1938 void PixelPipeline::BEM(Vector4s &dst, Vector4s &src0, Vector4s &src1, int stage) 1939 { 1940 Short4 t0; 1941 Short4 t1; 1942 1943 // dst.x = src0.x + BUMPENVMAT00(stage) * src1.x + BUMPENVMAT10(stage) * src1.y 1944 t0 = MulHigh(src1.x, *Pointer<Short4>(data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4W[0][0]))); t0 = t0 << 4; // FIXME: Matrix components range? Overflow hazard. 1945 t1 = MulHigh(src1.y, *Pointer<Short4>(data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4W[1][0]))); t1 = t1 << 4; // FIXME: Matrix components range? Overflow hazard. 1946 t0 = AddSat(t0, t1); 1947 t0 = AddSat(t0, src0.x); 1948 dst.x = t0; 1949 1950 // dst.y = src0.y + BUMPENVMAT01(stage) * src1.x + BUMPENVMAT11(stage) * src1.y 1951 t0 = MulHigh(src1.x, *Pointer<Short4>(data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4W[0][1]))); t0 = t0 << 4; // FIXME: Matrix components range? Overflow hazard. 1952 t1 = MulHigh(src1.y, *Pointer<Short4>(data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4W[1][1]))); t1 = t1 << 4; // FIXME: Matrix components range? Overflow hazard. 1953 t0 = AddSat(t0, t1); 1954 t0 = AddSat(t0, src0.y); 1955 dst.y = t0; 1956 } 1957 } 1958 1959