1 // Copyright 2016 The SwiftShader Authors. All Rights Reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #include "PixelProgram.hpp" 16 17 #include "SamplerCore.hpp" 18 #include "Renderer/Primitive.hpp" 19 #include "Renderer/Renderer.hpp" 20 21 namespace sw 22 { 23 extern bool postBlendSRGB; 24 extern bool booleanFaceRegister; 25 extern bool halfIntegerCoordinates; // Pixel centers are not at integer coordinates 26 extern bool fullPixelPositionRegister; 27 setBuiltins(Int & x,Int & y,Float4 (& z)[4],Float4 & w)28 void PixelProgram::setBuiltins(Int &x, Int &y, Float4(&z)[4], Float4 &w) 29 { 30 if(shader->getShaderModel() >= 0x0300) 31 { 32 if(shader->isVPosDeclared()) 33 { 34 if(!halfIntegerCoordinates) 35 { 36 vPos.x = Float4(Float(x)) + Float4(0, 1, 0, 1); 37 vPos.y = Float4(Float(y)) + Float4(0, 0, 1, 1); 38 } 39 else 40 { 41 vPos.x = Float4(Float(x)) + Float4(0.5f, 1.5f, 0.5f, 1.5f); 42 vPos.y = Float4(Float(y)) + Float4(0.5f, 0.5f, 1.5f, 1.5f); 43 } 44 45 if(fullPixelPositionRegister) 46 { 47 vPos.z = z[0]; // FIXME: Centroid? 48 vPos.w = w; // FIXME: Centroid? 49 } 50 } 51 52 if(shader->isVFaceDeclared()) 53 { 54 Float4 face = *Pointer<Float>(primitive + OFFSET(Primitive, area)); 55 56 if(booleanFaceRegister) 57 { 58 face = As<Float4>(state.frontFaceCCW ? CmpNLT(face, Float4(0.0f)) : CmpLT(face, Float4(0.0f))); 59 } 60 61 vFace.x = face; 62 vFace.y = face; 63 vFace.z = face; 64 vFace.w = face; 65 } 66 } 67 } 68 applyShader(Int cMask[4])69 void PixelProgram::applyShader(Int cMask[4]) 70 { 71 enableIndex = 0; 72 stackIndex = 0; 73 74 if(shader->containsLeaveInstruction()) 75 { 76 enableLeave = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF); 77 } 78 79 for(int i = 0; i < RENDERTARGETS; i++) 80 { 81 if(state.targetFormat[i] != FORMAT_NULL) 82 { 83 oC[i] = Vector4f(0.0f, 0.0f, 0.0f, 0.0f); 84 } 85 } 86 87 // Create all call site return blocks up front 88 for(size_t i = 0; i < shader->getLength(); i++) 89 { 90 const Shader::Instruction *instruction = shader->getInstruction(i); 91 Shader::Opcode opcode = instruction->opcode; 92 93 if(opcode == Shader::OPCODE_CALL || opcode == Shader::OPCODE_CALLNZ) 94 { 95 const Dst &dst = instruction->dst; 96 97 ASSERT(callRetBlock[dst.label].size() == dst.callSite); 98 callRetBlock[dst.label].push_back(Nucleus::createBasicBlock()); 99 } 100 } 101 102 bool broadcastColor0 = true; 103 104 for(size_t i = 0; i < shader->getLength(); i++) 105 { 106 const Shader::Instruction *instruction = shader->getInstruction(i); 107 Shader::Opcode opcode = instruction->opcode; 108 109 if(opcode == Shader::OPCODE_DCL || opcode == Shader::OPCODE_DEF || opcode == Shader::OPCODE_DEFI || opcode == Shader::OPCODE_DEFB) 110 { 111 continue; 112 } 113 114 const Dst &dst = instruction->dst; 115 const Src &src0 = instruction->src[0]; 116 const Src &src1 = instruction->src[1]; 117 const Src &src2 = instruction->src[2]; 118 const Src &src3 = instruction->src[3]; 119 const Src &src4 = instruction->src[4]; 120 121 bool predicate = instruction->predicate; 122 Control control = instruction->control; 123 bool pp = dst.partialPrecision; 124 bool project = instruction->project; 125 bool bias = instruction->bias; 126 127 Vector4f d; 128 Vector4f s0; 129 Vector4f s1; 130 Vector4f s2; 131 Vector4f s3; 132 Vector4f s4; 133 134 if(opcode == Shader::OPCODE_TEXKILL) // Takes destination as input 135 { 136 if(dst.type == Shader::PARAMETER_TEXTURE) 137 { 138 d.x = v[2 + dst.index].x; 139 d.y = v[2 + dst.index].y; 140 d.z = v[2 + dst.index].z; 141 d.w = v[2 + dst.index].w; 142 } 143 else 144 { 145 d = r[dst.index]; 146 } 147 } 148 149 if(src0.type != Shader::PARAMETER_VOID) s0 = fetchRegister(src0); 150 if(src1.type != Shader::PARAMETER_VOID) s1 = fetchRegister(src1); 151 if(src2.type != Shader::PARAMETER_VOID) s2 = fetchRegister(src2); 152 if(src3.type != Shader::PARAMETER_VOID) s3 = fetchRegister(src3); 153 if(src4.type != Shader::PARAMETER_VOID) s4 = fetchRegister(src4); 154 155 switch(opcode) 156 { 157 case Shader::OPCODE_PS_2_0: break; 158 case Shader::OPCODE_PS_2_x: break; 159 case Shader::OPCODE_PS_3_0: break; 160 case Shader::OPCODE_DEF: break; 161 case Shader::OPCODE_DCL: break; 162 case Shader::OPCODE_NOP: break; 163 case Shader::OPCODE_MOV: mov(d, s0); break; 164 case Shader::OPCODE_NEG: neg(d, s0); break; 165 case Shader::OPCODE_INEG: ineg(d, s0); break; 166 case Shader::OPCODE_F2B: f2b(d, s0); break; 167 case Shader::OPCODE_B2F: b2f(d, s0); break; 168 case Shader::OPCODE_F2I: f2i(d, s0); break; 169 case Shader::OPCODE_I2F: i2f(d, s0); break; 170 case Shader::OPCODE_F2U: f2u(d, s0); break; 171 case Shader::OPCODE_U2F: u2f(d, s0); break; 172 case Shader::OPCODE_I2B: i2b(d, s0); break; 173 case Shader::OPCODE_B2I: b2i(d, s0); break; 174 case Shader::OPCODE_ADD: add(d, s0, s1); break; 175 case Shader::OPCODE_IADD: iadd(d, s0, s1); break; 176 case Shader::OPCODE_SUB: sub(d, s0, s1); break; 177 case Shader::OPCODE_ISUB: isub(d, s0, s1); break; 178 case Shader::OPCODE_MUL: mul(d, s0, s1); break; 179 case Shader::OPCODE_IMUL: imul(d, s0, s1); break; 180 case Shader::OPCODE_MAD: mad(d, s0, s1, s2); break; 181 case Shader::OPCODE_IMAD: imad(d, s0, s1, s2); break; 182 case Shader::OPCODE_DP1: dp1(d, s0, s1); break; 183 case Shader::OPCODE_DP2: dp2(d, s0, s1); break; 184 case Shader::OPCODE_DP2ADD: dp2add(d, s0, s1, s2); break; 185 case Shader::OPCODE_DP3: dp3(d, s0, s1); break; 186 case Shader::OPCODE_DP4: dp4(d, s0, s1); break; 187 case Shader::OPCODE_DET2: det2(d, s0, s1); break; 188 case Shader::OPCODE_DET3: det3(d, s0, s1, s2); break; 189 case Shader::OPCODE_DET4: det4(d, s0, s1, s2, s3); break; 190 case Shader::OPCODE_CMP0: cmp0(d, s0, s1, s2); break; 191 case Shader::OPCODE_ICMP: icmp(d, s0, s1, control); break; 192 case Shader::OPCODE_UCMP: ucmp(d, s0, s1, control); break; 193 case Shader::OPCODE_SELECT: select(d, s0, s1, s2); break; 194 case Shader::OPCODE_EXTRACT: extract(d.x, s0, s1.x); break; 195 case Shader::OPCODE_INSERT: insert(d, s0, s1.x, s2.x); break; 196 case Shader::OPCODE_FRC: frc(d, s0); break; 197 case Shader::OPCODE_TRUNC: trunc(d, s0); break; 198 case Shader::OPCODE_FLOOR: floor(d, s0); break; 199 case Shader::OPCODE_ROUND: round(d, s0); break; 200 case Shader::OPCODE_ROUNDEVEN: roundEven(d, s0); break; 201 case Shader::OPCODE_CEIL: ceil(d, s0); break; 202 case Shader::OPCODE_EXP2X: exp2x(d, s0, pp); break; 203 case Shader::OPCODE_EXP2: exp2(d, s0, pp); break; 204 case Shader::OPCODE_LOG2X: log2x(d, s0, pp); break; 205 case Shader::OPCODE_LOG2: log2(d, s0, pp); break; 206 case Shader::OPCODE_EXP: exp(d, s0, pp); break; 207 case Shader::OPCODE_LOG: log(d, s0, pp); break; 208 case Shader::OPCODE_RCPX: rcpx(d, s0, pp); break; 209 case Shader::OPCODE_DIV: div(d, s0, s1); break; 210 case Shader::OPCODE_IDIV: idiv(d, s0, s1); break; 211 case Shader::OPCODE_UDIV: udiv(d, s0, s1); break; 212 case Shader::OPCODE_MOD: mod(d, s0, s1); break; 213 case Shader::OPCODE_IMOD: imod(d, s0, s1); break; 214 case Shader::OPCODE_UMOD: umod(d, s0, s1); break; 215 case Shader::OPCODE_SHL: shl(d, s0, s1); break; 216 case Shader::OPCODE_ISHR: ishr(d, s0, s1); break; 217 case Shader::OPCODE_USHR: ushr(d, s0, s1); break; 218 case Shader::OPCODE_RSQX: rsqx(d, s0, pp); break; 219 case Shader::OPCODE_SQRT: sqrt(d, s0, pp); break; 220 case Shader::OPCODE_RSQ: rsq(d, s0, pp); break; 221 case Shader::OPCODE_LEN2: len2(d.x, s0, pp); break; 222 case Shader::OPCODE_LEN3: len3(d.x, s0, pp); break; 223 case Shader::OPCODE_LEN4: len4(d.x, s0, pp); break; 224 case Shader::OPCODE_DIST1: dist1(d.x, s0, s1, pp); break; 225 case Shader::OPCODE_DIST2: dist2(d.x, s0, s1, pp); break; 226 case Shader::OPCODE_DIST3: dist3(d.x, s0, s1, pp); break; 227 case Shader::OPCODE_DIST4: dist4(d.x, s0, s1, pp); break; 228 case Shader::OPCODE_MIN: min(d, s0, s1); break; 229 case Shader::OPCODE_IMIN: imin(d, s0, s1); break; 230 case Shader::OPCODE_UMIN: umin(d, s0, s1); break; 231 case Shader::OPCODE_MAX: max(d, s0, s1); break; 232 case Shader::OPCODE_IMAX: imax(d, s0, s1); break; 233 case Shader::OPCODE_UMAX: umax(d, s0, s1); break; 234 case Shader::OPCODE_LRP: lrp(d, s0, s1, s2); break; 235 case Shader::OPCODE_STEP: step(d, s0, s1); break; 236 case Shader::OPCODE_SMOOTH: smooth(d, s0, s1, s2); break; 237 case Shader::OPCODE_ISINF: isinf(d, s0); break; 238 case Shader::OPCODE_ISNAN: isnan(d, s0); break; 239 case Shader::OPCODE_FLOATBITSTOINT: 240 case Shader::OPCODE_FLOATBITSTOUINT: 241 case Shader::OPCODE_INTBITSTOFLOAT: 242 case Shader::OPCODE_UINTBITSTOFLOAT: d = s0; break; 243 case Shader::OPCODE_PACKSNORM2x16: packSnorm2x16(d, s0); break; 244 case Shader::OPCODE_PACKUNORM2x16: packUnorm2x16(d, s0); break; 245 case Shader::OPCODE_PACKHALF2x16: packHalf2x16(d, s0); break; 246 case Shader::OPCODE_UNPACKSNORM2x16: unpackSnorm2x16(d, s0); break; 247 case Shader::OPCODE_UNPACKUNORM2x16: unpackUnorm2x16(d, s0); break; 248 case Shader::OPCODE_UNPACKHALF2x16: unpackHalf2x16(d, s0); break; 249 case Shader::OPCODE_POWX: powx(d, s0, s1, pp); break; 250 case Shader::OPCODE_POW: pow(d, s0, s1, pp); break; 251 case Shader::OPCODE_SGN: sgn(d, s0); break; 252 case Shader::OPCODE_ISGN: isgn(d, s0); break; 253 case Shader::OPCODE_CRS: crs(d, s0, s1); break; 254 case Shader::OPCODE_FORWARD1: forward1(d, s0, s1, s2); break; 255 case Shader::OPCODE_FORWARD2: forward2(d, s0, s1, s2); break; 256 case Shader::OPCODE_FORWARD3: forward3(d, s0, s1, s2); break; 257 case Shader::OPCODE_FORWARD4: forward4(d, s0, s1, s2); break; 258 case Shader::OPCODE_REFLECT1: reflect1(d, s0, s1); break; 259 case Shader::OPCODE_REFLECT2: reflect2(d, s0, s1); break; 260 case Shader::OPCODE_REFLECT3: reflect3(d, s0, s1); break; 261 case Shader::OPCODE_REFLECT4: reflect4(d, s0, s1); break; 262 case Shader::OPCODE_REFRACT1: refract1(d, s0, s1, s2.x); break; 263 case Shader::OPCODE_REFRACT2: refract2(d, s0, s1, s2.x); break; 264 case Shader::OPCODE_REFRACT3: refract3(d, s0, s1, s2.x); break; 265 case Shader::OPCODE_REFRACT4: refract4(d, s0, s1, s2.x); break; 266 case Shader::OPCODE_NRM2: nrm2(d, s0, pp); break; 267 case Shader::OPCODE_NRM3: nrm3(d, s0, pp); break; 268 case Shader::OPCODE_NRM4: nrm4(d, s0, pp); break; 269 case Shader::OPCODE_ABS: abs(d, s0); break; 270 case Shader::OPCODE_IABS: iabs(d, s0); break; 271 case Shader::OPCODE_SINCOS: sincos(d, s0, pp); break; 272 case Shader::OPCODE_COS: cos(d, s0, pp); break; 273 case Shader::OPCODE_SIN: sin(d, s0, pp); break; 274 case Shader::OPCODE_TAN: tan(d, s0, pp); break; 275 case Shader::OPCODE_ACOS: acos(d, s0, pp); break; 276 case Shader::OPCODE_ASIN: asin(d, s0, pp); break; 277 case Shader::OPCODE_ATAN: atan(d, s0, pp); break; 278 case Shader::OPCODE_ATAN2: atan2(d, s0, s1, pp); break; 279 case Shader::OPCODE_COSH: cosh(d, s0, pp); break; 280 case Shader::OPCODE_SINH: sinh(d, s0, pp); break; 281 case Shader::OPCODE_TANH: tanh(d, s0, pp); break; 282 case Shader::OPCODE_ACOSH: acosh(d, s0, pp); break; 283 case Shader::OPCODE_ASINH: asinh(d, s0, pp); break; 284 case Shader::OPCODE_ATANH: atanh(d, s0, pp); break; 285 case Shader::OPCODE_M4X4: M4X4(d, s0, src1); break; 286 case Shader::OPCODE_M4X3: M4X3(d, s0, src1); break; 287 case Shader::OPCODE_M3X4: M3X4(d, s0, src1); break; 288 case Shader::OPCODE_M3X3: M3X3(d, s0, src1); break; 289 case Shader::OPCODE_M3X2: M3X2(d, s0, src1); break; 290 case Shader::OPCODE_TEX: TEX(d, s0, src1, project, bias); break; 291 case Shader::OPCODE_TEXLDD: TEXGRAD(d, s0, src1, s2, s3); break; 292 case Shader::OPCODE_TEXLDL: TEXLOD(d, s0, src1, s0.w); break; 293 case Shader::OPCODE_TEXLOD: TEXLOD(d, s0, src1, s2.x); break; 294 case Shader::OPCODE_TEXSIZE: TEXSIZE(d, s0.x, src1); break; 295 case Shader::OPCODE_TEXKILL: TEXKILL(cMask, d, dst.mask); break; 296 case Shader::OPCODE_TEXOFFSET: TEXOFFSET(d, s0, src1, s2); break; 297 case Shader::OPCODE_TEXLODOFFSET: TEXLODOFFSET(d, s0, src1, s2, s3.x); break; 298 case Shader::OPCODE_TEXELFETCH: TEXELFETCH(d, s0, src1, s2.x); break; 299 case Shader::OPCODE_TEXELFETCHOFFSET: TEXELFETCHOFFSET(d, s0, src1, s2, s3.x); break; 300 case Shader::OPCODE_TEXGRAD: TEXGRAD(d, s0, src1, s2, s3); break; 301 case Shader::OPCODE_TEXGRADOFFSET: TEXGRADOFFSET(d, s0, src1, s2, s3, s4); break; 302 case Shader::OPCODE_TEXBIAS: TEXBIAS(d, s0, src1, s2.x); break; 303 case Shader::OPCODE_TEXOFFSETBIAS: TEXOFFSETBIAS(d, s0, src1, s2, s3.x); break; 304 case Shader::OPCODE_DISCARD: DISCARD(cMask, instruction); break; 305 case Shader::OPCODE_DFDX: DFDX(d, s0); break; 306 case Shader::OPCODE_DFDY: DFDY(d, s0); break; 307 case Shader::OPCODE_FWIDTH: FWIDTH(d, s0); break; 308 case Shader::OPCODE_BREAK: BREAK(); break; 309 case Shader::OPCODE_BREAKC: BREAKC(s0, s1, control); break; 310 case Shader::OPCODE_BREAKP: BREAKP(src0); break; 311 case Shader::OPCODE_CONTINUE: CONTINUE(); break; 312 case Shader::OPCODE_TEST: TEST(); break; 313 case Shader::OPCODE_SCALAR: SCALAR(); break; 314 case Shader::OPCODE_CALL: CALL(dst.label, dst.callSite); break; 315 case Shader::OPCODE_CALLNZ: CALLNZ(dst.label, dst.callSite, src0); break; 316 case Shader::OPCODE_ELSE: ELSE(); break; 317 case Shader::OPCODE_ENDIF: ENDIF(); break; 318 case Shader::OPCODE_ENDLOOP: ENDLOOP(); break; 319 case Shader::OPCODE_ENDREP: ENDREP(); break; 320 case Shader::OPCODE_ENDWHILE: ENDWHILE(); break; 321 case Shader::OPCODE_ENDSWITCH: ENDSWITCH(); break; 322 case Shader::OPCODE_IF: IF(src0); break; 323 case Shader::OPCODE_IFC: IFC(s0, s1, control); break; 324 case Shader::OPCODE_LABEL: LABEL(dst.index); break; 325 case Shader::OPCODE_LOOP: LOOP(src1); break; 326 case Shader::OPCODE_REP: REP(src0); break; 327 case Shader::OPCODE_WHILE: WHILE(src0); break; 328 case Shader::OPCODE_SWITCH: SWITCH(); break; 329 case Shader::OPCODE_RET: RET(); break; 330 case Shader::OPCODE_LEAVE: LEAVE(); break; 331 case Shader::OPCODE_CMP: cmp(d, s0, s1, control); break; 332 case Shader::OPCODE_ALL: all(d.x, s0); break; 333 case Shader::OPCODE_ANY: any(d.x, s0); break; 334 case Shader::OPCODE_NOT: bitwise_not(d, s0); break; 335 case Shader::OPCODE_OR: bitwise_or(d, s0, s1); break; 336 case Shader::OPCODE_XOR: bitwise_xor(d, s0, s1); break; 337 case Shader::OPCODE_AND: bitwise_and(d, s0, s1); break; 338 case Shader::OPCODE_EQ: equal(d, s0, s1); break; 339 case Shader::OPCODE_NE: notEqual(d, s0, s1); break; 340 case Shader::OPCODE_END: break; 341 default: 342 ASSERT(false); 343 } 344 345 if(dst.type != Shader::PARAMETER_VOID && dst.type != Shader::PARAMETER_LABEL && opcode != Shader::OPCODE_TEXKILL && opcode != Shader::OPCODE_NOP) 346 { 347 if(dst.saturate) 348 { 349 if(dst.x) d.x = Max(d.x, Float4(0.0f)); 350 if(dst.y) d.y = Max(d.y, Float4(0.0f)); 351 if(dst.z) d.z = Max(d.z, Float4(0.0f)); 352 if(dst.w) d.w = Max(d.w, Float4(0.0f)); 353 354 if(dst.x) d.x = Min(d.x, Float4(1.0f)); 355 if(dst.y) d.y = Min(d.y, Float4(1.0f)); 356 if(dst.z) d.z = Min(d.z, Float4(1.0f)); 357 if(dst.w) d.w = Min(d.w, Float4(1.0f)); 358 } 359 360 if(instruction->isPredicated()) 361 { 362 Vector4f pDst; // FIXME: Rename 363 364 switch(dst.type) 365 { 366 case Shader::PARAMETER_TEMP: 367 if(dst.rel.type == Shader::PARAMETER_VOID) 368 { 369 if(dst.x) pDst.x = r[dst.index].x; 370 if(dst.y) pDst.y = r[dst.index].y; 371 if(dst.z) pDst.z = r[dst.index].z; 372 if(dst.w) pDst.w = r[dst.index].w; 373 } 374 else if(!dst.rel.dynamic) 375 { 376 Int a = dst.index + relativeAddress(dst.rel); 377 378 if(dst.x) pDst.x = r[a].x; 379 if(dst.y) pDst.y = r[a].y; 380 if(dst.z) pDst.z = r[a].z; 381 if(dst.w) pDst.w = r[a].w; 382 } 383 else 384 { 385 Int4 a = dst.index + dynamicAddress(dst.rel); 386 387 if(dst.x) pDst.x = r[a].x; 388 if(dst.y) pDst.y = r[a].y; 389 if(dst.z) pDst.z = r[a].z; 390 if(dst.w) pDst.w = r[a].w; 391 } 392 break; 393 case Shader::PARAMETER_COLOROUT: 394 if(dst.rel.type == Shader::PARAMETER_VOID) 395 { 396 if(dst.x) pDst.x = oC[dst.index].x; 397 if(dst.y) pDst.y = oC[dst.index].y; 398 if(dst.z) pDst.z = oC[dst.index].z; 399 if(dst.w) pDst.w = oC[dst.index].w; 400 } 401 else if(!dst.rel.dynamic) 402 { 403 Int a = dst.index + relativeAddress(dst.rel); 404 405 if(dst.x) pDst.x = oC[a].x; 406 if(dst.y) pDst.y = oC[a].y; 407 if(dst.z) pDst.z = oC[a].z; 408 if(dst.w) pDst.w = oC[a].w; 409 } 410 else 411 { 412 Int4 a = dst.index + dynamicAddress(dst.rel); 413 414 if(dst.x) pDst.x = oC[a].x; 415 if(dst.y) pDst.y = oC[a].y; 416 if(dst.z) pDst.z = oC[a].z; 417 if(dst.w) pDst.w = oC[a].w; 418 } 419 break; 420 case Shader::PARAMETER_PREDICATE: 421 if(dst.x) pDst.x = p0.x; 422 if(dst.y) pDst.y = p0.y; 423 if(dst.z) pDst.z = p0.z; 424 if(dst.w) pDst.w = p0.w; 425 break; 426 case Shader::PARAMETER_DEPTHOUT: 427 pDst.x = oDepth; 428 break; 429 default: 430 ASSERT(false); 431 } 432 433 Int4 enable = enableMask(instruction); 434 435 Int4 xEnable = enable; 436 Int4 yEnable = enable; 437 Int4 zEnable = enable; 438 Int4 wEnable = enable; 439 440 if(predicate) 441 { 442 unsigned char pSwizzle = instruction->predicateSwizzle; 443 444 Float4 xPredicate = p0[(pSwizzle >> 0) & 0x03]; 445 Float4 yPredicate = p0[(pSwizzle >> 2) & 0x03]; 446 Float4 zPredicate = p0[(pSwizzle >> 4) & 0x03]; 447 Float4 wPredicate = p0[(pSwizzle >> 6) & 0x03]; 448 449 if(!instruction->predicateNot) 450 { 451 if(dst.x) xEnable = xEnable & As<Int4>(xPredicate); 452 if(dst.y) yEnable = yEnable & As<Int4>(yPredicate); 453 if(dst.z) zEnable = zEnable & As<Int4>(zPredicate); 454 if(dst.w) wEnable = wEnable & As<Int4>(wPredicate); 455 } 456 else 457 { 458 if(dst.x) xEnable = xEnable & ~As<Int4>(xPredicate); 459 if(dst.y) yEnable = yEnable & ~As<Int4>(yPredicate); 460 if(dst.z) zEnable = zEnable & ~As<Int4>(zPredicate); 461 if(dst.w) wEnable = wEnable & ~As<Int4>(wPredicate); 462 } 463 } 464 465 if(dst.x) d.x = As<Float4>(As<Int4>(d.x) & xEnable); 466 if(dst.y) d.y = As<Float4>(As<Int4>(d.y) & yEnable); 467 if(dst.z) d.z = As<Float4>(As<Int4>(d.z) & zEnable); 468 if(dst.w) d.w = As<Float4>(As<Int4>(d.w) & wEnable); 469 470 if(dst.x) d.x = As<Float4>(As<Int4>(d.x) | (As<Int4>(pDst.x) & ~xEnable)); 471 if(dst.y) d.y = As<Float4>(As<Int4>(d.y) | (As<Int4>(pDst.y) & ~yEnable)); 472 if(dst.z) d.z = As<Float4>(As<Int4>(d.z) | (As<Int4>(pDst.z) & ~zEnable)); 473 if(dst.w) d.w = As<Float4>(As<Int4>(d.w) | (As<Int4>(pDst.w) & ~wEnable)); 474 } 475 476 switch(dst.type) 477 { 478 case Shader::PARAMETER_TEMP: 479 if(dst.rel.type == Shader::PARAMETER_VOID) 480 { 481 if(dst.x) r[dst.index].x = d.x; 482 if(dst.y) r[dst.index].y = d.y; 483 if(dst.z) r[dst.index].z = d.z; 484 if(dst.w) r[dst.index].w = d.w; 485 } 486 else if(!dst.rel.dynamic) 487 { 488 Int a = dst.index + relativeAddress(dst.rel); 489 490 if(dst.x) r[a].x = d.x; 491 if(dst.y) r[a].y = d.y; 492 if(dst.z) r[a].z = d.z; 493 if(dst.w) r[a].w = d.w; 494 } 495 else 496 { 497 Int4 a = dst.index + dynamicAddress(dst.rel); 498 499 if(dst.x) r.scatter_x(a, d.x); 500 if(dst.y) r.scatter_y(a, d.y); 501 if(dst.z) r.scatter_z(a, d.z); 502 if(dst.w) r.scatter_w(a, d.w); 503 } 504 break; 505 case Shader::PARAMETER_COLOROUT: 506 if(dst.rel.type == Shader::PARAMETER_VOID) 507 { 508 broadcastColor0 = (dst.index == 0) && broadcastColor0; 509 510 if(dst.x) oC[dst.index].x = d.x; 511 if(dst.y) oC[dst.index].y = d.y; 512 if(dst.z) oC[dst.index].z = d.z; 513 if(dst.w) oC[dst.index].w = d.w; 514 } 515 else if(!dst.rel.dynamic) 516 { 517 broadcastColor0 = false; 518 Int a = dst.index + relativeAddress(dst.rel); 519 520 if(dst.x) oC[a].x = d.x; 521 if(dst.y) oC[a].y = d.y; 522 if(dst.z) oC[a].z = d.z; 523 if(dst.w) oC[a].w = d.w; 524 } 525 else 526 { 527 broadcastColor0 = false; 528 Int4 a = dst.index + dynamicAddress(dst.rel); 529 530 if(dst.x) oC.scatter_x(a, d.x); 531 if(dst.y) oC.scatter_y(a, d.y); 532 if(dst.z) oC.scatter_z(a, d.z); 533 if(dst.w) oC.scatter_w(a, d.w); 534 } 535 break; 536 case Shader::PARAMETER_PREDICATE: 537 if(dst.x) p0.x = d.x; 538 if(dst.y) p0.y = d.y; 539 if(dst.z) p0.z = d.z; 540 if(dst.w) p0.w = d.w; 541 break; 542 case Shader::PARAMETER_DEPTHOUT: 543 oDepth = d.x; 544 break; 545 default: 546 ASSERT(false); 547 } 548 } 549 } 550 551 if(currentLabel != -1) 552 { 553 Nucleus::setInsertBlock(returnBlock); 554 } 555 556 if(broadcastColor0) 557 { 558 for(int i = 0; i < RENDERTARGETS; i++) 559 { 560 c[i] = oC[0]; 561 } 562 } 563 else 564 { 565 for(int i = 0; i < RENDERTARGETS; i++) 566 { 567 c[i] = oC[i]; 568 } 569 } 570 571 clampColor(c); 572 573 if(state.depthOverride) 574 { 575 oDepth = Min(Max(oDepth, Float4(0.0f)), Float4(1.0f)); 576 } 577 } 578 alphaTest(Int cMask[4])579 Bool PixelProgram::alphaTest(Int cMask[4]) 580 { 581 if(!state.alphaTestActive()) 582 { 583 return true; 584 } 585 586 Int aMask; 587 588 if(state.transparencyAntialiasing == TRANSPARENCY_NONE) 589 { 590 Short4 alpha = RoundShort4(c[0].w * Float4(0x1000)); 591 592 PixelRoutine::alphaTest(aMask, alpha); 593 594 for(unsigned int q = 0; q < state.multiSample; q++) 595 { 596 cMask[q] &= aMask; 597 } 598 } 599 else if(state.transparencyAntialiasing == TRANSPARENCY_ALPHA_TO_COVERAGE) 600 { 601 alphaToCoverage(cMask, c[0].w); 602 } 603 else ASSERT(false); 604 605 Int pass = cMask[0]; 606 607 for(unsigned int q = 1; q < state.multiSample; q++) 608 { 609 pass = pass | cMask[q]; 610 } 611 612 return pass != 0x0; 613 } 614 rasterOperation(Float4 & fog,Pointer<Byte> cBuffer[4],Int & x,Int sMask[4],Int zMask[4],Int cMask[4])615 void PixelProgram::rasterOperation(Float4 &fog, Pointer<Byte> cBuffer[4], Int &x, Int sMask[4], Int zMask[4], Int cMask[4]) 616 { 617 for(int index = 0; index < RENDERTARGETS; index++) 618 { 619 if(!state.colorWriteActive(index)) 620 { 621 continue; 622 } 623 624 if(!postBlendSRGB && state.writeSRGB && !isSRGB(index)) 625 { 626 c[index].x = linearToSRGB(c[index].x); 627 c[index].y = linearToSRGB(c[index].y); 628 c[index].z = linearToSRGB(c[index].z); 629 } 630 631 if(index == 0) 632 { 633 fogBlend(c[index], fog); 634 } 635 636 switch(state.targetFormat[index]) 637 { 638 case FORMAT_R5G6B5: 639 case FORMAT_X8R8G8B8: 640 case FORMAT_X8B8G8R8: 641 case FORMAT_A8R8G8B8: 642 case FORMAT_A8B8G8R8: 643 case FORMAT_SRGB8_X8: 644 case FORMAT_SRGB8_A8: 645 case FORMAT_G8R8: 646 case FORMAT_R8: 647 case FORMAT_A8: 648 case FORMAT_G16R16: 649 case FORMAT_A16B16G16R16: 650 for(unsigned int q = 0; q < state.multiSample; q++) 651 { 652 Pointer<Byte> buffer = cBuffer[index] + q * *Pointer<Int>(data + OFFSET(DrawData, colorSliceB[index])); 653 Vector4s color; 654 655 if(state.targetFormat[index] == FORMAT_R5G6B5) 656 { 657 color.x = UShort4(c[index].x * Float4(0xFBFF), false); 658 color.y = UShort4(c[index].y * Float4(0xFDFF), false); 659 color.z = UShort4(c[index].z * Float4(0xFBFF), false); 660 color.w = UShort4(c[index].w * Float4(0xFFFF), false); 661 } 662 else 663 { 664 color.x = convertFixed16(c[index].x, false); 665 color.y = convertFixed16(c[index].y, false); 666 color.z = convertFixed16(c[index].z, false); 667 color.w = convertFixed16(c[index].w, false); 668 } 669 670 if(state.multiSampleMask & (1 << q)) 671 { 672 alphaBlend(index, buffer, color, x); 673 logicOperation(index, buffer, color, x); 674 writeColor(index, buffer, x, color, sMask[q], zMask[q], cMask[q]); 675 } 676 } 677 break; 678 case FORMAT_R32F: 679 case FORMAT_G32R32F: 680 case FORMAT_X32B32G32R32F: 681 case FORMAT_A32B32G32R32F: 682 case FORMAT_X32B32G32R32F_UNSIGNED: 683 case FORMAT_R32I: 684 case FORMAT_G32R32I: 685 case FORMAT_A32B32G32R32I: 686 case FORMAT_R32UI: 687 case FORMAT_G32R32UI: 688 case FORMAT_A32B32G32R32UI: 689 case FORMAT_R16I: 690 case FORMAT_G16R16I: 691 case FORMAT_A16B16G16R16I: 692 case FORMAT_R16UI: 693 case FORMAT_G16R16UI: 694 case FORMAT_A16B16G16R16UI: 695 case FORMAT_R8I: 696 case FORMAT_G8R8I: 697 case FORMAT_A8B8G8R8I: 698 case FORMAT_R8UI: 699 case FORMAT_G8R8UI: 700 case FORMAT_A8B8G8R8UI: 701 for(unsigned int q = 0; q < state.multiSample; q++) 702 { 703 Pointer<Byte> buffer = cBuffer[index] + q * *Pointer<Int>(data + OFFSET(DrawData, colorSliceB[index])); 704 Vector4f color = c[index]; 705 706 if(state.multiSampleMask & (1 << q)) 707 { 708 alphaBlend(index, buffer, color, x); 709 writeColor(index, buffer, x, color, sMask[q], zMask[q], cMask[q]); 710 } 711 } 712 break; 713 default: 714 ASSERT(false); 715 } 716 } 717 } 718 sampleTexture(const Src & sampler,Vector4f & uvwq,Float4 & bias,Vector4f & dsx,Vector4f & dsy,Vector4f & offset,SamplerFunction function)719 Vector4f PixelProgram::sampleTexture(const Src &sampler, Vector4f &uvwq, Float4 &bias, Vector4f &dsx, Vector4f &dsy, Vector4f &offset, SamplerFunction function) 720 { 721 Vector4f tmp; 722 723 if(sampler.type == Shader::PARAMETER_SAMPLER && sampler.rel.type == Shader::PARAMETER_VOID) 724 { 725 tmp = sampleTexture(sampler.index, uvwq, bias, dsx, dsy, offset, function); 726 } 727 else 728 { 729 Int index = As<Int>(Float(fetchRegister(sampler).x.x)); 730 731 for(int i = 0; i < TEXTURE_IMAGE_UNITS; i++) 732 { 733 if(shader->usesSampler(i)) 734 { 735 If(index == i) 736 { 737 tmp = sampleTexture(i, uvwq, bias, dsx, dsy, offset, function); 738 // FIXME: When the sampler states are the same, we could use one sampler and just index the texture 739 } 740 } 741 } 742 } 743 744 Vector4f c; 745 c.x = tmp[(sampler.swizzle >> 0) & 0x3]; 746 c.y = tmp[(sampler.swizzle >> 2) & 0x3]; 747 c.z = tmp[(sampler.swizzle >> 4) & 0x3]; 748 c.w = tmp[(sampler.swizzle >> 6) & 0x3]; 749 750 return c; 751 } 752 sampleTexture(int samplerIndex,Vector4f & uvwq,Float4 & bias,Vector4f & dsx,Vector4f & dsy,Vector4f & offset,SamplerFunction function)753 Vector4f PixelProgram::sampleTexture(int samplerIndex, Vector4f &uvwq, Float4 &bias, Vector4f &dsx, Vector4f &dsy, Vector4f &offset, SamplerFunction function) 754 { 755 #if PERF_PROFILE 756 Long texTime = Ticks(); 757 #endif 758 759 Pointer<Byte> texture = data + OFFSET(DrawData, mipmap) + samplerIndex * sizeof(Texture); 760 Vector4f c = SamplerCore(constants, state.sampler[samplerIndex]).sampleTexture(texture, uvwq.x, uvwq.y, uvwq.z, uvwq.w, bias, dsx, dsy, offset, function); 761 762 #if PERF_PROFILE 763 cycles[PERF_TEX] += Ticks() - texTime; 764 #endif 765 766 return c; 767 } 768 clampColor(Vector4f oC[RENDERTARGETS])769 void PixelProgram::clampColor(Vector4f oC[RENDERTARGETS]) 770 { 771 for(int index = 0; index < RENDERTARGETS; index++) 772 { 773 if(!state.colorWriteActive(index) && !(index == 0 && state.alphaTestActive())) 774 { 775 continue; 776 } 777 778 switch(state.targetFormat[index]) 779 { 780 case FORMAT_NULL: 781 break; 782 case FORMAT_R5G6B5: 783 case FORMAT_A8R8G8B8: 784 case FORMAT_A8B8G8R8: 785 case FORMAT_X8R8G8B8: 786 case FORMAT_X8B8G8R8: 787 case FORMAT_SRGB8_X8: 788 case FORMAT_SRGB8_A8: 789 case FORMAT_G8R8: 790 case FORMAT_R8: 791 case FORMAT_A8: 792 case FORMAT_G16R16: 793 case FORMAT_A16B16G16R16: 794 oC[index].x = Max(oC[index].x, Float4(0.0f)); oC[index].x = Min(oC[index].x, Float4(1.0f)); 795 oC[index].y = Max(oC[index].y, Float4(0.0f)); oC[index].y = Min(oC[index].y, Float4(1.0f)); 796 oC[index].z = Max(oC[index].z, Float4(0.0f)); oC[index].z = Min(oC[index].z, Float4(1.0f)); 797 oC[index].w = Max(oC[index].w, Float4(0.0f)); oC[index].w = Min(oC[index].w, Float4(1.0f)); 798 break; 799 case FORMAT_R32F: 800 case FORMAT_G32R32F: 801 case FORMAT_X32B32G32R32F: 802 case FORMAT_A32B32G32R32F: 803 case FORMAT_R32I: 804 case FORMAT_G32R32I: 805 case FORMAT_A32B32G32R32I: 806 case FORMAT_R32UI: 807 case FORMAT_G32R32UI: 808 case FORMAT_A32B32G32R32UI: 809 case FORMAT_R16I: 810 case FORMAT_G16R16I: 811 case FORMAT_A16B16G16R16I: 812 case FORMAT_R16UI: 813 case FORMAT_G16R16UI: 814 case FORMAT_A16B16G16R16UI: 815 case FORMAT_R8I: 816 case FORMAT_G8R8I: 817 case FORMAT_A8B8G8R8I: 818 case FORMAT_R8UI: 819 case FORMAT_G8R8UI: 820 case FORMAT_A8B8G8R8UI: 821 break; 822 case FORMAT_X32B32G32R32F_UNSIGNED: 823 oC[index].x = Max(oC[index].x, Float4(0.0f)); 824 oC[index].y = Max(oC[index].y, Float4(0.0f)); 825 oC[index].z = Max(oC[index].z, Float4(0.0f)); 826 oC[index].w = Max(oC[index].w, Float4(0.0f)); 827 break; 828 default: 829 ASSERT(false); 830 } 831 } 832 } 833 enableMask(const Shader::Instruction * instruction)834 Int4 PixelProgram::enableMask(const Shader::Instruction *instruction) 835 { 836 if(scalar) 837 { 838 return Int4(0xFFFFFFFF); 839 } 840 841 Int4 enable = instruction->analysisBranch ? Int4(enableStack[Min(enableIndex, Int(MAX_SHADER_ENABLE_STACK_SIZE))]) : Int4(0xFFFFFFFF); 842 843 if(shader->containsBreakInstruction() && instruction->analysisBreak) 844 { 845 enable &= enableBreak; 846 } 847 848 if(shader->containsContinueInstruction() && instruction->analysisContinue) 849 { 850 enable &= enableContinue; 851 } 852 853 if(shader->containsLeaveInstruction() && instruction->analysisLeave) 854 { 855 enable &= enableLeave; 856 } 857 858 return enable; 859 } 860 fetchRegister(const Src & src,unsigned int offset)861 Vector4f PixelProgram::fetchRegister(const Src &src, unsigned int offset) 862 { 863 Vector4f reg; 864 unsigned int i = src.index + offset; 865 866 switch(src.type) 867 { 868 case Shader::PARAMETER_TEMP: 869 if(src.rel.type == Shader::PARAMETER_VOID) 870 { 871 reg = r[i]; 872 } 873 else if(!src.rel.dynamic) 874 { 875 reg = r[i + relativeAddress(src.rel, src.bufferIndex)]; 876 } 877 else 878 { 879 reg = r[i + dynamicAddress(src.rel)]; 880 } 881 break; 882 case Shader::PARAMETER_INPUT: 883 if(src.rel.type == Shader::PARAMETER_VOID) // Not relative 884 { 885 reg = v[i]; 886 } 887 else if(!src.rel.dynamic) 888 { 889 reg = v[i + relativeAddress(src.rel, src.bufferIndex)]; 890 } 891 else 892 { 893 reg = v[i + dynamicAddress(src.rel)]; 894 } 895 break; 896 case Shader::PARAMETER_CONST: 897 reg = readConstant(src, offset); 898 break; 899 case Shader::PARAMETER_TEXTURE: 900 reg = v[2 + i]; 901 break; 902 case Shader::PARAMETER_MISCTYPE: 903 if(src.index == Shader::VPosIndex) reg = vPos; 904 if(src.index == Shader::VFaceIndex) reg = vFace; 905 break; 906 case Shader::PARAMETER_SAMPLER: 907 if(src.rel.type == Shader::PARAMETER_VOID) 908 { 909 reg.x = As<Float4>(Int4(i)); 910 } 911 else if(src.rel.type == Shader::PARAMETER_TEMP) 912 { 913 reg.x = As<Float4>(Int4(i) + As<Int4>(r[src.rel.index].x)); 914 } 915 return reg; 916 case Shader::PARAMETER_PREDICATE: return reg; // Dummy 917 case Shader::PARAMETER_VOID: return reg; // Dummy 918 case Shader::PARAMETER_FLOAT4LITERAL: 919 reg.x = Float4(src.value[0]); 920 reg.y = Float4(src.value[1]); 921 reg.z = Float4(src.value[2]); 922 reg.w = Float4(src.value[3]); 923 break; 924 case Shader::PARAMETER_CONSTINT: return reg; // Dummy 925 case Shader::PARAMETER_CONSTBOOL: return reg; // Dummy 926 case Shader::PARAMETER_LOOP: return reg; // Dummy 927 case Shader::PARAMETER_COLOROUT: 928 if(src.rel.type == Shader::PARAMETER_VOID) // Not relative 929 { 930 reg = oC[i]; 931 } 932 else if(!src.rel.dynamic) 933 { 934 reg = oC[i + relativeAddress(src.rel, src.bufferIndex)]; 935 } 936 else 937 { 938 reg = oC[i + dynamicAddress(src.rel)]; 939 } 940 break; 941 case Shader::PARAMETER_DEPTHOUT: 942 reg.x = oDepth; 943 break; 944 default: 945 ASSERT(false); 946 } 947 948 const Float4 &x = reg[(src.swizzle >> 0) & 0x3]; 949 const Float4 &y = reg[(src.swizzle >> 2) & 0x3]; 950 const Float4 &z = reg[(src.swizzle >> 4) & 0x3]; 951 const Float4 &w = reg[(src.swizzle >> 6) & 0x3]; 952 953 Vector4f mod; 954 955 switch(src.modifier) 956 { 957 case Shader::MODIFIER_NONE: 958 mod.x = x; 959 mod.y = y; 960 mod.z = z; 961 mod.w = w; 962 break; 963 case Shader::MODIFIER_NEGATE: 964 mod.x = -x; 965 mod.y = -y; 966 mod.z = -z; 967 mod.w = -w; 968 break; 969 case Shader::MODIFIER_ABS: 970 mod.x = Abs(x); 971 mod.y = Abs(y); 972 mod.z = Abs(z); 973 mod.w = Abs(w); 974 break; 975 case Shader::MODIFIER_ABS_NEGATE: 976 mod.x = -Abs(x); 977 mod.y = -Abs(y); 978 mod.z = -Abs(z); 979 mod.w = -Abs(w); 980 break; 981 case Shader::MODIFIER_NOT: 982 mod.x = As<Float4>(As<Int4>(x) ^ Int4(0xFFFFFFFF)); 983 mod.y = As<Float4>(As<Int4>(y) ^ Int4(0xFFFFFFFF)); 984 mod.z = As<Float4>(As<Int4>(z) ^ Int4(0xFFFFFFFF)); 985 mod.w = As<Float4>(As<Int4>(w) ^ Int4(0xFFFFFFFF)); 986 break; 987 default: 988 ASSERT(false); 989 } 990 991 return mod; 992 } 993 uniformAddress(int bufferIndex,unsigned int index)994 RValue<Pointer<Byte>> PixelProgram::uniformAddress(int bufferIndex, unsigned int index) 995 { 996 if(bufferIndex == -1) 997 { 998 return data + OFFSET(DrawData, ps.c[index]); 999 } 1000 else 1001 { 1002 return *Pointer<Pointer<Byte>>(data + OFFSET(DrawData, ps.u[bufferIndex])) + index; 1003 } 1004 } 1005 uniformAddress(int bufferIndex,unsigned int index,Int & offset)1006 RValue<Pointer<Byte>> PixelProgram::uniformAddress(int bufferIndex, unsigned int index, Int& offset) 1007 { 1008 return uniformAddress(bufferIndex, index) + offset * sizeof(float4); 1009 } 1010 readConstant(const Src & src,unsigned int offset)1011 Vector4f PixelProgram::readConstant(const Src &src, unsigned int offset) 1012 { 1013 Vector4f c; 1014 unsigned int i = src.index + offset; 1015 1016 if(src.rel.type == Shader::PARAMETER_VOID) // Not relative 1017 { 1018 c.x = c.y = c.z = c.w = *Pointer<Float4>(uniformAddress(src.bufferIndex, i)); 1019 1020 c.x = c.x.xxxx; 1021 c.y = c.y.yyyy; 1022 c.z = c.z.zzzz; 1023 c.w = c.w.wwww; 1024 1025 if(shader->containsDefineInstruction()) // Constant may be known at compile time 1026 { 1027 for(size_t j = 0; j < shader->getLength(); j++) 1028 { 1029 const Shader::Instruction &instruction = *shader->getInstruction(j); 1030 1031 if(instruction.opcode == Shader::OPCODE_DEF) 1032 { 1033 if(instruction.dst.index == i) 1034 { 1035 c.x = Float4(instruction.src[0].value[0]); 1036 c.y = Float4(instruction.src[0].value[1]); 1037 c.z = Float4(instruction.src[0].value[2]); 1038 c.w = Float4(instruction.src[0].value[3]); 1039 1040 break; 1041 } 1042 } 1043 } 1044 } 1045 } 1046 else if(!src.rel.dynamic || src.rel.type == Shader::PARAMETER_LOOP) 1047 { 1048 Int a = relativeAddress(src.rel, src.bufferIndex); 1049 1050 c.x = c.y = c.z = c.w = *Pointer<Float4>(uniformAddress(src.bufferIndex, i, a)); 1051 1052 c.x = c.x.xxxx; 1053 c.y = c.y.yyyy; 1054 c.z = c.z.zzzz; 1055 c.w = c.w.wwww; 1056 } 1057 else 1058 { 1059 int component = src.rel.swizzle & 0x03; 1060 Float4 a; 1061 1062 switch(src.rel.type) 1063 { 1064 case Shader::PARAMETER_TEMP: a = r[src.rel.index][component]; break; 1065 case Shader::PARAMETER_INPUT: a = v[src.rel.index][component]; break; 1066 case Shader::PARAMETER_OUTPUT: a = oC[src.rel.index][component]; break; 1067 case Shader::PARAMETER_CONST: a = *Pointer<Float>(uniformAddress(src.bufferIndex, src.rel.index) + component * sizeof(float)); break; 1068 case Shader::PARAMETER_MISCTYPE: 1069 switch(src.rel.index) 1070 { 1071 case Shader::VPosIndex: a = vPos.x; break; 1072 case Shader::VFaceIndex: a = vFace.x; break; 1073 default: ASSERT(false); 1074 } 1075 break; 1076 default: ASSERT(false); 1077 } 1078 1079 Int4 index = Int4(i) + As<Int4>(a) * Int4(src.rel.scale); 1080 1081 index = Min(As<UInt4>(index), UInt4(VERTEX_UNIFORM_VECTORS)); // Clamp to constant register range, c[VERTEX_UNIFORM_VECTORS] = {0, 0, 0, 0} 1082 1083 Int index0 = Extract(index, 0); 1084 Int index1 = Extract(index, 1); 1085 Int index2 = Extract(index, 2); 1086 Int index3 = Extract(index, 3); 1087 1088 c.x = *Pointer<Float4>(uniformAddress(src.bufferIndex, 0, index0), 16); 1089 c.y = *Pointer<Float4>(uniformAddress(src.bufferIndex, 0, index1), 16); 1090 c.z = *Pointer<Float4>(uniformAddress(src.bufferIndex, 0, index2), 16); 1091 c.w = *Pointer<Float4>(uniformAddress(src.bufferIndex, 0, index3), 16); 1092 1093 transpose4x4(c.x, c.y, c.z, c.w); 1094 } 1095 1096 return c; 1097 } 1098 relativeAddress(const Shader::Relative & rel,int bufferIndex)1099 Int PixelProgram::relativeAddress(const Shader::Relative &rel, int bufferIndex) 1100 { 1101 ASSERT(!rel.dynamic); 1102 1103 if(rel.type == Shader::PARAMETER_TEMP) 1104 { 1105 return As<Int>(Extract(r[rel.index].x, 0)) * rel.scale; 1106 } 1107 else if(rel.type == Shader::PARAMETER_INPUT) 1108 { 1109 return As<Int>(Extract(v[rel.index].x, 0)) * rel.scale; 1110 } 1111 else if(rel.type == Shader::PARAMETER_OUTPUT) 1112 { 1113 return As<Int>(Extract(oC[rel.index].x, 0)) * rel.scale; 1114 } 1115 else if(rel.type == Shader::PARAMETER_CONST) 1116 { 1117 return *Pointer<Int>(uniformAddress(bufferIndex, rel.index)) * rel.scale; 1118 } 1119 else if(rel.type == Shader::PARAMETER_LOOP) 1120 { 1121 return aL[loopDepth]; 1122 } 1123 else ASSERT(false); 1124 1125 return 0; 1126 } 1127 dynamicAddress(const Shader::Relative & rel)1128 Int4 PixelProgram::dynamicAddress(const Shader::Relative &rel) 1129 { 1130 int component = rel.swizzle & 0x03; 1131 Float4 a; 1132 1133 switch(rel.type) 1134 { 1135 case Shader::PARAMETER_TEMP: a = r[rel.index][component]; break; 1136 case Shader::PARAMETER_INPUT: a = v[rel.index][component]; break; 1137 case Shader::PARAMETER_OUTPUT: a = oC[rel.index][component]; break; 1138 case Shader::PARAMETER_MISCTYPE: 1139 switch(rel.index) 1140 { 1141 case Shader::VPosIndex: a = vPos.x; break; 1142 case Shader::VFaceIndex: a = vFace.x; break; 1143 default: ASSERT(false); 1144 } 1145 break; 1146 default: ASSERT(false); 1147 } 1148 1149 return As<Int4>(a) * Int4(rel.scale); 1150 } 1151 linearToSRGB(const Float4 & x)1152 Float4 PixelProgram::linearToSRGB(const Float4 &x) // Approximates x^(1.0/2.2) 1153 { 1154 Float4 sqrtx = Rcp_pp(RcpSqrt_pp(x)); 1155 Float4 sRGB = sqrtx * Float4(1.14f) - x * Float4(0.14f); 1156 1157 return Min(Max(sRGB, Float4(0.0f)), Float4(1.0f)); 1158 } 1159 M3X2(Vector4f & dst,Vector4f & src0,const Src & src1)1160 void PixelProgram::M3X2(Vector4f &dst, Vector4f &src0, const Src &src1) 1161 { 1162 Vector4f row0 = fetchRegister(src1, 0); 1163 Vector4f row1 = fetchRegister(src1, 1); 1164 1165 dst.x = dot3(src0, row0); 1166 dst.y = dot3(src0, row1); 1167 } 1168 M3X3(Vector4f & dst,Vector4f & src0,const Src & src1)1169 void PixelProgram::M3X3(Vector4f &dst, Vector4f &src0, const Src &src1) 1170 { 1171 Vector4f row0 = fetchRegister(src1, 0); 1172 Vector4f row1 = fetchRegister(src1, 1); 1173 Vector4f row2 = fetchRegister(src1, 2); 1174 1175 dst.x = dot3(src0, row0); 1176 dst.y = dot3(src0, row1); 1177 dst.z = dot3(src0, row2); 1178 } 1179 M3X4(Vector4f & dst,Vector4f & src0,const Src & src1)1180 void PixelProgram::M3X4(Vector4f &dst, Vector4f &src0, const Src &src1) 1181 { 1182 Vector4f row0 = fetchRegister(src1, 0); 1183 Vector4f row1 = fetchRegister(src1, 1); 1184 Vector4f row2 = fetchRegister(src1, 2); 1185 Vector4f row3 = fetchRegister(src1, 3); 1186 1187 dst.x = dot3(src0, row0); 1188 dst.y = dot3(src0, row1); 1189 dst.z = dot3(src0, row2); 1190 dst.w = dot3(src0, row3); 1191 } 1192 M4X3(Vector4f & dst,Vector4f & src0,const Src & src1)1193 void PixelProgram::M4X3(Vector4f &dst, Vector4f &src0, const Src &src1) 1194 { 1195 Vector4f row0 = fetchRegister(src1, 0); 1196 Vector4f row1 = fetchRegister(src1, 1); 1197 Vector4f row2 = fetchRegister(src1, 2); 1198 1199 dst.x = dot4(src0, row0); 1200 dst.y = dot4(src0, row1); 1201 dst.z = dot4(src0, row2); 1202 } 1203 M4X4(Vector4f & dst,Vector4f & src0,const Src & src1)1204 void PixelProgram::M4X4(Vector4f &dst, Vector4f &src0, const Src &src1) 1205 { 1206 Vector4f row0 = fetchRegister(src1, 0); 1207 Vector4f row1 = fetchRegister(src1, 1); 1208 Vector4f row2 = fetchRegister(src1, 2); 1209 Vector4f row3 = fetchRegister(src1, 3); 1210 1211 dst.x = dot4(src0, row0); 1212 dst.y = dot4(src0, row1); 1213 dst.z = dot4(src0, row2); 1214 dst.w = dot4(src0, row3); 1215 } 1216 TEX(Vector4f & dst,Vector4f & src0,const Src & src1,bool project,bool bias)1217 void PixelProgram::TEX(Vector4f &dst, Vector4f &src0, const Src &src1, bool project, bool bias) 1218 { 1219 if(project) 1220 { 1221 Vector4f proj; 1222 Float4 rw = reciprocal(src0.w); 1223 proj.x = src0.x * rw; 1224 proj.y = src0.y * rw; 1225 proj.z = src0.z * rw; 1226 1227 dst = sampleTexture(src1, proj, src0.x, (src0), (src0), (src0), Implicit); 1228 } 1229 else 1230 { 1231 dst = sampleTexture(src1, src0, src0.x, (src0), (src0), (src0), bias ? Bias : Implicit); 1232 } 1233 } 1234 TEXOFFSET(Vector4f & dst,Vector4f & src0,const Src & src1,Vector4f & offset)1235 void PixelProgram::TEXOFFSET(Vector4f &dst, Vector4f &src0, const Src &src1, Vector4f &offset) 1236 { 1237 dst = sampleTexture(src1, src0, (src0.x), (src0), (src0), offset, {Implicit, Offset}); 1238 } 1239 TEXLODOFFSET(Vector4f & dst,Vector4f & src0,const Src & src1,Vector4f & offset,Float4 & lod)1240 void PixelProgram::TEXLODOFFSET(Vector4f &dst, Vector4f &src0, const Src &src1, Vector4f &offset, Float4 &lod) 1241 { 1242 dst = sampleTexture(src1, src0, lod, (src0), (src0), offset, {Lod, Offset}); 1243 } 1244 TEXBIAS(Vector4f & dst,Vector4f & src0,const Src & src1,Float4 & bias)1245 void PixelProgram::TEXBIAS(Vector4f &dst, Vector4f &src0, const Src &src1, Float4 &bias) 1246 { 1247 dst = sampleTexture(src1, src0, bias, (src0), (src0), (src0), Bias); 1248 } 1249 TEXOFFSETBIAS(Vector4f & dst,Vector4f & src0,const Src & src1,Vector4f & offset,Float4 & bias)1250 void PixelProgram::TEXOFFSETBIAS(Vector4f &dst, Vector4f &src0, const Src &src1, Vector4f &offset, Float4 &bias) 1251 { 1252 dst = sampleTexture(src1, src0, bias, (src0), (src0), offset, {Bias, Offset}); 1253 } 1254 TEXELFETCH(Vector4f & dst,Vector4f & src0,const Src & src1,Float4 & lod)1255 void PixelProgram::TEXELFETCH(Vector4f &dst, Vector4f &src0, const Src& src1, Float4 &lod) 1256 { 1257 dst = sampleTexture(src1, src0, lod, (src0), (src0), (src0), Fetch); 1258 } 1259 TEXELFETCHOFFSET(Vector4f & dst,Vector4f & src0,const Src & src1,Vector4f & offset,Float4 & lod)1260 void PixelProgram::TEXELFETCHOFFSET(Vector4f &dst, Vector4f &src0, const Src& src1, Vector4f &offset, Float4 &lod) 1261 { 1262 dst = sampleTexture(src1, src0, lod, (src0), (src0), offset, {Fetch, Offset}); 1263 } 1264 TEXGRAD(Vector4f & dst,Vector4f & src0,const Src & src1,Vector4f & dsx,Vector4f & dsy)1265 void PixelProgram::TEXGRAD(Vector4f &dst, Vector4f &src0, const Src& src1, Vector4f &dsx, Vector4f &dsy) 1266 { 1267 dst = sampleTexture(src1, src0, (src0.x), dsx, dsy, (src0), Grad); 1268 } 1269 TEXGRADOFFSET(Vector4f & dst,Vector4f & src0,const Src & src1,Vector4f & dsx,Vector4f & dsy,Vector4f & offset)1270 void PixelProgram::TEXGRADOFFSET(Vector4f &dst, Vector4f &src0, const Src& src1, Vector4f &dsx, Vector4f &dsy, Vector4f &offset) 1271 { 1272 dst = sampleTexture(src1, src0, (src0.x), dsx, dsy, offset, {Grad, Offset}); 1273 } 1274 TEXLOD(Vector4f & dst,Vector4f & src0,const Src & src1,Float4 & lod)1275 void PixelProgram::TEXLOD(Vector4f &dst, Vector4f &src0, const Src &src1, Float4 &lod) 1276 { 1277 dst = sampleTexture(src1, src0, lod, (src0), (src0), (src0), Lod); 1278 } 1279 TEXSIZE(Vector4f & dst,Float4 & lod,const Src & src1)1280 void PixelProgram::TEXSIZE(Vector4f &dst, Float4 &lod, const Src &src1) 1281 { 1282 bool uniformSampler = (src1.type == Shader::PARAMETER_SAMPLER && src1.rel.type == Shader::PARAMETER_VOID); 1283 Int offset = uniformSampler ? src1.index * sizeof(Texture) : As<Int>(Float(fetchRegister(src1).x.x)) * sizeof(Texture); 1284 Pointer<Byte> texture = data + OFFSET(DrawData, mipmap) + offset; 1285 1286 dst = SamplerCore::textureSize(texture, lod); 1287 } 1288 TEXKILL(Int cMask[4],Vector4f & src,unsigned char mask)1289 void PixelProgram::TEXKILL(Int cMask[4], Vector4f &src, unsigned char mask) 1290 { 1291 Int kill = -1; 1292 1293 if(mask & 0x1) kill &= SignMask(CmpNLT(src.x, Float4(0.0f))); 1294 if(mask & 0x2) kill &= SignMask(CmpNLT(src.y, Float4(0.0f))); 1295 if(mask & 0x4) kill &= SignMask(CmpNLT(src.z, Float4(0.0f))); 1296 if(mask & 0x8) kill &= SignMask(CmpNLT(src.w, Float4(0.0f))); 1297 1298 // FIXME: Dynamic branching affects TEXKILL? 1299 // if(shader->containsDynamicBranching()) 1300 // { 1301 // kill = ~SignMask(enableMask()); 1302 // } 1303 1304 for(unsigned int q = 0; q < state.multiSample; q++) 1305 { 1306 cMask[q] &= kill; 1307 } 1308 1309 // FIXME: Branch to end of shader if all killed? 1310 } 1311 DISCARD(Int cMask[4],const Shader::Instruction * instruction)1312 void PixelProgram::DISCARD(Int cMask[4], const Shader::Instruction *instruction) 1313 { 1314 Int kill = 0; 1315 1316 if(shader->containsDynamicBranching()) 1317 { 1318 kill = ~SignMask(enableMask(instruction)); 1319 } 1320 1321 for(unsigned int q = 0; q < state.multiSample; q++) 1322 { 1323 cMask[q] &= kill; 1324 } 1325 1326 // FIXME: Branch to end of shader if all killed? 1327 } 1328 DFDX(Vector4f & dst,Vector4f & src)1329 void PixelProgram::DFDX(Vector4f &dst, Vector4f &src) 1330 { 1331 dst.x = src.x.yyww - src.x.xxzz; 1332 dst.y = src.y.yyww - src.y.xxzz; 1333 dst.z = src.z.yyww - src.z.xxzz; 1334 dst.w = src.w.yyww - src.w.xxzz; 1335 } 1336 DFDY(Vector4f & dst,Vector4f & src)1337 void PixelProgram::DFDY(Vector4f &dst, Vector4f &src) 1338 { 1339 dst.x = src.x.zwzw - src.x.xyxy; 1340 dst.y = src.y.zwzw - src.y.xyxy; 1341 dst.z = src.z.zwzw - src.z.xyxy; 1342 dst.w = src.w.zwzw - src.w.xyxy; 1343 } 1344 FWIDTH(Vector4f & dst,Vector4f & src)1345 void PixelProgram::FWIDTH(Vector4f &dst, Vector4f &src) 1346 { 1347 // abs(dFdx(src)) + abs(dFdy(src)); 1348 dst.x = Abs(src.x.yyww - src.x.xxzz) + Abs(src.x.zwzw - src.x.xyxy); 1349 dst.y = Abs(src.y.yyww - src.y.xxzz) + Abs(src.y.zwzw - src.y.xyxy); 1350 dst.z = Abs(src.z.yyww - src.z.xxzz) + Abs(src.z.zwzw - src.z.xyxy); 1351 dst.w = Abs(src.w.yyww - src.w.xxzz) + Abs(src.w.zwzw - src.w.xyxy); 1352 } 1353 BREAK()1354 void PixelProgram::BREAK() 1355 { 1356 enableBreak = enableBreak & ~enableStack[Min(enableIndex, Int(MAX_SHADER_ENABLE_STACK_SIZE))]; 1357 } 1358 BREAKC(Vector4f & src0,Vector4f & src1,Control control)1359 void PixelProgram::BREAKC(Vector4f &src0, Vector4f &src1, Control control) 1360 { 1361 Int4 condition; 1362 1363 switch(control) 1364 { 1365 case Shader::CONTROL_GT: condition = CmpNLE(src0.x, src1.x); break; 1366 case Shader::CONTROL_EQ: condition = CmpEQ(src0.x, src1.x); break; 1367 case Shader::CONTROL_GE: condition = CmpNLT(src0.x, src1.x); break; 1368 case Shader::CONTROL_LT: condition = CmpLT(src0.x, src1.x); break; 1369 case Shader::CONTROL_NE: condition = CmpNEQ(src0.x, src1.x); break; 1370 case Shader::CONTROL_LE: condition = CmpLE(src0.x, src1.x); break; 1371 default: 1372 ASSERT(false); 1373 } 1374 1375 BREAK(condition); 1376 } 1377 BREAKP(const Src & predicateRegister)1378 void PixelProgram::BREAKP(const Src &predicateRegister) // FIXME: Factor out parts common with BREAKC 1379 { 1380 Int4 condition = As<Int4>(p0[predicateRegister.swizzle & 0x3]); 1381 1382 if(predicateRegister.modifier == Shader::MODIFIER_NOT) 1383 { 1384 condition = ~condition; 1385 } 1386 1387 BREAK(condition); 1388 } 1389 BREAK(Int4 & condition)1390 void PixelProgram::BREAK(Int4 &condition) 1391 { 1392 condition &= enableStack[Min(enableIndex, Int(MAX_SHADER_ENABLE_STACK_SIZE))]; 1393 1394 enableBreak = enableBreak & ~condition; 1395 } 1396 CONTINUE()1397 void PixelProgram::CONTINUE() 1398 { 1399 enableContinue = enableContinue & ~enableStack[Min(enableIndex, Int(MAX_SHADER_ENABLE_STACK_SIZE))]; 1400 } 1401 TEST()1402 void PixelProgram::TEST() 1403 { 1404 enableContinue = restoreContinue.back(); 1405 restoreContinue.pop_back(); 1406 } 1407 SCALAR()1408 void PixelProgram::SCALAR() 1409 { 1410 scalar = true; 1411 } 1412 CALL(int labelIndex,int callSiteIndex)1413 void PixelProgram::CALL(int labelIndex, int callSiteIndex) 1414 { 1415 if(!labelBlock[labelIndex]) 1416 { 1417 labelBlock[labelIndex] = Nucleus::createBasicBlock(); 1418 } 1419 1420 if(callRetBlock[labelIndex].size() > 1) 1421 { 1422 callStack[Min(stackIndex++, Int(MAX_SHADER_CALL_STACK_SIZE))] = UInt(callSiteIndex); 1423 } 1424 1425 Int4 restoreLeave = enableLeave; 1426 1427 Nucleus::createBr(labelBlock[labelIndex]); 1428 Nucleus::setInsertBlock(callRetBlock[labelIndex][callSiteIndex]); 1429 1430 enableLeave = restoreLeave; 1431 } 1432 CALLNZ(int labelIndex,int callSiteIndex,const Src & src)1433 void PixelProgram::CALLNZ(int labelIndex, int callSiteIndex, const Src &src) 1434 { 1435 if(src.type == Shader::PARAMETER_CONSTBOOL) 1436 { 1437 CALLNZb(labelIndex, callSiteIndex, src); 1438 } 1439 else if(src.type == Shader::PARAMETER_PREDICATE) 1440 { 1441 CALLNZp(labelIndex, callSiteIndex, src); 1442 } 1443 else ASSERT(false); 1444 } 1445 CALLNZb(int labelIndex,int callSiteIndex,const Src & boolRegister)1446 void PixelProgram::CALLNZb(int labelIndex, int callSiteIndex, const Src &boolRegister) 1447 { 1448 Bool condition = (*Pointer<Byte>(data + OFFSET(DrawData, ps.b[boolRegister.index])) != Byte(0)); // FIXME 1449 1450 if(boolRegister.modifier == Shader::MODIFIER_NOT) 1451 { 1452 condition = !condition; 1453 } 1454 1455 if(!labelBlock[labelIndex]) 1456 { 1457 labelBlock[labelIndex] = Nucleus::createBasicBlock(); 1458 } 1459 1460 if(callRetBlock[labelIndex].size() > 1) 1461 { 1462 callStack[Min(stackIndex++, Int(MAX_SHADER_CALL_STACK_SIZE))] = UInt(callSiteIndex); 1463 } 1464 1465 Int4 restoreLeave = enableLeave; 1466 1467 branch(condition, labelBlock[labelIndex], callRetBlock[labelIndex][callSiteIndex]); 1468 Nucleus::setInsertBlock(callRetBlock[labelIndex][callSiteIndex]); 1469 1470 enableLeave = restoreLeave; 1471 } 1472 CALLNZp(int labelIndex,int callSiteIndex,const Src & predicateRegister)1473 void PixelProgram::CALLNZp(int labelIndex, int callSiteIndex, const Src &predicateRegister) 1474 { 1475 Int4 condition = As<Int4>(p0[predicateRegister.swizzle & 0x3]); 1476 1477 if(predicateRegister.modifier == Shader::MODIFIER_NOT) 1478 { 1479 condition = ~condition; 1480 } 1481 1482 condition &= enableStack[Min(enableIndex, Int(MAX_SHADER_ENABLE_STACK_SIZE))]; 1483 1484 if(!labelBlock[labelIndex]) 1485 { 1486 labelBlock[labelIndex] = Nucleus::createBasicBlock(); 1487 } 1488 1489 if(callRetBlock[labelIndex].size() > 1) 1490 { 1491 callStack[Min(stackIndex++, Int(MAX_SHADER_CALL_STACK_SIZE))] = UInt(callSiteIndex); 1492 } 1493 1494 enableIndex++; 1495 enableStack[Min(enableIndex, Int(MAX_SHADER_ENABLE_STACK_SIZE))] = condition; 1496 Int4 restoreLeave = enableLeave; 1497 1498 Bool notAllFalse = SignMask(condition) != 0; 1499 branch(notAllFalse, labelBlock[labelIndex], callRetBlock[labelIndex][callSiteIndex]); 1500 Nucleus::setInsertBlock(callRetBlock[labelIndex][callSiteIndex]); 1501 1502 enableIndex--; 1503 enableLeave = restoreLeave; 1504 } 1505 ELSE()1506 void PixelProgram::ELSE() 1507 { 1508 ifDepth--; 1509 1510 BasicBlock *falseBlock = ifFalseBlock[ifDepth]; 1511 BasicBlock *endBlock = Nucleus::createBasicBlock(); 1512 1513 if(isConditionalIf[ifDepth]) 1514 { 1515 Int4 condition = ~enableStack[Min(enableIndex, Int(MAX_SHADER_ENABLE_STACK_SIZE))] & enableStack[Min(enableIndex - 1, Int(MAX_SHADER_ENABLE_STACK_SIZE))]; 1516 Bool notAllFalse = SignMask(condition) != 0; 1517 1518 branch(notAllFalse, falseBlock, endBlock); 1519 1520 enableStack[Min(enableIndex, Int(MAX_SHADER_ENABLE_STACK_SIZE))] = ~enableStack[Min(enableIndex, Int(MAX_SHADER_ENABLE_STACK_SIZE))] & enableStack[Min(enableIndex - 1, Int(MAX_SHADER_ENABLE_STACK_SIZE))]; 1521 } 1522 else 1523 { 1524 Nucleus::createBr(endBlock); 1525 Nucleus::setInsertBlock(falseBlock); 1526 } 1527 1528 ifFalseBlock[ifDepth] = endBlock; 1529 1530 ifDepth++; 1531 } 1532 ENDIF()1533 void PixelProgram::ENDIF() 1534 { 1535 ifDepth--; 1536 1537 BasicBlock *endBlock = ifFalseBlock[ifDepth]; 1538 1539 Nucleus::createBr(endBlock); 1540 Nucleus::setInsertBlock(endBlock); 1541 1542 if(isConditionalIf[ifDepth]) 1543 { 1544 enableIndex--; 1545 } 1546 } 1547 ENDLOOP()1548 void PixelProgram::ENDLOOP() 1549 { 1550 loopRepDepth--; 1551 1552 aL[loopDepth] = aL[loopDepth] + increment[loopDepth]; // FIXME: += 1553 1554 BasicBlock *testBlock = loopRepTestBlock[loopRepDepth]; 1555 BasicBlock *endBlock = loopRepEndBlock[loopRepDepth]; 1556 1557 Nucleus::createBr(testBlock); 1558 Nucleus::setInsertBlock(endBlock); 1559 1560 loopDepth--; 1561 enableBreak = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF); 1562 } 1563 ENDREP()1564 void PixelProgram::ENDREP() 1565 { 1566 loopRepDepth--; 1567 1568 BasicBlock *testBlock = loopRepTestBlock[loopRepDepth]; 1569 BasicBlock *endBlock = loopRepEndBlock[loopRepDepth]; 1570 1571 Nucleus::createBr(testBlock); 1572 Nucleus::setInsertBlock(endBlock); 1573 1574 loopDepth--; 1575 enableBreak = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF); 1576 } 1577 ENDWHILE()1578 void PixelProgram::ENDWHILE() 1579 { 1580 loopRepDepth--; 1581 1582 BasicBlock *testBlock = loopRepTestBlock[loopRepDepth]; 1583 BasicBlock *endBlock = loopRepEndBlock[loopRepDepth]; 1584 1585 Nucleus::createBr(testBlock); 1586 Nucleus::setInsertBlock(endBlock); 1587 1588 enableIndex--; 1589 scalar = false; 1590 } 1591 ENDSWITCH()1592 void PixelProgram::ENDSWITCH() 1593 { 1594 loopRepDepth--; 1595 1596 BasicBlock *endBlock = loopRepEndBlock[loopRepDepth]; 1597 1598 Nucleus::createBr(endBlock); 1599 Nucleus::setInsertBlock(endBlock); 1600 } 1601 IF(const Src & src)1602 void PixelProgram::IF(const Src &src) 1603 { 1604 if(src.type == Shader::PARAMETER_CONSTBOOL) 1605 { 1606 IFb(src); 1607 } 1608 else if(src.type == Shader::PARAMETER_PREDICATE) 1609 { 1610 IFp(src); 1611 } 1612 else 1613 { 1614 Int4 condition = As<Int4>(fetchRegister(src).x); 1615 IF(condition); 1616 } 1617 } 1618 IFb(const Src & boolRegister)1619 void PixelProgram::IFb(const Src &boolRegister) 1620 { 1621 ASSERT(ifDepth < 24 + 4); 1622 1623 Bool condition = (*Pointer<Byte>(data + OFFSET(DrawData, ps.b[boolRegister.index])) != Byte(0)); // FIXME 1624 1625 if(boolRegister.modifier == Shader::MODIFIER_NOT) 1626 { 1627 condition = !condition; 1628 } 1629 1630 BasicBlock *trueBlock = Nucleus::createBasicBlock(); 1631 BasicBlock *falseBlock = Nucleus::createBasicBlock(); 1632 1633 branch(condition, trueBlock, falseBlock); 1634 1635 isConditionalIf[ifDepth] = false; 1636 ifFalseBlock[ifDepth] = falseBlock; 1637 1638 ifDepth++; 1639 } 1640 IFp(const Src & predicateRegister)1641 void PixelProgram::IFp(const Src &predicateRegister) 1642 { 1643 Int4 condition = As<Int4>(p0[predicateRegister.swizzle & 0x3]); 1644 1645 if(predicateRegister.modifier == Shader::MODIFIER_NOT) 1646 { 1647 condition = ~condition; 1648 } 1649 1650 IF(condition); 1651 } 1652 IFC(Vector4f & src0,Vector4f & src1,Control control)1653 void PixelProgram::IFC(Vector4f &src0, Vector4f &src1, Control control) 1654 { 1655 Int4 condition; 1656 1657 switch(control) 1658 { 1659 case Shader::CONTROL_GT: condition = CmpNLE(src0.x, src1.x); break; 1660 case Shader::CONTROL_EQ: condition = CmpEQ(src0.x, src1.x); break; 1661 case Shader::CONTROL_GE: condition = CmpNLT(src0.x, src1.x); break; 1662 case Shader::CONTROL_LT: condition = CmpLT(src0.x, src1.x); break; 1663 case Shader::CONTROL_NE: condition = CmpNEQ(src0.x, src1.x); break; 1664 case Shader::CONTROL_LE: condition = CmpLE(src0.x, src1.x); break; 1665 default: 1666 ASSERT(false); 1667 } 1668 1669 IF(condition); 1670 } 1671 IF(Int4 & condition)1672 void PixelProgram::IF(Int4 &condition) 1673 { 1674 condition &= enableStack[Min(enableIndex, Int(MAX_SHADER_ENABLE_STACK_SIZE))]; 1675 1676 enableIndex++; 1677 enableStack[Min(enableIndex, Int(MAX_SHADER_ENABLE_STACK_SIZE))] = condition; 1678 1679 BasicBlock *trueBlock = Nucleus::createBasicBlock(); 1680 BasicBlock *falseBlock = Nucleus::createBasicBlock(); 1681 1682 Bool notAllFalse = SignMask(condition) != 0; 1683 1684 branch(notAllFalse, trueBlock, falseBlock); 1685 1686 isConditionalIf[ifDepth] = true; 1687 ifFalseBlock[ifDepth] = falseBlock; 1688 1689 ifDepth++; 1690 } 1691 LABEL(int labelIndex)1692 void PixelProgram::LABEL(int labelIndex) 1693 { 1694 if(!labelBlock[labelIndex]) 1695 { 1696 labelBlock[labelIndex] = Nucleus::createBasicBlock(); 1697 } 1698 1699 Nucleus::setInsertBlock(labelBlock[labelIndex]); 1700 currentLabel = labelIndex; 1701 } 1702 LOOP(const Src & integerRegister)1703 void PixelProgram::LOOP(const Src &integerRegister) 1704 { 1705 loopDepth++; 1706 1707 iteration[loopDepth] = *Pointer<Int>(data + OFFSET(DrawData, ps.i[integerRegister.index][0])); 1708 aL[loopDepth] = *Pointer<Int>(data + OFFSET(DrawData, ps.i[integerRegister.index][1])); 1709 increment[loopDepth] = *Pointer<Int>(data + OFFSET(DrawData, ps.i[integerRegister.index][2])); 1710 1711 // If(increment[loopDepth] == 0) 1712 // { 1713 // increment[loopDepth] = 1; 1714 // } 1715 1716 BasicBlock *loopBlock = Nucleus::createBasicBlock(); 1717 BasicBlock *testBlock = Nucleus::createBasicBlock(); 1718 BasicBlock *endBlock = Nucleus::createBasicBlock(); 1719 1720 loopRepTestBlock[loopRepDepth] = testBlock; 1721 loopRepEndBlock[loopRepDepth] = endBlock; 1722 1723 // FIXME: jump(testBlock) 1724 Nucleus::createBr(testBlock); 1725 Nucleus::setInsertBlock(testBlock); 1726 1727 branch(iteration[loopDepth] > 0, loopBlock, endBlock); 1728 Nucleus::setInsertBlock(loopBlock); 1729 1730 iteration[loopDepth] = iteration[loopDepth] - 1; // FIXME: -- 1731 1732 loopRepDepth++; 1733 } 1734 REP(const Src & integerRegister)1735 void PixelProgram::REP(const Src &integerRegister) 1736 { 1737 loopDepth++; 1738 1739 iteration[loopDepth] = *Pointer<Int>(data + OFFSET(DrawData, ps.i[integerRegister.index][0])); 1740 aL[loopDepth] = aL[loopDepth - 1]; 1741 1742 BasicBlock *loopBlock = Nucleus::createBasicBlock(); 1743 BasicBlock *testBlock = Nucleus::createBasicBlock(); 1744 BasicBlock *endBlock = Nucleus::createBasicBlock(); 1745 1746 loopRepTestBlock[loopRepDepth] = testBlock; 1747 loopRepEndBlock[loopRepDepth] = endBlock; 1748 1749 // FIXME: jump(testBlock) 1750 Nucleus::createBr(testBlock); 1751 Nucleus::setInsertBlock(testBlock); 1752 1753 branch(iteration[loopDepth] > 0, loopBlock, endBlock); 1754 Nucleus::setInsertBlock(loopBlock); 1755 1756 iteration[loopDepth] = iteration[loopDepth] - 1; // FIXME: -- 1757 1758 loopRepDepth++; 1759 } 1760 WHILE(const Src & temporaryRegister)1761 void PixelProgram::WHILE(const Src &temporaryRegister) 1762 { 1763 enableIndex++; 1764 1765 BasicBlock *loopBlock = Nucleus::createBasicBlock(); 1766 BasicBlock *testBlock = Nucleus::createBasicBlock(); 1767 BasicBlock *endBlock = Nucleus::createBasicBlock(); 1768 1769 loopRepTestBlock[loopRepDepth] = testBlock; 1770 loopRepEndBlock[loopRepDepth] = endBlock; 1771 1772 Int4 restoreBreak = enableBreak; 1773 restoreContinue.push_back(enableContinue); 1774 1775 // TODO: jump(testBlock) 1776 Nucleus::createBr(testBlock); 1777 Nucleus::setInsertBlock(testBlock); 1778 1779 const Vector4f &src = fetchRegister(temporaryRegister); 1780 Int4 condition = As<Int4>(src.x); 1781 condition &= enableStack[Min(enableIndex - 1, Int(MAX_SHADER_ENABLE_STACK_SIZE))]; 1782 if(shader->containsLeaveInstruction()) condition &= enableLeave; 1783 if(shader->containsBreakInstruction()) condition &= enableBreak; 1784 enableStack[Min(enableIndex, Int(MAX_SHADER_ENABLE_STACK_SIZE))] = condition; 1785 1786 Bool notAllFalse = SignMask(condition) != 0; 1787 branch(notAllFalse, loopBlock, endBlock); 1788 1789 Nucleus::setInsertBlock(endBlock); 1790 enableBreak = restoreBreak; 1791 1792 Nucleus::setInsertBlock(loopBlock); 1793 1794 loopRepDepth++; 1795 scalar = false; 1796 } 1797 SWITCH()1798 void PixelProgram::SWITCH() 1799 { 1800 BasicBlock *endBlock = Nucleus::createBasicBlock(); 1801 1802 loopRepTestBlock[loopRepDepth] = nullptr; 1803 loopRepEndBlock[loopRepDepth] = endBlock; 1804 1805 Int4 restoreBreak = enableBreak; 1806 1807 BasicBlock *currentBlock = Nucleus::getInsertBlock(); 1808 1809 Nucleus::setInsertBlock(endBlock); 1810 enableBreak = restoreBreak; 1811 1812 Nucleus::setInsertBlock(currentBlock); 1813 1814 loopRepDepth++; 1815 } 1816 RET()1817 void PixelProgram::RET() 1818 { 1819 if(currentLabel == -1) 1820 { 1821 returnBlock = Nucleus::createBasicBlock(); 1822 Nucleus::createBr(returnBlock); 1823 } 1824 else 1825 { 1826 BasicBlock *unreachableBlock = Nucleus::createBasicBlock(); 1827 1828 if(callRetBlock[currentLabel].size() > 1) // Pop the return destination from the call stack 1829 { 1830 // FIXME: Encapsulate 1831 UInt index = callStack[--stackIndex]; 1832 1833 Value *value = index.loadValue(); 1834 SwitchCases *switchCases = Nucleus::createSwitch(value, unreachableBlock, (int)callRetBlock[currentLabel].size()); 1835 1836 for(unsigned int i = 0; i < callRetBlock[currentLabel].size(); i++) 1837 { 1838 Nucleus::addSwitchCase(switchCases, i, callRetBlock[currentLabel][i]); 1839 } 1840 } 1841 else if(callRetBlock[currentLabel].size() == 1) // Jump directly to the unique return destination 1842 { 1843 Nucleus::createBr(callRetBlock[currentLabel][0]); 1844 } 1845 else // Function isn't called 1846 { 1847 Nucleus::createBr(unreachableBlock); 1848 } 1849 1850 Nucleus::setInsertBlock(unreachableBlock); 1851 Nucleus::createUnreachable(); 1852 } 1853 } 1854 LEAVE()1855 void PixelProgram::LEAVE() 1856 { 1857 enableLeave = enableLeave & ~enableStack[Min(enableIndex, Int(MAX_SHADER_ENABLE_STACK_SIZE))]; 1858 1859 // FIXME: Return from function if all instances left 1860 // FIXME: Use enableLeave in other control-flow constructs 1861 } 1862 } 1863