1 // Copyright 2016 The SwiftShader Authors. All Rights Reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #include "PixelProgram.hpp" 16 #include "Primitive.hpp" 17 #include "Renderer.hpp" 18 #include "SamplerCore.hpp" 19 20 namespace sw 21 { 22 extern bool postBlendSRGB; 23 extern bool booleanFaceRegister; 24 extern bool halfIntegerCoordinates; // Pixel centers are not at integer coordinates 25 extern bool fullPixelPositionRegister; 26 setBuiltins(Int & x,Int & y,Float4 (& z)[4],Float4 & w)27 void PixelProgram::setBuiltins(Int &x, Int &y, Float4(&z)[4], Float4 &w) 28 { 29 if(shader->getVersion() >= 0x0300) 30 { 31 if(shader->vPosDeclared) 32 { 33 if(!halfIntegerCoordinates) 34 { 35 vPos.x = Float4(Float(x)) + Float4(0, 1, 0, 1); 36 vPos.y = Float4(Float(y)) + Float4(0, 0, 1, 1); 37 } 38 else 39 { 40 vPos.x = Float4(Float(x)) + Float4(0.5f, 1.5f, 0.5f, 1.5f); 41 vPos.y = Float4(Float(y)) + Float4(0.5f, 0.5f, 1.5f, 1.5f); 42 } 43 44 if(fullPixelPositionRegister) 45 { 46 vPos.z = z[0]; // FIXME: Centroid? 47 vPos.w = w; // FIXME: Centroid? 48 } 49 } 50 51 if(shader->vFaceDeclared) 52 { 53 Float4 area = *Pointer<Float>(primitive + OFFSET(Primitive, area)); 54 Float4 face = booleanFaceRegister ? Float4(As<Float4>(CmpNLT(area, Float4(0.0f)))) : area; 55 56 vFace.x = face; 57 vFace.y = face; 58 vFace.z = face; 59 vFace.w = face; 60 } 61 } 62 } 63 applyShader(Int cMask[4])64 void PixelProgram::applyShader(Int cMask[4]) 65 { 66 enableIndex = 0; 67 stackIndex = 0; 68 69 if(shader->containsLeaveInstruction()) 70 { 71 enableLeave = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF); 72 } 73 74 for(int i = 0; i < RENDERTARGETS; i++) 75 { 76 if(state.targetFormat[i] != FORMAT_NULL) 77 { 78 oC[i] = Vector4f(0.0f, 0.0f, 0.0f, 0.0f); 79 } 80 } 81 82 // Create all call site return blocks up front 83 for(size_t i = 0; i < shader->getLength(); i++) 84 { 85 const Shader::Instruction *instruction = shader->getInstruction(i); 86 Shader::Opcode opcode = instruction->opcode; 87 88 if(opcode == Shader::OPCODE_CALL || opcode == Shader::OPCODE_CALLNZ) 89 { 90 const Dst &dst = instruction->dst; 91 92 ASSERT(callRetBlock[dst.label].size() == dst.callSite); 93 callRetBlock[dst.label].push_back(Nucleus::createBasicBlock()); 94 } 95 } 96 97 bool broadcastColor0 = true; 98 99 for(size_t i = 0; i < shader->getLength(); i++) 100 { 101 const Shader::Instruction *instruction = shader->getInstruction(i); 102 Shader::Opcode opcode = instruction->opcode; 103 104 if(opcode == Shader::OPCODE_DCL || opcode == Shader::OPCODE_DEF || opcode == Shader::OPCODE_DEFI || opcode == Shader::OPCODE_DEFB) 105 { 106 continue; 107 } 108 109 const Dst &dst = instruction->dst; 110 const Src &src0 = instruction->src[0]; 111 const Src &src1 = instruction->src[1]; 112 const Src &src2 = instruction->src[2]; 113 const Src &src3 = instruction->src[3]; 114 const Src &src4 = instruction->src[4]; 115 116 bool predicate = instruction->predicate; 117 Control control = instruction->control; 118 bool pp = dst.partialPrecision; 119 bool project = instruction->project; 120 bool bias = instruction->bias; 121 122 Vector4f d; 123 Vector4f s0; 124 Vector4f s1; 125 Vector4f s2; 126 Vector4f s3; 127 Vector4f s4; 128 129 if(opcode == Shader::OPCODE_TEXKILL) // Takes destination as input 130 { 131 if(dst.type == Shader::PARAMETER_TEXTURE) 132 { 133 d.x = v[2 + dst.index].x; 134 d.y = v[2 + dst.index].y; 135 d.z = v[2 + dst.index].z; 136 d.w = v[2 + dst.index].w; 137 } 138 else 139 { 140 d = r[dst.index]; 141 } 142 } 143 144 if(src0.type != Shader::PARAMETER_VOID) s0 = fetchRegister(src0); 145 if(src1.type != Shader::PARAMETER_VOID) s1 = fetchRegister(src1); 146 if(src2.type != Shader::PARAMETER_VOID) s2 = fetchRegister(src2); 147 if(src3.type != Shader::PARAMETER_VOID) s3 = fetchRegister(src3); 148 if(src4.type != Shader::PARAMETER_VOID) s4 = fetchRegister(src4); 149 150 switch(opcode) 151 { 152 case Shader::OPCODE_PS_2_0: break; 153 case Shader::OPCODE_PS_2_x: break; 154 case Shader::OPCODE_PS_3_0: break; 155 case Shader::OPCODE_DEF: break; 156 case Shader::OPCODE_DCL: break; 157 case Shader::OPCODE_NOP: break; 158 case Shader::OPCODE_MOV: mov(d, s0); break; 159 case Shader::OPCODE_NEG: neg(d, s0); break; 160 case Shader::OPCODE_INEG: ineg(d, s0); break; 161 case Shader::OPCODE_F2B: f2b(d, s0); break; 162 case Shader::OPCODE_B2F: b2f(d, s0); break; 163 case Shader::OPCODE_F2I: f2i(d, s0); break; 164 case Shader::OPCODE_I2F: i2f(d, s0); break; 165 case Shader::OPCODE_F2U: f2u(d, s0); break; 166 case Shader::OPCODE_U2F: u2f(d, s0); break; 167 case Shader::OPCODE_I2B: i2b(d, s0); break; 168 case Shader::OPCODE_B2I: b2i(d, s0); break; 169 case Shader::OPCODE_ADD: add(d, s0, s1); break; 170 case Shader::OPCODE_IADD: iadd(d, s0, s1); break; 171 case Shader::OPCODE_SUB: sub(d, s0, s1); break; 172 case Shader::OPCODE_ISUB: isub(d, s0, s1); break; 173 case Shader::OPCODE_MUL: mul(d, s0, s1); break; 174 case Shader::OPCODE_IMUL: imul(d, s0, s1); break; 175 case Shader::OPCODE_MAD: mad(d, s0, s1, s2); break; 176 case Shader::OPCODE_IMAD: imad(d, s0, s1, s2); break; 177 case Shader::OPCODE_DP1: dp1(d, s0, s1); break; 178 case Shader::OPCODE_DP2: dp2(d, s0, s1); break; 179 case Shader::OPCODE_DP2ADD: dp2add(d, s0, s1, s2); break; 180 case Shader::OPCODE_DP3: dp3(d, s0, s1); break; 181 case Shader::OPCODE_DP4: dp4(d, s0, s1); break; 182 case Shader::OPCODE_DET2: det2(d, s0, s1); break; 183 case Shader::OPCODE_DET3: det3(d, s0, s1, s2); break; 184 case Shader::OPCODE_DET4: det4(d, s0, s1, s2, s3); break; 185 case Shader::OPCODE_CMP0: cmp0(d, s0, s1, s2); break; 186 case Shader::OPCODE_ICMP: icmp(d, s0, s1, control); break; 187 case Shader::OPCODE_UCMP: ucmp(d, s0, s1, control); break; 188 case Shader::OPCODE_SELECT: select(d, s0, s1, s2); break; 189 case Shader::OPCODE_EXTRACT: extract(d.x, s0, s1.x); break; 190 case Shader::OPCODE_INSERT: insert(d, s0, s1.x, s2.x); break; 191 case Shader::OPCODE_FRC: frc(d, s0); break; 192 case Shader::OPCODE_TRUNC: trunc(d, s0); break; 193 case Shader::OPCODE_FLOOR: floor(d, s0); break; 194 case Shader::OPCODE_ROUND: round(d, s0); break; 195 case Shader::OPCODE_ROUNDEVEN: roundEven(d, s0); break; 196 case Shader::OPCODE_CEIL: ceil(d, s0); break; 197 case Shader::OPCODE_EXP2X: exp2x(d, s0, pp); break; 198 case Shader::OPCODE_EXP2: exp2(d, s0, pp); break; 199 case Shader::OPCODE_LOG2X: log2x(d, s0, pp); break; 200 case Shader::OPCODE_LOG2: log2(d, s0, pp); break; 201 case Shader::OPCODE_EXP: exp(d, s0, pp); break; 202 case Shader::OPCODE_LOG: log(d, s0, pp); break; 203 case Shader::OPCODE_RCPX: rcpx(d, s0, pp); break; 204 case Shader::OPCODE_DIV: div(d, s0, s1); break; 205 case Shader::OPCODE_IDIV: idiv(d, s0, s1); break; 206 case Shader::OPCODE_UDIV: udiv(d, s0, s1); break; 207 case Shader::OPCODE_MOD: mod(d, s0, s1); break; 208 case Shader::OPCODE_IMOD: imod(d, s0, s1); break; 209 case Shader::OPCODE_UMOD: umod(d, s0, s1); break; 210 case Shader::OPCODE_SHL: shl(d, s0, s1); break; 211 case Shader::OPCODE_ISHR: ishr(d, s0, s1); break; 212 case Shader::OPCODE_USHR: ushr(d, s0, s1); break; 213 case Shader::OPCODE_RSQX: rsqx(d, s0, pp); break; 214 case Shader::OPCODE_SQRT: sqrt(d, s0, pp); break; 215 case Shader::OPCODE_RSQ: rsq(d, s0, pp); break; 216 case Shader::OPCODE_LEN2: len2(d.x, s0, pp); break; 217 case Shader::OPCODE_LEN3: len3(d.x, s0, pp); break; 218 case Shader::OPCODE_LEN4: len4(d.x, s0, pp); break; 219 case Shader::OPCODE_DIST1: dist1(d.x, s0, s1, pp); break; 220 case Shader::OPCODE_DIST2: dist2(d.x, s0, s1, pp); break; 221 case Shader::OPCODE_DIST3: dist3(d.x, s0, s1, pp); break; 222 case Shader::OPCODE_DIST4: dist4(d.x, s0, s1, pp); break; 223 case Shader::OPCODE_MIN: min(d, s0, s1); break; 224 case Shader::OPCODE_IMIN: imin(d, s0, s1); break; 225 case Shader::OPCODE_UMIN: umin(d, s0, s1); break; 226 case Shader::OPCODE_MAX: max(d, s0, s1); break; 227 case Shader::OPCODE_IMAX: imax(d, s0, s1); break; 228 case Shader::OPCODE_UMAX: umax(d, s0, s1); break; 229 case Shader::OPCODE_LRP: lrp(d, s0, s1, s2); break; 230 case Shader::OPCODE_STEP: step(d, s0, s1); break; 231 case Shader::OPCODE_SMOOTH: smooth(d, s0, s1, s2); break; 232 case Shader::OPCODE_FLOATBITSTOINT: 233 case Shader::OPCODE_FLOATBITSTOUINT: 234 case Shader::OPCODE_INTBITSTOFLOAT: 235 case Shader::OPCODE_UINTBITSTOFLOAT: d = s0; break; 236 case Shader::OPCODE_PACKSNORM2x16: packSnorm2x16(d, s0); break; 237 case Shader::OPCODE_PACKUNORM2x16: packUnorm2x16(d, s0); break; 238 case Shader::OPCODE_PACKHALF2x16: packHalf2x16(d, s0); break; 239 case Shader::OPCODE_UNPACKSNORM2x16: unpackSnorm2x16(d, s0); break; 240 case Shader::OPCODE_UNPACKUNORM2x16: unpackUnorm2x16(d, s0); break; 241 case Shader::OPCODE_UNPACKHALF2x16: unpackHalf2x16(d, s0); break; 242 case Shader::OPCODE_POWX: powx(d, s0, s1, pp); break; 243 case Shader::OPCODE_POW: pow(d, s0, s1, pp); break; 244 case Shader::OPCODE_SGN: sgn(d, s0); break; 245 case Shader::OPCODE_ISGN: isgn(d, s0); break; 246 case Shader::OPCODE_CRS: crs(d, s0, s1); break; 247 case Shader::OPCODE_FORWARD1: forward1(d, s0, s1, s2); break; 248 case Shader::OPCODE_FORWARD2: forward2(d, s0, s1, s2); break; 249 case Shader::OPCODE_FORWARD3: forward3(d, s0, s1, s2); break; 250 case Shader::OPCODE_FORWARD4: forward4(d, s0, s1, s2); break; 251 case Shader::OPCODE_REFLECT1: reflect1(d, s0, s1); break; 252 case Shader::OPCODE_REFLECT2: reflect2(d, s0, s1); break; 253 case Shader::OPCODE_REFLECT3: reflect3(d, s0, s1); break; 254 case Shader::OPCODE_REFLECT4: reflect4(d, s0, s1); break; 255 case Shader::OPCODE_REFRACT1: refract1(d, s0, s1, s2.x); break; 256 case Shader::OPCODE_REFRACT2: refract2(d, s0, s1, s2.x); break; 257 case Shader::OPCODE_REFRACT3: refract3(d, s0, s1, s2.x); break; 258 case Shader::OPCODE_REFRACT4: refract4(d, s0, s1, s2.x); break; 259 case Shader::OPCODE_NRM2: nrm2(d, s0, pp); break; 260 case Shader::OPCODE_NRM3: nrm3(d, s0, pp); break; 261 case Shader::OPCODE_NRM4: nrm4(d, s0, pp); break; 262 case Shader::OPCODE_ABS: abs(d, s0); break; 263 case Shader::OPCODE_IABS: iabs(d, s0); break; 264 case Shader::OPCODE_SINCOS: sincos(d, s0, pp); break; 265 case Shader::OPCODE_COS: cos(d, s0, pp); break; 266 case Shader::OPCODE_SIN: sin(d, s0, pp); break; 267 case Shader::OPCODE_TAN: tan(d, s0, pp); break; 268 case Shader::OPCODE_ACOS: acos(d, s0, pp); break; 269 case Shader::OPCODE_ASIN: asin(d, s0, pp); break; 270 case Shader::OPCODE_ATAN: atan(d, s0, pp); break; 271 case Shader::OPCODE_ATAN2: atan2(d, s0, s1, pp); break; 272 case Shader::OPCODE_COSH: cosh(d, s0, pp); break; 273 case Shader::OPCODE_SINH: sinh(d, s0, pp); break; 274 case Shader::OPCODE_TANH: tanh(d, s0, pp); break; 275 case Shader::OPCODE_ACOSH: acosh(d, s0, pp); break; 276 case Shader::OPCODE_ASINH: asinh(d, s0, pp); break; 277 case Shader::OPCODE_ATANH: atanh(d, s0, pp); break; 278 case Shader::OPCODE_M4X4: M4X4(d, s0, src1); break; 279 case Shader::OPCODE_M4X3: M4X3(d, s0, src1); break; 280 case Shader::OPCODE_M3X4: M3X4(d, s0, src1); break; 281 case Shader::OPCODE_M3X3: M3X3(d, s0, src1); break; 282 case Shader::OPCODE_M3X2: M3X2(d, s0, src1); break; 283 case Shader::OPCODE_TEX: TEXLD(d, s0, src1, project, bias); break; 284 case Shader::OPCODE_TEXLDD: TEXLDD(d, s0, src1, s2, s3, project); break; 285 case Shader::OPCODE_TEXLDL: TEXLDL(d, s0, src1, project); break; 286 case Shader::OPCODE_TEXSIZE: TEXSIZE(d, s0.x, src1); break; 287 case Shader::OPCODE_TEXKILL: TEXKILL(cMask, d, dst.mask); break; 288 case Shader::OPCODE_TEXOFFSET: TEXOFFSET(d, s0, src1, s2, s3, project, bias); break; 289 case Shader::OPCODE_TEXLDLOFFSET: TEXLDL(d, s0, src1, s2, project, bias); break; 290 case Shader::OPCODE_TEXELFETCH: TEXELFETCH(d, s0, src1, s2); break; 291 case Shader::OPCODE_TEXELFETCHOFFSET: TEXELFETCH(d, s0, src1, s2, s3); break; 292 case Shader::OPCODE_TEXGRAD: TEXGRAD(d, s0, src1, s2, s3); break; 293 case Shader::OPCODE_TEXGRADOFFSET: TEXGRAD(d, s0, src1, s2, s3, s4); break; 294 case Shader::OPCODE_DISCARD: DISCARD(cMask, instruction); break; 295 case Shader::OPCODE_DFDX: DFDX(d, s0); break; 296 case Shader::OPCODE_DFDY: DFDY(d, s0); break; 297 case Shader::OPCODE_FWIDTH: FWIDTH(d, s0); break; 298 case Shader::OPCODE_BREAK: BREAK(); break; 299 case Shader::OPCODE_BREAKC: BREAKC(s0, s1, control); break; 300 case Shader::OPCODE_BREAKP: BREAKP(src0); break; 301 case Shader::OPCODE_CONTINUE: CONTINUE(); break; 302 case Shader::OPCODE_TEST: TEST(); break; 303 case Shader::OPCODE_CALL: CALL(dst.label, dst.callSite); break; 304 case Shader::OPCODE_CALLNZ: CALLNZ(dst.label, dst.callSite, src0); break; 305 case Shader::OPCODE_ELSE: ELSE(); break; 306 case Shader::OPCODE_ENDIF: ENDIF(); break; 307 case Shader::OPCODE_ENDLOOP: ENDLOOP(); break; 308 case Shader::OPCODE_ENDREP: ENDREP(); break; 309 case Shader::OPCODE_ENDWHILE: ENDWHILE(); break; 310 case Shader::OPCODE_ENDSWITCH: ENDSWITCH(); break; 311 case Shader::OPCODE_IF: IF(src0); break; 312 case Shader::OPCODE_IFC: IFC(s0, s1, control); break; 313 case Shader::OPCODE_LABEL: LABEL(dst.index); break; 314 case Shader::OPCODE_LOOP: LOOP(src1); break; 315 case Shader::OPCODE_REP: REP(src0); break; 316 case Shader::OPCODE_WHILE: WHILE(src0); break; 317 case Shader::OPCODE_SWITCH: SWITCH(); break; 318 case Shader::OPCODE_RET: RET(); break; 319 case Shader::OPCODE_LEAVE: LEAVE(); break; 320 case Shader::OPCODE_CMP: cmp(d, s0, s1, control); break; 321 case Shader::OPCODE_ALL: all(d.x, s0); break; 322 case Shader::OPCODE_ANY: any(d.x, s0); break; 323 case Shader::OPCODE_NOT: not(d, s0); break; 324 case Shader::OPCODE_OR: or(d, s0, s1); break; 325 case Shader::OPCODE_XOR: xor(d, s0, s1); break; 326 case Shader::OPCODE_AND: and(d, s0, s1); break; 327 case Shader::OPCODE_EQ: equal(d, s0, s1); break; 328 case Shader::OPCODE_NE: notEqual(d, s0, s1); break; 329 case Shader::OPCODE_END: break; 330 default: 331 ASSERT(false); 332 } 333 334 if(dst.type != Shader::PARAMETER_VOID && dst.type != Shader::PARAMETER_LABEL && opcode != Shader::OPCODE_TEXKILL && opcode != Shader::OPCODE_NOP) 335 { 336 if(dst.integer) 337 { 338 switch(opcode) 339 { 340 case Shader::OPCODE_DIV: 341 if(dst.x) d.x = Trunc(d.x); 342 if(dst.y) d.y = Trunc(d.y); 343 if(dst.z) d.z = Trunc(d.z); 344 if(dst.w) d.w = Trunc(d.w); 345 break; 346 default: 347 break; // No truncation to integer required when arguments are integer 348 } 349 } 350 351 if(dst.saturate) 352 { 353 if(dst.x) d.x = Max(d.x, Float4(0.0f)); 354 if(dst.y) d.y = Max(d.y, Float4(0.0f)); 355 if(dst.z) d.z = Max(d.z, Float4(0.0f)); 356 if(dst.w) d.w = Max(d.w, Float4(0.0f)); 357 358 if(dst.x) d.x = Min(d.x, Float4(1.0f)); 359 if(dst.y) d.y = Min(d.y, Float4(1.0f)); 360 if(dst.z) d.z = Min(d.z, Float4(1.0f)); 361 if(dst.w) d.w = Min(d.w, Float4(1.0f)); 362 } 363 364 if(instruction->isPredicated()) 365 { 366 Vector4f pDst; // FIXME: Rename 367 368 switch(dst.type) 369 { 370 case Shader::PARAMETER_TEMP: 371 if(dst.rel.type == Shader::PARAMETER_VOID) 372 { 373 if(dst.x) pDst.x = r[dst.index].x; 374 if(dst.y) pDst.y = r[dst.index].y; 375 if(dst.z) pDst.z = r[dst.index].z; 376 if(dst.w) pDst.w = r[dst.index].w; 377 } 378 else 379 { 380 Int a = relativeAddress(dst); 381 382 if(dst.x) pDst.x = r[dst.index + a].x; 383 if(dst.y) pDst.y = r[dst.index + a].y; 384 if(dst.z) pDst.z = r[dst.index + a].z; 385 if(dst.w) pDst.w = r[dst.index + a].w; 386 } 387 break; 388 case Shader::PARAMETER_COLOROUT: 389 if(dst.rel.type == Shader::PARAMETER_VOID) 390 { 391 if(dst.x) pDst.x = oC[dst.index].x; 392 if(dst.y) pDst.y = oC[dst.index].y; 393 if(dst.z) pDst.z = oC[dst.index].z; 394 if(dst.w) pDst.w = oC[dst.index].w; 395 } 396 else 397 { 398 Int a = relativeAddress(dst) + dst.index; 399 400 if(dst.x) pDst.x = oC[a].x; 401 if(dst.y) pDst.y = oC[a].y; 402 if(dst.z) pDst.z = oC[a].z; 403 if(dst.w) pDst.w = oC[a].w; 404 } 405 break; 406 case Shader::PARAMETER_PREDICATE: 407 if(dst.x) pDst.x = p0.x; 408 if(dst.y) pDst.y = p0.y; 409 if(dst.z) pDst.z = p0.z; 410 if(dst.w) pDst.w = p0.w; 411 break; 412 case Shader::PARAMETER_DEPTHOUT: 413 pDst.x = oDepth; 414 break; 415 default: 416 ASSERT(false); 417 } 418 419 Int4 enable = enableMask(instruction); 420 421 Int4 xEnable = enable; 422 Int4 yEnable = enable; 423 Int4 zEnable = enable; 424 Int4 wEnable = enable; 425 426 if(predicate) 427 { 428 unsigned char pSwizzle = instruction->predicateSwizzle; 429 430 Float4 xPredicate = p0[(pSwizzle >> 0) & 0x03]; 431 Float4 yPredicate = p0[(pSwizzle >> 2) & 0x03]; 432 Float4 zPredicate = p0[(pSwizzle >> 4) & 0x03]; 433 Float4 wPredicate = p0[(pSwizzle >> 6) & 0x03]; 434 435 if(!instruction->predicateNot) 436 { 437 if(dst.x) xEnable = xEnable & As<Int4>(xPredicate); 438 if(dst.y) yEnable = yEnable & As<Int4>(yPredicate); 439 if(dst.z) zEnable = zEnable & As<Int4>(zPredicate); 440 if(dst.w) wEnable = wEnable & As<Int4>(wPredicate); 441 } 442 else 443 { 444 if(dst.x) xEnable = xEnable & ~As<Int4>(xPredicate); 445 if(dst.y) yEnable = yEnable & ~As<Int4>(yPredicate); 446 if(dst.z) zEnable = zEnable & ~As<Int4>(zPredicate); 447 if(dst.w) wEnable = wEnable & ~As<Int4>(wPredicate); 448 } 449 } 450 451 if(dst.x) d.x = As<Float4>(As<Int4>(d.x) & xEnable); 452 if(dst.y) d.y = As<Float4>(As<Int4>(d.y) & yEnable); 453 if(dst.z) d.z = As<Float4>(As<Int4>(d.z) & zEnable); 454 if(dst.w) d.w = As<Float4>(As<Int4>(d.w) & wEnable); 455 456 if(dst.x) d.x = As<Float4>(As<Int4>(d.x) | (As<Int4>(pDst.x) & ~xEnable)); 457 if(dst.y) d.y = As<Float4>(As<Int4>(d.y) | (As<Int4>(pDst.y) & ~yEnable)); 458 if(dst.z) d.z = As<Float4>(As<Int4>(d.z) | (As<Int4>(pDst.z) & ~zEnable)); 459 if(dst.w) d.w = As<Float4>(As<Int4>(d.w) | (As<Int4>(pDst.w) & ~wEnable)); 460 } 461 462 switch(dst.type) 463 { 464 case Shader::PARAMETER_TEMP: 465 if(dst.rel.type == Shader::PARAMETER_VOID) 466 { 467 if(dst.x) r[dst.index].x = d.x; 468 if(dst.y) r[dst.index].y = d.y; 469 if(dst.z) r[dst.index].z = d.z; 470 if(dst.w) r[dst.index].w = d.w; 471 } 472 else 473 { 474 Int a = relativeAddress(dst); 475 476 if(dst.x) r[dst.index + a].x = d.x; 477 if(dst.y) r[dst.index + a].y = d.y; 478 if(dst.z) r[dst.index + a].z = d.z; 479 if(dst.w) r[dst.index + a].w = d.w; 480 } 481 break; 482 case Shader::PARAMETER_COLOROUT: 483 if(dst.rel.type == Shader::PARAMETER_VOID) 484 { 485 broadcastColor0 = (dst.index == 0) && broadcastColor0; 486 487 if(dst.x) { oC[dst.index].x = d.x; } 488 if(dst.y) { oC[dst.index].y = d.y; } 489 if(dst.z) { oC[dst.index].z = d.z; } 490 if(dst.w) { oC[dst.index].w = d.w; } 491 } 492 else 493 { 494 broadcastColor0 = false; 495 Int a = relativeAddress(dst) + dst.index; 496 497 if(dst.x) { oC[a].x = d.x; } 498 if(dst.y) { oC[a].y = d.y; } 499 if(dst.z) { oC[a].z = d.z; } 500 if(dst.w) { oC[a].w = d.w; } 501 } 502 break; 503 case Shader::PARAMETER_PREDICATE: 504 if(dst.x) p0.x = d.x; 505 if(dst.y) p0.y = d.y; 506 if(dst.z) p0.z = d.z; 507 if(dst.w) p0.w = d.w; 508 break; 509 case Shader::PARAMETER_DEPTHOUT: 510 oDepth = d.x; 511 break; 512 default: 513 ASSERT(false); 514 } 515 } 516 } 517 518 if(currentLabel != -1) 519 { 520 Nucleus::setInsertBlock(returnBlock); 521 } 522 523 if(broadcastColor0) 524 { 525 for(int i = 0; i < RENDERTARGETS; i++) 526 { 527 c[i] = oC[0]; 528 } 529 } 530 else 531 { 532 for(int i = 0; i < RENDERTARGETS; i++) 533 { 534 c[i] = oC[i]; 535 } 536 } 537 } 538 alphaTest(Int cMask[4])539 Bool PixelProgram::alphaTest(Int cMask[4]) 540 { 541 clampColor(c); 542 543 if(!state.alphaTestActive()) 544 { 545 return true; 546 } 547 548 Int aMask; 549 550 if(state.transparencyAntialiasing == TRANSPARENCY_NONE) 551 { 552 Short4 alpha = RoundShort4(c[0].w * Float4(0x1000)); 553 554 PixelRoutine::alphaTest(aMask, alpha); 555 556 for(unsigned int q = 0; q < state.multiSample; q++) 557 { 558 cMask[q] &= aMask; 559 } 560 } 561 else if(state.transparencyAntialiasing == TRANSPARENCY_ALPHA_TO_COVERAGE) 562 { 563 alphaToCoverage(cMask, c[0].w); 564 } 565 else ASSERT(false); 566 567 Int pass = cMask[0]; 568 569 for(unsigned int q = 1; q < state.multiSample; q++) 570 { 571 pass = pass | cMask[q]; 572 } 573 574 return pass != 0x0; 575 } 576 rasterOperation(Float4 & fog,Pointer<Byte> cBuffer[4],Int & x,Int sMask[4],Int zMask[4],Int cMask[4])577 void PixelProgram::rasterOperation(Float4 &fog, Pointer<Byte> cBuffer[4], Int &x, Int sMask[4], Int zMask[4], Int cMask[4]) 578 { 579 for(int index = 0; index < RENDERTARGETS; index++) 580 { 581 if(!state.colorWriteActive(index)) 582 { 583 continue; 584 } 585 586 if(!postBlendSRGB && state.writeSRGB && !isSRGB(index)) 587 { 588 c[index].x = linearToSRGB(c[index].x); 589 c[index].y = linearToSRGB(c[index].y); 590 c[index].z = linearToSRGB(c[index].z); 591 } 592 593 if(index == 0) 594 { 595 fogBlend(c[index], fog); 596 } 597 598 switch(state.targetFormat[index]) 599 { 600 case FORMAT_R5G6B5: 601 case FORMAT_X8R8G8B8: 602 case FORMAT_X8B8G8R8: 603 case FORMAT_A8R8G8B8: 604 case FORMAT_A8B8G8R8: 605 case FORMAT_SRGB8_X8: 606 case FORMAT_SRGB8_A8: 607 case FORMAT_A8: 608 case FORMAT_G16R16: 609 case FORMAT_A16B16G16R16: 610 for(unsigned int q = 0; q < state.multiSample; q++) 611 { 612 Pointer<Byte> buffer = cBuffer[index] + q * *Pointer<Int>(data + OFFSET(DrawData, colorSliceB[index])); 613 Vector4s color; 614 615 if(state.targetFormat[index] == FORMAT_R5G6B5) 616 { 617 color.x = UShort4(c[index].x * Float4(0xFBFF), false); 618 color.y = UShort4(c[index].y * Float4(0xFDFF), false); 619 color.z = UShort4(c[index].z * Float4(0xFBFF), false); 620 color.w = UShort4(c[index].w * Float4(0xFFFF), false); 621 } 622 else 623 { 624 color.x = convertFixed16(c[index].x, false); 625 color.y = convertFixed16(c[index].y, false); 626 color.z = convertFixed16(c[index].z, false); 627 color.w = convertFixed16(c[index].w, false); 628 } 629 630 if(state.multiSampleMask & (1 << q)) 631 { 632 alphaBlend(index, buffer, color, x); 633 logicOperation(index, buffer, color, x); 634 writeColor(index, buffer, x, color, sMask[q], zMask[q], cMask[q]); 635 } 636 } 637 break; 638 case FORMAT_R32F: 639 case FORMAT_G32R32F: 640 case FORMAT_X32B32G32R32F: 641 case FORMAT_A32B32G32R32F: 642 case FORMAT_R32I: 643 case FORMAT_G32R32I: 644 case FORMAT_A32B32G32R32I: 645 case FORMAT_R32UI: 646 case FORMAT_G32R32UI: 647 case FORMAT_A32B32G32R32UI: 648 for(unsigned int q = 0; q < state.multiSample; q++) 649 { 650 Pointer<Byte> buffer = cBuffer[index] + q * *Pointer<Int>(data + OFFSET(DrawData, colorSliceB[index])); 651 Vector4f color = c[index]; 652 653 if(state.multiSampleMask & (1 << q)) 654 { 655 alphaBlend(index, buffer, color, x); 656 writeColor(index, buffer, x, color, sMask[q], zMask[q], cMask[q]); 657 } 658 } 659 break; 660 default: 661 ASSERT(false); 662 } 663 } 664 } 665 sampleTexture(Vector4f & c,const Src & sampler,Float4 & u,Float4 & v,Float4 & w,Float4 & q,Vector4f & dsx,Vector4f & dsy,bool project,SamplerMethod method)666 void PixelProgram::sampleTexture(Vector4f &c, const Src &sampler, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &dsx, Vector4f &dsy, bool project, SamplerMethod method) 667 { 668 Vector4f tmp; 669 670 if(sampler.type == Shader::PARAMETER_SAMPLER && sampler.rel.type == Shader::PARAMETER_VOID) 671 { 672 sampleTexture(tmp, sampler.index, u, v, w, q, dsx, dsy, project, method); 673 } 674 else 675 { 676 Int index = As<Int>(Float(fetchRegister(sampler).x.x)); 677 678 for(int i = 0; i < TEXTURE_IMAGE_UNITS; i++) 679 { 680 if(shader->usesSampler(i)) 681 { 682 If(index == i) 683 { 684 sampleTexture(tmp, i, u, v, w, q, dsx, dsy, project, method); 685 // FIXME: When the sampler states are the same, we could use one sampler and just index the texture 686 } 687 } 688 } 689 } 690 691 c.x = tmp[(sampler.swizzle >> 0) & 0x3]; 692 c.y = tmp[(sampler.swizzle >> 2) & 0x3]; 693 c.z = tmp[(sampler.swizzle >> 4) & 0x3]; 694 c.w = tmp[(sampler.swizzle >> 6) & 0x3]; 695 } 696 sampleTexture(Vector4f & c,int stage,Float4 & u,Float4 & v,Float4 & w,Float4 & q,Vector4f & dsx,Vector4f & dsy,bool project,SamplerMethod method)697 void PixelProgram::sampleTexture(Vector4f &c, int stage, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &dsx, Vector4f &dsy, bool project, SamplerMethod method) 698 { 699 #if PERF_PROFILE 700 Long texTime = Ticks(); 701 #endif 702 703 Pointer<Byte> texture = data + OFFSET(DrawData, mipmap) + stage * sizeof(Texture); 704 705 if(!project) 706 { 707 sampler[stage]->sampleTexture(texture, c, u, v, w, q, dsx, dsy, method); 708 } 709 else 710 { 711 Float4 rq = reciprocal(q); 712 713 Float4 u_q = u * rq; 714 Float4 v_q = v * rq; 715 Float4 w_q = w * rq; 716 717 sampler[stage]->sampleTexture(texture, c, u_q, v_q, w_q, q, dsx, dsy, method); 718 } 719 720 #if PERF_PROFILE 721 cycles[PERF_TEX] += Ticks() - texTime; 722 #endif 723 } 724 clampColor(Vector4f oC[RENDERTARGETS])725 void PixelProgram::clampColor(Vector4f oC[RENDERTARGETS]) 726 { 727 for(int index = 0; index < RENDERTARGETS; index++) 728 { 729 if(!state.colorWriteActive(index) && !(index == 0 && state.alphaTestActive())) 730 { 731 continue; 732 } 733 734 switch(state.targetFormat[index]) 735 { 736 case FORMAT_NULL: 737 break; 738 case FORMAT_R5G6B5: 739 case FORMAT_A8R8G8B8: 740 case FORMAT_A8B8G8R8: 741 case FORMAT_X8R8G8B8: 742 case FORMAT_X8B8G8R8: 743 case FORMAT_SRGB8_X8: 744 case FORMAT_SRGB8_A8: 745 case FORMAT_A8: 746 case FORMAT_G16R16: 747 case FORMAT_A16B16G16R16: 748 oC[index].x = Max(oC[index].x, Float4(0.0f)); oC[index].x = Min(oC[index].x, Float4(1.0f)); 749 oC[index].y = Max(oC[index].y, Float4(0.0f)); oC[index].y = Min(oC[index].y, Float4(1.0f)); 750 oC[index].z = Max(oC[index].z, Float4(0.0f)); oC[index].z = Min(oC[index].z, Float4(1.0f)); 751 oC[index].w = Max(oC[index].w, Float4(0.0f)); oC[index].w = Min(oC[index].w, Float4(1.0f)); 752 break; 753 case FORMAT_R32F: 754 case FORMAT_G32R32F: 755 case FORMAT_X32B32G32R32F: 756 case FORMAT_A32B32G32R32F: 757 case FORMAT_R32I: 758 case FORMAT_G32R32I: 759 case FORMAT_A32B32G32R32I: 760 case FORMAT_R32UI: 761 case FORMAT_G32R32UI: 762 case FORMAT_A32B32G32R32UI: 763 break; 764 default: 765 ASSERT(false); 766 } 767 } 768 } 769 enableMask(const Shader::Instruction * instruction)770 Int4 PixelProgram::enableMask(const Shader::Instruction *instruction) 771 { 772 Int4 enable = instruction->analysisBranch ? Int4(enableStack[enableIndex]) : Int4(0xFFFFFFFF); 773 774 if(!whileTest) 775 { 776 if(shader->containsBreakInstruction() && instruction->analysisBreak) 777 { 778 enable &= enableBreak; 779 } 780 781 if(shader->containsContinueInstruction() && instruction->analysisContinue) 782 { 783 enable &= enableContinue; 784 } 785 786 if(shader->containsLeaveInstruction() && instruction->analysisLeave) 787 { 788 enable &= enableLeave; 789 } 790 } 791 792 return enable; 793 } 794 fetchRegister(const Src & src,unsigned int offset)795 Vector4f PixelProgram::fetchRegister(const Src &src, unsigned int offset) 796 { 797 Vector4f reg; 798 unsigned int i = src.index + offset; 799 800 switch(src.type) 801 { 802 case Shader::PARAMETER_TEMP: 803 if(src.rel.type == Shader::PARAMETER_VOID) 804 { 805 reg = r[i]; 806 } 807 else 808 { 809 Int a = relativeAddress(src, src.bufferIndex); 810 811 reg = r[i + a]; 812 } 813 break; 814 case Shader::PARAMETER_INPUT: 815 { 816 if(src.rel.type == Shader::PARAMETER_VOID) // Not relative 817 { 818 reg = v[i]; 819 } 820 else 821 { 822 Int a = relativeAddress(src, src.bufferIndex); 823 824 reg = v[i + a]; 825 } 826 } 827 break; 828 case Shader::PARAMETER_CONST: 829 reg = readConstant(src, offset); 830 break; 831 case Shader::PARAMETER_TEXTURE: 832 reg = v[2 + i]; 833 break; 834 case Shader::PARAMETER_MISCTYPE: 835 if(src.index == 0) reg = vPos; 836 if(src.index == 1) reg = vFace; 837 break; 838 case Shader::PARAMETER_SAMPLER: 839 if(src.rel.type == Shader::PARAMETER_VOID) 840 { 841 reg.x = As<Float4>(Int4(i)); 842 } 843 else if(src.rel.type == Shader::PARAMETER_TEMP) 844 { 845 reg.x = As<Float4>(Int4(i) + As<Int4>(r[src.rel.index].x)); 846 } 847 return reg; 848 case Shader::PARAMETER_PREDICATE: return reg; // Dummy 849 case Shader::PARAMETER_VOID: return reg; // Dummy 850 case Shader::PARAMETER_FLOAT4LITERAL: 851 reg.x = Float4(src.value[0]); 852 reg.y = Float4(src.value[1]); 853 reg.z = Float4(src.value[2]); 854 reg.w = Float4(src.value[3]); 855 break; 856 case Shader::PARAMETER_CONSTINT: return reg; // Dummy 857 case Shader::PARAMETER_CONSTBOOL: return reg; // Dummy 858 case Shader::PARAMETER_LOOP: return reg; // Dummy 859 case Shader::PARAMETER_COLOROUT: 860 if(src.rel.type == Shader::PARAMETER_VOID) // Not relative 861 { 862 reg = oC[i]; 863 } 864 else 865 { 866 Int a = relativeAddress(src, src.bufferIndex); 867 868 reg = oC[i + a]; 869 } 870 break; 871 case Shader::PARAMETER_DEPTHOUT: 872 reg.x = oDepth; 873 break; 874 default: 875 ASSERT(false); 876 } 877 878 const Float4 &x = reg[(src.swizzle >> 0) & 0x3]; 879 const Float4 &y = reg[(src.swizzle >> 2) & 0x3]; 880 const Float4 &z = reg[(src.swizzle >> 4) & 0x3]; 881 const Float4 &w = reg[(src.swizzle >> 6) & 0x3]; 882 883 Vector4f mod; 884 885 switch(src.modifier) 886 { 887 case Shader::MODIFIER_NONE: 888 mod.x = x; 889 mod.y = y; 890 mod.z = z; 891 mod.w = w; 892 break; 893 case Shader::MODIFIER_NEGATE: 894 mod.x = -x; 895 mod.y = -y; 896 mod.z = -z; 897 mod.w = -w; 898 break; 899 case Shader::MODIFIER_ABS: 900 mod.x = Abs(x); 901 mod.y = Abs(y); 902 mod.z = Abs(z); 903 mod.w = Abs(w); 904 break; 905 case Shader::MODIFIER_ABS_NEGATE: 906 mod.x = -Abs(x); 907 mod.y = -Abs(y); 908 mod.z = -Abs(z); 909 mod.w = -Abs(w); 910 break; 911 case Shader::MODIFIER_NOT: 912 mod.x = As<Float4>(As<Int4>(x) ^ Int4(0xFFFFFFFF)); 913 mod.y = As<Float4>(As<Int4>(y) ^ Int4(0xFFFFFFFF)); 914 mod.z = As<Float4>(As<Int4>(z) ^ Int4(0xFFFFFFFF)); 915 mod.w = As<Float4>(As<Int4>(w) ^ Int4(0xFFFFFFFF)); 916 break; 917 default: 918 ASSERT(false); 919 } 920 921 return mod; 922 } 923 uniformAddress(int bufferIndex,unsigned int index)924 RValue<Pointer<Byte>> PixelProgram::uniformAddress(int bufferIndex, unsigned int index) 925 { 926 if(bufferIndex == -1) 927 { 928 return data + OFFSET(DrawData, ps.c[index]); 929 } 930 else 931 { 932 return *Pointer<Pointer<Byte>>(data + OFFSET(DrawData, ps.u[bufferIndex])) + index; 933 } 934 } 935 uniformAddress(int bufferIndex,unsigned int index,Int & offset)936 RValue<Pointer<Byte>> PixelProgram::uniformAddress(int bufferIndex, unsigned int index, Int& offset) 937 { 938 return uniformAddress(bufferIndex, index) + offset * sizeof(float4); 939 } 940 readConstant(const Src & src,unsigned int offset)941 Vector4f PixelProgram::readConstant(const Src &src, unsigned int offset) 942 { 943 Vector4f c; 944 unsigned int i = src.index + offset; 945 946 if(src.rel.type == Shader::PARAMETER_VOID) // Not relative 947 { 948 c.x = c.y = c.z = c.w = *Pointer<Float4>(uniformAddress(src.bufferIndex, i)); 949 950 c.x = c.x.xxxx; 951 c.y = c.y.yyyy; 952 c.z = c.z.zzzz; 953 c.w = c.w.wwww; 954 955 if(shader->containsDefineInstruction()) // Constant may be known at compile time 956 { 957 for(size_t j = 0; j < shader->getLength(); j++) 958 { 959 const Shader::Instruction &instruction = *shader->getInstruction(j); 960 961 if(instruction.opcode == Shader::OPCODE_DEF) 962 { 963 if(instruction.dst.index == i) 964 { 965 c.x = Float4(instruction.src[0].value[0]); 966 c.y = Float4(instruction.src[0].value[1]); 967 c.z = Float4(instruction.src[0].value[2]); 968 c.w = Float4(instruction.src[0].value[3]); 969 970 break; 971 } 972 } 973 } 974 } 975 } 976 else if(src.rel.type == Shader::PARAMETER_LOOP) 977 { 978 Int loopCounter = aL[loopDepth]; 979 980 c.x = c.y = c.z = c.w = *Pointer<Float4>(uniformAddress(src.bufferIndex, i, loopCounter)); 981 982 c.x = c.x.xxxx; 983 c.y = c.y.yyyy; 984 c.z = c.z.zzzz; 985 c.w = c.w.wwww; 986 } 987 else 988 { 989 Int a = relativeAddress(src, src.bufferIndex); 990 991 c.x = c.y = c.z = c.w = *Pointer<Float4>(uniformAddress(src.bufferIndex, i, a)); 992 993 c.x = c.x.xxxx; 994 c.y = c.y.yyyy; 995 c.z = c.z.zzzz; 996 c.w = c.w.wwww; 997 } 998 999 return c; 1000 } 1001 relativeAddress(const Shader::Parameter & var,int bufferIndex)1002 Int PixelProgram::relativeAddress(const Shader::Parameter &var, int bufferIndex) 1003 { 1004 ASSERT(var.rel.deterministic); 1005 1006 if(var.rel.type == Shader::PARAMETER_TEMP) 1007 { 1008 return As<Int>(Extract(r[var.rel.index].x, 0)) * var.rel.scale; 1009 } 1010 else if(var.rel.type == Shader::PARAMETER_INPUT) 1011 { 1012 return As<Int>(Extract(v[var.rel.index].x, 0)) * var.rel.scale; 1013 } 1014 else if(var.rel.type == Shader::PARAMETER_OUTPUT) 1015 { 1016 return As<Int>(Extract(oC[var.rel.index].x, 0)) * var.rel.scale; 1017 } 1018 else if(var.rel.type == Shader::PARAMETER_CONST) 1019 { 1020 return *Pointer<Int>(uniformAddress(bufferIndex, var.rel.index)) * var.rel.scale; 1021 } 1022 else if(var.rel.type == Shader::PARAMETER_LOOP) 1023 { 1024 return aL[loopDepth]; 1025 } 1026 else ASSERT(false); 1027 1028 return 0; 1029 } 1030 linearToSRGB(const Float4 & x)1031 Float4 PixelProgram::linearToSRGB(const Float4 &x) // Approximates x^(1.0/2.2) 1032 { 1033 Float4 sqrtx = Rcp_pp(RcpSqrt_pp(x)); 1034 Float4 sRGB = sqrtx * Float4(1.14f) - x * Float4(0.14f); 1035 1036 return Min(Max(sRGB, Float4(0.0f)), Float4(1.0f)); 1037 } 1038 M3X2(Vector4f & dst,Vector4f & src0,const Src & src1)1039 void PixelProgram::M3X2(Vector4f &dst, Vector4f &src0, const Src &src1) 1040 { 1041 Vector4f row0 = fetchRegister(src1, 0); 1042 Vector4f row1 = fetchRegister(src1, 1); 1043 1044 dst.x = dot3(src0, row0); 1045 dst.y = dot3(src0, row1); 1046 } 1047 M3X3(Vector4f & dst,Vector4f & src0,const Src & src1)1048 void PixelProgram::M3X3(Vector4f &dst, Vector4f &src0, const Src &src1) 1049 { 1050 Vector4f row0 = fetchRegister(src1, 0); 1051 Vector4f row1 = fetchRegister(src1, 1); 1052 Vector4f row2 = fetchRegister(src1, 2); 1053 1054 dst.x = dot3(src0, row0); 1055 dst.y = dot3(src0, row1); 1056 dst.z = dot3(src0, row2); 1057 } 1058 M3X4(Vector4f & dst,Vector4f & src0,const Src & src1)1059 void PixelProgram::M3X4(Vector4f &dst, Vector4f &src0, const Src &src1) 1060 { 1061 Vector4f row0 = fetchRegister(src1, 0); 1062 Vector4f row1 = fetchRegister(src1, 1); 1063 Vector4f row2 = fetchRegister(src1, 2); 1064 Vector4f row3 = fetchRegister(src1, 3); 1065 1066 dst.x = dot3(src0, row0); 1067 dst.y = dot3(src0, row1); 1068 dst.z = dot3(src0, row2); 1069 dst.w = dot3(src0, row3); 1070 } 1071 M4X3(Vector4f & dst,Vector4f & src0,const Src & src1)1072 void PixelProgram::M4X3(Vector4f &dst, Vector4f &src0, const Src &src1) 1073 { 1074 Vector4f row0 = fetchRegister(src1, 0); 1075 Vector4f row1 = fetchRegister(src1, 1); 1076 Vector4f row2 = fetchRegister(src1, 2); 1077 1078 dst.x = dot4(src0, row0); 1079 dst.y = dot4(src0, row1); 1080 dst.z = dot4(src0, row2); 1081 } 1082 M4X4(Vector4f & dst,Vector4f & src0,const Src & src1)1083 void PixelProgram::M4X4(Vector4f &dst, Vector4f &src0, const Src &src1) 1084 { 1085 Vector4f row0 = fetchRegister(src1, 0); 1086 Vector4f row1 = fetchRegister(src1, 1); 1087 Vector4f row2 = fetchRegister(src1, 2); 1088 Vector4f row3 = fetchRegister(src1, 3); 1089 1090 dst.x = dot4(src0, row0); 1091 dst.y = dot4(src0, row1); 1092 dst.z = dot4(src0, row2); 1093 dst.w = dot4(src0, row3); 1094 } 1095 TEXLD(Vector4f & dst,Vector4f & src0,const Src & src1,bool project,bool bias)1096 void PixelProgram::TEXLD(Vector4f &dst, Vector4f &src0, const Src &src1, bool project, bool bias) 1097 { 1098 sampleTexture(dst, src1, src0.x, src0.y, src0.z, src0.w, src0, src0, project, bias ? Bias : Implicit); 1099 } 1100 TEXOFFSET(Vector4f & dst,Vector4f & src0,const Src & src1,Vector4f & src2,Vector4f & src3,bool project,bool bias)1101 void PixelProgram::TEXOFFSET(Vector4f &dst, Vector4f &src0, const Src& src1, Vector4f &src2, Vector4f &src3, bool project, bool bias) 1102 { 1103 UNIMPLEMENTED(); 1104 } 1105 TEXLDL(Vector4f & dst,Vector4f & src0,const Src & src1,Vector4f & offset,bool project,bool bias)1106 void PixelProgram::TEXLDL(Vector4f &dst, Vector4f &src0, const Src &src1, Vector4f &offset, bool project, bool bias) 1107 { 1108 UNIMPLEMENTED(); 1109 } 1110 TEXELFETCH(Vector4f & dst,Vector4f & src0,const Src & src1,Vector4f & src2)1111 void PixelProgram::TEXELFETCH(Vector4f &dst, Vector4f &src0, const Src& src1, Vector4f &src2) 1112 { 1113 UNIMPLEMENTED(); 1114 } 1115 TEXELFETCH(Vector4f & dst,Vector4f & src0,const Src & src1,Vector4f & src2,Vector4f & offset)1116 void PixelProgram::TEXELFETCH(Vector4f &dst, Vector4f &src0, const Src& src1, Vector4f &src2, Vector4f &offset) 1117 { 1118 UNIMPLEMENTED(); 1119 } 1120 TEXGRAD(Vector4f & dst,Vector4f & src0,const Src & src1,Vector4f & src2,Vector4f & src3)1121 void PixelProgram::TEXGRAD(Vector4f &dst, Vector4f &src0, const Src& src1, Vector4f &src2, Vector4f &src3) 1122 { 1123 sampleTexture(dst, src1, src0.x, src0.y, src0.z, src0.w, src2, src3, false, Grad); 1124 } 1125 TEXGRAD(Vector4f & dst,Vector4f & src0,const Src & src1,Vector4f & src2,Vector4f & src3,Vector4f & offset)1126 void PixelProgram::TEXGRAD(Vector4f &dst, Vector4f &src0, const Src& src1, Vector4f &src2, Vector4f &src3, Vector4f &offset) 1127 { 1128 UNIMPLEMENTED(); 1129 } 1130 TEXLDD(Vector4f & dst,Vector4f & src0,const Src & src1,Vector4f & src2,Vector4f & src3,bool project)1131 void PixelProgram::TEXLDD(Vector4f &dst, Vector4f &src0, const Src &src1, Vector4f &src2, Vector4f &src3, bool project) 1132 { 1133 sampleTexture(dst, src1, src0.x, src0.y, src0.z, src0.w, src2, src3, project, Grad); 1134 } 1135 TEXLDL(Vector4f & dst,Vector4f & src0,const Src & src1,bool project)1136 void PixelProgram::TEXLDL(Vector4f &dst, Vector4f &src0, const Src &src1, bool project) 1137 { 1138 sampleTexture(dst, src1, src0.x, src0.y, src0.z, src0.w, src0, src0, project, Lod); 1139 } 1140 TEXSIZE(Vector4f & dst,Float4 & lod,const Src & src1)1141 void PixelProgram::TEXSIZE(Vector4f &dst, Float4 &lod, const Src &src1) 1142 { 1143 Pointer<Byte> textureMipmap = data + OFFSET(DrawData, mipmap) + src1.index * sizeof(Texture) + OFFSET(Texture, mipmap); 1144 for(int i = 0; i < 4; ++i) 1145 { 1146 Pointer<Byte> mipmap = textureMipmap + (As<Int>(Extract(lod, i)) + Int(1)) * sizeof(Mipmap); 1147 dst.x = Insert(dst.x, As<Float>(Int(*Pointer<Short>(mipmap + OFFSET(Mipmap, width)))), i); 1148 dst.y = Insert(dst.y, As<Float>(Int(*Pointer<Short>(mipmap + OFFSET(Mipmap, height)))), i); 1149 dst.z = Insert(dst.z, As<Float>(Int(*Pointer<Short>(mipmap + OFFSET(Mipmap, depth)))), i); 1150 } 1151 } 1152 TEXKILL(Int cMask[4],Vector4f & src,unsigned char mask)1153 void PixelProgram::TEXKILL(Int cMask[4], Vector4f &src, unsigned char mask) 1154 { 1155 Int kill = -1; 1156 1157 if(mask & 0x1) kill &= SignMask(CmpNLT(src.x, Float4(0.0f))); 1158 if(mask & 0x2) kill &= SignMask(CmpNLT(src.y, Float4(0.0f))); 1159 if(mask & 0x4) kill &= SignMask(CmpNLT(src.z, Float4(0.0f))); 1160 if(mask & 0x8) kill &= SignMask(CmpNLT(src.w, Float4(0.0f))); 1161 1162 // FIXME: Dynamic branching affects TEXKILL? 1163 // if(shader->containsDynamicBranching()) 1164 // { 1165 // kill = ~SignMask(enableMask()); 1166 // } 1167 1168 for(unsigned int q = 0; q < state.multiSample; q++) 1169 { 1170 cMask[q] &= kill; 1171 } 1172 1173 // FIXME: Branch to end of shader if all killed? 1174 } 1175 DISCARD(Int cMask[4],const Shader::Instruction * instruction)1176 void PixelProgram::DISCARD(Int cMask[4], const Shader::Instruction *instruction) 1177 { 1178 Int kill = 0; 1179 1180 if(shader->containsDynamicBranching()) 1181 { 1182 kill = ~SignMask(enableMask(instruction)); 1183 } 1184 1185 for(unsigned int q = 0; q < state.multiSample; q++) 1186 { 1187 cMask[q] &= kill; 1188 } 1189 1190 // FIXME: Branch to end of shader if all killed? 1191 } 1192 DFDX(Vector4f & dst,Vector4f & src)1193 void PixelProgram::DFDX(Vector4f &dst, Vector4f &src) 1194 { 1195 dst.x = src.x.yyww - src.x.xxzz; 1196 dst.y = src.y.yyww - src.y.xxzz; 1197 dst.z = src.z.yyww - src.z.xxzz; 1198 dst.w = src.w.yyww - src.w.xxzz; 1199 } 1200 DFDY(Vector4f & dst,Vector4f & src)1201 void PixelProgram::DFDY(Vector4f &dst, Vector4f &src) 1202 { 1203 dst.x = src.x.zwzw - src.x.xyxy; 1204 dst.y = src.y.zwzw - src.y.xyxy; 1205 dst.z = src.z.zwzw - src.z.xyxy; 1206 dst.w = src.w.zwzw - src.w.xyxy; 1207 } 1208 FWIDTH(Vector4f & dst,Vector4f & src)1209 void PixelProgram::FWIDTH(Vector4f &dst, Vector4f &src) 1210 { 1211 // abs(dFdx(src)) + abs(dFdy(src)); 1212 dst.x = Abs(src.x.yyww - src.x.xxzz) + Abs(src.x.zwzw - src.x.xyxy); 1213 dst.y = Abs(src.y.yyww - src.y.xxzz) + Abs(src.y.zwzw - src.y.xyxy); 1214 dst.z = Abs(src.z.yyww - src.z.xxzz) + Abs(src.z.zwzw - src.z.xyxy); 1215 dst.w = Abs(src.w.yyww - src.w.xxzz) + Abs(src.w.zwzw - src.w.xyxy); 1216 } 1217 BREAK()1218 void PixelProgram::BREAK() 1219 { 1220 llvm::BasicBlock *deadBlock = Nucleus::createBasicBlock(); 1221 llvm::BasicBlock *endBlock = loopRepEndBlock[loopRepDepth - 1]; 1222 1223 if(breakDepth == 0) 1224 { 1225 enableIndex = enableIndex - breakDepth; 1226 Nucleus::createBr(endBlock); 1227 } 1228 else 1229 { 1230 enableBreak = enableBreak & ~enableStack[enableIndex]; 1231 Bool allBreak = SignMask(enableBreak) == 0x0; 1232 1233 enableIndex = enableIndex - breakDepth; 1234 branch(allBreak, endBlock, deadBlock); 1235 } 1236 1237 Nucleus::setInsertBlock(deadBlock); 1238 enableIndex = enableIndex + breakDepth; 1239 } 1240 BREAKC(Vector4f & src0,Vector4f & src1,Control control)1241 void PixelProgram::BREAKC(Vector4f &src0, Vector4f &src1, Control control) 1242 { 1243 Int4 condition; 1244 1245 switch(control) 1246 { 1247 case Shader::CONTROL_GT: condition = CmpNLE(src0.x, src1.x); break; 1248 case Shader::CONTROL_EQ: condition = CmpEQ(src0.x, src1.x); break; 1249 case Shader::CONTROL_GE: condition = CmpNLT(src0.x, src1.x); break; 1250 case Shader::CONTROL_LT: condition = CmpLT(src0.x, src1.x); break; 1251 case Shader::CONTROL_NE: condition = CmpNEQ(src0.x, src1.x); break; 1252 case Shader::CONTROL_LE: condition = CmpLE(src0.x, src1.x); break; 1253 default: 1254 ASSERT(false); 1255 } 1256 1257 BREAK(condition); 1258 } 1259 BREAKP(const Src & predicateRegister)1260 void PixelProgram::BREAKP(const Src &predicateRegister) // FIXME: Factor out parts common with BREAKC 1261 { 1262 Int4 condition = As<Int4>(p0[predicateRegister.swizzle & 0x3]); 1263 1264 if(predicateRegister.modifier == Shader::MODIFIER_NOT) 1265 { 1266 condition = ~condition; 1267 } 1268 1269 BREAK(condition); 1270 } 1271 BREAK(Int4 & condition)1272 void PixelProgram::BREAK(Int4 &condition) 1273 { 1274 condition &= enableStack[enableIndex]; 1275 1276 llvm::BasicBlock *continueBlock = Nucleus::createBasicBlock(); 1277 llvm::BasicBlock *endBlock = loopRepEndBlock[loopRepDepth - 1]; 1278 1279 enableBreak = enableBreak & ~condition; 1280 Bool allBreak = SignMask(enableBreak) == 0x0; 1281 1282 enableIndex = enableIndex - breakDepth; 1283 branch(allBreak, endBlock, continueBlock); 1284 1285 Nucleus::setInsertBlock(continueBlock); 1286 enableIndex = enableIndex + breakDepth; 1287 } 1288 CONTINUE()1289 void PixelProgram::CONTINUE() 1290 { 1291 enableContinue = enableContinue & ~enableStack[enableIndex]; 1292 } 1293 TEST()1294 void PixelProgram::TEST() 1295 { 1296 whileTest = true; 1297 } 1298 CALL(int labelIndex,int callSiteIndex)1299 void PixelProgram::CALL(int labelIndex, int callSiteIndex) 1300 { 1301 if(!labelBlock[labelIndex]) 1302 { 1303 labelBlock[labelIndex] = Nucleus::createBasicBlock(); 1304 } 1305 1306 if(callRetBlock[labelIndex].size() > 1) 1307 { 1308 callStack[stackIndex++] = UInt(callSiteIndex); 1309 } 1310 1311 Int4 restoreLeave = enableLeave; 1312 1313 Nucleus::createBr(labelBlock[labelIndex]); 1314 Nucleus::setInsertBlock(callRetBlock[labelIndex][callSiteIndex]); 1315 1316 enableLeave = restoreLeave; 1317 } 1318 CALLNZ(int labelIndex,int callSiteIndex,const Src & src)1319 void PixelProgram::CALLNZ(int labelIndex, int callSiteIndex, const Src &src) 1320 { 1321 if(src.type == Shader::PARAMETER_CONSTBOOL) 1322 { 1323 CALLNZb(labelIndex, callSiteIndex, src); 1324 } 1325 else if(src.type == Shader::PARAMETER_PREDICATE) 1326 { 1327 CALLNZp(labelIndex, callSiteIndex, src); 1328 } 1329 else ASSERT(false); 1330 } 1331 CALLNZb(int labelIndex,int callSiteIndex,const Src & boolRegister)1332 void PixelProgram::CALLNZb(int labelIndex, int callSiteIndex, const Src &boolRegister) 1333 { 1334 Bool condition = (*Pointer<Byte>(data + OFFSET(DrawData, ps.b[boolRegister.index])) != Byte(0)); // FIXME 1335 1336 if(boolRegister.modifier == Shader::MODIFIER_NOT) 1337 { 1338 condition = !condition; 1339 } 1340 1341 if(!labelBlock[labelIndex]) 1342 { 1343 labelBlock[labelIndex] = Nucleus::createBasicBlock(); 1344 } 1345 1346 if(callRetBlock[labelIndex].size() > 1) 1347 { 1348 callStack[stackIndex++] = UInt(callSiteIndex); 1349 } 1350 1351 Int4 restoreLeave = enableLeave; 1352 1353 branch(condition, labelBlock[labelIndex], callRetBlock[labelIndex][callSiteIndex]); 1354 Nucleus::setInsertBlock(callRetBlock[labelIndex][callSiteIndex]); 1355 1356 enableLeave = restoreLeave; 1357 } 1358 CALLNZp(int labelIndex,int callSiteIndex,const Src & predicateRegister)1359 void PixelProgram::CALLNZp(int labelIndex, int callSiteIndex, const Src &predicateRegister) 1360 { 1361 Int4 condition = As<Int4>(p0[predicateRegister.swizzle & 0x3]); 1362 1363 if(predicateRegister.modifier == Shader::MODIFIER_NOT) 1364 { 1365 condition = ~condition; 1366 } 1367 1368 condition &= enableStack[enableIndex]; 1369 1370 if(!labelBlock[labelIndex]) 1371 { 1372 labelBlock[labelIndex] = Nucleus::createBasicBlock(); 1373 } 1374 1375 if(callRetBlock[labelIndex].size() > 1) 1376 { 1377 callStack[stackIndex++] = UInt(callSiteIndex); 1378 } 1379 1380 enableIndex++; 1381 enableStack[enableIndex] = condition; 1382 Int4 restoreLeave = enableLeave; 1383 1384 Bool notAllFalse = SignMask(condition) != 0; 1385 branch(notAllFalse, labelBlock[labelIndex], callRetBlock[labelIndex][callSiteIndex]); 1386 Nucleus::setInsertBlock(callRetBlock[labelIndex][callSiteIndex]); 1387 1388 enableIndex--; 1389 enableLeave = restoreLeave; 1390 } 1391 ELSE()1392 void PixelProgram::ELSE() 1393 { 1394 ifDepth--; 1395 1396 llvm::BasicBlock *falseBlock = ifFalseBlock[ifDepth]; 1397 llvm::BasicBlock *endBlock = Nucleus::createBasicBlock(); 1398 1399 if(isConditionalIf[ifDepth]) 1400 { 1401 Int4 condition = ~enableStack[enableIndex] & enableStack[enableIndex - 1]; 1402 Bool notAllFalse = SignMask(condition) != 0; 1403 1404 branch(notAllFalse, falseBlock, endBlock); 1405 1406 enableStack[enableIndex] = ~enableStack[enableIndex] & enableStack[enableIndex - 1]; 1407 } 1408 else 1409 { 1410 Nucleus::createBr(endBlock); 1411 Nucleus::setInsertBlock(falseBlock); 1412 } 1413 1414 ifFalseBlock[ifDepth] = endBlock; 1415 1416 ifDepth++; 1417 } 1418 ENDIF()1419 void PixelProgram::ENDIF() 1420 { 1421 ifDepth--; 1422 1423 llvm::BasicBlock *endBlock = ifFalseBlock[ifDepth]; 1424 1425 Nucleus::createBr(endBlock); 1426 Nucleus::setInsertBlock(endBlock); 1427 1428 if(isConditionalIf[ifDepth]) 1429 { 1430 breakDepth--; 1431 enableIndex--; 1432 } 1433 } 1434 ENDLOOP()1435 void PixelProgram::ENDLOOP() 1436 { 1437 loopRepDepth--; 1438 1439 aL[loopDepth] = aL[loopDepth] + increment[loopDepth]; // FIXME: += 1440 1441 llvm::BasicBlock *testBlock = loopRepTestBlock[loopRepDepth]; 1442 llvm::BasicBlock *endBlock = loopRepEndBlock[loopRepDepth]; 1443 1444 Nucleus::createBr(testBlock); 1445 Nucleus::setInsertBlock(endBlock); 1446 1447 loopDepth--; 1448 enableBreak = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF); 1449 } 1450 ENDREP()1451 void PixelProgram::ENDREP() 1452 { 1453 loopRepDepth--; 1454 1455 llvm::BasicBlock *testBlock = loopRepTestBlock[loopRepDepth]; 1456 llvm::BasicBlock *endBlock = loopRepEndBlock[loopRepDepth]; 1457 1458 Nucleus::createBr(testBlock); 1459 Nucleus::setInsertBlock(endBlock); 1460 1461 loopDepth--; 1462 enableBreak = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF); 1463 } 1464 ENDWHILE()1465 void PixelProgram::ENDWHILE() 1466 { 1467 loopRepDepth--; 1468 1469 llvm::BasicBlock *testBlock = loopRepTestBlock[loopRepDepth]; 1470 llvm::BasicBlock *endBlock = loopRepEndBlock[loopRepDepth]; 1471 1472 Nucleus::createBr(testBlock); 1473 Nucleus::setInsertBlock(endBlock); 1474 1475 enableIndex--; 1476 enableBreak = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF); 1477 whileTest = false; 1478 } 1479 ENDSWITCH()1480 void PixelProgram::ENDSWITCH() 1481 { 1482 loopRepDepth--; 1483 1484 llvm::BasicBlock *endBlock = loopRepEndBlock[loopRepDepth]; 1485 1486 Nucleus::createBr(loopRepEndBlock[loopRepDepth]); 1487 Nucleus::setInsertBlock(endBlock); 1488 1489 enableIndex--; 1490 enableBreak = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF); 1491 } 1492 IF(const Src & src)1493 void PixelProgram::IF(const Src &src) 1494 { 1495 if(src.type == Shader::PARAMETER_CONSTBOOL) 1496 { 1497 IFb(src); 1498 } 1499 else if(src.type == Shader::PARAMETER_PREDICATE) 1500 { 1501 IFp(src); 1502 } 1503 else 1504 { 1505 Int4 condition = As<Int4>(fetchRegister(src).x); 1506 IF(condition); 1507 } 1508 } 1509 IFb(const Src & boolRegister)1510 void PixelProgram::IFb(const Src &boolRegister) 1511 { 1512 ASSERT(ifDepth < 24 + 4); 1513 1514 Bool condition = (*Pointer<Byte>(data + OFFSET(DrawData, ps.b[boolRegister.index])) != Byte(0)); // FIXME 1515 1516 if(boolRegister.modifier == Shader::MODIFIER_NOT) 1517 { 1518 condition = !condition; 1519 } 1520 1521 llvm::BasicBlock *trueBlock = Nucleus::createBasicBlock(); 1522 llvm::BasicBlock *falseBlock = Nucleus::createBasicBlock(); 1523 1524 branch(condition, trueBlock, falseBlock); 1525 1526 isConditionalIf[ifDepth] = false; 1527 ifFalseBlock[ifDepth] = falseBlock; 1528 1529 ifDepth++; 1530 } 1531 IFp(const Src & predicateRegister)1532 void PixelProgram::IFp(const Src &predicateRegister) 1533 { 1534 Int4 condition = As<Int4>(p0[predicateRegister.swizzle & 0x3]); 1535 1536 if(predicateRegister.modifier == Shader::MODIFIER_NOT) 1537 { 1538 condition = ~condition; 1539 } 1540 1541 IF(condition); 1542 } 1543 IFC(Vector4f & src0,Vector4f & src1,Control control)1544 void PixelProgram::IFC(Vector4f &src0, Vector4f &src1, Control control) 1545 { 1546 Int4 condition; 1547 1548 switch(control) 1549 { 1550 case Shader::CONTROL_GT: condition = CmpNLE(src0.x, src1.x); break; 1551 case Shader::CONTROL_EQ: condition = CmpEQ(src0.x, src1.x); break; 1552 case Shader::CONTROL_GE: condition = CmpNLT(src0.x, src1.x); break; 1553 case Shader::CONTROL_LT: condition = CmpLT(src0.x, src1.x); break; 1554 case Shader::CONTROL_NE: condition = CmpNEQ(src0.x, src1.x); break; 1555 case Shader::CONTROL_LE: condition = CmpLE(src0.x, src1.x); break; 1556 default: 1557 ASSERT(false); 1558 } 1559 1560 IF(condition); 1561 } 1562 IF(Int4 & condition)1563 void PixelProgram::IF(Int4 &condition) 1564 { 1565 condition &= enableStack[enableIndex]; 1566 1567 enableIndex++; 1568 enableStack[enableIndex] = condition; 1569 1570 llvm::BasicBlock *trueBlock = Nucleus::createBasicBlock(); 1571 llvm::BasicBlock *falseBlock = Nucleus::createBasicBlock(); 1572 1573 Bool notAllFalse = SignMask(condition) != 0; 1574 1575 branch(notAllFalse, trueBlock, falseBlock); 1576 1577 isConditionalIf[ifDepth] = true; 1578 ifFalseBlock[ifDepth] = falseBlock; 1579 1580 ifDepth++; 1581 breakDepth++; 1582 } 1583 LABEL(int labelIndex)1584 void PixelProgram::LABEL(int labelIndex) 1585 { 1586 if(!labelBlock[labelIndex]) 1587 { 1588 labelBlock[labelIndex] = Nucleus::createBasicBlock(); 1589 } 1590 1591 Nucleus::setInsertBlock(labelBlock[labelIndex]); 1592 currentLabel = labelIndex; 1593 } 1594 LOOP(const Src & integerRegister)1595 void PixelProgram::LOOP(const Src &integerRegister) 1596 { 1597 loopDepth++; 1598 1599 iteration[loopDepth] = *Pointer<Int>(data + OFFSET(DrawData, ps.i[integerRegister.index][0])); 1600 aL[loopDepth] = *Pointer<Int>(data + OFFSET(DrawData, ps.i[integerRegister.index][1])); 1601 increment[loopDepth] = *Pointer<Int>(data + OFFSET(DrawData, ps.i[integerRegister.index][2])); 1602 1603 // If(increment[loopDepth] == 0) 1604 // { 1605 // increment[loopDepth] = 1; 1606 // } 1607 1608 llvm::BasicBlock *loopBlock = Nucleus::createBasicBlock(); 1609 llvm::BasicBlock *testBlock = Nucleus::createBasicBlock(); 1610 llvm::BasicBlock *endBlock = Nucleus::createBasicBlock(); 1611 1612 loopRepTestBlock[loopRepDepth] = testBlock; 1613 loopRepEndBlock[loopRepDepth] = endBlock; 1614 1615 // FIXME: jump(testBlock) 1616 Nucleus::createBr(testBlock); 1617 Nucleus::setInsertBlock(testBlock); 1618 1619 branch(iteration[loopDepth] > 0, loopBlock, endBlock); 1620 Nucleus::setInsertBlock(loopBlock); 1621 1622 iteration[loopDepth] = iteration[loopDepth] - 1; // FIXME: -- 1623 1624 loopRepDepth++; 1625 breakDepth = 0; 1626 } 1627 REP(const Src & integerRegister)1628 void PixelProgram::REP(const Src &integerRegister) 1629 { 1630 loopDepth++; 1631 1632 iteration[loopDepth] = *Pointer<Int>(data + OFFSET(DrawData, ps.i[integerRegister.index][0])); 1633 aL[loopDepth] = aL[loopDepth - 1]; 1634 1635 llvm::BasicBlock *loopBlock = Nucleus::createBasicBlock(); 1636 llvm::BasicBlock *testBlock = Nucleus::createBasicBlock(); 1637 llvm::BasicBlock *endBlock = Nucleus::createBasicBlock(); 1638 1639 loopRepTestBlock[loopRepDepth] = testBlock; 1640 loopRepEndBlock[loopRepDepth] = endBlock; 1641 1642 // FIXME: jump(testBlock) 1643 Nucleus::createBr(testBlock); 1644 Nucleus::setInsertBlock(testBlock); 1645 1646 branch(iteration[loopDepth] > 0, loopBlock, endBlock); 1647 Nucleus::setInsertBlock(loopBlock); 1648 1649 iteration[loopDepth] = iteration[loopDepth] - 1; // FIXME: -- 1650 1651 loopRepDepth++; 1652 breakDepth = 0; 1653 } 1654 WHILE(const Src & temporaryRegister)1655 void PixelProgram::WHILE(const Src &temporaryRegister) 1656 { 1657 enableIndex++; 1658 1659 llvm::BasicBlock *loopBlock = Nucleus::createBasicBlock(); 1660 llvm::BasicBlock *testBlock = Nucleus::createBasicBlock(); 1661 llvm::BasicBlock *endBlock = Nucleus::createBasicBlock(); 1662 1663 loopRepTestBlock[loopRepDepth] = testBlock; 1664 loopRepEndBlock[loopRepDepth] = endBlock; 1665 1666 Int4 restoreBreak = enableBreak; 1667 Int4 restoreContinue = enableContinue; 1668 1669 // FIXME: jump(testBlock) 1670 Nucleus::createBr(testBlock); 1671 Nucleus::setInsertBlock(testBlock); 1672 enableContinue = restoreContinue; 1673 1674 const Vector4f &src = fetchRegister(temporaryRegister); 1675 Int4 condition = As<Int4>(src.x); 1676 condition &= enableStack[enableIndex - 1]; 1677 if(shader->containsLeaveInstruction()) condition &= enableLeave; 1678 enableStack[enableIndex] = condition; 1679 1680 Bool notAllFalse = SignMask(condition) != 0; 1681 branch(notAllFalse, loopBlock, endBlock); 1682 1683 Nucleus::setInsertBlock(endBlock); 1684 enableBreak = restoreBreak; 1685 1686 Nucleus::setInsertBlock(loopBlock); 1687 1688 loopRepDepth++; 1689 breakDepth = 0; 1690 } 1691 SWITCH()1692 void PixelProgram::SWITCH() 1693 { 1694 enableIndex++; 1695 enableStack[enableIndex] = Int4(0xFFFFFFFF); 1696 1697 llvm::BasicBlock *endBlock = Nucleus::createBasicBlock(); 1698 1699 loopRepTestBlock[loopRepDepth] = nullptr; 1700 loopRepEndBlock[loopRepDepth] = endBlock; 1701 1702 loopRepDepth++; 1703 breakDepth = 0; 1704 } 1705 RET()1706 void PixelProgram::RET() 1707 { 1708 if(currentLabel == -1) 1709 { 1710 returnBlock = Nucleus::createBasicBlock(); 1711 Nucleus::createBr(returnBlock); 1712 } 1713 else 1714 { 1715 llvm::BasicBlock *unreachableBlock = Nucleus::createBasicBlock(); 1716 1717 if(callRetBlock[currentLabel].size() > 1) // Pop the return destination from the call stack 1718 { 1719 // FIXME: Encapsulate 1720 UInt index = callStack[--stackIndex]; 1721 1722 llvm::Value *value = index.loadValue(); 1723 llvm::Value *switchInst = Nucleus::createSwitch(value, unreachableBlock, (int)callRetBlock[currentLabel].size()); 1724 1725 for(unsigned int i = 0; i < callRetBlock[currentLabel].size(); i++) 1726 { 1727 Nucleus::addSwitchCase(switchInst, i, callRetBlock[currentLabel][i]); 1728 } 1729 } 1730 else if(callRetBlock[currentLabel].size() == 1) // Jump directly to the unique return destination 1731 { 1732 Nucleus::createBr(callRetBlock[currentLabel][0]); 1733 } 1734 else // Function isn't called 1735 { 1736 Nucleus::createBr(unreachableBlock); 1737 } 1738 1739 Nucleus::setInsertBlock(unreachableBlock); 1740 Nucleus::createUnreachable(); 1741 } 1742 } 1743 LEAVE()1744 void PixelProgram::LEAVE() 1745 { 1746 enableLeave = enableLeave & ~enableStack[enableIndex]; 1747 1748 // FIXME: Return from function if all instances left 1749 // FIXME: Use enableLeave in other control-flow constructs 1750 } 1751 } 1752