1 // Copyright 2016 The SwiftShader Authors. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //    http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "PixelProgram.hpp"
16 
17 #include "SamplerCore.hpp"
18 #include "Renderer/Primitive.hpp"
19 #include "Renderer/Renderer.hpp"
20 
21 namespace sw
22 {
23 	extern bool postBlendSRGB;
24 	extern bool booleanFaceRegister;
25 	extern bool halfIntegerCoordinates;     // Pixel centers are not at integer coordinates
26 	extern bool fullPixelPositionRegister;
27 
setBuiltins(Int & x,Int & y,Float4 (& z)[4],Float4 & w)28 	void PixelProgram::setBuiltins(Int &x, Int &y, Float4(&z)[4], Float4 &w)
29 	{
30 		if(shader->getShaderModel() >= 0x0300)
31 		{
32 			if(shader->isVPosDeclared())
33 			{
34 				if(!halfIntegerCoordinates)
35 				{
36 					vPos.x = Float4(Float(x)) + Float4(0, 1, 0, 1);
37 					vPos.y = Float4(Float(y)) + Float4(0, 0, 1, 1);
38 				}
39 				else
40 				{
41 					vPos.x = Float4(Float(x)) + Float4(0.5f, 1.5f, 0.5f, 1.5f);
42 					vPos.y = Float4(Float(y)) + Float4(0.5f, 0.5f, 1.5f, 1.5f);
43 				}
44 
45 				if(fullPixelPositionRegister)
46 				{
47 					vPos.z = z[0]; // FIXME: Centroid?
48 					vPos.w = w;    // FIXME: Centroid?
49 				}
50 			}
51 
52 			if(shader->isVFaceDeclared())
53 			{
54 				Float4 face = *Pointer<Float>(primitive + OFFSET(Primitive, area));
55 
56 				if(booleanFaceRegister)
57 				{
58 					face = As<Float4>(state.frontFaceCCW ? CmpNLT(face, Float4(0.0f)) : CmpLT(face, Float4(0.0f)));
59 				}
60 
61 				vFace.x = face;
62 				vFace.y = face;
63 				vFace.z = face;
64 				vFace.w = face;
65 			}
66 		}
67 	}
68 
applyShader(Int cMask[4])69 	void PixelProgram::applyShader(Int cMask[4])
70 	{
71 		enableIndex = 0;
72 		stackIndex = 0;
73 
74 		if(shader->containsLeaveInstruction())
75 		{
76 			enableLeave = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
77 		}
78 
79 		for(int i = 0; i < RENDERTARGETS; i++)
80 		{
81 			if(state.targetFormat[i] != FORMAT_NULL)
82 			{
83 				oC[i] = Vector4f(0.0f, 0.0f, 0.0f, 0.0f);
84 			}
85 		}
86 
87 		// Create all call site return blocks up front
88 		for(size_t i = 0; i < shader->getLength(); i++)
89 		{
90 			const Shader::Instruction *instruction = shader->getInstruction(i);
91 			Shader::Opcode opcode = instruction->opcode;
92 
93 			if(opcode == Shader::OPCODE_CALL || opcode == Shader::OPCODE_CALLNZ)
94 			{
95 				const Dst &dst = instruction->dst;
96 
97 				ASSERT(callRetBlock[dst.label].size() == dst.callSite);
98 				callRetBlock[dst.label].push_back(Nucleus::createBasicBlock());
99 			}
100 		}
101 
102 		bool broadcastColor0 = true;
103 
104 		for(size_t i = 0; i < shader->getLength(); i++)
105 		{
106 			const Shader::Instruction *instruction = shader->getInstruction(i);
107 			Shader::Opcode opcode = instruction->opcode;
108 
109 			if(opcode == Shader::OPCODE_DCL || opcode == Shader::OPCODE_DEF || opcode == Shader::OPCODE_DEFI || opcode == Shader::OPCODE_DEFB)
110 			{
111 				continue;
112 			}
113 
114 			const Dst &dst = instruction->dst;
115 			const Src &src0 = instruction->src[0];
116 			const Src &src1 = instruction->src[1];
117 			const Src &src2 = instruction->src[2];
118 			const Src &src3 = instruction->src[3];
119 			const Src &src4 = instruction->src[4];
120 
121 			bool predicate = instruction->predicate;
122 			Control control = instruction->control;
123 			bool pp = dst.partialPrecision;
124 			bool project = instruction->project;
125 			bool bias = instruction->bias;
126 
127 			Vector4f d;
128 			Vector4f s0;
129 			Vector4f s1;
130 			Vector4f s2;
131 			Vector4f s3;
132 			Vector4f s4;
133 
134 			if(opcode == Shader::OPCODE_TEXKILL)   // Takes destination as input
135 			{
136 				if(dst.type == Shader::PARAMETER_TEXTURE)
137 				{
138 					d.x = v[2 + dst.index].x;
139 					d.y = v[2 + dst.index].y;
140 					d.z = v[2 + dst.index].z;
141 					d.w = v[2 + dst.index].w;
142 				}
143 				else
144 				{
145 					d = r[dst.index];
146 				}
147 			}
148 
149 			if(src0.type != Shader::PARAMETER_VOID) s0 = fetchRegister(src0);
150 			if(src1.type != Shader::PARAMETER_VOID) s1 = fetchRegister(src1);
151 			if(src2.type != Shader::PARAMETER_VOID) s2 = fetchRegister(src2);
152 			if(src3.type != Shader::PARAMETER_VOID) s3 = fetchRegister(src3);
153 			if(src4.type != Shader::PARAMETER_VOID) s4 = fetchRegister(src4);
154 
155 			switch(opcode)
156 			{
157 			case Shader::OPCODE_PS_2_0:                                                    break;
158 			case Shader::OPCODE_PS_2_x:                                                    break;
159 			case Shader::OPCODE_PS_3_0:                                                    break;
160 			case Shader::OPCODE_DEF:                                                       break;
161 			case Shader::OPCODE_DCL:                                                       break;
162 			case Shader::OPCODE_NOP:                                                       break;
163 			case Shader::OPCODE_MOV:        mov(d, s0);                                    break;
164 			case Shader::OPCODE_NEG:        neg(d, s0);                                    break;
165 			case Shader::OPCODE_INEG:       ineg(d, s0);                                   break;
166 			case Shader::OPCODE_F2B:        f2b(d, s0);                                    break;
167 			case Shader::OPCODE_B2F:        b2f(d, s0);                                    break;
168 			case Shader::OPCODE_F2I:        f2i(d, s0);                                    break;
169 			case Shader::OPCODE_I2F:        i2f(d, s0);                                    break;
170 			case Shader::OPCODE_F2U:        f2u(d, s0);                                    break;
171 			case Shader::OPCODE_U2F:        u2f(d, s0);                                    break;
172 			case Shader::OPCODE_I2B:        i2b(d, s0);                                    break;
173 			case Shader::OPCODE_B2I:        b2i(d, s0);                                    break;
174 			case Shader::OPCODE_ADD:        add(d, s0, s1);                                break;
175 			case Shader::OPCODE_IADD:       iadd(d, s0, s1);                               break;
176 			case Shader::OPCODE_SUB:        sub(d, s0, s1);                                break;
177 			case Shader::OPCODE_ISUB:       isub(d, s0, s1);                               break;
178 			case Shader::OPCODE_MUL:        mul(d, s0, s1);                                break;
179 			case Shader::OPCODE_IMUL:       imul(d, s0, s1);                               break;
180 			case Shader::OPCODE_MAD:        mad(d, s0, s1, s2);                            break;
181 			case Shader::OPCODE_IMAD:       imad(d, s0, s1, s2);                           break;
182 			case Shader::OPCODE_DP1:        dp1(d, s0, s1);                                break;
183 			case Shader::OPCODE_DP2:        dp2(d, s0, s1);                                break;
184 			case Shader::OPCODE_DP2ADD:     dp2add(d, s0, s1, s2);                         break;
185 			case Shader::OPCODE_DP3:        dp3(d, s0, s1);                                break;
186 			case Shader::OPCODE_DP4:        dp4(d, s0, s1);                                break;
187 			case Shader::OPCODE_DET2:       det2(d, s0, s1);                               break;
188 			case Shader::OPCODE_DET3:       det3(d, s0, s1, s2);                           break;
189 			case Shader::OPCODE_DET4:       det4(d, s0, s1, s2, s3);                       break;
190 			case Shader::OPCODE_CMP0:       cmp0(d, s0, s1, s2);                           break;
191 			case Shader::OPCODE_ICMP:       icmp(d, s0, s1, control);                      break;
192 			case Shader::OPCODE_UCMP:       ucmp(d, s0, s1, control);                      break;
193 			case Shader::OPCODE_SELECT:     select(d, s0, s1, s2);                         break;
194 			case Shader::OPCODE_EXTRACT:    extract(d.x, s0, s1.x);                        break;
195 			case Shader::OPCODE_INSERT:     insert(d, s0, s1.x, s2.x);                     break;
196 			case Shader::OPCODE_FRC:        frc(d, s0);                                    break;
197 			case Shader::OPCODE_TRUNC:      trunc(d, s0);                                  break;
198 			case Shader::OPCODE_FLOOR:      floor(d, s0);                                  break;
199 			case Shader::OPCODE_ROUND:      round(d, s0);                                  break;
200 			case Shader::OPCODE_ROUNDEVEN:  roundEven(d, s0);                              break;
201 			case Shader::OPCODE_CEIL:       ceil(d, s0);                                   break;
202 			case Shader::OPCODE_EXP2X:      exp2x(d, s0, pp);                              break;
203 			case Shader::OPCODE_EXP2:       exp2(d, s0, pp);                               break;
204 			case Shader::OPCODE_LOG2X:      log2x(d, s0, pp);                              break;
205 			case Shader::OPCODE_LOG2:       log2(d, s0, pp);                               break;
206 			case Shader::OPCODE_EXP:        exp(d, s0, pp);                                break;
207 			case Shader::OPCODE_LOG:        log(d, s0, pp);                                break;
208 			case Shader::OPCODE_RCPX:       rcpx(d, s0, pp);                               break;
209 			case Shader::OPCODE_DIV:        div(d, s0, s1);                                break;
210 			case Shader::OPCODE_IDIV:       idiv(d, s0, s1);                               break;
211 			case Shader::OPCODE_UDIV:       udiv(d, s0, s1);                               break;
212 			case Shader::OPCODE_MOD:        mod(d, s0, s1);                                break;
213 			case Shader::OPCODE_IMOD:       imod(d, s0, s1);                               break;
214 			case Shader::OPCODE_UMOD:       umod(d, s0, s1);                               break;
215 			case Shader::OPCODE_SHL:        shl(d, s0, s1);                                break;
216 			case Shader::OPCODE_ISHR:       ishr(d, s0, s1);                               break;
217 			case Shader::OPCODE_USHR:       ushr(d, s0, s1);                               break;
218 			case Shader::OPCODE_RSQX:       rsqx(d, s0, pp);                               break;
219 			case Shader::OPCODE_SQRT:       sqrt(d, s0, pp);                               break;
220 			case Shader::OPCODE_RSQ:        rsq(d, s0, pp);                                break;
221 			case Shader::OPCODE_LEN2:       len2(d.x, s0, pp);                             break;
222 			case Shader::OPCODE_LEN3:       len3(d.x, s0, pp);                             break;
223 			case Shader::OPCODE_LEN4:       len4(d.x, s0, pp);                             break;
224 			case Shader::OPCODE_DIST1:      dist1(d.x, s0, s1, pp);                        break;
225 			case Shader::OPCODE_DIST2:      dist2(d.x, s0, s1, pp);                        break;
226 			case Shader::OPCODE_DIST3:      dist3(d.x, s0, s1, pp);                        break;
227 			case Shader::OPCODE_DIST4:      dist4(d.x, s0, s1, pp);                        break;
228 			case Shader::OPCODE_MIN:        min(d, s0, s1);                                break;
229 			case Shader::OPCODE_IMIN:       imin(d, s0, s1);                               break;
230 			case Shader::OPCODE_UMIN:       umin(d, s0, s1);                               break;
231 			case Shader::OPCODE_MAX:        max(d, s0, s1);                                break;
232 			case Shader::OPCODE_IMAX:       imax(d, s0, s1);                               break;
233 			case Shader::OPCODE_UMAX:       umax(d, s0, s1);                               break;
234 			case Shader::OPCODE_LRP:        lrp(d, s0, s1, s2);                            break;
235 			case Shader::OPCODE_STEP:       step(d, s0, s1);                               break;
236 			case Shader::OPCODE_SMOOTH:     smooth(d, s0, s1, s2);                         break;
237 			case Shader::OPCODE_ISINF:      isinf(d, s0);                                  break;
238 			case Shader::OPCODE_ISNAN:      isnan(d, s0);                                  break;
239 			case Shader::OPCODE_FLOATBITSTOINT:
240 			case Shader::OPCODE_FLOATBITSTOUINT:
241 			case Shader::OPCODE_INTBITSTOFLOAT:
242 			case Shader::OPCODE_UINTBITSTOFLOAT: d = s0;                                   break;
243 			case Shader::OPCODE_PACKSNORM2x16:   packSnorm2x16(d, s0);                     break;
244 			case Shader::OPCODE_PACKUNORM2x16:   packUnorm2x16(d, s0);                     break;
245 			case Shader::OPCODE_PACKHALF2x16:    packHalf2x16(d, s0);                      break;
246 			case Shader::OPCODE_UNPACKSNORM2x16: unpackSnorm2x16(d, s0);                   break;
247 			case Shader::OPCODE_UNPACKUNORM2x16: unpackUnorm2x16(d, s0);                   break;
248 			case Shader::OPCODE_UNPACKHALF2x16:  unpackHalf2x16(d, s0);                    break;
249 			case Shader::OPCODE_POWX:       powx(d, s0, s1, pp);                           break;
250 			case Shader::OPCODE_POW:        pow(d, s0, s1, pp);                            break;
251 			case Shader::OPCODE_SGN:        sgn(d, s0);                                    break;
252 			case Shader::OPCODE_ISGN:       isgn(d, s0);                                   break;
253 			case Shader::OPCODE_CRS:        crs(d, s0, s1);                                break;
254 			case Shader::OPCODE_FORWARD1:   forward1(d, s0, s1, s2);                       break;
255 			case Shader::OPCODE_FORWARD2:   forward2(d, s0, s1, s2);                       break;
256 			case Shader::OPCODE_FORWARD3:   forward3(d, s0, s1, s2);                       break;
257 			case Shader::OPCODE_FORWARD4:   forward4(d, s0, s1, s2);                       break;
258 			case Shader::OPCODE_REFLECT1:   reflect1(d, s0, s1);                           break;
259 			case Shader::OPCODE_REFLECT2:   reflect2(d, s0, s1);                           break;
260 			case Shader::OPCODE_REFLECT3:   reflect3(d, s0, s1);                           break;
261 			case Shader::OPCODE_REFLECT4:   reflect4(d, s0, s1);                           break;
262 			case Shader::OPCODE_REFRACT1:   refract1(d, s0, s1, s2.x);                     break;
263 			case Shader::OPCODE_REFRACT2:   refract2(d, s0, s1, s2.x);                     break;
264 			case Shader::OPCODE_REFRACT3:   refract3(d, s0, s1, s2.x);                     break;
265 			case Shader::OPCODE_REFRACT4:   refract4(d, s0, s1, s2.x);                     break;
266 			case Shader::OPCODE_NRM2:       nrm2(d, s0, pp);                               break;
267 			case Shader::OPCODE_NRM3:       nrm3(d, s0, pp);                               break;
268 			case Shader::OPCODE_NRM4:       nrm4(d, s0, pp);                               break;
269 			case Shader::OPCODE_ABS:        abs(d, s0);                                    break;
270 			case Shader::OPCODE_IABS:       iabs(d, s0);                                   break;
271 			case Shader::OPCODE_SINCOS:     sincos(d, s0, pp);                             break;
272 			case Shader::OPCODE_COS:        cos(d, s0, pp);                                break;
273 			case Shader::OPCODE_SIN:        sin(d, s0, pp);                                break;
274 			case Shader::OPCODE_TAN:        tan(d, s0, pp);                                break;
275 			case Shader::OPCODE_ACOS:       acos(d, s0, pp);                               break;
276 			case Shader::OPCODE_ASIN:       asin(d, s0, pp);                               break;
277 			case Shader::OPCODE_ATAN:       atan(d, s0, pp);                               break;
278 			case Shader::OPCODE_ATAN2:      atan2(d, s0, s1, pp);                          break;
279 			case Shader::OPCODE_COSH:       cosh(d, s0, pp);                               break;
280 			case Shader::OPCODE_SINH:       sinh(d, s0, pp);                               break;
281 			case Shader::OPCODE_TANH:       tanh(d, s0, pp);                               break;
282 			case Shader::OPCODE_ACOSH:      acosh(d, s0, pp);                              break;
283 			case Shader::OPCODE_ASINH:      asinh(d, s0, pp);                              break;
284 			case Shader::OPCODE_ATANH:      atanh(d, s0, pp);                              break;
285 			case Shader::OPCODE_M4X4:       M4X4(d, s0, src1);                             break;
286 			case Shader::OPCODE_M4X3:       M4X3(d, s0, src1);                             break;
287 			case Shader::OPCODE_M3X4:       M3X4(d, s0, src1);                             break;
288 			case Shader::OPCODE_M3X3:       M3X3(d, s0, src1);                             break;
289 			case Shader::OPCODE_M3X2:       M3X2(d, s0, src1);                             break;
290 			case Shader::OPCODE_TEX:        TEX(d, s0, src1, project, bias);               break;
291 			case Shader::OPCODE_TEXLDD:     TEXGRAD(d, s0, src1, s2, s3);                  break;
292 			case Shader::OPCODE_TEXLDL:     TEXLOD(d, s0, src1, s0.w);                     break;
293 			case Shader::OPCODE_TEXLOD:     TEXLOD(d, s0, src1, s2.x);                     break;
294 			case Shader::OPCODE_TEXSIZE:    TEXSIZE(d, s0.x, src1);                        break;
295 			case Shader::OPCODE_TEXKILL:    TEXKILL(cMask, d, dst.mask);                   break;
296 			case Shader::OPCODE_TEXOFFSET:  TEXOFFSET(d, s0, src1, s2);                    break;
297 			case Shader::OPCODE_TEXLODOFFSET: TEXLODOFFSET(d, s0, src1, s2, s3.x);         break;
298 			case Shader::OPCODE_TEXELFETCH: TEXELFETCH(d, s0, src1, s2.x);                 break;
299 			case Shader::OPCODE_TEXELFETCHOFFSET: TEXELFETCHOFFSET(d, s0, src1, s2, s3.x); break;
300 			case Shader::OPCODE_TEXGRAD:    TEXGRAD(d, s0, src1, s2, s3);                  break;
301 			case Shader::OPCODE_TEXGRADOFFSET: TEXGRADOFFSET(d, s0, src1, s2, s3, s4);     break;
302 			case Shader::OPCODE_TEXBIAS:    TEXBIAS(d, s0, src1, s2.x);                    break;
303 			case Shader::OPCODE_TEXOFFSETBIAS: TEXOFFSETBIAS(d, s0, src1, s2, s3.x);       break;
304 			case Shader::OPCODE_DISCARD:    DISCARD(cMask, instruction);                   break;
305 			case Shader::OPCODE_DFDX:       DFDX(d, s0);                                   break;
306 			case Shader::OPCODE_DFDY:       DFDY(d, s0);                                   break;
307 			case Shader::OPCODE_FWIDTH:     FWIDTH(d, s0);                                 break;
308 			case Shader::OPCODE_BREAK:      BREAK();                                       break;
309 			case Shader::OPCODE_BREAKC:     BREAKC(s0, s1, control);                       break;
310 			case Shader::OPCODE_BREAKP:     BREAKP(src0);                                  break;
311 			case Shader::OPCODE_CONTINUE:   CONTINUE();                                    break;
312 			case Shader::OPCODE_TEST:       TEST();                                        break;
313 			case Shader::OPCODE_SCALAR:     SCALAR();                                      break;
314 			case Shader::OPCODE_CALL:       CALL(dst.label, dst.callSite);                 break;
315 			case Shader::OPCODE_CALLNZ:     CALLNZ(dst.label, dst.callSite, src0);         break;
316 			case Shader::OPCODE_ELSE:       ELSE();                                        break;
317 			case Shader::OPCODE_ENDIF:      ENDIF();                                       break;
318 			case Shader::OPCODE_ENDLOOP:    ENDLOOP();                                     break;
319 			case Shader::OPCODE_ENDREP:     ENDREP();                                      break;
320 			case Shader::OPCODE_ENDWHILE:   ENDWHILE();                                    break;
321 			case Shader::OPCODE_ENDSWITCH:  ENDSWITCH();                                   break;
322 			case Shader::OPCODE_IF:         IF(src0);                                      break;
323 			case Shader::OPCODE_IFC:        IFC(s0, s1, control);                          break;
324 			case Shader::OPCODE_LABEL:      LABEL(dst.index);                              break;
325 			case Shader::OPCODE_LOOP:       LOOP(src1);                                    break;
326 			case Shader::OPCODE_REP:        REP(src0);                                     break;
327 			case Shader::OPCODE_WHILE:      WHILE(src0);                                   break;
328 			case Shader::OPCODE_SWITCH:     SWITCH();                                      break;
329 			case Shader::OPCODE_RET:        RET();                                         break;
330 			case Shader::OPCODE_LEAVE:      LEAVE();                                       break;
331 			case Shader::OPCODE_CMP:        cmp(d, s0, s1, control);                       break;
332 			case Shader::OPCODE_ALL:        all(d.x, s0);                                  break;
333 			case Shader::OPCODE_ANY:        any(d.x, s0);                                  break;
334 			case Shader::OPCODE_NOT:        bitwise_not(d, s0);                            break;
335 			case Shader::OPCODE_OR:         bitwise_or(d, s0, s1);                         break;
336 			case Shader::OPCODE_XOR:        bitwise_xor(d, s0, s1);                        break;
337 			case Shader::OPCODE_AND:        bitwise_and(d, s0, s1);                        break;
338 			case Shader::OPCODE_EQ:         equal(d, s0, s1);                              break;
339 			case Shader::OPCODE_NE:         notEqual(d, s0, s1);                           break;
340 			case Shader::OPCODE_END:                                                       break;
341 			default:
342 				ASSERT(false);
343 			}
344 
345 			if(dst.type != Shader::PARAMETER_VOID && dst.type != Shader::PARAMETER_LABEL && opcode != Shader::OPCODE_TEXKILL && opcode != Shader::OPCODE_NOP)
346 			{
347 				if(dst.saturate)
348 				{
349 					if(dst.x) d.x = Max(d.x, Float4(0.0f));
350 					if(dst.y) d.y = Max(d.y, Float4(0.0f));
351 					if(dst.z) d.z = Max(d.z, Float4(0.0f));
352 					if(dst.w) d.w = Max(d.w, Float4(0.0f));
353 
354 					if(dst.x) d.x = Min(d.x, Float4(1.0f));
355 					if(dst.y) d.y = Min(d.y, Float4(1.0f));
356 					if(dst.z) d.z = Min(d.z, Float4(1.0f));
357 					if(dst.w) d.w = Min(d.w, Float4(1.0f));
358 				}
359 
360 				if(instruction->isPredicated())
361 				{
362 					Vector4f pDst;   // FIXME: Rename
363 
364 					switch(dst.type)
365 					{
366 					case Shader::PARAMETER_TEMP:
367 						if(dst.rel.type == Shader::PARAMETER_VOID)
368 						{
369 							if(dst.x) pDst.x = r[dst.index].x;
370 							if(dst.y) pDst.y = r[dst.index].y;
371 							if(dst.z) pDst.z = r[dst.index].z;
372 							if(dst.w) pDst.w = r[dst.index].w;
373 						}
374 						else if(!dst.rel.dynamic)
375 						{
376 							Int a = dst.index + relativeAddress(dst.rel);
377 
378 							if(dst.x) pDst.x = r[a].x;
379 							if(dst.y) pDst.y = r[a].y;
380 							if(dst.z) pDst.z = r[a].z;
381 							if(dst.w) pDst.w = r[a].w;
382 						}
383 						else
384 						{
385 							Int4 a = dst.index + dynamicAddress(dst.rel);
386 
387 							if(dst.x) pDst.x = r[a].x;
388 							if(dst.y) pDst.y = r[a].y;
389 							if(dst.z) pDst.z = r[a].z;
390 							if(dst.w) pDst.w = r[a].w;
391 						}
392 						break;
393 					case Shader::PARAMETER_COLOROUT:
394 						if(dst.rel.type == Shader::PARAMETER_VOID)
395 						{
396 							if(dst.x) pDst.x = oC[dst.index].x;
397 							if(dst.y) pDst.y = oC[dst.index].y;
398 							if(dst.z) pDst.z = oC[dst.index].z;
399 							if(dst.w) pDst.w = oC[dst.index].w;
400 						}
401 						else if(!dst.rel.dynamic)
402 						{
403 							Int a = dst.index + relativeAddress(dst.rel);
404 
405 							if(dst.x) pDst.x = oC[a].x;
406 							if(dst.y) pDst.y = oC[a].y;
407 							if(dst.z) pDst.z = oC[a].z;
408 							if(dst.w) pDst.w = oC[a].w;
409 						}
410 						else
411 						{
412 							Int4 a = dst.index + dynamicAddress(dst.rel);
413 
414 							if(dst.x) pDst.x = oC[a].x;
415 							if(dst.y) pDst.y = oC[a].y;
416 							if(dst.z) pDst.z = oC[a].z;
417 							if(dst.w) pDst.w = oC[a].w;
418 						}
419 						break;
420 					case Shader::PARAMETER_PREDICATE:
421 						if(dst.x) pDst.x = p0.x;
422 						if(dst.y) pDst.y = p0.y;
423 						if(dst.z) pDst.z = p0.z;
424 						if(dst.w) pDst.w = p0.w;
425 						break;
426 					case Shader::PARAMETER_DEPTHOUT:
427 						pDst.x = oDepth;
428 						break;
429 					default:
430 						ASSERT(false);
431 					}
432 
433 					Int4 enable = enableMask(instruction);
434 
435 					Int4 xEnable = enable;
436 					Int4 yEnable = enable;
437 					Int4 zEnable = enable;
438 					Int4 wEnable = enable;
439 
440 					if(predicate)
441 					{
442 						unsigned char pSwizzle = instruction->predicateSwizzle;
443 
444 						Float4 xPredicate = p0[(pSwizzle >> 0) & 0x03];
445 						Float4 yPredicate = p0[(pSwizzle >> 2) & 0x03];
446 						Float4 zPredicate = p0[(pSwizzle >> 4) & 0x03];
447 						Float4 wPredicate = p0[(pSwizzle >> 6) & 0x03];
448 
449 						if(!instruction->predicateNot)
450 						{
451 							if(dst.x) xEnable = xEnable & As<Int4>(xPredicate);
452 							if(dst.y) yEnable = yEnable & As<Int4>(yPredicate);
453 							if(dst.z) zEnable = zEnable & As<Int4>(zPredicate);
454 							if(dst.w) wEnable = wEnable & As<Int4>(wPredicate);
455 						}
456 						else
457 						{
458 							if(dst.x) xEnable = xEnable & ~As<Int4>(xPredicate);
459 							if(dst.y) yEnable = yEnable & ~As<Int4>(yPredicate);
460 							if(dst.z) zEnable = zEnable & ~As<Int4>(zPredicate);
461 							if(dst.w) wEnable = wEnable & ~As<Int4>(wPredicate);
462 						}
463 					}
464 
465 					if(dst.x) d.x = As<Float4>(As<Int4>(d.x) & xEnable);
466 					if(dst.y) d.y = As<Float4>(As<Int4>(d.y) & yEnable);
467 					if(dst.z) d.z = As<Float4>(As<Int4>(d.z) & zEnable);
468 					if(dst.w) d.w = As<Float4>(As<Int4>(d.w) & wEnable);
469 
470 					if(dst.x) d.x = As<Float4>(As<Int4>(d.x) | (As<Int4>(pDst.x) & ~xEnable));
471 					if(dst.y) d.y = As<Float4>(As<Int4>(d.y) | (As<Int4>(pDst.y) & ~yEnable));
472 					if(dst.z) d.z = As<Float4>(As<Int4>(d.z) | (As<Int4>(pDst.z) & ~zEnable));
473 					if(dst.w) d.w = As<Float4>(As<Int4>(d.w) | (As<Int4>(pDst.w) & ~wEnable));
474 				}
475 
476 				switch(dst.type)
477 				{
478 				case Shader::PARAMETER_TEMP:
479 					if(dst.rel.type == Shader::PARAMETER_VOID)
480 					{
481 						if(dst.x) r[dst.index].x = d.x;
482 						if(dst.y) r[dst.index].y = d.y;
483 						if(dst.z) r[dst.index].z = d.z;
484 						if(dst.w) r[dst.index].w = d.w;
485 					}
486 					else if(!dst.rel.dynamic)
487 					{
488 						Int a = dst.index + relativeAddress(dst.rel);
489 
490 						if(dst.x) r[a].x = d.x;
491 						if(dst.y) r[a].y = d.y;
492 						if(dst.z) r[a].z = d.z;
493 						if(dst.w) r[a].w = d.w;
494 					}
495 					else
496 					{
497 						Int4 a = dst.index + dynamicAddress(dst.rel);
498 
499 						if(dst.x) r.scatter_x(a, d.x);
500 						if(dst.y) r.scatter_y(a, d.y);
501 						if(dst.z) r.scatter_z(a, d.z);
502 						if(dst.w) r.scatter_w(a, d.w);
503 					}
504 					break;
505 				case Shader::PARAMETER_COLOROUT:
506 					if(dst.rel.type == Shader::PARAMETER_VOID)
507 					{
508 						broadcastColor0 = (dst.index == 0) && broadcastColor0;
509 
510 						if(dst.x) oC[dst.index].x = d.x;
511 						if(dst.y) oC[dst.index].y = d.y;
512 						if(dst.z) oC[dst.index].z = d.z;
513 						if(dst.w) oC[dst.index].w = d.w;
514 					}
515 					else if(!dst.rel.dynamic)
516 					{
517 						broadcastColor0 = false;
518 						Int a = dst.index + relativeAddress(dst.rel);
519 
520 						if(dst.x) oC[a].x = d.x;
521 						if(dst.y) oC[a].y = d.y;
522 						if(dst.z) oC[a].z = d.z;
523 						if(dst.w) oC[a].w = d.w;
524 					}
525 					else
526 					{
527 						broadcastColor0 = false;
528 						Int4 a = dst.index + dynamicAddress(dst.rel);
529 
530 						if(dst.x) oC.scatter_x(a, d.x);
531 						if(dst.y) oC.scatter_y(a, d.y);
532 						if(dst.z) oC.scatter_z(a, d.z);
533 						if(dst.w) oC.scatter_w(a, d.w);
534 					}
535 					break;
536 				case Shader::PARAMETER_PREDICATE:
537 					if(dst.x) p0.x = d.x;
538 					if(dst.y) p0.y = d.y;
539 					if(dst.z) p0.z = d.z;
540 					if(dst.w) p0.w = d.w;
541 					break;
542 				case Shader::PARAMETER_DEPTHOUT:
543 					oDepth = d.x;
544 					break;
545 				default:
546 					ASSERT(false);
547 				}
548 			}
549 		}
550 
551 		if(currentLabel != -1)
552 		{
553 			Nucleus::setInsertBlock(returnBlock);
554 		}
555 
556 		if(broadcastColor0)
557 		{
558 			for(int i = 0; i < RENDERTARGETS; i++)
559 			{
560 				c[i] = oC[0];
561 			}
562 		}
563 		else
564 		{
565 			for(int i = 0; i < RENDERTARGETS; i++)
566 			{
567 				c[i] = oC[i];
568 			}
569 		}
570 
571 		clampColor(c);
572 
573 		if(state.depthOverride)
574 		{
575 			oDepth = Min(Max(oDepth, Float4(0.0f)), Float4(1.0f));
576 		}
577 	}
578 
alphaTest(Int cMask[4])579 	Bool PixelProgram::alphaTest(Int cMask[4])
580 	{
581 		if(!state.alphaTestActive())
582 		{
583 			return true;
584 		}
585 
586 		Int aMask;
587 
588 		if(state.transparencyAntialiasing == TRANSPARENCY_NONE)
589 		{
590 			Short4 alpha = RoundShort4(c[0].w * Float4(0x1000));
591 
592 			PixelRoutine::alphaTest(aMask, alpha);
593 
594 			for(unsigned int q = 0; q < state.multiSample; q++)
595 			{
596 				cMask[q] &= aMask;
597 			}
598 		}
599 		else if(state.transparencyAntialiasing == TRANSPARENCY_ALPHA_TO_COVERAGE)
600 		{
601 			alphaToCoverage(cMask, c[0].w);
602 		}
603 		else ASSERT(false);
604 
605 		Int pass = cMask[0];
606 
607 		for(unsigned int q = 1; q < state.multiSample; q++)
608 		{
609 			pass = pass | cMask[q];
610 		}
611 
612 		return pass != 0x0;
613 	}
614 
rasterOperation(Float4 & fog,Pointer<Byte> cBuffer[4],Int & x,Int sMask[4],Int zMask[4],Int cMask[4])615 	void PixelProgram::rasterOperation(Float4 &fog, Pointer<Byte> cBuffer[4], Int &x, Int sMask[4], Int zMask[4], Int cMask[4])
616 	{
617 		for(int index = 0; index < RENDERTARGETS; index++)
618 		{
619 			if(!state.colorWriteActive(index))
620 			{
621 				continue;
622 			}
623 
624 			if(!postBlendSRGB && state.writeSRGB && !isSRGB(index))
625 			{
626 				c[index].x = linearToSRGB(c[index].x);
627 				c[index].y = linearToSRGB(c[index].y);
628 				c[index].z = linearToSRGB(c[index].z);
629 			}
630 
631 			if(index == 0)
632 			{
633 				fogBlend(c[index], fog);
634 			}
635 
636 			switch(state.targetFormat[index])
637 			{
638 			case FORMAT_R5G6B5:
639 			case FORMAT_X8R8G8B8:
640 			case FORMAT_X8B8G8R8:
641 			case FORMAT_A8R8G8B8:
642 			case FORMAT_A8B8G8R8:
643 			case FORMAT_SRGB8_X8:
644 			case FORMAT_SRGB8_A8:
645 			case FORMAT_G8R8:
646 			case FORMAT_R8:
647 			case FORMAT_A8:
648 			case FORMAT_G16R16:
649 			case FORMAT_A16B16G16R16:
650 				for(unsigned int q = 0; q < state.multiSample; q++)
651 				{
652 					Pointer<Byte> buffer = cBuffer[index] + q * *Pointer<Int>(data + OFFSET(DrawData, colorSliceB[index]));
653 					Vector4s color;
654 
655 					if(state.targetFormat[index] == FORMAT_R5G6B5)
656 					{
657 						color.x = UShort4(c[index].x * Float4(0xFBFF), false);
658 						color.y = UShort4(c[index].y * Float4(0xFDFF), false);
659 						color.z = UShort4(c[index].z * Float4(0xFBFF), false);
660 						color.w = UShort4(c[index].w * Float4(0xFFFF), false);
661 					}
662 					else
663 					{
664 						color.x = convertFixed16(c[index].x, false);
665 						color.y = convertFixed16(c[index].y, false);
666 						color.z = convertFixed16(c[index].z, false);
667 						color.w = convertFixed16(c[index].w, false);
668 					}
669 
670 					if(state.multiSampleMask & (1 << q))
671 					{
672 						alphaBlend(index, buffer, color, x);
673 						logicOperation(index, buffer, color, x);
674 						writeColor(index, buffer, x, color, sMask[q], zMask[q], cMask[q]);
675 					}
676 				}
677 				break;
678 			case FORMAT_R32F:
679 			case FORMAT_G32R32F:
680 			case FORMAT_X32B32G32R32F:
681 			case FORMAT_A32B32G32R32F:
682 			case FORMAT_X32B32G32R32F_UNSIGNED:
683 			case FORMAT_R32I:
684 			case FORMAT_G32R32I:
685 			case FORMAT_A32B32G32R32I:
686 			case FORMAT_R32UI:
687 			case FORMAT_G32R32UI:
688 			case FORMAT_A32B32G32R32UI:
689 			case FORMAT_R16I:
690 			case FORMAT_G16R16I:
691 			case FORMAT_A16B16G16R16I:
692 			case FORMAT_R16UI:
693 			case FORMAT_G16R16UI:
694 			case FORMAT_A16B16G16R16UI:
695 			case FORMAT_R8I:
696 			case FORMAT_G8R8I:
697 			case FORMAT_A8B8G8R8I:
698 			case FORMAT_R8UI:
699 			case FORMAT_G8R8UI:
700 			case FORMAT_A8B8G8R8UI:
701 				for(unsigned int q = 0; q < state.multiSample; q++)
702 				{
703 					Pointer<Byte> buffer = cBuffer[index] + q * *Pointer<Int>(data + OFFSET(DrawData, colorSliceB[index]));
704 					Vector4f color = c[index];
705 
706 					if(state.multiSampleMask & (1 << q))
707 					{
708 						alphaBlend(index, buffer, color, x);
709 						writeColor(index, buffer, x, color, sMask[q], zMask[q], cMask[q]);
710 					}
711 				}
712 				break;
713 			default:
714 				ASSERT(false);
715 			}
716 		}
717 	}
718 
sampleTexture(const Src & sampler,Vector4f & uvwq,Float4 & bias,Vector4f & dsx,Vector4f & dsy,Vector4f & offset,SamplerFunction function)719 	Vector4f PixelProgram::sampleTexture(const Src &sampler, Vector4f &uvwq, Float4 &bias, Vector4f &dsx, Vector4f &dsy, Vector4f &offset, SamplerFunction function)
720 	{
721 		Vector4f tmp;
722 
723 		if(sampler.type == Shader::PARAMETER_SAMPLER && sampler.rel.type == Shader::PARAMETER_VOID)
724 		{
725 			tmp = sampleTexture(sampler.index, uvwq, bias, dsx, dsy, offset, function);
726 		}
727 		else
728 		{
729 			Int index = As<Int>(Float(fetchRegister(sampler).x.x));
730 
731 			for(int i = 0; i < TEXTURE_IMAGE_UNITS; i++)
732 			{
733 				if(shader->usesSampler(i))
734 				{
735 					If(index == i)
736 					{
737 						tmp = sampleTexture(i, uvwq, bias, dsx, dsy, offset, function);
738 						// FIXME: When the sampler states are the same, we could use one sampler and just index the texture
739 					}
740 				}
741 			}
742 		}
743 
744 		Vector4f c;
745 		c.x = tmp[(sampler.swizzle >> 0) & 0x3];
746 		c.y = tmp[(sampler.swizzle >> 2) & 0x3];
747 		c.z = tmp[(sampler.swizzle >> 4) & 0x3];
748 		c.w = tmp[(sampler.swizzle >> 6) & 0x3];
749 
750 		return c;
751 	}
752 
sampleTexture(int samplerIndex,Vector4f & uvwq,Float4 & bias,Vector4f & dsx,Vector4f & dsy,Vector4f & offset,SamplerFunction function)753 	Vector4f PixelProgram::sampleTexture(int samplerIndex, Vector4f &uvwq, Float4 &bias, Vector4f &dsx, Vector4f &dsy, Vector4f &offset, SamplerFunction function)
754 	{
755 		#if PERF_PROFILE
756 			Long texTime = Ticks();
757 		#endif
758 
759 		Pointer<Byte> texture = data + OFFSET(DrawData, mipmap) + samplerIndex * sizeof(Texture);
760 		Vector4f c = SamplerCore(constants, state.sampler[samplerIndex]).sampleTexture(texture, uvwq.x, uvwq.y, uvwq.z, uvwq.w, bias, dsx, dsy, offset, function);
761 
762 		#if PERF_PROFILE
763 			cycles[PERF_TEX] += Ticks() - texTime;
764 		#endif
765 
766 		return c;
767 	}
768 
clampColor(Vector4f oC[RENDERTARGETS])769 	void PixelProgram::clampColor(Vector4f oC[RENDERTARGETS])
770 	{
771 		for(int index = 0; index < RENDERTARGETS; index++)
772 		{
773 			if(!state.colorWriteActive(index) && !(index == 0 && state.alphaTestActive()))
774 			{
775 				continue;
776 			}
777 
778 			switch(state.targetFormat[index])
779 			{
780 			case FORMAT_NULL:
781 				break;
782 			case FORMAT_R5G6B5:
783 			case FORMAT_A8R8G8B8:
784 			case FORMAT_A8B8G8R8:
785 			case FORMAT_X8R8G8B8:
786 			case FORMAT_X8B8G8R8:
787 			case FORMAT_SRGB8_X8:
788 			case FORMAT_SRGB8_A8:
789 			case FORMAT_G8R8:
790 			case FORMAT_R8:
791 			case FORMAT_A8:
792 			case FORMAT_G16R16:
793 			case FORMAT_A16B16G16R16:
794 				oC[index].x = Max(oC[index].x, Float4(0.0f)); oC[index].x = Min(oC[index].x, Float4(1.0f));
795 				oC[index].y = Max(oC[index].y, Float4(0.0f)); oC[index].y = Min(oC[index].y, Float4(1.0f));
796 				oC[index].z = Max(oC[index].z, Float4(0.0f)); oC[index].z = Min(oC[index].z, Float4(1.0f));
797 				oC[index].w = Max(oC[index].w, Float4(0.0f)); oC[index].w = Min(oC[index].w, Float4(1.0f));
798 				break;
799 			case FORMAT_R32F:
800 			case FORMAT_G32R32F:
801 			case FORMAT_X32B32G32R32F:
802 			case FORMAT_A32B32G32R32F:
803 			case FORMAT_R32I:
804 			case FORMAT_G32R32I:
805 			case FORMAT_A32B32G32R32I:
806 			case FORMAT_R32UI:
807 			case FORMAT_G32R32UI:
808 			case FORMAT_A32B32G32R32UI:
809 			case FORMAT_R16I:
810 			case FORMAT_G16R16I:
811 			case FORMAT_A16B16G16R16I:
812 			case FORMAT_R16UI:
813 			case FORMAT_G16R16UI:
814 			case FORMAT_A16B16G16R16UI:
815 			case FORMAT_R8I:
816 			case FORMAT_G8R8I:
817 			case FORMAT_A8B8G8R8I:
818 			case FORMAT_R8UI:
819 			case FORMAT_G8R8UI:
820 			case FORMAT_A8B8G8R8UI:
821 				break;
822 			case FORMAT_X32B32G32R32F_UNSIGNED:
823 				oC[index].x = Max(oC[index].x, Float4(0.0f));
824 				oC[index].y = Max(oC[index].y, Float4(0.0f));
825 				oC[index].z = Max(oC[index].z, Float4(0.0f));
826 				oC[index].w = Max(oC[index].w, Float4(0.0f));
827 				break;
828 			default:
829 				ASSERT(false);
830 			}
831 		}
832 	}
833 
enableMask(const Shader::Instruction * instruction)834 	Int4 PixelProgram::enableMask(const Shader::Instruction *instruction)
835 	{
836 		if(scalar)
837 		{
838 			return Int4(0xFFFFFFFF);
839 		}
840 
841 		Int4 enable = instruction->analysisBranch ? Int4(enableStack[Min(enableIndex, Int(MAX_SHADER_ENABLE_STACK_SIZE))]) : Int4(0xFFFFFFFF);
842 
843 		if(shader->containsBreakInstruction() && instruction->analysisBreak)
844 		{
845 			enable &= enableBreak;
846 		}
847 
848 		if(shader->containsContinueInstruction() && instruction->analysisContinue)
849 		{
850 			enable &= enableContinue;
851 		}
852 
853 		if(shader->containsLeaveInstruction() && instruction->analysisLeave)
854 		{
855 			enable &= enableLeave;
856 		}
857 
858 		return enable;
859 	}
860 
fetchRegister(const Src & src,unsigned int offset)861 	Vector4f PixelProgram::fetchRegister(const Src &src, unsigned int offset)
862 	{
863 		Vector4f reg;
864 		unsigned int i = src.index + offset;
865 
866 		switch(src.type)
867 		{
868 		case Shader::PARAMETER_TEMP:
869 			if(src.rel.type == Shader::PARAMETER_VOID)
870 			{
871 				reg = r[i];
872 			}
873 			else if(!src.rel.dynamic)
874 			{
875 				reg = r[i + relativeAddress(src.rel, src.bufferIndex)];
876 			}
877 			else
878 			{
879 				reg = r[i + dynamicAddress(src.rel)];
880 			}
881 			break;
882 		case Shader::PARAMETER_INPUT:
883 			if(src.rel.type == Shader::PARAMETER_VOID)   // Not relative
884 			{
885 				reg = v[i];
886 			}
887 			else if(!src.rel.dynamic)
888 			{
889 				reg = v[i + relativeAddress(src.rel, src.bufferIndex)];
890 			}
891 			else
892 			{
893 				reg = v[i + dynamicAddress(src.rel)];
894 			}
895 			break;
896 		case Shader::PARAMETER_CONST:
897 			reg = readConstant(src, offset);
898 			break;
899 		case Shader::PARAMETER_TEXTURE:
900 			reg = v[2 + i];
901 			break;
902 		case Shader::PARAMETER_MISCTYPE:
903 			if(src.index == Shader::VPosIndex) reg = vPos;
904 			if(src.index == Shader::VFaceIndex) reg = vFace;
905 			break;
906 		case Shader::PARAMETER_SAMPLER:
907 			if(src.rel.type == Shader::PARAMETER_VOID)
908 			{
909 				reg.x = As<Float4>(Int4(i));
910 			}
911 			else if(src.rel.type == Shader::PARAMETER_TEMP)
912 			{
913 				reg.x = As<Float4>(Int4(i) + As<Int4>(r[src.rel.index].x));
914 			}
915 			return reg;
916 		case Shader::PARAMETER_PREDICATE:   return reg; // Dummy
917 		case Shader::PARAMETER_VOID:        return reg; // Dummy
918 		case Shader::PARAMETER_FLOAT4LITERAL:
919 			reg.x = Float4(src.value[0]);
920 			reg.y = Float4(src.value[1]);
921 			reg.z = Float4(src.value[2]);
922 			reg.w = Float4(src.value[3]);
923 			break;
924 		case Shader::PARAMETER_CONSTINT:    return reg; // Dummy
925 		case Shader::PARAMETER_CONSTBOOL:   return reg; // Dummy
926 		case Shader::PARAMETER_LOOP:        return reg; // Dummy
927 		case Shader::PARAMETER_COLOROUT:
928 			if(src.rel.type == Shader::PARAMETER_VOID)   // Not relative
929 			{
930 				reg = oC[i];
931 			}
932 			else if(!src.rel.dynamic)
933 			{
934 				reg = oC[i + relativeAddress(src.rel, src.bufferIndex)];
935 			}
936 			else
937 			{
938 				reg = oC[i + dynamicAddress(src.rel)];
939 			}
940 			break;
941 		case Shader::PARAMETER_DEPTHOUT:
942 			reg.x = oDepth;
943 			break;
944 		default:
945 			ASSERT(false);
946 		}
947 
948 		const Float4 &x = reg[(src.swizzle >> 0) & 0x3];
949 		const Float4 &y = reg[(src.swizzle >> 2) & 0x3];
950 		const Float4 &z = reg[(src.swizzle >> 4) & 0x3];
951 		const Float4 &w = reg[(src.swizzle >> 6) & 0x3];
952 
953 		Vector4f mod;
954 
955 		switch(src.modifier)
956 		{
957 		case Shader::MODIFIER_NONE:
958 			mod.x = x;
959 			mod.y = y;
960 			mod.z = z;
961 			mod.w = w;
962 			break;
963 		case Shader::MODIFIER_NEGATE:
964 			mod.x = -x;
965 			mod.y = -y;
966 			mod.z = -z;
967 			mod.w = -w;
968 			break;
969 		case Shader::MODIFIER_ABS:
970 			mod.x = Abs(x);
971 			mod.y = Abs(y);
972 			mod.z = Abs(z);
973 			mod.w = Abs(w);
974 			break;
975 		case Shader::MODIFIER_ABS_NEGATE:
976 			mod.x = -Abs(x);
977 			mod.y = -Abs(y);
978 			mod.z = -Abs(z);
979 			mod.w = -Abs(w);
980 			break;
981 		case Shader::MODIFIER_NOT:
982 			mod.x = As<Float4>(As<Int4>(x) ^ Int4(0xFFFFFFFF));
983 			mod.y = As<Float4>(As<Int4>(y) ^ Int4(0xFFFFFFFF));
984 			mod.z = As<Float4>(As<Int4>(z) ^ Int4(0xFFFFFFFF));
985 			mod.w = As<Float4>(As<Int4>(w) ^ Int4(0xFFFFFFFF));
986 			break;
987 		default:
988 			ASSERT(false);
989 		}
990 
991 		return mod;
992 	}
993 
uniformAddress(int bufferIndex,unsigned int index)994 	RValue<Pointer<Byte>> PixelProgram::uniformAddress(int bufferIndex, unsigned int index)
995 	{
996 		if(bufferIndex == -1)
997 		{
998 			return data + OFFSET(DrawData, ps.c[index]);
999 		}
1000 		else
1001 		{
1002 			return *Pointer<Pointer<Byte>>(data + OFFSET(DrawData, ps.u[bufferIndex])) + index;
1003 		}
1004 	}
1005 
uniformAddress(int bufferIndex,unsigned int index,Int & offset)1006 	RValue<Pointer<Byte>> PixelProgram::uniformAddress(int bufferIndex, unsigned int index, Int& offset)
1007 	{
1008 		return uniformAddress(bufferIndex, index) + offset * sizeof(float4);
1009 	}
1010 
readConstant(const Src & src,unsigned int offset)1011 	Vector4f PixelProgram::readConstant(const Src &src, unsigned int offset)
1012 	{
1013 		Vector4f c;
1014 		unsigned int i = src.index + offset;
1015 
1016 		if(src.rel.type == Shader::PARAMETER_VOID)   // Not relative
1017 		{
1018 			c.x = c.y = c.z = c.w = *Pointer<Float4>(uniformAddress(src.bufferIndex, i));
1019 
1020 			c.x = c.x.xxxx;
1021 			c.y = c.y.yyyy;
1022 			c.z = c.z.zzzz;
1023 			c.w = c.w.wwww;
1024 
1025 			if(shader->containsDefineInstruction())   // Constant may be known at compile time
1026 			{
1027 				for(size_t j = 0; j < shader->getLength(); j++)
1028 				{
1029 					const Shader::Instruction &instruction = *shader->getInstruction(j);
1030 
1031 					if(instruction.opcode == Shader::OPCODE_DEF)
1032 					{
1033 						if(instruction.dst.index == i)
1034 						{
1035 							c.x = Float4(instruction.src[0].value[0]);
1036 							c.y = Float4(instruction.src[0].value[1]);
1037 							c.z = Float4(instruction.src[0].value[2]);
1038 							c.w = Float4(instruction.src[0].value[3]);
1039 
1040 							break;
1041 						}
1042 					}
1043 				}
1044 			}
1045 		}
1046 		else if(!src.rel.dynamic || src.rel.type == Shader::PARAMETER_LOOP)
1047 		{
1048 			Int a = relativeAddress(src.rel, src.bufferIndex);
1049 
1050 			c.x = c.y = c.z = c.w = *Pointer<Float4>(uniformAddress(src.bufferIndex, i, a));
1051 
1052 			c.x = c.x.xxxx;
1053 			c.y = c.y.yyyy;
1054 			c.z = c.z.zzzz;
1055 			c.w = c.w.wwww;
1056 		}
1057 		else
1058 		{
1059 			int component = src.rel.swizzle & 0x03;
1060 			Float4 a;
1061 
1062 			switch(src.rel.type)
1063 			{
1064 			case Shader::PARAMETER_TEMP:     a = r[src.rel.index][component]; break;
1065 			case Shader::PARAMETER_INPUT:    a = v[src.rel.index][component]; break;
1066 			case Shader::PARAMETER_OUTPUT:   a = oC[src.rel.index][component]; break;
1067 			case Shader::PARAMETER_CONST:    a = *Pointer<Float>(uniformAddress(src.bufferIndex, src.rel.index) + component * sizeof(float)); break;
1068 			case Shader::PARAMETER_MISCTYPE:
1069 				switch(src.rel.index)
1070 				{
1071 				case Shader::VPosIndex:  a = vPos.x;  break;
1072 				case Shader::VFaceIndex: a = vFace.x; break;
1073 				default: ASSERT(false);
1074 				}
1075 				break;
1076 			default: ASSERT(false);
1077 			}
1078 
1079 			Int4 index = Int4(i) + As<Int4>(a) * Int4(src.rel.scale);
1080 
1081 			index = Min(As<UInt4>(index), UInt4(VERTEX_UNIFORM_VECTORS));   // Clamp to constant register range, c[VERTEX_UNIFORM_VECTORS] = {0, 0, 0, 0}
1082 
1083 			Int index0 = Extract(index, 0);
1084 			Int index1 = Extract(index, 1);
1085 			Int index2 = Extract(index, 2);
1086 			Int index3 = Extract(index, 3);
1087 
1088 			c.x = *Pointer<Float4>(uniformAddress(src.bufferIndex, 0, index0), 16);
1089 			c.y = *Pointer<Float4>(uniformAddress(src.bufferIndex, 0, index1), 16);
1090 			c.z = *Pointer<Float4>(uniformAddress(src.bufferIndex, 0, index2), 16);
1091 			c.w = *Pointer<Float4>(uniformAddress(src.bufferIndex, 0, index3), 16);
1092 
1093 			transpose4x4(c.x, c.y, c.z, c.w);
1094 		}
1095 
1096 		return c;
1097 	}
1098 
relativeAddress(const Shader::Relative & rel,int bufferIndex)1099 	Int PixelProgram::relativeAddress(const Shader::Relative &rel, int bufferIndex)
1100 	{
1101 		ASSERT(!rel.dynamic);
1102 
1103 		if(rel.type == Shader::PARAMETER_TEMP)
1104 		{
1105 			return As<Int>(Extract(r[rel.index].x, 0)) * rel.scale;
1106 		}
1107 		else if(rel.type == Shader::PARAMETER_INPUT)
1108 		{
1109 			return As<Int>(Extract(v[rel.index].x, 0)) * rel.scale;
1110 		}
1111 		else if(rel.type == Shader::PARAMETER_OUTPUT)
1112 		{
1113 			return As<Int>(Extract(oC[rel.index].x, 0)) * rel.scale;
1114 		}
1115 		else if(rel.type == Shader::PARAMETER_CONST)
1116 		{
1117 			return *Pointer<Int>(uniformAddress(bufferIndex, rel.index)) * rel.scale;
1118 		}
1119 		else if(rel.type == Shader::PARAMETER_LOOP)
1120 		{
1121 			return aL[loopDepth];
1122 		}
1123 		else ASSERT(false);
1124 
1125 		return 0;
1126 	}
1127 
dynamicAddress(const Shader::Relative & rel)1128 	Int4 PixelProgram::dynamicAddress(const Shader::Relative &rel)
1129 	{
1130 		int component = rel.swizzle & 0x03;
1131 		Float4 a;
1132 
1133 		switch(rel.type)
1134 		{
1135 		case Shader::PARAMETER_TEMP:     a = r[rel.index][component]; break;
1136 		case Shader::PARAMETER_INPUT:    a = v[rel.index][component]; break;
1137 		case Shader::PARAMETER_OUTPUT:   a = oC[rel.index][component]; break;
1138 		case Shader::PARAMETER_MISCTYPE:
1139 			switch(rel.index)
1140 			{
1141 			case Shader::VPosIndex:  a = vPos.x;  break;
1142 			case Shader::VFaceIndex: a = vFace.x; break;
1143 			default: ASSERT(false);
1144 			}
1145 			break;
1146 		default: ASSERT(false);
1147 		}
1148 
1149 		return As<Int4>(a) * Int4(rel.scale);
1150 	}
1151 
linearToSRGB(const Float4 & x)1152 	Float4 PixelProgram::linearToSRGB(const Float4 &x)   // Approximates x^(1.0/2.2)
1153 	{
1154 		Float4 sqrtx = Rcp_pp(RcpSqrt_pp(x));
1155 		Float4 sRGB = sqrtx * Float4(1.14f) - x * Float4(0.14f);
1156 
1157 		return Min(Max(sRGB, Float4(0.0f)), Float4(1.0f));
1158 	}
1159 
M3X2(Vector4f & dst,Vector4f & src0,const Src & src1)1160 	void PixelProgram::M3X2(Vector4f &dst, Vector4f &src0, const Src &src1)
1161 	{
1162 		Vector4f row0 = fetchRegister(src1, 0);
1163 		Vector4f row1 = fetchRegister(src1, 1);
1164 
1165 		dst.x = dot3(src0, row0);
1166 		dst.y = dot3(src0, row1);
1167 	}
1168 
M3X3(Vector4f & dst,Vector4f & src0,const Src & src1)1169 	void PixelProgram::M3X3(Vector4f &dst, Vector4f &src0, const Src &src1)
1170 	{
1171 		Vector4f row0 = fetchRegister(src1, 0);
1172 		Vector4f row1 = fetchRegister(src1, 1);
1173 		Vector4f row2 = fetchRegister(src1, 2);
1174 
1175 		dst.x = dot3(src0, row0);
1176 		dst.y = dot3(src0, row1);
1177 		dst.z = dot3(src0, row2);
1178 	}
1179 
M3X4(Vector4f & dst,Vector4f & src0,const Src & src1)1180 	void PixelProgram::M3X4(Vector4f &dst, Vector4f &src0, const Src &src1)
1181 	{
1182 		Vector4f row0 = fetchRegister(src1, 0);
1183 		Vector4f row1 = fetchRegister(src1, 1);
1184 		Vector4f row2 = fetchRegister(src1, 2);
1185 		Vector4f row3 = fetchRegister(src1, 3);
1186 
1187 		dst.x = dot3(src0, row0);
1188 		dst.y = dot3(src0, row1);
1189 		dst.z = dot3(src0, row2);
1190 		dst.w = dot3(src0, row3);
1191 	}
1192 
M4X3(Vector4f & dst,Vector4f & src0,const Src & src1)1193 	void PixelProgram::M4X3(Vector4f &dst, Vector4f &src0, const Src &src1)
1194 	{
1195 		Vector4f row0 = fetchRegister(src1, 0);
1196 		Vector4f row1 = fetchRegister(src1, 1);
1197 		Vector4f row2 = fetchRegister(src1, 2);
1198 
1199 		dst.x = dot4(src0, row0);
1200 		dst.y = dot4(src0, row1);
1201 		dst.z = dot4(src0, row2);
1202 	}
1203 
M4X4(Vector4f & dst,Vector4f & src0,const Src & src1)1204 	void PixelProgram::M4X4(Vector4f &dst, Vector4f &src0, const Src &src1)
1205 	{
1206 		Vector4f row0 = fetchRegister(src1, 0);
1207 		Vector4f row1 = fetchRegister(src1, 1);
1208 		Vector4f row2 = fetchRegister(src1, 2);
1209 		Vector4f row3 = fetchRegister(src1, 3);
1210 
1211 		dst.x = dot4(src0, row0);
1212 		dst.y = dot4(src0, row1);
1213 		dst.z = dot4(src0, row2);
1214 		dst.w = dot4(src0, row3);
1215 	}
1216 
TEX(Vector4f & dst,Vector4f & src0,const Src & src1,bool project,bool bias)1217 	void PixelProgram::TEX(Vector4f &dst, Vector4f &src0, const Src &src1, bool project, bool bias)
1218 	{
1219 		if(project)
1220 		{
1221 			Vector4f proj;
1222 			Float4 rw = reciprocal(src0.w);
1223 			proj.x = src0.x * rw;
1224 			proj.y = src0.y * rw;
1225 			proj.z = src0.z * rw;
1226 
1227 			dst = sampleTexture(src1, proj, src0.x, (src0), (src0), (src0), Implicit);
1228 		}
1229 		else
1230 		{
1231 			dst = sampleTexture(src1, src0, src0.x, (src0), (src0), (src0), bias ? Bias : Implicit);
1232 		}
1233 	}
1234 
TEXOFFSET(Vector4f & dst,Vector4f & src0,const Src & src1,Vector4f & offset)1235 	void PixelProgram::TEXOFFSET(Vector4f &dst, Vector4f &src0, const Src &src1, Vector4f &offset)
1236 	{
1237 		dst = sampleTexture(src1, src0, (src0.x), (src0), (src0), offset, {Implicit, Offset});
1238 	}
1239 
TEXLODOFFSET(Vector4f & dst,Vector4f & src0,const Src & src1,Vector4f & offset,Float4 & lod)1240 	void PixelProgram::TEXLODOFFSET(Vector4f &dst, Vector4f &src0, const Src &src1, Vector4f &offset, Float4 &lod)
1241 	{
1242 		dst = sampleTexture(src1, src0, lod, (src0), (src0), offset, {Lod, Offset});
1243 	}
1244 
TEXBIAS(Vector4f & dst,Vector4f & src0,const Src & src1,Float4 & bias)1245 	void PixelProgram::TEXBIAS(Vector4f &dst, Vector4f &src0, const Src &src1, Float4 &bias)
1246 	{
1247 		dst = sampleTexture(src1, src0, bias, (src0), (src0), (src0), Bias);
1248 	}
1249 
TEXOFFSETBIAS(Vector4f & dst,Vector4f & src0,const Src & src1,Vector4f & offset,Float4 & bias)1250 	void PixelProgram::TEXOFFSETBIAS(Vector4f &dst, Vector4f &src0, const Src &src1, Vector4f &offset, Float4 &bias)
1251 	{
1252 		dst = sampleTexture(src1, src0, bias, (src0), (src0), offset, {Bias, Offset});
1253 	}
1254 
TEXELFETCH(Vector4f & dst,Vector4f & src0,const Src & src1,Float4 & lod)1255 	void PixelProgram::TEXELFETCH(Vector4f &dst, Vector4f &src0, const Src& src1, Float4 &lod)
1256 	{
1257 		dst = sampleTexture(src1, src0, lod, (src0), (src0), (src0), Fetch);
1258 	}
1259 
TEXELFETCHOFFSET(Vector4f & dst,Vector4f & src0,const Src & src1,Vector4f & offset,Float4 & lod)1260 	void PixelProgram::TEXELFETCHOFFSET(Vector4f &dst, Vector4f &src0, const Src& src1, Vector4f &offset, Float4 &lod)
1261 	{
1262 		dst = sampleTexture(src1, src0, lod, (src0), (src0), offset, {Fetch, Offset});
1263 	}
1264 
TEXGRAD(Vector4f & dst,Vector4f & src0,const Src & src1,Vector4f & dsx,Vector4f & dsy)1265 	void PixelProgram::TEXGRAD(Vector4f &dst, Vector4f &src0, const Src& src1, Vector4f &dsx, Vector4f &dsy)
1266 	{
1267 		dst = sampleTexture(src1, src0, (src0.x), dsx, dsy, (src0), Grad);
1268 	}
1269 
TEXGRADOFFSET(Vector4f & dst,Vector4f & src0,const Src & src1,Vector4f & dsx,Vector4f & dsy,Vector4f & offset)1270 	void PixelProgram::TEXGRADOFFSET(Vector4f &dst, Vector4f &src0, const Src& src1, Vector4f &dsx, Vector4f &dsy, Vector4f &offset)
1271 	{
1272 		dst = sampleTexture(src1, src0, (src0.x), dsx, dsy, offset, {Grad, Offset});
1273 	}
1274 
TEXLOD(Vector4f & dst,Vector4f & src0,const Src & src1,Float4 & lod)1275 	void PixelProgram::TEXLOD(Vector4f &dst, Vector4f &src0, const Src &src1, Float4 &lod)
1276 	{
1277 		dst = sampleTexture(src1, src0, lod, (src0), (src0), (src0), Lod);
1278 	}
1279 
TEXSIZE(Vector4f & dst,Float4 & lod,const Src & src1)1280 	void PixelProgram::TEXSIZE(Vector4f &dst, Float4 &lod, const Src &src1)
1281 	{
1282 		bool uniformSampler = (src1.type == Shader::PARAMETER_SAMPLER && src1.rel.type == Shader::PARAMETER_VOID);
1283 		Int offset = uniformSampler ? src1.index * sizeof(Texture) : As<Int>(Float(fetchRegister(src1).x.x)) * sizeof(Texture);
1284 		Pointer<Byte> texture = data + OFFSET(DrawData, mipmap) + offset;
1285 
1286 		dst = SamplerCore::textureSize(texture, lod);
1287 	}
1288 
TEXKILL(Int cMask[4],Vector4f & src,unsigned char mask)1289 	void PixelProgram::TEXKILL(Int cMask[4], Vector4f &src, unsigned char mask)
1290 	{
1291 		Int kill = -1;
1292 
1293 		if(mask & 0x1) kill &= SignMask(CmpNLT(src.x, Float4(0.0f)));
1294 		if(mask & 0x2) kill &= SignMask(CmpNLT(src.y, Float4(0.0f)));
1295 		if(mask & 0x4) kill &= SignMask(CmpNLT(src.z, Float4(0.0f)));
1296 		if(mask & 0x8) kill &= SignMask(CmpNLT(src.w, Float4(0.0f)));
1297 
1298 		// FIXME: Dynamic branching affects TEXKILL?
1299 		//	if(shader->containsDynamicBranching())
1300 		//	{
1301 		//		kill = ~SignMask(enableMask());
1302 		//	}
1303 
1304 		for(unsigned int q = 0; q < state.multiSample; q++)
1305 		{
1306 			cMask[q] &= kill;
1307 		}
1308 
1309 		// FIXME: Branch to end of shader if all killed?
1310 	}
1311 
DISCARD(Int cMask[4],const Shader::Instruction * instruction)1312 	void PixelProgram::DISCARD(Int cMask[4], const Shader::Instruction *instruction)
1313 	{
1314 		Int kill = 0;
1315 
1316 		if(shader->containsDynamicBranching())
1317 		{
1318 			kill = ~SignMask(enableMask(instruction));
1319 		}
1320 
1321 		for(unsigned int q = 0; q < state.multiSample; q++)
1322 		{
1323 			cMask[q] &= kill;
1324 		}
1325 
1326 		// FIXME: Branch to end of shader if all killed?
1327 	}
1328 
DFDX(Vector4f & dst,Vector4f & src)1329 	void PixelProgram::DFDX(Vector4f &dst, Vector4f &src)
1330 	{
1331 		dst.x = src.x.yyww - src.x.xxzz;
1332 		dst.y = src.y.yyww - src.y.xxzz;
1333 		dst.z = src.z.yyww - src.z.xxzz;
1334 		dst.w = src.w.yyww - src.w.xxzz;
1335 	}
1336 
DFDY(Vector4f & dst,Vector4f & src)1337 	void PixelProgram::DFDY(Vector4f &dst, Vector4f &src)
1338 	{
1339 		dst.x = src.x.zwzw - src.x.xyxy;
1340 		dst.y = src.y.zwzw - src.y.xyxy;
1341 		dst.z = src.z.zwzw - src.z.xyxy;
1342 		dst.w = src.w.zwzw - src.w.xyxy;
1343 	}
1344 
FWIDTH(Vector4f & dst,Vector4f & src)1345 	void PixelProgram::FWIDTH(Vector4f &dst, Vector4f &src)
1346 	{
1347 		// abs(dFdx(src)) + abs(dFdy(src));
1348 		dst.x = Abs(src.x.yyww - src.x.xxzz) + Abs(src.x.zwzw - src.x.xyxy);
1349 		dst.y = Abs(src.y.yyww - src.y.xxzz) + Abs(src.y.zwzw - src.y.xyxy);
1350 		dst.z = Abs(src.z.yyww - src.z.xxzz) + Abs(src.z.zwzw - src.z.xyxy);
1351 		dst.w = Abs(src.w.yyww - src.w.xxzz) + Abs(src.w.zwzw - src.w.xyxy);
1352 	}
1353 
BREAK()1354 	void PixelProgram::BREAK()
1355 	{
1356 		enableBreak = enableBreak & ~enableStack[Min(enableIndex, Int(MAX_SHADER_ENABLE_STACK_SIZE))];
1357 	}
1358 
BREAKC(Vector4f & src0,Vector4f & src1,Control control)1359 	void PixelProgram::BREAKC(Vector4f &src0, Vector4f &src1, Control control)
1360 	{
1361 		Int4 condition;
1362 
1363 		switch(control)
1364 		{
1365 		case Shader::CONTROL_GT: condition = CmpNLE(src0.x, src1.x); break;
1366 		case Shader::CONTROL_EQ: condition = CmpEQ(src0.x, src1.x);  break;
1367 		case Shader::CONTROL_GE: condition = CmpNLT(src0.x, src1.x); break;
1368 		case Shader::CONTROL_LT: condition = CmpLT(src0.x, src1.x);  break;
1369 		case Shader::CONTROL_NE: condition = CmpNEQ(src0.x, src1.x); break;
1370 		case Shader::CONTROL_LE: condition = CmpLE(src0.x, src1.x);  break;
1371 		default:
1372 			ASSERT(false);
1373 		}
1374 
1375 		BREAK(condition);
1376 	}
1377 
BREAKP(const Src & predicateRegister)1378 	void PixelProgram::BREAKP(const Src &predicateRegister)   // FIXME: Factor out parts common with BREAKC
1379 	{
1380 		Int4 condition = As<Int4>(p0[predicateRegister.swizzle & 0x3]);
1381 
1382 		if(predicateRegister.modifier == Shader::MODIFIER_NOT)
1383 		{
1384 			condition = ~condition;
1385 		}
1386 
1387 		BREAK(condition);
1388 	}
1389 
BREAK(Int4 & condition)1390 	void PixelProgram::BREAK(Int4 &condition)
1391 	{
1392 		condition &= enableStack[Min(enableIndex, Int(MAX_SHADER_ENABLE_STACK_SIZE))];
1393 
1394 		enableBreak = enableBreak & ~condition;
1395 	}
1396 
CONTINUE()1397 	void PixelProgram::CONTINUE()
1398 	{
1399 		enableContinue = enableContinue & ~enableStack[Min(enableIndex, Int(MAX_SHADER_ENABLE_STACK_SIZE))];
1400 	}
1401 
TEST()1402 	void PixelProgram::TEST()
1403 	{
1404 		enableContinue = restoreContinue.back();
1405 		restoreContinue.pop_back();
1406 	}
1407 
SCALAR()1408 	void PixelProgram::SCALAR()
1409 	{
1410 		scalar = true;
1411 	}
1412 
CALL(int labelIndex,int callSiteIndex)1413 	void PixelProgram::CALL(int labelIndex, int callSiteIndex)
1414 	{
1415 		if(!labelBlock[labelIndex])
1416 		{
1417 			labelBlock[labelIndex] = Nucleus::createBasicBlock();
1418 		}
1419 
1420 		if(callRetBlock[labelIndex].size() > 1)
1421 		{
1422 			callStack[Min(stackIndex++, Int(MAX_SHADER_CALL_STACK_SIZE))] = UInt(callSiteIndex);
1423 		}
1424 
1425 		Int4 restoreLeave = enableLeave;
1426 
1427 		Nucleus::createBr(labelBlock[labelIndex]);
1428 		Nucleus::setInsertBlock(callRetBlock[labelIndex][callSiteIndex]);
1429 
1430 		enableLeave = restoreLeave;
1431 	}
1432 
CALLNZ(int labelIndex,int callSiteIndex,const Src & src)1433 	void PixelProgram::CALLNZ(int labelIndex, int callSiteIndex, const Src &src)
1434 	{
1435 		if(src.type == Shader::PARAMETER_CONSTBOOL)
1436 		{
1437 			CALLNZb(labelIndex, callSiteIndex, src);
1438 		}
1439 		else if(src.type == Shader::PARAMETER_PREDICATE)
1440 		{
1441 			CALLNZp(labelIndex, callSiteIndex, src);
1442 		}
1443 		else ASSERT(false);
1444 	}
1445 
CALLNZb(int labelIndex,int callSiteIndex,const Src & boolRegister)1446 	void PixelProgram::CALLNZb(int labelIndex, int callSiteIndex, const Src &boolRegister)
1447 	{
1448 		Bool condition = (*Pointer<Byte>(data + OFFSET(DrawData, ps.b[boolRegister.index])) != Byte(0));   // FIXME
1449 
1450 		if(boolRegister.modifier == Shader::MODIFIER_NOT)
1451 		{
1452 			condition = !condition;
1453 		}
1454 
1455 		if(!labelBlock[labelIndex])
1456 		{
1457 			labelBlock[labelIndex] = Nucleus::createBasicBlock();
1458 		}
1459 
1460 		if(callRetBlock[labelIndex].size() > 1)
1461 		{
1462 			callStack[Min(stackIndex++, Int(MAX_SHADER_CALL_STACK_SIZE))] = UInt(callSiteIndex);
1463 		}
1464 
1465 		Int4 restoreLeave = enableLeave;
1466 
1467 		branch(condition, labelBlock[labelIndex], callRetBlock[labelIndex][callSiteIndex]);
1468 		Nucleus::setInsertBlock(callRetBlock[labelIndex][callSiteIndex]);
1469 
1470 		enableLeave = restoreLeave;
1471 	}
1472 
CALLNZp(int labelIndex,int callSiteIndex,const Src & predicateRegister)1473 	void PixelProgram::CALLNZp(int labelIndex, int callSiteIndex, const Src &predicateRegister)
1474 	{
1475 		Int4 condition = As<Int4>(p0[predicateRegister.swizzle & 0x3]);
1476 
1477 		if(predicateRegister.modifier == Shader::MODIFIER_NOT)
1478 		{
1479 			condition = ~condition;
1480 		}
1481 
1482 		condition &= enableStack[Min(enableIndex, Int(MAX_SHADER_ENABLE_STACK_SIZE))];
1483 
1484 		if(!labelBlock[labelIndex])
1485 		{
1486 			labelBlock[labelIndex] = Nucleus::createBasicBlock();
1487 		}
1488 
1489 		if(callRetBlock[labelIndex].size() > 1)
1490 		{
1491 			callStack[Min(stackIndex++, Int(MAX_SHADER_CALL_STACK_SIZE))] = UInt(callSiteIndex);
1492 		}
1493 
1494 		enableIndex++;
1495 		enableStack[Min(enableIndex, Int(MAX_SHADER_ENABLE_STACK_SIZE))] = condition;
1496 		Int4 restoreLeave = enableLeave;
1497 
1498 		Bool notAllFalse = SignMask(condition) != 0;
1499 		branch(notAllFalse, labelBlock[labelIndex], callRetBlock[labelIndex][callSiteIndex]);
1500 		Nucleus::setInsertBlock(callRetBlock[labelIndex][callSiteIndex]);
1501 
1502 		enableIndex--;
1503 		enableLeave = restoreLeave;
1504 	}
1505 
ELSE()1506 	void PixelProgram::ELSE()
1507 	{
1508 		ifDepth--;
1509 
1510 		BasicBlock *falseBlock = ifFalseBlock[ifDepth];
1511 		BasicBlock *endBlock = Nucleus::createBasicBlock();
1512 
1513 		if(isConditionalIf[ifDepth])
1514 		{
1515 			Int4 condition = ~enableStack[Min(enableIndex, Int(MAX_SHADER_ENABLE_STACK_SIZE))] & enableStack[Min(enableIndex - 1, Int(MAX_SHADER_ENABLE_STACK_SIZE))];
1516 			Bool notAllFalse = SignMask(condition) != 0;
1517 
1518 			branch(notAllFalse, falseBlock, endBlock);
1519 
1520 			enableStack[Min(enableIndex, Int(MAX_SHADER_ENABLE_STACK_SIZE))] = ~enableStack[Min(enableIndex, Int(MAX_SHADER_ENABLE_STACK_SIZE))] & enableStack[Min(enableIndex - 1, Int(MAX_SHADER_ENABLE_STACK_SIZE))];
1521 		}
1522 		else
1523 		{
1524 			Nucleus::createBr(endBlock);
1525 			Nucleus::setInsertBlock(falseBlock);
1526 		}
1527 
1528 		ifFalseBlock[ifDepth] = endBlock;
1529 
1530 		ifDepth++;
1531 	}
1532 
ENDIF()1533 	void PixelProgram::ENDIF()
1534 	{
1535 		ifDepth--;
1536 
1537 		BasicBlock *endBlock = ifFalseBlock[ifDepth];
1538 
1539 		Nucleus::createBr(endBlock);
1540 		Nucleus::setInsertBlock(endBlock);
1541 
1542 		if(isConditionalIf[ifDepth])
1543 		{
1544 			enableIndex--;
1545 		}
1546 	}
1547 
ENDLOOP()1548 	void PixelProgram::ENDLOOP()
1549 	{
1550 		loopRepDepth--;
1551 
1552 		aL[loopDepth] = aL[loopDepth] + increment[loopDepth];   // FIXME: +=
1553 
1554 		BasicBlock *testBlock = loopRepTestBlock[loopRepDepth];
1555 		BasicBlock *endBlock = loopRepEndBlock[loopRepDepth];
1556 
1557 		Nucleus::createBr(testBlock);
1558 		Nucleus::setInsertBlock(endBlock);
1559 
1560 		loopDepth--;
1561 		enableBreak = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
1562 	}
1563 
ENDREP()1564 	void PixelProgram::ENDREP()
1565 	{
1566 		loopRepDepth--;
1567 
1568 		BasicBlock *testBlock = loopRepTestBlock[loopRepDepth];
1569 		BasicBlock *endBlock = loopRepEndBlock[loopRepDepth];
1570 
1571 		Nucleus::createBr(testBlock);
1572 		Nucleus::setInsertBlock(endBlock);
1573 
1574 		loopDepth--;
1575 		enableBreak = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
1576 	}
1577 
ENDWHILE()1578 	void PixelProgram::ENDWHILE()
1579 	{
1580 		loopRepDepth--;
1581 
1582 		BasicBlock *testBlock = loopRepTestBlock[loopRepDepth];
1583 		BasicBlock *endBlock = loopRepEndBlock[loopRepDepth];
1584 
1585 		Nucleus::createBr(testBlock);
1586 		Nucleus::setInsertBlock(endBlock);
1587 
1588 		enableIndex--;
1589 		scalar = false;
1590 	}
1591 
ENDSWITCH()1592 	void PixelProgram::ENDSWITCH()
1593 	{
1594 		loopRepDepth--;
1595 
1596 		BasicBlock *endBlock = loopRepEndBlock[loopRepDepth];
1597 
1598 		Nucleus::createBr(endBlock);
1599 		Nucleus::setInsertBlock(endBlock);
1600 	}
1601 
IF(const Src & src)1602 	void PixelProgram::IF(const Src &src)
1603 	{
1604 		if(src.type == Shader::PARAMETER_CONSTBOOL)
1605 		{
1606 			IFb(src);
1607 		}
1608 		else if(src.type == Shader::PARAMETER_PREDICATE)
1609 		{
1610 			IFp(src);
1611 		}
1612 		else
1613 		{
1614 			Int4 condition = As<Int4>(fetchRegister(src).x);
1615 			IF(condition);
1616 		}
1617 	}
1618 
IFb(const Src & boolRegister)1619 	void PixelProgram::IFb(const Src &boolRegister)
1620 	{
1621 		ASSERT(ifDepth < 24 + 4);
1622 
1623 		Bool condition = (*Pointer<Byte>(data + OFFSET(DrawData, ps.b[boolRegister.index])) != Byte(0));   // FIXME
1624 
1625 		if(boolRegister.modifier == Shader::MODIFIER_NOT)
1626 		{
1627 			condition = !condition;
1628 		}
1629 
1630 		BasicBlock *trueBlock = Nucleus::createBasicBlock();
1631 		BasicBlock *falseBlock = Nucleus::createBasicBlock();
1632 
1633 		branch(condition, trueBlock, falseBlock);
1634 
1635 		isConditionalIf[ifDepth] = false;
1636 		ifFalseBlock[ifDepth] = falseBlock;
1637 
1638 		ifDepth++;
1639 	}
1640 
IFp(const Src & predicateRegister)1641 	void PixelProgram::IFp(const Src &predicateRegister)
1642 	{
1643 		Int4 condition = As<Int4>(p0[predicateRegister.swizzle & 0x3]);
1644 
1645 		if(predicateRegister.modifier == Shader::MODIFIER_NOT)
1646 		{
1647 			condition = ~condition;
1648 		}
1649 
1650 		IF(condition);
1651 	}
1652 
IFC(Vector4f & src0,Vector4f & src1,Control control)1653 	void PixelProgram::IFC(Vector4f &src0, Vector4f &src1, Control control)
1654 	{
1655 		Int4 condition;
1656 
1657 		switch(control)
1658 		{
1659 		case Shader::CONTROL_GT: condition = CmpNLE(src0.x, src1.x); break;
1660 		case Shader::CONTROL_EQ: condition = CmpEQ(src0.x, src1.x);  break;
1661 		case Shader::CONTROL_GE: condition = CmpNLT(src0.x, src1.x); break;
1662 		case Shader::CONTROL_LT: condition = CmpLT(src0.x, src1.x);  break;
1663 		case Shader::CONTROL_NE: condition = CmpNEQ(src0.x, src1.x); break;
1664 		case Shader::CONTROL_LE: condition = CmpLE(src0.x, src1.x);  break;
1665 		default:
1666 			ASSERT(false);
1667 		}
1668 
1669 		IF(condition);
1670 	}
1671 
IF(Int4 & condition)1672 	void PixelProgram::IF(Int4 &condition)
1673 	{
1674 		condition &= enableStack[Min(enableIndex, Int(MAX_SHADER_ENABLE_STACK_SIZE))];
1675 
1676 		enableIndex++;
1677 		enableStack[Min(enableIndex, Int(MAX_SHADER_ENABLE_STACK_SIZE))] = condition;
1678 
1679 		BasicBlock *trueBlock = Nucleus::createBasicBlock();
1680 		BasicBlock *falseBlock = Nucleus::createBasicBlock();
1681 
1682 		Bool notAllFalse = SignMask(condition) != 0;
1683 
1684 		branch(notAllFalse, trueBlock, falseBlock);
1685 
1686 		isConditionalIf[ifDepth] = true;
1687 		ifFalseBlock[ifDepth] = falseBlock;
1688 
1689 		ifDepth++;
1690 	}
1691 
LABEL(int labelIndex)1692 	void PixelProgram::LABEL(int labelIndex)
1693 	{
1694 		if(!labelBlock[labelIndex])
1695 		{
1696 			labelBlock[labelIndex] = Nucleus::createBasicBlock();
1697 		}
1698 
1699 		Nucleus::setInsertBlock(labelBlock[labelIndex]);
1700 		currentLabel = labelIndex;
1701 	}
1702 
LOOP(const Src & integerRegister)1703 	void PixelProgram::LOOP(const Src &integerRegister)
1704 	{
1705 		loopDepth++;
1706 
1707 		iteration[loopDepth] = *Pointer<Int>(data + OFFSET(DrawData, ps.i[integerRegister.index][0]));
1708 		aL[loopDepth] = *Pointer<Int>(data + OFFSET(DrawData, ps.i[integerRegister.index][1]));
1709 		increment[loopDepth] = *Pointer<Int>(data + OFFSET(DrawData, ps.i[integerRegister.index][2]));
1710 
1711 		//	If(increment[loopDepth] == 0)
1712 		//	{
1713 		//		increment[loopDepth] = 1;
1714 		//	}
1715 
1716 		BasicBlock *loopBlock = Nucleus::createBasicBlock();
1717 		BasicBlock *testBlock = Nucleus::createBasicBlock();
1718 		BasicBlock *endBlock = Nucleus::createBasicBlock();
1719 
1720 		loopRepTestBlock[loopRepDepth] = testBlock;
1721 		loopRepEndBlock[loopRepDepth] = endBlock;
1722 
1723 		// FIXME: jump(testBlock)
1724 		Nucleus::createBr(testBlock);
1725 		Nucleus::setInsertBlock(testBlock);
1726 
1727 		branch(iteration[loopDepth] > 0, loopBlock, endBlock);
1728 		Nucleus::setInsertBlock(loopBlock);
1729 
1730 		iteration[loopDepth] = iteration[loopDepth] - 1;   // FIXME: --
1731 
1732 		loopRepDepth++;
1733 	}
1734 
REP(const Src & integerRegister)1735 	void PixelProgram::REP(const Src &integerRegister)
1736 	{
1737 		loopDepth++;
1738 
1739 		iteration[loopDepth] = *Pointer<Int>(data + OFFSET(DrawData, ps.i[integerRegister.index][0]));
1740 		aL[loopDepth] = aL[loopDepth - 1];
1741 
1742 		BasicBlock *loopBlock = Nucleus::createBasicBlock();
1743 		BasicBlock *testBlock = Nucleus::createBasicBlock();
1744 		BasicBlock *endBlock = Nucleus::createBasicBlock();
1745 
1746 		loopRepTestBlock[loopRepDepth] = testBlock;
1747 		loopRepEndBlock[loopRepDepth] = endBlock;
1748 
1749 		// FIXME: jump(testBlock)
1750 		Nucleus::createBr(testBlock);
1751 		Nucleus::setInsertBlock(testBlock);
1752 
1753 		branch(iteration[loopDepth] > 0, loopBlock, endBlock);
1754 		Nucleus::setInsertBlock(loopBlock);
1755 
1756 		iteration[loopDepth] = iteration[loopDepth] - 1;   // FIXME: --
1757 
1758 		loopRepDepth++;
1759 	}
1760 
WHILE(const Src & temporaryRegister)1761 	void PixelProgram::WHILE(const Src &temporaryRegister)
1762 	{
1763 		enableIndex++;
1764 
1765 		BasicBlock *loopBlock = Nucleus::createBasicBlock();
1766 		BasicBlock *testBlock = Nucleus::createBasicBlock();
1767 		BasicBlock *endBlock = Nucleus::createBasicBlock();
1768 
1769 		loopRepTestBlock[loopRepDepth] = testBlock;
1770 		loopRepEndBlock[loopRepDepth] = endBlock;
1771 
1772 		Int4 restoreBreak = enableBreak;
1773 		restoreContinue.push_back(enableContinue);
1774 
1775 		// TODO: jump(testBlock)
1776 		Nucleus::createBr(testBlock);
1777 		Nucleus::setInsertBlock(testBlock);
1778 
1779 		const Vector4f &src = fetchRegister(temporaryRegister);
1780 		Int4 condition = As<Int4>(src.x);
1781 		condition &= enableStack[Min(enableIndex - 1, Int(MAX_SHADER_ENABLE_STACK_SIZE))];
1782 		if(shader->containsLeaveInstruction()) condition &= enableLeave;
1783 		if(shader->containsBreakInstruction()) condition &= enableBreak;
1784 		enableStack[Min(enableIndex, Int(MAX_SHADER_ENABLE_STACK_SIZE))] = condition;
1785 
1786 		Bool notAllFalse = SignMask(condition) != 0;
1787 		branch(notAllFalse, loopBlock, endBlock);
1788 
1789 		Nucleus::setInsertBlock(endBlock);
1790 		enableBreak = restoreBreak;
1791 
1792 		Nucleus::setInsertBlock(loopBlock);
1793 
1794 		loopRepDepth++;
1795 		scalar = false;
1796 	}
1797 
SWITCH()1798 	void PixelProgram::SWITCH()
1799 	{
1800 		BasicBlock *endBlock = Nucleus::createBasicBlock();
1801 
1802 		loopRepTestBlock[loopRepDepth] = nullptr;
1803 		loopRepEndBlock[loopRepDepth] = endBlock;
1804 
1805 		Int4 restoreBreak = enableBreak;
1806 
1807 		BasicBlock *currentBlock = Nucleus::getInsertBlock();
1808 
1809 		Nucleus::setInsertBlock(endBlock);
1810 		enableBreak = restoreBreak;
1811 
1812 		Nucleus::setInsertBlock(currentBlock);
1813 
1814 		loopRepDepth++;
1815 	}
1816 
RET()1817 	void PixelProgram::RET()
1818 	{
1819 		if(currentLabel == -1)
1820 		{
1821 			returnBlock = Nucleus::createBasicBlock();
1822 			Nucleus::createBr(returnBlock);
1823 		}
1824 		else
1825 		{
1826 			BasicBlock *unreachableBlock = Nucleus::createBasicBlock();
1827 
1828 			if(callRetBlock[currentLabel].size() > 1)   // Pop the return destination from the call stack
1829 			{
1830 				// FIXME: Encapsulate
1831 				UInt index = callStack[--stackIndex];
1832 
1833 				Value *value = index.loadValue();
1834 				SwitchCases *switchCases = Nucleus::createSwitch(value, unreachableBlock, (int)callRetBlock[currentLabel].size());
1835 
1836 				for(unsigned int i = 0; i < callRetBlock[currentLabel].size(); i++)
1837 				{
1838 					Nucleus::addSwitchCase(switchCases, i, callRetBlock[currentLabel][i]);
1839 				}
1840 			}
1841 			else if(callRetBlock[currentLabel].size() == 1)   // Jump directly to the unique return destination
1842 			{
1843 				Nucleus::createBr(callRetBlock[currentLabel][0]);
1844 			}
1845 			else   // Function isn't called
1846 			{
1847 				Nucleus::createBr(unreachableBlock);
1848 			}
1849 
1850 			Nucleus::setInsertBlock(unreachableBlock);
1851 			Nucleus::createUnreachable();
1852 		}
1853 	}
1854 
LEAVE()1855 	void PixelProgram::LEAVE()
1856 	{
1857 		enableLeave = enableLeave & ~enableStack[Min(enableIndex, Int(MAX_SHADER_ENABLE_STACK_SIZE))];
1858 
1859 		// FIXME: Return from function if all instances left
1860 		// FIXME: Use enableLeave in other control-flow constructs
1861 	}
1862 }
1863