1 // Copyright 2016 The SwiftShader Authors. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //    http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "VertexProgram.hpp"
16 
17 #include "VertexShader.hpp"
18 #include "SamplerCore.hpp"
19 #include "Renderer/Renderer.hpp"
20 #include "Renderer/Vertex.hpp"
21 #include "Common/Half.hpp"
22 #include "Common/Debug.hpp"
23 
24 namespace sw
25 {
VertexProgram(const VertexProcessor::State & state,const VertexShader * shader)26 	VertexProgram::VertexProgram(const VertexProcessor::State &state, const VertexShader *shader)
27 		: VertexRoutine(state, shader), shader(shader), r(shader->indirectAddressableTemporaries)
28 	{
29 		for(int i = 0; i < MAX_SHADER_CALL_SITES; i++)
30 		{
31 			labelBlock[i] = 0;
32 		}
33 
34 		loopDepth = -1;
35 		enableStack[0] = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
36 
37 		if(shader->containsBreakInstruction())
38 		{
39 			enableBreak = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
40 		}
41 
42 		if(shader->containsContinueInstruction())
43 		{
44 			enableContinue = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
45 		}
46 
47 		if(shader->isInstanceIdDeclared())
48 		{
49 			instanceID = *Pointer<Int>(data + OFFSET(DrawData,instanceID));
50 		}
51 	}
52 
~VertexProgram()53 	VertexProgram::~VertexProgram()
54 	{
55 	}
56 
pipeline(UInt & index)57 	void VertexProgram::pipeline(UInt &index)
58 	{
59 		if(!state.preTransformed)
60 		{
61 			program(index);
62 		}
63 		else
64 		{
65 			passThrough();
66 		}
67 	}
68 
program(UInt & index)69 	void VertexProgram::program(UInt &index)
70 	{
71 	//	shader->print("VertexShader-%0.8X.txt", state.shaderID);
72 
73 		unsigned short shaderModel = shader->getShaderModel();
74 
75 		enableIndex = 0;
76 		stackIndex = 0;
77 
78 		if(shader->containsLeaveInstruction())
79 		{
80 			enableLeave = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
81 		}
82 
83 		if(shader->isVertexIdDeclared())
84 		{
85 			if(state.textureSampling)
86 			{
87 				vertexID = Int4(index);
88 			}
89 			else
90 			{
91 				vertexID = Insert(vertexID, As<Int>(index), 0);
92 				vertexID = Insert(vertexID, As<Int>(index + 1), 1);
93 				vertexID = Insert(vertexID, As<Int>(index + 2), 2);
94 				vertexID = Insert(vertexID, As<Int>(index + 3), 3);
95 			}
96 		}
97 
98 		// Create all call site return blocks up front
99 		for(size_t i = 0; i < shader->getLength(); i++)
100 		{
101 			const Shader::Instruction *instruction = shader->getInstruction(i);
102 			Shader::Opcode opcode = instruction->opcode;
103 
104 			if(opcode == Shader::OPCODE_CALL || opcode == Shader::OPCODE_CALLNZ)
105 			{
106 				const Dst &dst = instruction->dst;
107 
108 				ASSERT(callRetBlock[dst.label].size() == dst.callSite);
109 				callRetBlock[dst.label].push_back(Nucleus::createBasicBlock());
110 			}
111 		}
112 
113 		for(size_t i = 0; i < shader->getLength(); i++)
114 		{
115 			const Shader::Instruction *instruction = shader->getInstruction(i);
116 			Shader::Opcode opcode = instruction->opcode;
117 
118 			if(opcode == Shader::OPCODE_DCL || opcode == Shader::OPCODE_DEF || opcode == Shader::OPCODE_DEFI || opcode == Shader::OPCODE_DEFB)
119 			{
120 				continue;
121 			}
122 
123 			Dst dst = instruction->dst;
124 			Src src0 = instruction->src[0];
125 			Src src1 = instruction->src[1];
126 			Src src2 = instruction->src[2];
127 			Src src3 = instruction->src[3];
128 			Src src4 = instruction->src[4];
129 
130 			bool predicate = instruction->predicate;
131 			Control control = instruction->control;
132 			bool integer = dst.type == Shader::PARAMETER_ADDR;
133 			bool pp = dst.partialPrecision;
134 
135 			Vector4f d;
136 			Vector4f s0;
137 			Vector4f s1;
138 			Vector4f s2;
139 			Vector4f s3;
140 			Vector4f s4;
141 
142 			if(src0.type != Shader::PARAMETER_VOID) s0 = fetchRegister(src0);
143 			if(src1.type != Shader::PARAMETER_VOID) s1 = fetchRegister(src1);
144 			if(src2.type != Shader::PARAMETER_VOID) s2 = fetchRegister(src2);
145 			if(src3.type != Shader::PARAMETER_VOID) s3 = fetchRegister(src3);
146 			if(src4.type != Shader::PARAMETER_VOID) s4 = fetchRegister(src4);
147 
148 			switch(opcode)
149 			{
150 			case Shader::OPCODE_VS_1_0:                                     break;
151 			case Shader::OPCODE_VS_1_1:                                     break;
152 			case Shader::OPCODE_VS_2_0:                                     break;
153 			case Shader::OPCODE_VS_2_x:                                     break;
154 			case Shader::OPCODE_VS_2_sw:                                    break;
155 			case Shader::OPCODE_VS_3_0:                                     break;
156 			case Shader::OPCODE_VS_3_sw:                                    break;
157 			case Shader::OPCODE_DCL:                                        break;
158 			case Shader::OPCODE_DEF:                                        break;
159 			case Shader::OPCODE_DEFI:                                       break;
160 			case Shader::OPCODE_DEFB:                                       break;
161 			case Shader::OPCODE_NOP:                                        break;
162 			case Shader::OPCODE_ABS:        abs(d, s0);                     break;
163 			case Shader::OPCODE_IABS:       iabs(d, s0);                    break;
164 			case Shader::OPCODE_ADD:        add(d, s0, s1);                 break;
165 			case Shader::OPCODE_IADD:       iadd(d, s0, s1);                break;
166 			case Shader::OPCODE_CRS:        crs(d, s0, s1);                 break;
167 			case Shader::OPCODE_FORWARD1:   forward1(d, s0, s1, s2);        break;
168 			case Shader::OPCODE_FORWARD2:   forward2(d, s0, s1, s2);        break;
169 			case Shader::OPCODE_FORWARD3:   forward3(d, s0, s1, s2);        break;
170 			case Shader::OPCODE_FORWARD4:   forward4(d, s0, s1, s2);        break;
171 			case Shader::OPCODE_REFLECT1:   reflect1(d, s0, s1);            break;
172 			case Shader::OPCODE_REFLECT2:   reflect2(d, s0, s1);            break;
173 			case Shader::OPCODE_REFLECT3:   reflect3(d, s0, s1);            break;
174 			case Shader::OPCODE_REFLECT4:   reflect4(d, s0, s1);            break;
175 			case Shader::OPCODE_REFRACT1:   refract1(d, s0, s1, s2.x);      break;
176 			case Shader::OPCODE_REFRACT2:   refract2(d, s0, s1, s2.x);      break;
177 			case Shader::OPCODE_REFRACT3:   refract3(d, s0, s1, s2.x);      break;
178 			case Shader::OPCODE_REFRACT4:   refract4(d, s0, s1, s2.x);      break;
179 			case Shader::OPCODE_DP1:        dp1(d, s0, s1);                 break;
180 			case Shader::OPCODE_DP2:        dp2(d, s0, s1);                 break;
181 			case Shader::OPCODE_DP3:        dp3(d, s0, s1);                 break;
182 			case Shader::OPCODE_DP4:        dp4(d, s0, s1);                 break;
183 			case Shader::OPCODE_DET2:       det2(d, s0, s1);                break;
184 			case Shader::OPCODE_DET3:       det3(d, s0, s1, s2);            break;
185 			case Shader::OPCODE_DET4:       det4(d, s0, s1, s2, s3);        break;
186 			case Shader::OPCODE_ATT:        att(d, s0, s1);                 break;
187 			case Shader::OPCODE_EXP2X:      exp2x(d, s0, pp);               break;
188 			case Shader::OPCODE_EXP2:       exp2(d, s0, pp);                break;
189 			case Shader::OPCODE_EXPP:       expp(d, s0, shaderModel);       break;
190 			case Shader::OPCODE_EXP:        exp(d, s0, pp);                 break;
191 			case Shader::OPCODE_FRC:        frc(d, s0);                     break;
192 			case Shader::OPCODE_TRUNC:      trunc(d, s0);                   break;
193 			case Shader::OPCODE_FLOOR:      floor(d, s0);                   break;
194 			case Shader::OPCODE_ROUND:      round(d, s0);                   break;
195 			case Shader::OPCODE_ROUNDEVEN:  roundEven(d, s0);               break;
196 			case Shader::OPCODE_CEIL:       ceil(d, s0);                    break;
197 			case Shader::OPCODE_LIT:        lit(d, s0);                     break;
198 			case Shader::OPCODE_LOG2X:      log2x(d, s0, pp);               break;
199 			case Shader::OPCODE_LOG2:       log2(d, s0, pp);                break;
200 			case Shader::OPCODE_LOGP:       logp(d, s0, shaderModel);       break;
201 			case Shader::OPCODE_LOG:        log(d, s0, pp);                 break;
202 			case Shader::OPCODE_LRP:        lrp(d, s0, s1, s2);             break;
203 			case Shader::OPCODE_STEP:       step(d, s0, s1);                break;
204 			case Shader::OPCODE_SMOOTH:     smooth(d, s0, s1, s2);          break;
205 			case Shader::OPCODE_ISINF:      isinf(d, s0);                   break;
206 			case Shader::OPCODE_ISNAN:      isnan(d, s0);                   break;
207 			case Shader::OPCODE_FLOATBITSTOINT:
208 			case Shader::OPCODE_FLOATBITSTOUINT:
209 			case Shader::OPCODE_INTBITSTOFLOAT:
210 			case Shader::OPCODE_UINTBITSTOFLOAT: d = s0;                    break;
211 			case Shader::OPCODE_PACKSNORM2x16:   packSnorm2x16(d, s0);      break;
212 			case Shader::OPCODE_PACKUNORM2x16:   packUnorm2x16(d, s0);      break;
213 			case Shader::OPCODE_PACKHALF2x16:    packHalf2x16(d, s0);       break;
214 			case Shader::OPCODE_UNPACKSNORM2x16: unpackSnorm2x16(d, s0);    break;
215 			case Shader::OPCODE_UNPACKUNORM2x16: unpackUnorm2x16(d, s0);    break;
216 			case Shader::OPCODE_UNPACKHALF2x16:  unpackHalf2x16(d, s0);     break;
217 			case Shader::OPCODE_M3X2:       M3X2(d, s0, src1);              break;
218 			case Shader::OPCODE_M3X3:       M3X3(d, s0, src1);              break;
219 			case Shader::OPCODE_M3X4:       M3X4(d, s0, src1);              break;
220 			case Shader::OPCODE_M4X3:       M4X3(d, s0, src1);              break;
221 			case Shader::OPCODE_M4X4:       M4X4(d, s0, src1);              break;
222 			case Shader::OPCODE_MAD:        mad(d, s0, s1, s2);             break;
223 			case Shader::OPCODE_IMAD:       imad(d, s0, s1, s2);            break;
224 			case Shader::OPCODE_MAX:        max(d, s0, s1);                 break;
225 			case Shader::OPCODE_IMAX:       imax(d, s0, s1);                break;
226 			case Shader::OPCODE_UMAX:       umax(d, s0, s1);                break;
227 			case Shader::OPCODE_MIN:        min(d, s0, s1);                 break;
228 			case Shader::OPCODE_IMIN:       imin(d, s0, s1);                break;
229 			case Shader::OPCODE_UMIN:       umin(d, s0, s1);                break;
230 			case Shader::OPCODE_MOV:        mov(d, s0, integer);            break;
231 			case Shader::OPCODE_MOVA:       mov(d, s0, true);               break;
232 			case Shader::OPCODE_NEG:        neg(d, s0);                     break;
233 			case Shader::OPCODE_INEG:       ineg(d, s0);                    break;
234 			case Shader::OPCODE_F2B:        f2b(d, s0);                     break;
235 			case Shader::OPCODE_B2F:        b2f(d, s0);                     break;
236 			case Shader::OPCODE_F2I:        f2i(d, s0);                     break;
237 			case Shader::OPCODE_I2F:        i2f(d, s0);                     break;
238 			case Shader::OPCODE_F2U:        f2u(d, s0);                     break;
239 			case Shader::OPCODE_U2F:        u2f(d, s0);                     break;
240 			case Shader::OPCODE_I2B:        i2b(d, s0);                     break;
241 			case Shader::OPCODE_B2I:        b2i(d, s0);                     break;
242 			case Shader::OPCODE_MUL:        mul(d, s0, s1);                 break;
243 			case Shader::OPCODE_IMUL:       imul(d, s0, s1);                break;
244 			case Shader::OPCODE_NRM2:       nrm2(d, s0, pp);                break;
245 			case Shader::OPCODE_NRM3:       nrm3(d, s0, pp);                break;
246 			case Shader::OPCODE_NRM4:       nrm4(d, s0, pp);                break;
247 			case Shader::OPCODE_POWX:       powx(d, s0, s1, pp);            break;
248 			case Shader::OPCODE_POW:        pow(d, s0, s1, pp);             break;
249 			case Shader::OPCODE_RCPX:       rcpx(d, s0, pp);                break;
250 			case Shader::OPCODE_DIV:        div(d, s0, s1);                 break;
251 			case Shader::OPCODE_IDIV:       idiv(d, s0, s1);                break;
252 			case Shader::OPCODE_UDIV:       udiv(d, s0, s1);                break;
253 			case Shader::OPCODE_MOD:        mod(d, s0, s1);                 break;
254 			case Shader::OPCODE_IMOD:       imod(d, s0, s1);                break;
255 			case Shader::OPCODE_UMOD:       umod(d, s0, s1);                break;
256 			case Shader::OPCODE_SHL:        shl(d, s0, s1);                 break;
257 			case Shader::OPCODE_ISHR:       ishr(d, s0, s1);                break;
258 			case Shader::OPCODE_USHR:       ushr(d, s0, s1);                break;
259 			case Shader::OPCODE_RSQX:       rsqx(d, s0, pp);                break;
260 			case Shader::OPCODE_SQRT:       sqrt(d, s0, pp);                break;
261 			case Shader::OPCODE_RSQ:        rsq(d, s0, pp);                 break;
262 			case Shader::OPCODE_LEN2:       len2(d.x, s0, pp);              break;
263 			case Shader::OPCODE_LEN3:       len3(d.x, s0, pp);              break;
264 			case Shader::OPCODE_LEN4:       len4(d.x, s0, pp);              break;
265 			case Shader::OPCODE_DIST1:      dist1(d.x, s0, s1, pp);         break;
266 			case Shader::OPCODE_DIST2:      dist2(d.x, s0, s1, pp);         break;
267 			case Shader::OPCODE_DIST3:      dist3(d.x, s0, s1, pp);         break;
268 			case Shader::OPCODE_DIST4:      dist4(d.x, s0, s1, pp);         break;
269 			case Shader::OPCODE_SGE:        step(d, s1, s0);                break;
270 			case Shader::OPCODE_SGN:        sgn(d, s0);                     break;
271 			case Shader::OPCODE_ISGN:       isgn(d, s0);                    break;
272 			case Shader::OPCODE_SINCOS:     sincos(d, s0, pp);              break;
273 			case Shader::OPCODE_COS:        cos(d, s0, pp);                 break;
274 			case Shader::OPCODE_SIN:        sin(d, s0, pp);                 break;
275 			case Shader::OPCODE_TAN:        tan(d, s0);                     break;
276 			case Shader::OPCODE_ACOS:       acos(d, s0);                    break;
277 			case Shader::OPCODE_ASIN:       asin(d, s0);                    break;
278 			case Shader::OPCODE_ATAN:       atan(d, s0);                    break;
279 			case Shader::OPCODE_ATAN2:      atan2(d, s0, s1);               break;
280 			case Shader::OPCODE_COSH:       cosh(d, s0, pp);                break;
281 			case Shader::OPCODE_SINH:       sinh(d, s0, pp);                break;
282 			case Shader::OPCODE_TANH:       tanh(d, s0, pp);                break;
283 			case Shader::OPCODE_ACOSH:      acosh(d, s0, pp);               break;
284 			case Shader::OPCODE_ASINH:      asinh(d, s0, pp);               break;
285 			case Shader::OPCODE_ATANH:      atanh(d, s0, pp);               break;
286 			case Shader::OPCODE_SLT:        slt(d, s0, s1);                 break;
287 			case Shader::OPCODE_SUB:        sub(d, s0, s1);                 break;
288 			case Shader::OPCODE_ISUB:       isub(d, s0, s1);                break;
289 			case Shader::OPCODE_BREAK:      BREAK();                        break;
290 			case Shader::OPCODE_BREAKC:     BREAKC(s0, s1, control);        break;
291 			case Shader::OPCODE_BREAKP:     BREAKP(src0);                   break;
292 			case Shader::OPCODE_CONTINUE:   CONTINUE();                     break;
293 			case Shader::OPCODE_TEST:       TEST();                         break;
294 			case Shader::OPCODE_SCALAR:     SCALAR();                       break;
295 			case Shader::OPCODE_CALL:       CALL(dst.label, dst.callSite);  break;
296 			case Shader::OPCODE_CALLNZ:     CALLNZ(dst.label, dst.callSite, src0); break;
297 			case Shader::OPCODE_ELSE:       ELSE();                         break;
298 			case Shader::OPCODE_ENDIF:      ENDIF();                        break;
299 			case Shader::OPCODE_ENDLOOP:    ENDLOOP();                      break;
300 			case Shader::OPCODE_ENDREP:     ENDREP();                       break;
301 			case Shader::OPCODE_ENDWHILE:   ENDWHILE();                     break;
302 			case Shader::OPCODE_ENDSWITCH:  ENDSWITCH();                    break;
303 			case Shader::OPCODE_IF:         IF(src0);                       break;
304 			case Shader::OPCODE_IFC:        IFC(s0, s1, control);           break;
305 			case Shader::OPCODE_LABEL:      LABEL(dst.index);               break;
306 			case Shader::OPCODE_LOOP:       LOOP(src1);                     break;
307 			case Shader::OPCODE_REP:        REP(src0);                      break;
308 			case Shader::OPCODE_WHILE:      WHILE(src0);                    break;
309 			case Shader::OPCODE_SWITCH:     SWITCH();                       break;
310 			case Shader::OPCODE_RET:        RET();                          break;
311 			case Shader::OPCODE_LEAVE:      LEAVE();                        break;
312 			case Shader::OPCODE_CMP:        cmp(d, s0, s1, control);        break;
313 			case Shader::OPCODE_ICMP:       icmp(d, s0, s1, control);       break;
314 			case Shader::OPCODE_UCMP:       ucmp(d, s0, s1, control);       break;
315 			case Shader::OPCODE_SELECT:     select(d, s0, s1, s2);          break;
316 			case Shader::OPCODE_EXTRACT:    extract(d.x, s0, s1.x);         break;
317 			case Shader::OPCODE_INSERT:     insert(d, s0, s1.x, s2.x);      break;
318 			case Shader::OPCODE_ALL:        all(d.x, s0);                   break;
319 			case Shader::OPCODE_ANY:        any(d.x, s0);                   break;
320 			case Shader::OPCODE_NOT:        bitwise_not(d, s0);             break;
321 			case Shader::OPCODE_OR:         bitwise_or(d, s0, s1);          break;
322 			case Shader::OPCODE_XOR:        bitwise_xor(d, s0, s1);         break;
323 			case Shader::OPCODE_AND:        bitwise_and(d, s0, s1);         break;
324 			case Shader::OPCODE_EQ:         equal(d, s0, s1);               break;
325 			case Shader::OPCODE_NE:         notEqual(d, s0, s1);            break;
326 			case Shader::OPCODE_TEXLDL:     TEXLOD(d, s0, src1, s0.w);      break;
327 			case Shader::OPCODE_TEXLOD:     TEXLOD(d, s0, src1, s2.x);      break;
328 			case Shader::OPCODE_TEX:        TEX(d, s0, src1);               break;
329 			case Shader::OPCODE_TEXOFFSET:  TEXOFFSET(d, s0, src1, s2);     break;
330 			case Shader::OPCODE_TEXLODOFFSET: TEXLODOFFSET(d, s0, src1, s2, s3.x); break;
331 			case Shader::OPCODE_TEXELFETCH: TEXELFETCH(d, s0, src1, s2.x);  break;
332 			case Shader::OPCODE_TEXELFETCHOFFSET: TEXELFETCHOFFSET(d, s0, src1, s2, s3.x); break;
333 			case Shader::OPCODE_TEXGRAD:    TEXGRAD(d, s0, src1, s2, s3);   break;
334 			case Shader::OPCODE_TEXGRADOFFSET: TEXGRADOFFSET(d, s0, src1, s2, s3, s4); break;
335 			case Shader::OPCODE_TEXSIZE:    TEXSIZE(d, s0.x, src1);         break;
336 			case Shader::OPCODE_END:                                        break;
337 			default:
338 				ASSERT(false);
339 			}
340 
341 			if(dst.type != Shader::PARAMETER_VOID && dst.type != Shader::PARAMETER_LABEL && opcode != Shader::OPCODE_NOP)
342 			{
343 				if(dst.saturate)
344 				{
345 					if(dst.x) d.x = Max(d.x, Float4(0.0f));
346 					if(dst.y) d.y = Max(d.y, Float4(0.0f));
347 					if(dst.z) d.z = Max(d.z, Float4(0.0f));
348 					if(dst.w) d.w = Max(d.w, Float4(0.0f));
349 
350 					if(dst.x) d.x = Min(d.x, Float4(1.0f));
351 					if(dst.y) d.y = Min(d.y, Float4(1.0f));
352 					if(dst.z) d.z = Min(d.z, Float4(1.0f));
353 					if(dst.w) d.w = Min(d.w, Float4(1.0f));
354 				}
355 
356 				if(instruction->isPredicated())
357 				{
358 					Vector4f pDst;   // FIXME: Rename
359 
360 					switch(dst.type)
361 					{
362 					case Shader::PARAMETER_VOID: break;
363 					case Shader::PARAMETER_TEMP:
364 						if(dst.rel.type == Shader::PARAMETER_VOID)
365 						{
366 							if(dst.x) pDst.x = r[dst.index].x;
367 							if(dst.y) pDst.y = r[dst.index].y;
368 							if(dst.z) pDst.z = r[dst.index].z;
369 							if(dst.w) pDst.w = r[dst.index].w;
370 						}
371 						else if(!dst.rel.dynamic)
372 						{
373 							Int a = dst.index + relativeAddress(dst.rel);
374 
375 							if(dst.x) pDst.x = r[a].x;
376 							if(dst.y) pDst.y = r[a].y;
377 							if(dst.z) pDst.z = r[a].z;
378 							if(dst.w) pDst.w = r[a].w;
379 						}
380 						else
381 						{
382 							Int4 a = dst.index + dynamicAddress(dst.rel);
383 
384 							if(dst.x) pDst.x = r[a].x;
385 							if(dst.y) pDst.y = r[a].y;
386 							if(dst.z) pDst.z = r[a].z;
387 							if(dst.w) pDst.w = r[a].w;
388 						}
389 						break;
390 					case Shader::PARAMETER_ADDR: pDst = a0; break;
391 					case Shader::PARAMETER_RASTOUT:
392 						switch(dst.index)
393 						{
394 						case 0:
395 							if(dst.x) pDst.x = o[Pos].x;
396 							if(dst.y) pDst.y = o[Pos].y;
397 							if(dst.z) pDst.z = o[Pos].z;
398 							if(dst.w) pDst.w = o[Pos].w;
399 							break;
400 						case 1:
401 							pDst.x = o[Fog].x;
402 							break;
403 						case 2:
404 							pDst.x = o[Pts].y;
405 							break;
406 						default:
407 							ASSERT(false);
408 						}
409 						break;
410 					case Shader::PARAMETER_ATTROUT:
411 						if(dst.x) pDst.x = o[C0 + dst.index].x;
412 						if(dst.y) pDst.y = o[C0 + dst.index].y;
413 						if(dst.z) pDst.z = o[C0 + dst.index].z;
414 						if(dst.w) pDst.w = o[C0 + dst.index].w;
415 						break;
416 					case Shader::PARAMETER_TEXCRDOUT:
417 				//	case Shader::PARAMETER_OUTPUT:
418 						if(shaderModel < 0x0300)
419 						{
420 							if(dst.x) pDst.x = o[T0 + dst.index].x;
421 							if(dst.y) pDst.y = o[T0 + dst.index].y;
422 							if(dst.z) pDst.z = o[T0 + dst.index].z;
423 							if(dst.w) pDst.w = o[T0 + dst.index].w;
424 						}
425 						else if(dst.rel.type == Shader::PARAMETER_VOID)   // Not relative
426 						{
427 							if(dst.x) pDst.x = o[dst.index].x;
428 							if(dst.y) pDst.y = o[dst.index].y;
429 							if(dst.z) pDst.z = o[dst.index].z;
430 							if(dst.w) pDst.w = o[dst.index].w;
431 						}
432 						else if(!dst.rel.dynamic)
433 						{
434 							Int a = dst.index + relativeAddress(dst.rel);
435 
436 							if(dst.x) pDst.x = o[a].x;
437 							if(dst.y) pDst.y = o[a].y;
438 							if(dst.z) pDst.z = o[a].z;
439 							if(dst.w) pDst.w = o[a].w;
440 						}
441 						else
442 						{
443 							Int4 a = dst.index + dynamicAddress(dst.rel);
444 
445 							if(dst.x) pDst.x = o[a].x;
446 							if(dst.y) pDst.y = o[a].y;
447 							if(dst.z) pDst.z = o[a].z;
448 							if(dst.w) pDst.w = o[a].w;
449 						}
450 						break;
451 					case Shader::PARAMETER_LABEL:                break;
452 					case Shader::PARAMETER_PREDICATE: pDst = p0; break;
453 					case Shader::PARAMETER_INPUT:                break;
454 					default:
455 						ASSERT(false);
456 					}
457 
458 					Int4 enable = enableMask(instruction);
459 
460 					Int4 xEnable = enable;
461 					Int4 yEnable = enable;
462 					Int4 zEnable = enable;
463 					Int4 wEnable = enable;
464 
465 					if(predicate)
466 					{
467 						unsigned char pSwizzle = instruction->predicateSwizzle;
468 
469 						Float4 xPredicate = p0[(pSwizzle >> 0) & 0x03];
470 						Float4 yPredicate = p0[(pSwizzle >> 2) & 0x03];
471 						Float4 zPredicate = p0[(pSwizzle >> 4) & 0x03];
472 						Float4 wPredicate = p0[(pSwizzle >> 6) & 0x03];
473 
474 						if(!instruction->predicateNot)
475 						{
476 							if(dst.x) xEnable = xEnable & As<Int4>(xPredicate);
477 							if(dst.y) yEnable = yEnable & As<Int4>(yPredicate);
478 							if(dst.z) zEnable = zEnable & As<Int4>(zPredicate);
479 							if(dst.w) wEnable = wEnable & As<Int4>(wPredicate);
480 						}
481 						else
482 						{
483 							if(dst.x) xEnable = xEnable & ~As<Int4>(xPredicate);
484 							if(dst.y) yEnable = yEnable & ~As<Int4>(yPredicate);
485 							if(dst.z) zEnable = zEnable & ~As<Int4>(zPredicate);
486 							if(dst.w) wEnable = wEnable & ~As<Int4>(wPredicate);
487 						}
488 					}
489 
490 					if(dst.x) d.x = As<Float4>(As<Int4>(d.x) & xEnable);
491 					if(dst.y) d.y = As<Float4>(As<Int4>(d.y) & yEnable);
492 					if(dst.z) d.z = As<Float4>(As<Int4>(d.z) & zEnable);
493 					if(dst.w) d.w = As<Float4>(As<Int4>(d.w) & wEnable);
494 
495 					if(dst.x) d.x = As<Float4>(As<Int4>(d.x) | (As<Int4>(pDst.x) & ~xEnable));
496 					if(dst.y) d.y = As<Float4>(As<Int4>(d.y) | (As<Int4>(pDst.y) & ~yEnable));
497 					if(dst.z) d.z = As<Float4>(As<Int4>(d.z) | (As<Int4>(pDst.z) & ~zEnable));
498 					if(dst.w) d.w = As<Float4>(As<Int4>(d.w) | (As<Int4>(pDst.w) & ~wEnable));
499 				}
500 
501 				switch(dst.type)
502 				{
503 				case Shader::PARAMETER_VOID:
504 					break;
505 				case Shader::PARAMETER_TEMP:
506 					if(dst.rel.type == Shader::PARAMETER_VOID)
507 					{
508 						if(dst.x) r[dst.index].x = d.x;
509 						if(dst.y) r[dst.index].y = d.y;
510 						if(dst.z) r[dst.index].z = d.z;
511 						if(dst.w) r[dst.index].w = d.w;
512 					}
513 					else if(!dst.rel.dynamic)
514 					{
515 						Int a = dst.index + relativeAddress(dst.rel);
516 
517 						if(dst.x) r[a].x = d.x;
518 						if(dst.y) r[a].y = d.y;
519 						if(dst.z) r[a].z = d.z;
520 						if(dst.w) r[a].w = d.w;
521 					}
522 					else
523 					{
524 						Int4 a = dst.index + dynamicAddress(dst.rel);
525 
526 						if(dst.x) r.scatter_x(a, d.x);
527 						if(dst.y) r.scatter_y(a, d.y);
528 						if(dst.z) r.scatter_z(a, d.z);
529 						if(dst.w) r.scatter_w(a, d.w);
530 					}
531 					break;
532 				case Shader::PARAMETER_ADDR:
533 					if(dst.x) a0.x = d.x;
534 					if(dst.y) a0.y = d.y;
535 					if(dst.z) a0.z = d.z;
536 					if(dst.w) a0.w = d.w;
537 					break;
538 				case Shader::PARAMETER_RASTOUT:
539 					switch(dst.index)
540 					{
541 					case 0:
542 						if(dst.x) o[Pos].x = d.x;
543 						if(dst.y) o[Pos].y = d.y;
544 						if(dst.z) o[Pos].z = d.z;
545 						if(dst.w) o[Pos].w = d.w;
546 						break;
547 					case 1:
548 						o[Fog].x = d.x;
549 						break;
550 					case 2:
551 						o[Pts].y = d.x;
552 						break;
553 					default:	ASSERT(false);
554 					}
555 					break;
556 				case Shader::PARAMETER_ATTROUT:
557 					if(dst.x) o[C0 + dst.index].x = d.x;
558 					if(dst.y) o[C0 + dst.index].y = d.y;
559 					if(dst.z) o[C0 + dst.index].z = d.z;
560 					if(dst.w) o[C0 + dst.index].w = d.w;
561 					break;
562 				case Shader::PARAMETER_TEXCRDOUT:
563 			//	case Shader::PARAMETER_OUTPUT:
564 					if(shaderModel < 0x0300)
565 					{
566 						if(dst.x) o[T0 + dst.index].x = d.x;
567 						if(dst.y) o[T0 + dst.index].y = d.y;
568 						if(dst.z) o[T0 + dst.index].z = d.z;
569 						if(dst.w) o[T0 + dst.index].w = d.w;
570 					}
571 					else if(dst.rel.type == Shader::PARAMETER_VOID)   // Not relative
572 					{
573 						if(dst.x) o[dst.index].x = d.x;
574 						if(dst.y) o[dst.index].y = d.y;
575 						if(dst.z) o[dst.index].z = d.z;
576 						if(dst.w) o[dst.index].w = d.w;
577 					}
578 					else if(!dst.rel.dynamic)
579 					{
580 						Int a = dst.index + relativeAddress(dst.rel);
581 
582 						if(dst.x) o[a].x = d.x;
583 						if(dst.y) o[a].y = d.y;
584 						if(dst.z) o[a].z = d.z;
585 						if(dst.w) o[a].w = d.w;
586 					}
587 					else
588 					{
589 						Int4 a = dst.index + dynamicAddress(dst.rel);
590 
591 						if(dst.x) o.scatter_x(a, d.x);
592 						if(dst.y) o.scatter_y(a, d.y);
593 						if(dst.z) o.scatter_z(a, d.z);
594 						if(dst.w) o.scatter_w(a, d.w);
595 					}
596 					break;
597 				case Shader::PARAMETER_LABEL:             break;
598 				case Shader::PARAMETER_PREDICATE: p0 = d; break;
599 				case Shader::PARAMETER_INPUT:             break;
600 				default:
601 					ASSERT(false);
602 				}
603 			}
604 		}
605 
606 		if(currentLabel != -1)
607 		{
608 			Nucleus::setInsertBlock(returnBlock);
609 		}
610 	}
611 
passThrough()612 	void VertexProgram::passThrough()
613 	{
614 		if(shader)
615 		{
616 			for(int i = 0; i < MAX_VERTEX_OUTPUTS; i++)
617 			{
618 				unsigned char usage = shader->getOutput(i, 0).usage;
619 
620 				switch(usage)
621 				{
622 				case 0xFF:
623 					continue;
624 				case Shader::USAGE_PSIZE:
625 					o[i].y = v[i].x;
626 					break;
627 				case Shader::USAGE_TEXCOORD:
628 					o[i].x = v[i].x;
629 					o[i].y = v[i].y;
630 					o[i].z = v[i].z;
631 					o[i].w = v[i].w;
632 					break;
633 				case Shader::USAGE_POSITION:
634 					o[i].x = v[i].x;
635 					o[i].y = v[i].y;
636 					o[i].z = v[i].z;
637 					o[i].w = v[i].w;
638 					break;
639 				case Shader::USAGE_COLOR:
640 					o[i].x = v[i].x;
641 					o[i].y = v[i].y;
642 					o[i].z = v[i].z;
643 					o[i].w = v[i].w;
644 					break;
645 				case Shader::USAGE_FOG:
646 					o[i].x = v[i].x;
647 					break;
648 				default:
649 					ASSERT(false);
650 				}
651 			}
652 		}
653 		else
654 		{
655 			o[Pos].x = v[PositionT].x;
656 			o[Pos].y = v[PositionT].y;
657 			o[Pos].z = v[PositionT].z;
658 			o[Pos].w = v[PositionT].w;
659 
660 			for(int i = 0; i < 2; i++)
661 			{
662 				o[C0 + i].x = v[Color0 + i].x;
663 				o[C0 + i].y = v[Color0 + i].y;
664 				o[C0 + i].z = v[Color0 + i].z;
665 				o[C0 + i].w = v[Color0 + i].w;
666 			}
667 
668 			for(int i = 0; i < 8; i++)
669 			{
670 				o[T0 + i].x = v[TexCoord0 + i].x;
671 				o[T0 + i].y = v[TexCoord0 + i].y;
672 				o[T0 + i].z = v[TexCoord0 + i].z;
673 				o[T0 + i].w = v[TexCoord0 + i].w;
674 			}
675 
676 			o[Pts].y = v[PointSize].x;
677 		}
678 	}
679 
fetchRegister(const Src & src,unsigned int offset)680 	Vector4f VertexProgram::fetchRegister(const Src &src, unsigned int offset)
681 	{
682 		Vector4f reg;
683 		unsigned int i = src.index + offset;
684 
685 		switch(src.type)
686 		{
687 		case Shader::PARAMETER_TEMP:
688 			if(src.rel.type == Shader::PARAMETER_VOID)
689 			{
690 				reg = r[i];
691 			}
692 			else if(!src.rel.dynamic)
693 			{
694 				reg = r[i + relativeAddress(src.rel, src.bufferIndex)];
695 			}
696 			else
697 			{
698 				reg = r[i + dynamicAddress(src.rel)];
699 			}
700 			break;
701 		case Shader::PARAMETER_CONST:
702 			reg = readConstant(src, offset);
703 			break;
704 		case Shader::PARAMETER_INPUT:
705 			if(src.rel.type == Shader::PARAMETER_VOID)
706 			{
707 				reg = v[i];
708 			}
709 			else if(!src.rel.dynamic)
710 			{
711 				reg = v[i + relativeAddress(src.rel, src.bufferIndex)];
712 			}
713 			else
714 			{
715 				reg = v[i + dynamicAddress(src.rel)];
716 			}
717 			break;
718 		case Shader::PARAMETER_VOID: return r[0];   // Dummy
719 		case Shader::PARAMETER_FLOAT4LITERAL:
720 			reg.x = Float4(src.value[0]);
721 			reg.y = Float4(src.value[1]);
722 			reg.z = Float4(src.value[2]);
723 			reg.w = Float4(src.value[3]);
724 			break;
725 		case Shader::PARAMETER_ADDR:      reg = a0; break;
726 		case Shader::PARAMETER_CONSTBOOL: return r[0];   // Dummy
727 		case Shader::PARAMETER_CONSTINT:  return r[0];   // Dummy
728 		case Shader::PARAMETER_LOOP:      return r[0];   // Dummy
729 		case Shader::PARAMETER_PREDICATE: return r[0];   // Dummy
730 		case Shader::PARAMETER_SAMPLER:
731 			if(src.rel.type == Shader::PARAMETER_VOID)
732 			{
733 				reg.x = As<Float4>(Int4(i));
734 			}
735 			else if(src.rel.type == Shader::PARAMETER_TEMP)
736 			{
737 				reg.x = As<Float4>(Int4(i) + As<Int4>(r[src.rel.index].x));
738 			}
739 			return reg;
740 		case Shader::PARAMETER_OUTPUT:
741 			if(src.rel.type == Shader::PARAMETER_VOID)
742 			{
743 				reg = o[i];
744 			}
745 			else if(!src.rel.dynamic)
746 			{
747 				reg = o[i + relativeAddress(src.rel, src.bufferIndex)];
748 			}
749 			else
750 			{
751 				reg = o[i + dynamicAddress(src.rel)];
752 			}
753 			break;
754 		case Shader::PARAMETER_MISCTYPE:
755 			if(src.index == Shader::InstanceIDIndex)
756 			{
757 				reg.x = As<Float>(instanceID);
758 			}
759 			else if(src.index == Shader::VertexIDIndex)
760 			{
761 				reg.x = As<Float4>(vertexID);
762 			}
763 			else ASSERT(false);
764 			return reg;
765 		default:
766 			ASSERT(false);
767 		}
768 
769 		const Float4 &x = reg[(src.swizzle >> 0) & 0x3];
770 		const Float4 &y = reg[(src.swizzle >> 2) & 0x3];
771 		const Float4 &z = reg[(src.swizzle >> 4) & 0x3];
772 		const Float4 &w = reg[(src.swizzle >> 6) & 0x3];
773 
774 		Vector4f mod;
775 
776 		switch(src.modifier)
777 		{
778 		case Shader::MODIFIER_NONE:
779 			mod.x = x;
780 			mod.y = y;
781 			mod.z = z;
782 			mod.w = w;
783 			break;
784 		case Shader::MODIFIER_NEGATE:
785 			mod.x = -x;
786 			mod.y = -y;
787 			mod.z = -z;
788 			mod.w = -w;
789 			break;
790 		case Shader::MODIFIER_ABS:
791 			mod.x = Abs(x);
792 			mod.y = Abs(y);
793 			mod.z = Abs(z);
794 			mod.w = Abs(w);
795 			break;
796 		case Shader::MODIFIER_ABS_NEGATE:
797 			mod.x = -Abs(x);
798 			mod.y = -Abs(y);
799 			mod.z = -Abs(z);
800 			mod.w = -Abs(w);
801 			break;
802 		case Shader::MODIFIER_NOT:
803 			mod.x = As<Float4>(As<Int4>(x) ^ Int4(0xFFFFFFFF));
804 			mod.y = As<Float4>(As<Int4>(y) ^ Int4(0xFFFFFFFF));
805 			mod.z = As<Float4>(As<Int4>(z) ^ Int4(0xFFFFFFFF));
806 			mod.w = As<Float4>(As<Int4>(w) ^ Int4(0xFFFFFFFF));
807 			break;
808 		default:
809 			ASSERT(false);
810 		}
811 
812 		return mod;
813 	}
814 
uniformAddress(int bufferIndex,unsigned int index)815 	RValue<Pointer<Byte>> VertexProgram::uniformAddress(int bufferIndex, unsigned int index)
816 	{
817 		if(bufferIndex == -1)
818 		{
819 			return data + OFFSET(DrawData, vs.c[index]);
820 		}
821 		else
822 		{
823 			return *Pointer<Pointer<Byte>>(data + OFFSET(DrawData, vs.u[bufferIndex])) + index;
824 		}
825 	}
826 
uniformAddress(int bufferIndex,unsigned int index,Int & offset)827 	RValue<Pointer<Byte>> VertexProgram::uniformAddress(int bufferIndex, unsigned int index, Int &offset)
828 	{
829 		return uniformAddress(bufferIndex, index) + offset * sizeof(float4);
830 	}
831 
readConstant(const Src & src,unsigned int offset)832 	Vector4f VertexProgram::readConstant(const Src &src, unsigned int offset)
833 	{
834 		Vector4f c;
835 		unsigned int i = src.index + offset;
836 
837 		if(src.rel.type == Shader::PARAMETER_VOID)   // Not relative
838 		{
839 			c.x = c.y = c.z = c.w = *Pointer<Float4>(uniformAddress(src.bufferIndex, i));
840 
841 			c.x = c.x.xxxx;
842 			c.y = c.y.yyyy;
843 			c.z = c.z.zzzz;
844 			c.w = c.w.wwww;
845 
846 			if(shader->containsDefineInstruction())   // Constant may be known at compile time
847 			{
848 				for(size_t j = 0; j < shader->getLength(); j++)
849 				{
850 					const Shader::Instruction &instruction = *shader->getInstruction(j);
851 
852 					if(instruction.opcode == Shader::OPCODE_DEF)
853 					{
854 						if(instruction.dst.index == i)
855 						{
856 							c.x = Float4(instruction.src[0].value[0]);
857 							c.y = Float4(instruction.src[0].value[1]);
858 							c.z = Float4(instruction.src[0].value[2]);
859 							c.w = Float4(instruction.src[0].value[3]);
860 
861 							break;
862 						}
863 					}
864 				}
865 			}
866 		}
867 		else if(!src.rel.dynamic || src.rel.type == Shader::PARAMETER_LOOP)
868 		{
869 			Int a = relativeAddress(src.rel, src.bufferIndex);
870 
871 			c.x = c.y = c.z = c.w = *Pointer<Float4>(uniformAddress(src.bufferIndex, i, a));
872 
873 			c.x = c.x.xxxx;
874 			c.y = c.y.yyyy;
875 			c.z = c.z.zzzz;
876 			c.w = c.w.wwww;
877 		}
878 		else
879 		{
880 			int component = src.rel.swizzle & 0x03;
881 			Float4 a;
882 
883 			switch(src.rel.type)
884 			{
885 			case Shader::PARAMETER_ADDR:     a = a0[component]; break;
886 			case Shader::PARAMETER_TEMP:     a = r[src.rel.index][component]; break;
887 			case Shader::PARAMETER_INPUT:    a = v[src.rel.index][component]; break;
888 			case Shader::PARAMETER_OUTPUT:   a = o[src.rel.index][component]; break;
889 			case Shader::PARAMETER_CONST:    a = *Pointer<Float>(uniformAddress(src.bufferIndex, src.rel.index) + component * sizeof(float)); break;
890 			case Shader::PARAMETER_MISCTYPE:
891 				switch(src.rel.index)
892 				{
893 				case Shader::InstanceIDIndex: a = As<Float4>(Int4(instanceID)); break;
894 				case Shader::VertexIDIndex:   a = As<Float4>(vertexID);         break;
895 				default: ASSERT(false);
896 				}
897 				break;
898 			default: ASSERT(false);
899 			}
900 
901 			Int4 index = Int4(i) + As<Int4>(a) * Int4(src.rel.scale);
902 
903 			index = Min(As<UInt4>(index), UInt4(VERTEX_UNIFORM_VECTORS));   // Clamp to constant register range, c[VERTEX_UNIFORM_VECTORS] = {0, 0, 0, 0}
904 
905 			Int index0 = Extract(index, 0);
906 			Int index1 = Extract(index, 1);
907 			Int index2 = Extract(index, 2);
908 			Int index3 = Extract(index, 3);
909 
910 			c.x = *Pointer<Float4>(uniformAddress(src.bufferIndex, 0, index0), 16);
911 			c.y = *Pointer<Float4>(uniformAddress(src.bufferIndex, 0, index1), 16);
912 			c.z = *Pointer<Float4>(uniformAddress(src.bufferIndex, 0, index2), 16);
913 			c.w = *Pointer<Float4>(uniformAddress(src.bufferIndex, 0, index3), 16);
914 
915 			transpose4x4(c.x, c.y, c.z, c.w);
916 		}
917 
918 		return c;
919 	}
920 
relativeAddress(const Shader::Relative & rel,int bufferIndex)921 	Int VertexProgram::relativeAddress(const Shader::Relative &rel, int bufferIndex)
922 	{
923 		ASSERT(!rel.dynamic);
924 
925 		if(rel.type == Shader::PARAMETER_TEMP)
926 		{
927 			return As<Int>(Extract(r[rel.index].x, 0)) * rel.scale;
928 		}
929 		else if(rel.type == Shader::PARAMETER_INPUT)
930 		{
931 			return As<Int>(Extract(v[rel.index].x, 0)) * rel.scale;
932 		}
933 		else if(rel.type == Shader::PARAMETER_OUTPUT)
934 		{
935 			return As<Int>(Extract(o[rel.index].x, 0)) * rel.scale;
936 		}
937 		else if(rel.type == Shader::PARAMETER_CONST)
938 		{
939 			return *Pointer<Int>(uniformAddress(bufferIndex, rel.index)) * rel.scale;
940 		}
941 		else if(rel.type == Shader::PARAMETER_LOOP)
942 		{
943 			return aL[loopDepth];
944 		}
945 		else ASSERT(false);
946 
947 		return 0;
948 	}
949 
dynamicAddress(const Shader::Relative & rel)950 	Int4 VertexProgram::dynamicAddress(const Shader::Relative &rel)
951 	{
952 		int component = rel.swizzle & 0x03;
953 		Float4 a;
954 
955 		switch(rel.type)
956 		{
957 		case Shader::PARAMETER_ADDR:     a = a0[component]; break;
958 		case Shader::PARAMETER_TEMP:     a = r[rel.index][component]; break;
959 		case Shader::PARAMETER_INPUT:    a = v[rel.index][component]; break;
960 		case Shader::PARAMETER_OUTPUT:   a = o[rel.index][component]; break;
961 		case Shader::PARAMETER_MISCTYPE:
962 			switch(rel.index)
963 			{
964 			case Shader::InstanceIDIndex: a = As<Float>(instanceID); break;
965 			case Shader::VertexIDIndex:   a = As<Float4>(vertexID);  break;
966 			default: ASSERT(false);
967 			}
968 			break;
969 		default: ASSERT(false);
970 		}
971 
972 		return As<Int4>(a) * Int4(rel.scale);
973 	}
974 
enableMask(const Shader::Instruction * instruction)975 	Int4 VertexProgram::enableMask(const Shader::Instruction *instruction)
976 	{
977 		if(scalar)
978 		{
979 			return Int4(0xFFFFFFFF);
980 		}
981 
982 		Int4 enable = instruction->analysisBranch ? Int4(enableStack[Min(enableIndex, Int(MAX_SHADER_ENABLE_STACK_SIZE))]) : Int4(0xFFFFFFFF);
983 
984 		if(shader->containsBreakInstruction() && instruction->analysisBreak)
985 		{
986 			enable &= enableBreak;
987 		}
988 
989 		if(shader->containsContinueInstruction() && instruction->analysisContinue)
990 		{
991 			enable &= enableContinue;
992 		}
993 
994 		if(shader->containsLeaveInstruction() && instruction->analysisLeave)
995 		{
996 			enable &= enableLeave;
997 		}
998 
999 		return enable;
1000 	}
1001 
M3X2(Vector4f & dst,Vector4f & src0,Src & src1)1002 	void VertexProgram::M3X2(Vector4f &dst, Vector4f &src0, Src &src1)
1003 	{
1004 		Vector4f row0 = fetchRegister(src1, 0);
1005 		Vector4f row1 = fetchRegister(src1, 1);
1006 
1007 		dst.x = dot3(src0, row0);
1008 		dst.y = dot3(src0, row1);
1009 	}
1010 
M3X3(Vector4f & dst,Vector4f & src0,Src & src1)1011 	void VertexProgram::M3X3(Vector4f &dst, Vector4f &src0, Src &src1)
1012 	{
1013 		Vector4f row0 = fetchRegister(src1, 0);
1014 		Vector4f row1 = fetchRegister(src1, 1);
1015 		Vector4f row2 = fetchRegister(src1, 2);
1016 
1017 		dst.x = dot3(src0, row0);
1018 		dst.y = dot3(src0, row1);
1019 		dst.z = dot3(src0, row2);
1020 	}
1021 
M3X4(Vector4f & dst,Vector4f & src0,Src & src1)1022 	void VertexProgram::M3X4(Vector4f &dst, Vector4f &src0, Src &src1)
1023 	{
1024 		Vector4f row0 = fetchRegister(src1, 0);
1025 		Vector4f row1 = fetchRegister(src1, 1);
1026 		Vector4f row2 = fetchRegister(src1, 2);
1027 		Vector4f row3 = fetchRegister(src1, 3);
1028 
1029 		dst.x = dot3(src0, row0);
1030 		dst.y = dot3(src0, row1);
1031 		dst.z = dot3(src0, row2);
1032 		dst.w = dot3(src0, row3);
1033 	}
1034 
M4X3(Vector4f & dst,Vector4f & src0,Src & src1)1035 	void VertexProgram::M4X3(Vector4f &dst, Vector4f &src0, Src &src1)
1036 	{
1037 		Vector4f row0 = fetchRegister(src1, 0);
1038 		Vector4f row1 = fetchRegister(src1, 1);
1039 		Vector4f row2 = fetchRegister(src1, 2);
1040 
1041 		dst.x = dot4(src0, row0);
1042 		dst.y = dot4(src0, row1);
1043 		dst.z = dot4(src0, row2);
1044 	}
1045 
M4X4(Vector4f & dst,Vector4f & src0,Src & src1)1046 	void VertexProgram::M4X4(Vector4f &dst, Vector4f &src0, Src &src1)
1047 	{
1048 		Vector4f row0 = fetchRegister(src1, 0);
1049 		Vector4f row1 = fetchRegister(src1, 1);
1050 		Vector4f row2 = fetchRegister(src1, 2);
1051 		Vector4f row3 = fetchRegister(src1, 3);
1052 
1053 		dst.x = dot4(src0, row0);
1054 		dst.y = dot4(src0, row1);
1055 		dst.z = dot4(src0, row2);
1056 		dst.w = dot4(src0, row3);
1057 	}
1058 
BREAK()1059 	void VertexProgram::BREAK()
1060 	{
1061 		enableBreak = enableBreak & ~enableStack[Min(enableIndex, Int(MAX_SHADER_ENABLE_STACK_SIZE))];
1062 	}
1063 
BREAKC(Vector4f & src0,Vector4f & src1,Control control)1064 	void VertexProgram::BREAKC(Vector4f &src0, Vector4f &src1, Control control)
1065 	{
1066 		Int4 condition;
1067 
1068 		switch(control)
1069 		{
1070 		case Shader::CONTROL_GT: condition = CmpNLE(src0.x, src1.x); break;
1071 		case Shader::CONTROL_EQ: condition = CmpEQ(src0.x, src1.x);  break;
1072 		case Shader::CONTROL_GE: condition = CmpNLT(src0.x, src1.x); break;
1073 		case Shader::CONTROL_LT: condition = CmpLT(src0.x, src1.x);  break;
1074 		case Shader::CONTROL_NE: condition = CmpNEQ(src0.x, src1.x); break;
1075 		case Shader::CONTROL_LE: condition = CmpLE(src0.x, src1.x);  break;
1076 		default:
1077 			ASSERT(false);
1078 		}
1079 
1080 		BREAK(condition);
1081 	}
1082 
BREAKP(const Src & predicateRegister)1083 	void VertexProgram::BREAKP(const Src &predicateRegister)   // FIXME: Factor out parts common with BREAKC
1084 	{
1085 		Int4 condition = As<Int4>(p0[predicateRegister.swizzle & 0x3]);
1086 
1087 		if(predicateRegister.modifier == Shader::MODIFIER_NOT)
1088 		{
1089 			condition = ~condition;
1090 		}
1091 
1092 		BREAK(condition);
1093 	}
1094 
BREAK(Int4 & condition)1095 	void VertexProgram::BREAK(Int4 &condition)
1096 	{
1097 		condition &= enableStack[Min(enableIndex, Int(MAX_SHADER_ENABLE_STACK_SIZE))];
1098 
1099 		enableBreak = enableBreak & ~condition;
1100 	}
1101 
CONTINUE()1102 	void VertexProgram::CONTINUE()
1103 	{
1104 		enableContinue = enableContinue & ~enableStack[Min(enableIndex, Int(MAX_SHADER_ENABLE_STACK_SIZE))];
1105 	}
1106 
TEST()1107 	void VertexProgram::TEST()
1108 	{
1109 		enableContinue = restoreContinue.back();
1110 		restoreContinue.pop_back();
1111 	}
1112 
SCALAR()1113 	void VertexProgram::SCALAR()
1114 	{
1115 		scalar = true;
1116 	}
1117 
CALL(int labelIndex,int callSiteIndex)1118 	void VertexProgram::CALL(int labelIndex, int callSiteIndex)
1119 	{
1120 		if(!labelBlock[labelIndex])
1121 		{
1122 			labelBlock[labelIndex] = Nucleus::createBasicBlock();
1123 		}
1124 
1125 		if(callRetBlock[labelIndex].size() > 1)
1126 		{
1127 			callStack[Min(stackIndex++, Int(MAX_SHADER_CALL_STACK_SIZE))] = UInt(callSiteIndex);
1128 		}
1129 
1130 		Int4 restoreLeave = enableLeave;
1131 
1132 		Nucleus::createBr(labelBlock[labelIndex]);
1133 		Nucleus::setInsertBlock(callRetBlock[labelIndex][callSiteIndex]);
1134 
1135 		enableLeave = restoreLeave;
1136 	}
1137 
CALLNZ(int labelIndex,int callSiteIndex,const Src & src)1138 	void VertexProgram::CALLNZ(int labelIndex, int callSiteIndex, const Src &src)
1139 	{
1140 		if(src.type == Shader::PARAMETER_CONSTBOOL)
1141 		{
1142 			CALLNZb(labelIndex, callSiteIndex, src);
1143 		}
1144 		else if(src.type == Shader::PARAMETER_PREDICATE)
1145 		{
1146 			CALLNZp(labelIndex, callSiteIndex, src);
1147 		}
1148 		else ASSERT(false);
1149 	}
1150 
CALLNZb(int labelIndex,int callSiteIndex,const Src & boolRegister)1151 	void VertexProgram::CALLNZb(int labelIndex, int callSiteIndex, const Src &boolRegister)
1152 	{
1153 		Bool condition = (*Pointer<Byte>(data + OFFSET(DrawData,vs.b[boolRegister.index])) != Byte(0));   // FIXME
1154 
1155 		if(boolRegister.modifier == Shader::MODIFIER_NOT)
1156 		{
1157 			condition = !condition;
1158 		}
1159 
1160 		if(!labelBlock[labelIndex])
1161 		{
1162 			labelBlock[labelIndex] = Nucleus::createBasicBlock();
1163 		}
1164 
1165 		if(callRetBlock[labelIndex].size() > 1)
1166 		{
1167 			callStack[Min(stackIndex++, Int(MAX_SHADER_CALL_STACK_SIZE))] = UInt(callSiteIndex);
1168 		}
1169 
1170 		Int4 restoreLeave = enableLeave;
1171 
1172 		branch(condition, labelBlock[labelIndex], callRetBlock[labelIndex][callSiteIndex]);
1173 		Nucleus::setInsertBlock(callRetBlock[labelIndex][callSiteIndex]);
1174 
1175 		enableLeave = restoreLeave;
1176 	}
1177 
CALLNZp(int labelIndex,int callSiteIndex,const Src & predicateRegister)1178 	void VertexProgram::CALLNZp(int labelIndex, int callSiteIndex, const Src &predicateRegister)
1179 	{
1180 		Int4 condition = As<Int4>(p0[predicateRegister.swizzle & 0x3]);
1181 
1182 		if(predicateRegister.modifier == Shader::MODIFIER_NOT)
1183 		{
1184 			condition = ~condition;
1185 		}
1186 
1187 		condition &= enableStack[Min(enableIndex, Int(MAX_SHADER_ENABLE_STACK_SIZE))];
1188 
1189 		if(!labelBlock[labelIndex])
1190 		{
1191 			labelBlock[labelIndex] = Nucleus::createBasicBlock();
1192 		}
1193 
1194 		if(callRetBlock[labelIndex].size() > 1)
1195 		{
1196 			callStack[Min(stackIndex++, Int(MAX_SHADER_CALL_STACK_SIZE))] = UInt(callSiteIndex);
1197 		}
1198 
1199 		enableIndex++;
1200 		enableStack[Min(enableIndex, Int(MAX_SHADER_ENABLE_STACK_SIZE))] = condition;
1201 		Int4 restoreLeave = enableLeave;
1202 
1203 		Bool notAllFalse = SignMask(condition) != 0;
1204 		branch(notAllFalse, labelBlock[labelIndex], callRetBlock[labelIndex][callSiteIndex]);
1205 		Nucleus::setInsertBlock(callRetBlock[labelIndex][callSiteIndex]);
1206 
1207 		enableIndex--;
1208 		enableLeave = restoreLeave;
1209 	}
1210 
ELSE()1211 	void VertexProgram::ELSE()
1212 	{
1213 		ifDepth--;
1214 
1215 		BasicBlock *falseBlock = ifFalseBlock[ifDepth];
1216 		BasicBlock *endBlock = Nucleus::createBasicBlock();
1217 
1218 		if(isConditionalIf[ifDepth])
1219 		{
1220 			Int4 condition = ~enableStack[Min(enableIndex, Int(MAX_SHADER_ENABLE_STACK_SIZE))] & enableStack[Min(enableIndex - 1, Int(MAX_SHADER_ENABLE_STACK_SIZE))];
1221 			Bool notAllFalse = SignMask(condition) != 0;
1222 
1223 			branch(notAllFalse, falseBlock, endBlock);
1224 
1225 			enableStack[Min(enableIndex, Int(MAX_SHADER_ENABLE_STACK_SIZE))] = ~enableStack[Min(enableIndex, Int(MAX_SHADER_ENABLE_STACK_SIZE))] & enableStack[Min(enableIndex - 1, Int(MAX_SHADER_ENABLE_STACK_SIZE))];
1226 		}
1227 		else
1228 		{
1229 			Nucleus::createBr(endBlock);
1230 			Nucleus::setInsertBlock(falseBlock);
1231 		}
1232 
1233 		ifFalseBlock[ifDepth] = endBlock;
1234 
1235 		ifDepth++;
1236 	}
1237 
ENDIF()1238 	void VertexProgram::ENDIF()
1239 	{
1240 		ifDepth--;
1241 
1242 		BasicBlock *endBlock = ifFalseBlock[ifDepth];
1243 
1244 		Nucleus::createBr(endBlock);
1245 		Nucleus::setInsertBlock(endBlock);
1246 
1247 		if(isConditionalIf[ifDepth])
1248 		{
1249 			enableIndex--;
1250 		}
1251 	}
1252 
ENDLOOP()1253 	void VertexProgram::ENDLOOP()
1254 	{
1255 		loopRepDepth--;
1256 
1257 		aL[loopDepth] = aL[loopDepth] + increment[loopDepth];   // FIXME: +=
1258 
1259 		BasicBlock *testBlock = loopRepTestBlock[loopRepDepth];
1260 		BasicBlock *endBlock = loopRepEndBlock[loopRepDepth];
1261 
1262 		Nucleus::createBr(testBlock);
1263 		Nucleus::setInsertBlock(endBlock);
1264 
1265 		loopDepth--;
1266 		enableBreak = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
1267 	}
1268 
ENDREP()1269 	void VertexProgram::ENDREP()
1270 	{
1271 		loopRepDepth--;
1272 
1273 		BasicBlock *testBlock = loopRepTestBlock[loopRepDepth];
1274 		BasicBlock *endBlock = loopRepEndBlock[loopRepDepth];
1275 
1276 		Nucleus::createBr(testBlock);
1277 		Nucleus::setInsertBlock(endBlock);
1278 
1279 		loopDepth--;
1280 		enableBreak = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
1281 	}
1282 
ENDWHILE()1283 	void VertexProgram::ENDWHILE()
1284 	{
1285 		loopRepDepth--;
1286 
1287 		BasicBlock *testBlock = loopRepTestBlock[loopRepDepth];
1288 		BasicBlock *endBlock = loopRepEndBlock[loopRepDepth];
1289 
1290 		Nucleus::createBr(testBlock);
1291 		Nucleus::setInsertBlock(endBlock);
1292 
1293 		enableIndex--;
1294 		scalar = false;
1295 	}
1296 
ENDSWITCH()1297 	void VertexProgram::ENDSWITCH()
1298 	{
1299 		loopRepDepth--;
1300 
1301 		BasicBlock *endBlock = loopRepEndBlock[loopRepDepth];
1302 
1303 		Nucleus::createBr(endBlock);
1304 		Nucleus::setInsertBlock(endBlock);
1305 	}
1306 
IF(const Src & src)1307 	void VertexProgram::IF(const Src &src)
1308 	{
1309 		if(src.type == Shader::PARAMETER_CONSTBOOL)
1310 		{
1311 			IFb(src);
1312 		}
1313 		else if(src.type == Shader::PARAMETER_PREDICATE)
1314 		{
1315 			IFp(src);
1316 		}
1317 		else
1318 		{
1319 			Int4 condition = As<Int4>(fetchRegister(src).x);
1320 			IF(condition);
1321 		}
1322 	}
1323 
IFb(const Src & boolRegister)1324 	void VertexProgram::IFb(const Src &boolRegister)
1325 	{
1326 		ASSERT(ifDepth < 24 + 4);
1327 
1328 		Bool condition = (*Pointer<Byte>(data + OFFSET(DrawData,vs.b[boolRegister.index])) != Byte(0));   // FIXME
1329 
1330 		if(boolRegister.modifier == Shader::MODIFIER_NOT)
1331 		{
1332 			condition = !condition;
1333 		}
1334 
1335 		BasicBlock *trueBlock = Nucleus::createBasicBlock();
1336 		BasicBlock *falseBlock = Nucleus::createBasicBlock();
1337 
1338 		branch(condition, trueBlock, falseBlock);
1339 
1340 		isConditionalIf[ifDepth] = false;
1341 		ifFalseBlock[ifDepth] = falseBlock;
1342 
1343 		ifDepth++;
1344 	}
1345 
IFp(const Src & predicateRegister)1346 	void VertexProgram::IFp(const Src &predicateRegister)
1347 	{
1348 		Int4 condition = As<Int4>(p0[predicateRegister.swizzle & 0x3]);
1349 
1350 		if(predicateRegister.modifier == Shader::MODIFIER_NOT)
1351 		{
1352 			condition = ~condition;
1353 		}
1354 
1355 		IF(condition);
1356 	}
1357 
IFC(Vector4f & src0,Vector4f & src1,Control control)1358 	void VertexProgram::IFC(Vector4f &src0, Vector4f &src1, Control control)
1359 	{
1360 		Int4 condition;
1361 
1362 		switch(control)
1363 		{
1364 		case Shader::CONTROL_GT: condition = CmpNLE(src0.x, src1.x); break;
1365 		case Shader::CONTROL_EQ: condition = CmpEQ(src0.x, src1.x);  break;
1366 		case Shader::CONTROL_GE: condition = CmpNLT(src0.x, src1.x); break;
1367 		case Shader::CONTROL_LT: condition = CmpLT(src0.x, src1.x);  break;
1368 		case Shader::CONTROL_NE: condition = CmpNEQ(src0.x, src1.x); break;
1369 		case Shader::CONTROL_LE: condition = CmpLE(src0.x, src1.x);  break;
1370 		default:
1371 			ASSERT(false);
1372 		}
1373 
1374 		IF(condition);
1375 	}
1376 
IF(Int4 & condition)1377 	void VertexProgram::IF(Int4 &condition)
1378 	{
1379 		condition &= enableStack[Min(enableIndex, Int(MAX_SHADER_ENABLE_STACK_SIZE))];
1380 
1381 		enableIndex++;
1382 		enableStack[Min(enableIndex, Int(MAX_SHADER_ENABLE_STACK_SIZE))] = condition;
1383 
1384 		BasicBlock *trueBlock = Nucleus::createBasicBlock();
1385 		BasicBlock *falseBlock = Nucleus::createBasicBlock();
1386 
1387 		Bool notAllFalse = SignMask(condition) != 0;
1388 
1389 		branch(notAllFalse, trueBlock, falseBlock);
1390 
1391 		isConditionalIf[ifDepth] = true;
1392 		ifFalseBlock[ifDepth] = falseBlock;
1393 
1394 		ifDepth++;
1395 	}
1396 
LABEL(int labelIndex)1397 	void VertexProgram::LABEL(int labelIndex)
1398 	{
1399 		if(!labelBlock[labelIndex])
1400 		{
1401 			labelBlock[labelIndex] = Nucleus::createBasicBlock();
1402 		}
1403 
1404 		Nucleus::setInsertBlock(labelBlock[labelIndex]);
1405 		currentLabel = labelIndex;
1406 	}
1407 
LOOP(const Src & integerRegister)1408 	void VertexProgram::LOOP(const Src &integerRegister)
1409 	{
1410 		loopDepth++;
1411 
1412 		iteration[loopDepth] = *Pointer<Int>(data + OFFSET(DrawData,vs.i[integerRegister.index][0]));
1413 		aL[loopDepth] = *Pointer<Int>(data + OFFSET(DrawData,vs.i[integerRegister.index][1]));
1414 		increment[loopDepth] = *Pointer<Int>(data + OFFSET(DrawData,vs.i[integerRegister.index][2]));
1415 
1416 		// FIXME: Compiles to two instructions?
1417 		If(increment[loopDepth] == 0)
1418 		{
1419 			increment[loopDepth] = 1;
1420 		}
1421 
1422 		BasicBlock *loopBlock = Nucleus::createBasicBlock();
1423 		BasicBlock *testBlock = Nucleus::createBasicBlock();
1424 		BasicBlock *endBlock = Nucleus::createBasicBlock();
1425 
1426 		loopRepTestBlock[loopRepDepth] = testBlock;
1427 		loopRepEndBlock[loopRepDepth] = endBlock;
1428 
1429 		// FIXME: jump(testBlock)
1430 		Nucleus::createBr(testBlock);
1431 		Nucleus::setInsertBlock(testBlock);
1432 
1433 		branch(iteration[loopDepth] > 0, loopBlock, endBlock);
1434 		Nucleus::setInsertBlock(loopBlock);
1435 
1436 		iteration[loopDepth] = iteration[loopDepth] - 1;   // FIXME: --
1437 
1438 		loopRepDepth++;
1439 	}
1440 
REP(const Src & integerRegister)1441 	void VertexProgram::REP(const Src &integerRegister)
1442 	{
1443 		loopDepth++;
1444 
1445 		iteration[loopDepth] = *Pointer<Int>(data + OFFSET(DrawData,vs.i[integerRegister.index][0]));
1446 		aL[loopDepth] = aL[loopDepth - 1];
1447 
1448 		BasicBlock *loopBlock = Nucleus::createBasicBlock();
1449 		BasicBlock *testBlock = Nucleus::createBasicBlock();
1450 		BasicBlock *endBlock = Nucleus::createBasicBlock();
1451 
1452 		loopRepTestBlock[loopRepDepth] = testBlock;
1453 		loopRepEndBlock[loopRepDepth] = endBlock;
1454 
1455 		// FIXME: jump(testBlock)
1456 		Nucleus::createBr(testBlock);
1457 		Nucleus::setInsertBlock(testBlock);
1458 
1459 		branch(iteration[loopDepth] > 0, loopBlock, endBlock);
1460 		Nucleus::setInsertBlock(loopBlock);
1461 
1462 		iteration[loopDepth] = iteration[loopDepth] - 1;   // FIXME: --
1463 
1464 		loopRepDepth++;
1465 	}
1466 
WHILE(const Src & temporaryRegister)1467 	void VertexProgram::WHILE(const Src &temporaryRegister)
1468 	{
1469 		enableIndex++;
1470 
1471 		BasicBlock *loopBlock = Nucleus::createBasicBlock();
1472 		BasicBlock *testBlock = Nucleus::createBasicBlock();
1473 		BasicBlock *endBlock = Nucleus::createBasicBlock();
1474 
1475 		loopRepTestBlock[loopRepDepth] = testBlock;
1476 		loopRepEndBlock[loopRepDepth] = endBlock;
1477 
1478 		Int4 restoreBreak = enableBreak;
1479 		restoreContinue.push_back(enableContinue);
1480 
1481 		// TODO: jump(testBlock)
1482 		Nucleus::createBr(testBlock);
1483 		Nucleus::setInsertBlock(testBlock);
1484 
1485 		const Vector4f &src = fetchRegister(temporaryRegister);
1486 		Int4 condition = As<Int4>(src.x);
1487 		condition &= enableStack[Min(enableIndex - 1, Int(MAX_SHADER_ENABLE_STACK_SIZE))];
1488 		if(shader->containsLeaveInstruction()) condition &= enableLeave;
1489 		if(shader->containsBreakInstruction()) condition &= enableBreak;
1490 		enableStack[Min(enableIndex, Int(MAX_SHADER_ENABLE_STACK_SIZE))] = condition;
1491 
1492 		Bool notAllFalse = SignMask(condition) != 0;
1493 		branch(notAllFalse, loopBlock, endBlock);
1494 
1495 		Nucleus::setInsertBlock(endBlock);
1496 		enableBreak = restoreBreak;
1497 
1498 		Nucleus::setInsertBlock(loopBlock);
1499 
1500 		loopRepDepth++;
1501 		scalar = false;
1502 	}
1503 
SWITCH()1504 	void VertexProgram::SWITCH()
1505 	{
1506 		BasicBlock *endBlock = Nucleus::createBasicBlock();
1507 
1508 		loopRepTestBlock[loopRepDepth] = nullptr;
1509 		loopRepEndBlock[loopRepDepth] = endBlock;
1510 
1511 		Int4 restoreBreak = enableBreak;
1512 
1513 		BasicBlock *currentBlock = Nucleus::getInsertBlock();
1514 
1515 		Nucleus::setInsertBlock(endBlock);
1516 		enableBreak = restoreBreak;
1517 
1518 		Nucleus::setInsertBlock(currentBlock);
1519 
1520 		loopRepDepth++;
1521 	}
1522 
RET()1523 	void VertexProgram::RET()
1524 	{
1525 		if(currentLabel == -1)
1526 		{
1527 			returnBlock = Nucleus::createBasicBlock();
1528 			Nucleus::createBr(returnBlock);
1529 		}
1530 		else
1531 		{
1532 			BasicBlock *unreachableBlock = Nucleus::createBasicBlock();
1533 
1534 			if(callRetBlock[currentLabel].size() > 1)   // Pop the return destination from the call stack
1535 			{
1536 				// FIXME: Encapsulate
1537 				UInt index = callStack[--stackIndex];
1538 
1539 				Value *value = index.loadValue();
1540 				SwitchCases *switchCases = Nucleus::createSwitch(value, unreachableBlock, (int)callRetBlock[currentLabel].size());
1541 
1542 				for(unsigned int i = 0; i < callRetBlock[currentLabel].size(); i++)
1543 				{
1544 					Nucleus::addSwitchCase(switchCases, i, callRetBlock[currentLabel][i]);
1545 				}
1546 			}
1547 			else if(callRetBlock[currentLabel].size() == 1)   // Jump directly to the unique return destination
1548 			{
1549 				Nucleus::createBr(callRetBlock[currentLabel][0]);
1550 			}
1551 			else   // Function isn't called
1552 			{
1553 				Nucleus::createBr(unreachableBlock);
1554 			}
1555 
1556 			Nucleus::setInsertBlock(unreachableBlock);
1557 			Nucleus::createUnreachable();
1558 		}
1559 	}
1560 
LEAVE()1561 	void VertexProgram::LEAVE()
1562 	{
1563 		enableLeave = enableLeave & ~enableStack[Min(enableIndex, Int(MAX_SHADER_ENABLE_STACK_SIZE))];
1564 
1565 		// FIXME: Return from function if all instances left
1566 		// FIXME: Use enableLeave in other control-flow constructs
1567 	}
1568 
TEX(Vector4f & dst,Vector4f & src0,const Src & src1)1569 	void VertexProgram::TEX(Vector4f &dst, Vector4f &src0, const Src &src1)
1570 	{
1571 		dst = sampleTexture(src1, src0, (src0.x), (src0), (src0), (src0), Base);
1572 	}
1573 
TEXOFFSET(Vector4f & dst,Vector4f & src0,const Src & src1,Vector4f & offset)1574 	void VertexProgram::TEXOFFSET(Vector4f &dst, Vector4f &src0, const Src& src1, Vector4f &offset)
1575 	{
1576 		dst = sampleTexture(src1, src0, (src0.x), (src0), (src0), offset, {Base, Offset});
1577 	}
1578 
TEXLOD(Vector4f & dst,Vector4f & src0,const Src & src1,Float4 & lod)1579 	void VertexProgram::TEXLOD(Vector4f &dst, Vector4f &src0, const Src& src1, Float4 &lod)
1580 	{
1581 		dst = sampleTexture(src1, src0, lod, (src0), (src0), (src0), Lod);
1582 	}
1583 
TEXLODOFFSET(Vector4f & dst,Vector4f & src0,const Src & src1,Vector4f & offset,Float4 & lod)1584 	void VertexProgram::TEXLODOFFSET(Vector4f &dst, Vector4f &src0, const Src& src1, Vector4f &offset, Float4 &lod)
1585 	{
1586 		dst = sampleTexture(src1, src0, lod, (src0), (src0), offset, {Lod, Offset});
1587 	}
1588 
TEXELFETCH(Vector4f & dst,Vector4f & src0,const Src & src1,Float4 & lod)1589 	void VertexProgram::TEXELFETCH(Vector4f &dst, Vector4f &src0, const Src& src1, Float4 &lod)
1590 	{
1591 		dst = sampleTexture(src1, src0, lod, (src0), (src0), (src0), Fetch);
1592 	}
1593 
TEXELFETCHOFFSET(Vector4f & dst,Vector4f & src0,const Src & src1,Vector4f & offset,Float4 & lod)1594 	void VertexProgram::TEXELFETCHOFFSET(Vector4f &dst, Vector4f &src0, const Src& src1, Vector4f &offset, Float4 &lod)
1595 	{
1596 		dst = sampleTexture(src1, src0, lod, (src0), (src0), offset, {Fetch, Offset});
1597 	}
1598 
TEXGRAD(Vector4f & dst,Vector4f & src0,const Src & src1,Vector4f & dsx,Vector4f & dsy)1599 	void VertexProgram::TEXGRAD(Vector4f &dst, Vector4f &src0, const Src& src1, Vector4f &dsx, Vector4f &dsy)
1600 	{
1601 		dst = sampleTexture(src1, src0, (src0.x), dsx, dsy, src0, Grad);
1602 	}
1603 
TEXGRADOFFSET(Vector4f & dst,Vector4f & src0,const Src & src1,Vector4f & dsx,Vector4f & dsy,Vector4f & offset)1604 	void VertexProgram::TEXGRADOFFSET(Vector4f &dst, Vector4f &src0, const Src& src1, Vector4f &dsx, Vector4f &dsy, Vector4f &offset)
1605 	{
1606 		dst = sampleTexture(src1, src0, (src0.x), dsx, dsy, offset, {Grad, Offset});
1607 	}
1608 
TEXSIZE(Vector4f & dst,Float4 & lod,const Src & src1)1609 	void VertexProgram::TEXSIZE(Vector4f &dst, Float4 &lod, const Src &src1)
1610 	{
1611 		bool uniformSampler = (src1.type == Shader::PARAMETER_SAMPLER && src1.rel.type == Shader::PARAMETER_VOID);
1612 		Int offset = uniformSampler ? src1.index * sizeof(Texture) : As<Int>(Float(fetchRegister(src1).x.x)) * sizeof(Texture);
1613 		Pointer<Byte> texture = data + OFFSET(DrawData, mipmap[TEXTURE_IMAGE_UNITS]) + offset;
1614 
1615 		dst = SamplerCore::textureSize(texture, lod);
1616 	}
1617 
sampleTexture(const Src & s,Vector4f & uvwq,Float4 & lod,Vector4f & dsx,Vector4f & dsy,Vector4f & offset,SamplerFunction function)1618 	Vector4f VertexProgram::sampleTexture(const Src &s, Vector4f &uvwq, Float4 &lod, Vector4f &dsx, Vector4f &dsy, Vector4f &offset, SamplerFunction function)
1619 	{
1620 		Vector4f tmp;
1621 
1622 		if(s.type == Shader::PARAMETER_SAMPLER && s.rel.type == Shader::PARAMETER_VOID)
1623 		{
1624 			tmp = sampleTexture(s.index, uvwq, lod, dsx, dsy, offset, function);
1625 		}
1626 		else
1627 		{
1628 			Int index = As<Int>(Float(fetchRegister(s).x.x));
1629 
1630 			for(int i = 0; i < VERTEX_TEXTURE_IMAGE_UNITS; i++)
1631 			{
1632 				if(shader->usesSampler(i))
1633 				{
1634 					If(index == i)
1635 					{
1636 						tmp = sampleTexture(i, uvwq, lod, dsx, dsy, offset, function);
1637 						// FIXME: When the sampler states are the same, we could use one sampler and just index the texture
1638 					}
1639 				}
1640 			}
1641 		}
1642 
1643 		Vector4f c;
1644 		c.x = tmp[(s.swizzle >> 0) & 0x3];
1645 		c.y = tmp[(s.swizzle >> 2) & 0x3];
1646 		c.z = tmp[(s.swizzle >> 4) & 0x3];
1647 		c.w = tmp[(s.swizzle >> 6) & 0x3];
1648 
1649 		return c;
1650 	}
1651 
sampleTexture(int sampler,Vector4f & uvwq,Float4 & lod,Vector4f & dsx,Vector4f & dsy,Vector4f & offset,SamplerFunction function)1652 	Vector4f VertexProgram::sampleTexture(int sampler, Vector4f &uvwq, Float4 &lod, Vector4f &dsx, Vector4f &dsy, Vector4f &offset, SamplerFunction function)
1653 	{
1654 		Pointer<Byte> texture = data + OFFSET(DrawData, mipmap[TEXTURE_IMAGE_UNITS]) + sampler * sizeof(Texture);
1655 		return SamplerCore(constants, state.sampler[sampler]).sampleTexture(texture, uvwq.x, uvwq.y, uvwq.z, uvwq.w, lod, dsx, dsy, offset, function);
1656 	}
1657 }
1658