1 // Copyright 2016 The SwiftShader Authors. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //    http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "Shader.hpp"
16 
17 #include "VertexShader.hpp"
18 #include "PixelShader.hpp"
19 #include "Math.hpp"
20 #include "Debug.hpp"
21 
22 #include <set>
23 #include <fstream>
24 #include <sstream>
25 #include <stdarg.h>
26 
27 namespace sw
28 {
29 	volatile int Shader::serialCounter = 1;
30 
OPCODE_DP(int i)31 	Shader::Opcode Shader::OPCODE_DP(int i)
32 	{
33 		switch(i)
34 		{
35 		default: ASSERT(false);
36 		case 1: return OPCODE_DP1;
37 		case 2: return OPCODE_DP2;
38 		case 3: return OPCODE_DP3;
39 		case 4: return OPCODE_DP4;
40 		}
41 	}
42 
OPCODE_LEN(int i)43 	Shader::Opcode Shader::OPCODE_LEN(int i)
44 	{
45 		switch(i)
46 		{
47 		default: ASSERT(false);
48 		case 1: return OPCODE_ABS;
49 		case 2: return OPCODE_LEN2;
50 		case 3: return OPCODE_LEN3;
51 		case 4: return OPCODE_LEN4;
52 		}
53 	}
54 
OPCODE_DIST(int i)55 	Shader::Opcode Shader::OPCODE_DIST(int i)
56 	{
57 		switch(i)
58 		{
59 		default: ASSERT(false);
60 		case 1: return OPCODE_DIST1;
61 		case 2: return OPCODE_DIST2;
62 		case 3: return OPCODE_DIST3;
63 		case 4: return OPCODE_DIST4;
64 		}
65 	}
66 
OPCODE_NRM(int i)67 	Shader::Opcode Shader::OPCODE_NRM(int i)
68 	{
69 		switch(i)
70 		{
71 		default: ASSERT(false);
72 		case 1: return OPCODE_SGN;
73 		case 2: return OPCODE_NRM2;
74 		case 3: return OPCODE_NRM3;
75 		case 4: return OPCODE_NRM4;
76 		}
77 	}
78 
OPCODE_FORWARD(int i)79 	Shader::Opcode Shader::OPCODE_FORWARD(int i)
80 	{
81 		switch(i)
82 		{
83 		default: ASSERT(false);
84 		case 1: return OPCODE_FORWARD1;
85 		case 2: return OPCODE_FORWARD2;
86 		case 3: return OPCODE_FORWARD3;
87 		case 4: return OPCODE_FORWARD4;
88 		}
89 	}
90 
OPCODE_REFLECT(int i)91 	Shader::Opcode Shader::OPCODE_REFLECT(int i)
92 	{
93 		switch(i)
94 		{
95 		default: ASSERT(false);
96 		case 1: return OPCODE_REFLECT1;
97 		case 2: return OPCODE_REFLECT2;
98 		case 3: return OPCODE_REFLECT3;
99 		case 4: return OPCODE_REFLECT4;
100 		}
101 	}
102 
OPCODE_REFRACT(int i)103 	Shader::Opcode Shader::OPCODE_REFRACT(int i)
104 	{
105 		switch(i)
106 		{
107 		default: ASSERT(false);
108 		case 1: return OPCODE_REFRACT1;
109 		case 2: return OPCODE_REFRACT2;
110 		case 3: return OPCODE_REFRACT3;
111 		case 4: return OPCODE_REFRACT4;
112 		}
113 	}
114 
Instruction(Opcode opcode)115 	Shader::Instruction::Instruction(Opcode opcode) : opcode(opcode), analysis(0)
116 	{
117 		control = CONTROL_RESERVED0;
118 
119 		predicate = false;
120 		predicateNot = false;
121 		predicateSwizzle = 0xE4;
122 
123 		coissue = false;
124 		samplerType = SAMPLER_UNKNOWN;
125 		usage = USAGE_POSITION;
126 		usageIndex = 0;
127 	}
128 
Instruction(const unsigned long * token,int size,unsigned char majorVersion)129 	Shader::Instruction::Instruction(const unsigned long *token, int size, unsigned char majorVersion) : analysis(0)
130 	{
131 		parseOperationToken(*token++, majorVersion);
132 
133 		samplerType = SAMPLER_UNKNOWN;
134 		usage = USAGE_POSITION;
135 		usageIndex = 0;
136 
137 		if(opcode == OPCODE_IF ||
138 		   opcode == OPCODE_IFC ||
139 		   opcode == OPCODE_LOOP ||
140 		   opcode == OPCODE_REP ||
141 		   opcode == OPCODE_BREAKC ||
142 		   opcode == OPCODE_BREAKP)   // No destination operand
143 		{
144 			if(size > 0) parseSourceToken(0, token++, majorVersion);
145 			if(size > 1) parseSourceToken(1, token++, majorVersion);
146 			if(size > 2) parseSourceToken(2, token++, majorVersion);
147 			if(size > 3) ASSERT(false);
148 		}
149 		else if(opcode == OPCODE_DCL)
150 		{
151 			parseDeclarationToken(*token++);
152 			parseDestinationToken(token++, majorVersion);
153 		}
154 		else
155 		{
156 			if(size > 0)
157 			{
158 				parseDestinationToken(token, majorVersion);
159 
160 				if(dst.rel.type != PARAMETER_VOID && majorVersion >= 3)
161 				{
162 					token++;
163 					size--;
164 				}
165 
166 				token++;
167 				size--;
168 			}
169 
170 			if(predicate)
171 			{
172 				ASSERT(size != 0);
173 
174 				predicateNot = (Modifier)((*token & 0x0F000000) >> 24) == MODIFIER_NOT;
175 				predicateSwizzle = (unsigned char)((*token & 0x00FF0000) >> 16);
176 
177 				token++;
178 				size--;
179 			}
180 
181 			for(int i = 0; size > 0; i++)
182 			{
183 				parseSourceToken(i, token, majorVersion);
184 
185 				token++;
186 				size--;
187 
188 				if(src[i].rel.type != PARAMETER_VOID && majorVersion >= 2)
189 				{
190 					token++;
191 					size--;
192 				}
193 			}
194 		}
195 	}
196 
~Instruction()197 	Shader::Instruction::~Instruction()
198 	{
199 	}
200 
string(ShaderType shaderType,unsigned short version) const201 	std::string Shader::Instruction::string(ShaderType shaderType, unsigned short version) const
202 	{
203 		std::string instructionString;
204 
205 		if(opcode != OPCODE_DCL)
206 		{
207 			instructionString += coissue ? "+ " : "";
208 
209 			if(predicate)
210 			{
211 				instructionString += predicateNot ? "(!p0" : "(p0";
212 				instructionString += swizzleString(PARAMETER_PREDICATE, predicateSwizzle);
213 				instructionString += ") ";
214 			}
215 
216 			instructionString += operationString(version) + controlString() + dst.shiftString() + dst.modifierString();
217 
218 			if(dst.type != PARAMETER_VOID)
219 			{
220 				instructionString += " " + dst.string(shaderType, version) +
221 				                           dst.relativeString() +
222 				                           dst.maskString();
223 			}
224 
225 			for(int i = 0; i < 4; i++)
226 			{
227 				if(src[i].type != PARAMETER_VOID)
228 				{
229 					instructionString += (dst.type != PARAMETER_VOID || i > 0) ? ", " : " ";
230 					instructionString += src[i].preModifierString() +
231 										 src[i].string(shaderType, version) +
232 										 src[i].relativeString() +
233 										 src[i].postModifierString() +
234 										 src[i].swizzleString();
235 				}
236 			}
237 		}
238 		else   // DCL
239 		{
240 			instructionString += "dcl";
241 
242 			if(dst.type == PARAMETER_SAMPLER)
243 			{
244 				switch(samplerType)
245 				{
246 				case SAMPLER_UNKNOWN: instructionString += " ";        break;
247 				case SAMPLER_1D:      instructionString += "_1d ";     break;
248 				case SAMPLER_2D:      instructionString += "_2d ";     break;
249 				case SAMPLER_CUBE:    instructionString += "_cube ";   break;
250 				case SAMPLER_VOLUME:  instructionString += "_volume "; break;
251 				default:
252 					ASSERT(false);
253 				}
254 
255 				instructionString += dst.string(shaderType, version);
256 			}
257 			else if(dst.type == PARAMETER_INPUT ||
258 				    dst.type == PARAMETER_OUTPUT ||
259 				    dst.type == PARAMETER_TEXTURE)
260 			{
261 				if(version >= 0x0300)
262 				{
263 					switch(usage)
264 					{
265 					case USAGE_POSITION:     instructionString += "_position";     break;
266 					case USAGE_BLENDWEIGHT:  instructionString += "_blendweight";  break;
267 					case USAGE_BLENDINDICES: instructionString += "_blendindices"; break;
268 					case USAGE_NORMAL:       instructionString += "_normal";       break;
269 					case USAGE_PSIZE:        instructionString += "_psize";        break;
270 					case USAGE_TEXCOORD:     instructionString += "_texcoord";     break;
271 					case USAGE_TANGENT:      instructionString += "_tangent";      break;
272 					case USAGE_BINORMAL:     instructionString += "_binormal";     break;
273 					case USAGE_TESSFACTOR:   instructionString += "_tessfactor";   break;
274 					case USAGE_POSITIONT:    instructionString += "_positiont";    break;
275 					case USAGE_COLOR:        instructionString += "_color";        break;
276 					case USAGE_FOG:          instructionString += "_fog";          break;
277 					case USAGE_DEPTH:        instructionString += "_depth";        break;
278 					case USAGE_SAMPLE:       instructionString += "_sample";       break;
279 					default:
280 						ASSERT(false);
281 					}
282 
283 					if(usageIndex > 0)
284 					{
285 						std::ostringstream buffer;
286 
287 						buffer << (int)usageIndex;
288 
289 						instructionString += buffer.str();
290 					}
291 				}
292 				else ASSERT(dst.type != PARAMETER_OUTPUT);
293 
294 				instructionString += " ";
295 
296 				instructionString += dst.string(shaderType, version);
297 				instructionString += dst.maskString();
298 			}
299 			else if(dst.type == PARAMETER_MISCTYPE)   // vPos and vFace
300 			{
301 				instructionString += " ";
302 
303 				instructionString += dst.string(shaderType, version);
304 			}
305 			else ASSERT(false);
306 		}
307 
308 		return instructionString;
309 	}
310 
modifierString() const311 	std::string Shader::DestinationParameter::modifierString() const
312 	{
313 		if(type == PARAMETER_VOID || type == PARAMETER_LABEL)
314 		{
315 			return "";
316 		}
317 
318 		std::string modifierString;
319 
320 		if(integer)
321 		{
322 			modifierString += "_int";
323 		}
324 
325 		if(saturate)
326 		{
327 			modifierString += "_sat";
328 		}
329 
330 		if(partialPrecision)
331 		{
332 			modifierString += "_pp";
333 		}
334 
335 		if(centroid)
336 		{
337 			modifierString += "_centroid";
338 		}
339 
340 		return modifierString;
341 	}
342 
shiftString() const343 	std::string Shader::DestinationParameter::shiftString() const
344 	{
345 		if(type == PARAMETER_VOID || type == PARAMETER_LABEL)
346 		{
347 			return "";
348 		}
349 
350 		switch(shift)
351 		{
352 		case 0:		return "";
353 		case 1:		return "_x2";
354 		case 2:		return "_x4";
355 		case 3:		return "_x8";
356 		case -1:	return "_d2";
357 		case -2:	return "_d4";
358 		case -3:	return "_d8";
359 		default:
360 			return "";
361 		//	ASSERT(false);   // FIXME
362 		}
363 	}
364 
maskString() const365 	std::string Shader::DestinationParameter::maskString() const
366 	{
367 		if(type == PARAMETER_VOID || type == PARAMETER_LABEL)
368 		{
369 			return "";
370 		}
371 
372 		switch(mask)
373 		{
374 		case 0x0:	return "";
375 		case 0x1:	return ".x";
376 		case 0x2:	return ".y";
377 		case 0x3:	return ".xy";
378 		case 0x4:	return ".z";
379 		case 0x5:	return ".xz";
380 		case 0x6:	return ".yz";
381 		case 0x7:	return ".xyz";
382 		case 0x8:	return ".w";
383 		case 0x9:	return ".xw";
384 		case 0xA:	return ".yw";
385 		case 0xB:	return ".xyw";
386 		case 0xC:	return ".zw";
387 		case 0xD:	return ".xzw";
388 		case 0xE:	return ".yzw";
389 		case 0xF:	return "";
390 		default:
391 			ASSERT(false);
392 		}
393 
394 		return "";
395 	}
396 
preModifierString() const397 	std::string Shader::SourceParameter::preModifierString() const
398 	{
399 		if(type == PARAMETER_VOID)
400 		{
401 			return "";
402 		}
403 
404 		switch(modifier)
405 		{
406 		case MODIFIER_NONE:			return "";
407 		case MODIFIER_NEGATE:		return "-";
408 		case MODIFIER_BIAS:			return "";
409 		case MODIFIER_BIAS_NEGATE:	return "-";
410 		case MODIFIER_SIGN:			return "";
411 		case MODIFIER_SIGN_NEGATE:	return "-";
412 		case MODIFIER_COMPLEMENT:	return "1-";
413 		case MODIFIER_X2:			return "";
414 		case MODIFIER_X2_NEGATE:	return "-";
415 		case MODIFIER_DZ:			return "";
416 		case MODIFIER_DW:			return "";
417 		case MODIFIER_ABS:			return "";
418 		case MODIFIER_ABS_NEGATE:	return "-";
419 		case MODIFIER_NOT:			return "!";
420 		default:
421 			ASSERT(false);
422 		}
423 
424 		return "";
425 	}
426 
relativeString() const427 	std::string Shader::Parameter::relativeString() const
428 	{
429 		if(type == PARAMETER_CONST || type == PARAMETER_INPUT || type == PARAMETER_OUTPUT || type == PARAMETER_TEMP)
430 		{
431 			if(rel.type == PARAMETER_VOID)
432 			{
433 				return "";
434 			}
435 			else if(rel.type == PARAMETER_ADDR)
436 			{
437 				switch(rel.swizzle & 0x03)
438 				{
439 				case 0: return "[a0.x]";
440 				case 1: return "[a0.y]";
441 				case 2: return "[a0.z]";
442 				case 3: return "[a0.w]";
443 				}
444 			}
445 			else if(rel.type == PARAMETER_TEMP)
446 			{
447 				std::ostringstream buffer;
448 				buffer << rel.index;
449 
450 				switch(rel.swizzle & 0x03)
451 				{
452 				case 0: return "[r" + buffer.str() + ".x]";
453 				case 1: return "[r" + buffer.str() + ".y]";
454 				case 2: return "[r" + buffer.str() + ".z]";
455 				case 3: return "[r" + buffer.str() + ".w]";
456 				}
457 			}
458 			else if(rel.type == PARAMETER_LOOP)
459 			{
460 				return "[aL]";
461 			}
462 			else if(rel.type == PARAMETER_CONST)
463 			{
464 				std::ostringstream buffer;
465 				buffer << rel.index;
466 
467 				switch(rel.swizzle & 0x03)
468 				{
469 				case 0: return "[c" + buffer.str() + ".x]";
470 				case 1: return "[c" + buffer.str() + ".y]";
471 				case 2: return "[c" + buffer.str() + ".z]";
472 				case 3: return "[c" + buffer.str() + ".w]";
473 				}
474 			}
475 			else ASSERT(false);
476 		}
477 
478 		return "";
479 	}
480 
postModifierString() const481 	std::string Shader::SourceParameter::postModifierString() const
482 	{
483 		if(type == PARAMETER_VOID)
484 		{
485 			return "";
486 		}
487 
488 		switch(modifier)
489 		{
490 		case MODIFIER_NONE:			return "";
491 		case MODIFIER_NEGATE:		return "";
492 		case MODIFIER_BIAS:			return "_bias";
493 		case MODIFIER_BIAS_NEGATE:	return "_bias";
494 		case MODIFIER_SIGN:			return "_bx2";
495 		case MODIFIER_SIGN_NEGATE:	return "_bx2";
496 		case MODIFIER_COMPLEMENT:	return "";
497 		case MODIFIER_X2:			return "_x2";
498 		case MODIFIER_X2_NEGATE:	return "_x2";
499 		case MODIFIER_DZ:			return "_dz";
500 		case MODIFIER_DW:			return "_dw";
501 		case MODIFIER_ABS:			return "_abs";
502 		case MODIFIER_ABS_NEGATE:	return "_abs";
503 		case MODIFIER_NOT:			return "";
504 		default:
505 			ASSERT(false);
506 		}
507 
508 		return "";
509 	}
510 
swizzleString() const511 	std::string Shader::SourceParameter::swizzleString() const
512 	{
513 		return Instruction::swizzleString(type, swizzle);
514 	}
515 
parseOperationToken(unsigned long token,unsigned char majorVersion)516 	void Shader::Instruction::parseOperationToken(unsigned long token, unsigned char majorVersion)
517 	{
518 		if((token & 0xFFFF0000) == 0xFFFF0000 || (token & 0xFFFF0000) == 0xFFFE0000)   // Version token
519 		{
520 			opcode = (Opcode)token;
521 
522 			control = CONTROL_RESERVED0;
523 			predicate = false;
524 			coissue = false;
525 		}
526 		else
527 		{
528 			opcode = (Opcode)(token & 0x0000FFFF);
529 			control = (Control)((token & 0x00FF0000) >> 16);
530 
531 			int size = (token & 0x0F000000) >> 24;
532 
533 			predicate = (token & 0x10000000) != 0x00000000;
534 			coissue = (token & 0x40000000) != 0x00000000;
535 
536 			if(majorVersion < 2)
537 			{
538 				if(size != 0)
539 				{
540 					ASSERT(false);   // Reserved
541 				}
542 			}
543 
544 			if(majorVersion < 2)
545 			{
546 				if(predicate)
547 				{
548 					ASSERT(false);
549 				}
550 			}
551 
552 			if((token & 0x20000000) != 0x00000000)
553 			{
554 				ASSERT(false);   // Reserved
555 			}
556 
557 			if(majorVersion >= 2)
558 			{
559 				if(coissue)
560 				{
561 					ASSERT(false);   // Reserved
562 				}
563 			}
564 
565 			if((token & 0x80000000) != 0x00000000)
566 			{
567 				ASSERT(false);
568 			}
569 		}
570 	}
571 
parseDeclarationToken(unsigned long token)572 	void Shader::Instruction::parseDeclarationToken(unsigned long token)
573 	{
574 		samplerType = (SamplerType)((token & 0x78000000) >> 27);
575 		usage = (Usage)(token & 0x0000001F);
576 		usageIndex = (unsigned char)((token & 0x000F0000) >> 16);
577 	}
578 
parseDestinationToken(const unsigned long * token,unsigned char majorVersion)579 	void Shader::Instruction::parseDestinationToken(const unsigned long *token, unsigned char majorVersion)
580 	{
581 		dst.index = (unsigned short)(token[0] & 0x000007FF);
582 		dst.type = (ParameterType)(((token[0] & 0x00001800) >> 8) | ((token[0] & 0x70000000) >> 28));
583 
584 		// TODO: Check type and index range
585 
586 		bool relative = (token[0] & 0x00002000) != 0x00000000;
587 		dst.rel.type = relative ? PARAMETER_ADDR : PARAMETER_VOID;
588 		dst.rel.swizzle = 0x00;
589 		dst.rel.scale = 1;
590 
591 		if(relative && majorVersion >= 3)
592 		{
593 			dst.rel.type = (ParameterType)(((token[1] & 0x00001800) >> 8) | ((token[1] & 0x70000000) >> 28));
594 			dst.rel.swizzle = (unsigned char)((token[1] & 0x00FF0000) >> 16);
595 		}
596 		else if(relative) ASSERT(false);   // Reserved
597 
598 		if((token[0] & 0x0000C000) != 0x00000000)
599 		{
600 			ASSERT(false);   // Reserved
601 		}
602 
603 		dst.mask = (unsigned char)((token[0] & 0x000F0000) >> 16);
604 		dst.saturate = (token[0] & 0x00100000) != 0;
605 		dst.partialPrecision = (token[0] & 0x00200000) != 0;
606 		dst.centroid = (token[0] & 0x00400000) != 0;
607 		dst.shift = (signed char)((token[0] & 0x0F000000) >> 20) >> 4;
608 
609 		if(majorVersion >= 2)
610 		{
611 			if(dst.shift)
612 			{
613 				ASSERT(false);   // Reserved
614 			}
615 		}
616 
617 		if((token[0] & 0x80000000) != 0x80000000)
618 		{
619 			ASSERT(false);
620 		}
621 	}
622 
parseSourceToken(int i,const unsigned long * token,unsigned char majorVersion)623 	void Shader::Instruction::parseSourceToken(int i, const unsigned long *token, unsigned char majorVersion)
624 	{
625 		// Defaults
626 		src[i].index = 0;
627 		src[i].type = PARAMETER_VOID;
628 		src[i].modifier = MODIFIER_NONE;
629 		src[i].swizzle = 0xE4;
630 		src[i].rel.type = PARAMETER_VOID;
631 		src[i].rel.swizzle = 0x00;
632 		src[i].rel.scale = 1;
633 
634 		switch(opcode)
635 		{
636 		case OPCODE_DEF:
637 			src[0].type = PARAMETER_FLOAT4LITERAL;
638 			src[0].value[i] = *(float*)token;
639 			break;
640 		case OPCODE_DEFB:
641 			src[0].type = PARAMETER_BOOL1LITERAL;
642 			src[0].boolean[0] = *(int*)token;
643 			break;
644 		case OPCODE_DEFI:
645 			src[0].type = PARAMETER_INT4LITERAL;
646 			src[0].integer[i] = *(int*)token;
647 			break;
648 		default:
649 			src[i].index = (unsigned short)(token[0] & 0x000007FF);
650 			src[i].type = (ParameterType)(((token[0] & 0x00001800) >> 8) | ((token[0] & 0x70000000) >> 28));
651 
652 			// FIXME: Check type and index range
653 
654 			bool relative = (token[0] & 0x00002000) != 0x00000000;
655 			src[i].rel.type = relative ? PARAMETER_ADDR : PARAMETER_VOID;
656 
657 			if((token[0] & 0x0000C000) != 0x00000000)
658 			{
659 				if(opcode != OPCODE_DEF &&
660 				   opcode != OPCODE_DEFI &&
661 				   opcode != OPCODE_DEFB)
662 				{
663 					ASSERT(false);
664 				}
665 			}
666 
667 			src[i].swizzle = (unsigned char)((token[0] & 0x00FF0000) >> 16);
668 			src[i].modifier = (Modifier)((token[0] & 0x0F000000) >> 24);
669 
670 			if((token[0] & 0x80000000) != 0x80000000)
671 			{
672 				if(opcode != OPCODE_DEF &&
673 				   opcode != OPCODE_DEFI &&
674 				   opcode != OPCODE_DEFB)
675 				{
676 					ASSERT(false);
677 				}
678 			}
679 
680 			if(relative && majorVersion >= 2)
681 			{
682 				src[i].rel.type = (ParameterType)(((token[1] & 0x00001800) >> 8) | ((token[1] & 0x70000000) >> 28));
683 				src[i].rel.swizzle = (unsigned char)((token[1] & 0x00FF0000) >> 16);
684 			}
685 		}
686 	}
687 
swizzleString(ParameterType type,unsigned char swizzle)688 	std::string Shader::Instruction::swizzleString(ParameterType type, unsigned char swizzle)
689 	{
690 		if(type == PARAMETER_VOID || type == PARAMETER_LABEL || swizzle == 0xE4)
691 		{
692 			return "";
693 		}
694 
695 		int x = (swizzle & 0x03) >> 0;
696 		int y = (swizzle & 0x0C) >> 2;
697 		int z = (swizzle & 0x30) >> 4;
698 		int w = (swizzle & 0xC0) >> 6;
699 
700 		std::string swizzleString = ".";
701 
702 		switch(x)
703 		{
704 		case 0: swizzleString += "x"; break;
705 		case 1: swizzleString += "y"; break;
706 		case 2: swizzleString += "z"; break;
707 		case 3: swizzleString += "w"; break;
708 		}
709 
710 		if(!(x == y && y == z && z == w))
711 		{
712 			switch(y)
713 			{
714 			case 0: swizzleString += "x"; break;
715 			case 1: swizzleString += "y"; break;
716 			case 2: swizzleString += "z"; break;
717 			case 3: swizzleString += "w"; break;
718 			}
719 
720 			if(!(y == z && z == w))
721 			{
722 				switch(z)
723 				{
724 				case 0: swizzleString += "x"; break;
725 				case 1: swizzleString += "y"; break;
726 				case 2: swizzleString += "z"; break;
727 				case 3: swizzleString += "w"; break;
728 				}
729 
730 				if(!(z == w))
731 				{
732 					switch(w)
733 					{
734 					case 0: swizzleString += "x"; break;
735 					case 1: swizzleString += "y"; break;
736 					case 2: swizzleString += "z"; break;
737 					case 3: swizzleString += "w"; break;
738 					}
739 				}
740 			}
741 		}
742 
743 		return swizzleString;
744 	}
745 
operationString(unsigned short version) const746 	std::string Shader::Instruction::operationString(unsigned short version) const
747 	{
748 		switch(opcode)
749 		{
750 		case OPCODE_NULL:			return "null";
751 		case OPCODE_NOP:			return "nop";
752 		case OPCODE_MOV:			return "mov";
753 		case OPCODE_ADD:			return "add";
754 		case OPCODE_IADD:			return "iadd";
755 		case OPCODE_SUB:			return "sub";
756 		case OPCODE_ISUB:			return "isub";
757 		case OPCODE_MAD:			return "mad";
758 		case OPCODE_IMAD:			return "imad";
759 		case OPCODE_MUL:			return "mul";
760 		case OPCODE_IMUL:			return "imul";
761 		case OPCODE_RCPX:			return "rcpx";
762 		case OPCODE_DIV:			return "div";
763 		case OPCODE_IDIV:			return "idiv";
764 		case OPCODE_UDIV:			return "udiv";
765 		case OPCODE_MOD:			return "mod";
766 		case OPCODE_IMOD:			return "imod";
767 		case OPCODE_UMOD:			return "umod";
768 		case OPCODE_SHL:			return "shl";
769 		case OPCODE_ISHR:			return "ishr";
770 		case OPCODE_USHR:			return "ushr";
771 		case OPCODE_RSQX:			return "rsqx";
772 		case OPCODE_SQRT:			return "sqrt";
773 		case OPCODE_RSQ:			return "rsq";
774 		case OPCODE_LEN2:			return "len2";
775 		case OPCODE_LEN3:			return "len3";
776 		case OPCODE_LEN4:			return "len4";
777 		case OPCODE_DIST1:			return "dist1";
778 		case OPCODE_DIST2:			return "dist2";
779 		case OPCODE_DIST3:			return "dist3";
780 		case OPCODE_DIST4:			return "dist4";
781 		case OPCODE_DP3:			return "dp3";
782 		case OPCODE_DP4:			return "dp4";
783 		case OPCODE_DET2:			return "det2";
784 		case OPCODE_DET3:			return "det3";
785 		case OPCODE_DET4:			return "det4";
786 		case OPCODE_MIN:			return "min";
787 		case OPCODE_IMIN:			return "imin";
788 		case OPCODE_UMIN:			return "umin";
789 		case OPCODE_MAX:			return "max";
790 		case OPCODE_IMAX:			return "imax";
791 		case OPCODE_UMAX:			return "umax";
792 		case OPCODE_SLT:			return "slt";
793 		case OPCODE_SGE:			return "sge";
794 		case OPCODE_EXP2X:			return "exp2x";
795 		case OPCODE_LOG2X:			return "log2x";
796 		case OPCODE_LIT:			return "lit";
797 		case OPCODE_ATT:			return "att";
798 		case OPCODE_LRP:			return "lrp";
799 		case OPCODE_STEP:			return "step";
800 		case OPCODE_SMOOTH:			return "smooth";
801 		case OPCODE_FLOATBITSTOINT:	 return "floatBitsToInt";
802 		case OPCODE_FLOATBITSTOUINT: return "floatBitsToUInt";
803 		case OPCODE_INTBITSTOFLOAT:	 return "intBitsToFloat";
804 		case OPCODE_UINTBITSTOFLOAT: return "uintBitsToFloat";
805 		case OPCODE_PACKSNORM2x16:	 return "packSnorm2x16";
806 		case OPCODE_PACKUNORM2x16:	 return "packUnorm2x16";
807 		case OPCODE_PACKHALF2x16:	 return "packHalf2x16";
808 		case OPCODE_UNPACKSNORM2x16: return "unpackSnorm2x16";
809 		case OPCODE_UNPACKUNORM2x16: return "unpackUnorm2x16";
810 		case OPCODE_UNPACKHALF2x16:	 return "unpackHalf2x16";
811 		case OPCODE_FRC:			return "frc";
812 		case OPCODE_M4X4:			return "m4x4";
813 		case OPCODE_M4X3:			return "m4x3";
814 		case OPCODE_M3X4:			return "m3x4";
815 		case OPCODE_M3X3:			return "m3x3";
816 		case OPCODE_M3X2:			return "m3x2";
817 		case OPCODE_CALL:			return "call";
818 		case OPCODE_CALLNZ:			return "callnz";
819 		case OPCODE_LOOP:			return "loop";
820 		case OPCODE_RET:			return "ret";
821 		case OPCODE_ENDLOOP:		return "endloop";
822 		case OPCODE_LABEL:			return "label";
823 		case OPCODE_DCL:			return "dcl";
824 		case OPCODE_POWX:			return "powx";
825 		case OPCODE_CRS:			return "crs";
826 		case OPCODE_SGN:			return "sgn";
827 		case OPCODE_ISGN:			return "isgn";
828 		case OPCODE_ABS:			return "abs";
829 		case OPCODE_IABS:			return "iabs";
830 		case OPCODE_NRM2:			return "nrm2";
831 		case OPCODE_NRM3:			return "nrm3";
832 		case OPCODE_NRM4:			return "nrm4";
833 		case OPCODE_SINCOS:			return "sincos";
834 		case OPCODE_REP:			return "rep";
835 		case OPCODE_ENDREP:			return "endrep";
836 		case OPCODE_IF:				return "if";
837 		case OPCODE_IFC:			return "ifc";
838 		case OPCODE_ELSE:			return "else";
839 		case OPCODE_ENDIF:			return "endif";
840 		case OPCODE_BREAK:			return "break";
841 		case OPCODE_BREAKC:			return "breakc";
842 		case OPCODE_MOVA:			return "mova";
843 		case OPCODE_DEFB:			return "defb";
844 		case OPCODE_DEFI:			return "defi";
845 		case OPCODE_TEXCOORD:		return "texcoord";
846 		case OPCODE_TEXKILL:		return "texkill";
847 		case OPCODE_DISCARD:		return "discard";
848 		case OPCODE_TEX:
849 			if(version < 0x0104)	return "tex";
850 			else					return "texld";
851 		case OPCODE_TEXBEM:			return "texbem";
852 		case OPCODE_TEXBEML:		return "texbeml";
853 		case OPCODE_TEXREG2AR:		return "texreg2ar";
854 		case OPCODE_TEXREG2GB:		return "texreg2gb";
855 		case OPCODE_TEXM3X2PAD:		return "texm3x2pad";
856 		case OPCODE_TEXM3X2TEX:		return "texm3x2tex";
857 		case OPCODE_TEXM3X3PAD:		return "texm3x3pad";
858 		case OPCODE_TEXM3X3TEX:		return "texm3x3tex";
859 		case OPCODE_RESERVED0:		return "reserved0";
860 		case OPCODE_TEXM3X3SPEC:	return "texm3x3spec";
861 		case OPCODE_TEXM3X3VSPEC:	return "texm3x3vspec";
862 		case OPCODE_EXPP:			return "expp";
863 		case OPCODE_LOGP:			return "logp";
864 		case OPCODE_CND:			return "cnd";
865 		case OPCODE_DEF:			return "def";
866 		case OPCODE_TEXREG2RGB:		return "texreg2rgb";
867 		case OPCODE_TEXDP3TEX:		return "texdp3tex";
868 		case OPCODE_TEXM3X2DEPTH:	return "texm3x2depth";
869 		case OPCODE_TEXDP3:			return "texdp3";
870 		case OPCODE_TEXM3X3:		return "texm3x3";
871 		case OPCODE_TEXDEPTH:		return "texdepth";
872 		case OPCODE_CMP0:			return "cmp0";
873 		case OPCODE_ICMP:			return "icmp";
874 		case OPCODE_UCMP:			return "ucmp";
875 		case OPCODE_SELECT:			return "select";
876 		case OPCODE_EXTRACT:		return "extract";
877 		case OPCODE_INSERT:			return "insert";
878 		case OPCODE_BEM:			return "bem";
879 		case OPCODE_DP2ADD:			return "dp2add";
880 		case OPCODE_DFDX:			return "dFdx";
881 		case OPCODE_DFDY:			return "dFdy";
882 		case OPCODE_FWIDTH:			return "fwidth";
883 		case OPCODE_TEXLDD:			return "texldd";
884 		case OPCODE_CMP:			return "cmp";
885 		case OPCODE_TEXLDL:			return "texldl";
886 		case OPCODE_TEXOFFSET:		return "texoffset";
887 		case OPCODE_TEXLDLOFFSET:	return "texldloffset";
888 		case OPCODE_TEXELFETCH:		return "texelfetch";
889 		case OPCODE_TEXELFETCHOFFSET: return "texelfetchoffset";
890 		case OPCODE_TEXGRAD:		return "texgrad";
891 		case OPCODE_TEXGRADOFFSET:	return "texgradoffset";
892 		case OPCODE_BREAKP:			return "breakp";
893 		case OPCODE_TEXSIZE:        return "texsize";
894 		case OPCODE_PHASE:			return "phase";
895 		case OPCODE_COMMENT:		return "comment";
896 		case OPCODE_END:			return "end";
897 		case OPCODE_PS_1_0:			return "ps_1_0";
898 		case OPCODE_PS_1_1:			return "ps_1_1";
899 		case OPCODE_PS_1_2:			return "ps_1_2";
900 		case OPCODE_PS_1_3:			return "ps_1_3";
901 		case OPCODE_PS_1_4:			return "ps_1_4";
902 		case OPCODE_PS_2_0:			return "ps_2_0";
903 		case OPCODE_PS_2_x:			return "ps_2_x";
904 		case OPCODE_PS_3_0:			return "ps_3_0";
905 		case OPCODE_VS_1_0:			return "vs_1_0";
906 		case OPCODE_VS_1_1:			return "vs_1_1";
907 		case OPCODE_VS_2_0:			return "vs_2_0";
908 		case OPCODE_VS_2_x:			return "vs_2_x";
909 		case OPCODE_VS_2_sw:		return "vs_2_sw";
910 		case OPCODE_VS_3_0:			return "vs_3_0";
911 		case OPCODE_VS_3_sw:		return "vs_3_sw";
912 		case OPCODE_WHILE:          return "while";
913 		case OPCODE_ENDWHILE:       return "endwhile";
914 		case OPCODE_COS:            return "cos";
915 		case OPCODE_SIN:            return "sin";
916 		case OPCODE_TAN:            return "tan";
917 		case OPCODE_ACOS:           return "acos";
918 		case OPCODE_ASIN:           return "asin";
919 		case OPCODE_ATAN:           return "atan";
920 		case OPCODE_ATAN2:          return "atan2";
921 		case OPCODE_COSH:           return "cosh";
922 		case OPCODE_SINH:           return "sinh";
923 		case OPCODE_TANH:           return "tanh";
924 		case OPCODE_ACOSH:          return "acosh";
925 		case OPCODE_ASINH:          return "asinh";
926 		case OPCODE_ATANH:          return "atanh";
927 		case OPCODE_DP1:            return "dp1";
928 		case OPCODE_DP2:            return "dp2";
929 		case OPCODE_TRUNC:          return "trunc";
930 		case OPCODE_FLOOR:          return "floor";
931 		case OPCODE_ROUND:          return "round";
932 		case OPCODE_ROUNDEVEN:      return "roundEven";
933 		case OPCODE_CEIL:           return "ceil";
934 		case OPCODE_EXP2:           return "exp2";
935 		case OPCODE_LOG2:           return "log2";
936 		case OPCODE_EXP:            return "exp";
937 		case OPCODE_LOG:            return "log";
938 		case OPCODE_POW:            return "pow";
939 		case OPCODE_F2B:            return "f2b";
940 		case OPCODE_B2F:            return "b2f";
941 		case OPCODE_F2I:            return "f2i";
942 		case OPCODE_I2F:            return "i2f";
943 		case OPCODE_F2U:            return "f2u";
944 		case OPCODE_U2F:            return "u2f";
945 		case OPCODE_B2I:            return "b2i";
946 		case OPCODE_I2B:            return "i2b";
947 		case OPCODE_ALL:            return "all";
948 		case OPCODE_ANY:            return "any";
949 		case OPCODE_NEG:            return "neg";
950 		case OPCODE_INEG:           return "ineg";
951 		case OPCODE_ISNAN:          return "isnan";
952 		case OPCODE_ISINF:          return "isinf";
953 		case OPCODE_NOT:            return "not";
954 		case OPCODE_OR:             return "or";
955 		case OPCODE_XOR:            return "xor";
956 		case OPCODE_AND:            return "and";
957 		case OPCODE_EQ:             return "eq";
958 		case OPCODE_NE:             return "neq";
959 		case OPCODE_FORWARD1:       return "forward1";
960 		case OPCODE_FORWARD2:       return "forward2";
961 		case OPCODE_FORWARD3:       return "forward3";
962 		case OPCODE_FORWARD4:       return "forward4";
963 		case OPCODE_REFLECT1:       return "reflect1";
964 		case OPCODE_REFLECT2:       return "reflect2";
965 		case OPCODE_REFLECT3:       return "reflect3";
966 		case OPCODE_REFLECT4:       return "reflect4";
967 		case OPCODE_REFRACT1:       return "refract1";
968 		case OPCODE_REFRACT2:       return "refract2";
969 		case OPCODE_REFRACT3:       return "refract3";
970 		case OPCODE_REFRACT4:       return "refract4";
971 		case OPCODE_LEAVE:          return "leave";
972 		case OPCODE_CONTINUE:       return "continue";
973 		case OPCODE_TEST:           return "test";
974 		case OPCODE_SWITCH:         return "switch";
975 		case OPCODE_ENDSWITCH:      return "endswitch";
976 		default:
977 			ASSERT(false);
978 		}
979 
980 		return "<unknown>";
981 	}
982 
controlString() const983 	std::string Shader::Instruction::controlString() const
984 	{
985 		if(opcode != OPCODE_LOOP && opcode != OPCODE_BREAKC && opcode != OPCODE_IFC && opcode != OPCODE_CMP)
986 		{
987 			if(project) return "p";
988 
989 			if(bias) return "b";
990 
991 			// FIXME: LOD
992 		}
993 
994 		switch(control)
995 		{
996 		case 1: return "_gt";
997 		case 2: return "_eq";
998 		case 3: return "_ge";
999 		case 4: return "_lt";
1000 		case 5: return "_ne";
1001 		case 6: return "_le";
1002 		default:
1003 			return "";
1004 		//	ASSERT(false);   // FIXME
1005 		}
1006 	}
1007 
string(ShaderType shaderType,unsigned short version) const1008 	std::string Shader::Parameter::string(ShaderType shaderType, unsigned short version) const
1009 	{
1010 		std::ostringstream buffer;
1011 
1012 		if(type == PARAMETER_FLOAT4LITERAL)
1013 		{
1014 			buffer << '{' << value[0] << ", " << value[1] << ", " << value[2] << ", " << value[3] << '}';
1015 
1016 			return buffer.str();
1017 		}
1018 		else if(type != PARAMETER_RASTOUT && !(type == PARAMETER_ADDR && shaderType == SHADER_VERTEX) && type != PARAMETER_LOOP && type != PARAMETER_PREDICATE && type != PARAMETER_MISCTYPE)
1019 		{
1020 			buffer << index;
1021 
1022 			return typeString(shaderType, version) + buffer.str();
1023 		}
1024 		else
1025 		{
1026 			return typeString(shaderType, version);
1027 		}
1028 	}
1029 
typeString(ShaderType shaderType,unsigned short version) const1030 	std::string Shader::Parameter::typeString(ShaderType shaderType, unsigned short version) const
1031 	{
1032 		switch(type)
1033 		{
1034 		case PARAMETER_TEMP:			return "r";
1035 		case PARAMETER_INPUT:			return "v";
1036 		case PARAMETER_CONST:			return "c";
1037 		case PARAMETER_TEXTURE:
1038 	//	case PARAMETER_ADDR:
1039 			if(shaderType == SHADER_PIXEL)	return "t";
1040 			else							return "a0";
1041 		case PARAMETER_RASTOUT:
1042 			if(index == 0)              return "oPos";
1043 			else if(index == 1)         return "oFog";
1044 			else if(index == 2)         return "oPts";
1045 			else                        ASSERT(false);
1046 		case PARAMETER_ATTROUT:			return "oD";
1047 		case PARAMETER_TEXCRDOUT:
1048 	//	case PARAMETER_OUTPUT:			return "";
1049 			if(version < 0x0300)		return "oT";
1050 			else						return "o";
1051 		case PARAMETER_CONSTINT:		return "i";
1052 		case PARAMETER_COLOROUT:		return "oC";
1053 		case PARAMETER_DEPTHOUT:		return "oDepth";
1054 		case PARAMETER_SAMPLER:			return "s";
1055 	//	case PARAMETER_CONST2:			return "";
1056 	//	case PARAMETER_CONST3:			return "";
1057 	//	case PARAMETER_CONST4:			return "";
1058 		case PARAMETER_CONSTBOOL:		return "b";
1059 		case PARAMETER_LOOP:			return "aL";
1060 	//	case PARAMETER_TEMPFLOAT16:		return "";
1061 		case PARAMETER_MISCTYPE:
1062 			if(index == 0)				return "vPos";
1063 			else if(index == 1)			return "vFace";
1064 			else						ASSERT(false);
1065 		case PARAMETER_LABEL:			return "l";
1066 		case PARAMETER_PREDICATE:		return "p0";
1067 		case PARAMETER_FLOAT4LITERAL:	return "";
1068 		case PARAMETER_BOOL1LITERAL:	return "";
1069 		case PARAMETER_INT4LITERAL:		return "";
1070 	//	case PARAMETER_VOID:			return "";
1071 		default:
1072 			ASSERT(false);
1073 		}
1074 
1075 		return "";
1076 	}
1077 
isBranch() const1078 	bool Shader::Instruction::isBranch() const
1079 	{
1080 		return opcode == OPCODE_IF || opcode == OPCODE_IFC;
1081 	}
1082 
isCall() const1083 	bool Shader::Instruction::isCall() const
1084 	{
1085 		return opcode == OPCODE_CALL || opcode == OPCODE_CALLNZ;
1086 	}
1087 
isBreak() const1088 	bool Shader::Instruction::isBreak() const
1089 	{
1090 		return opcode == OPCODE_BREAK || opcode == OPCODE_BREAKC || opcode == OPCODE_BREAKP;
1091 	}
1092 
isLoopOrSwitch() const1093 	bool Shader::Instruction::isLoopOrSwitch() const
1094 	{
1095 		return opcode == OPCODE_LOOP || opcode == OPCODE_REP || opcode == OPCODE_WHILE || opcode == OPCODE_SWITCH;
1096 	}
1097 
isEndLoopOrSwitch() const1098 	bool Shader::Instruction::isEndLoopOrSwitch() const
1099 	{
1100 		return opcode == OPCODE_ENDLOOP || opcode == OPCODE_ENDREP || opcode == OPCODE_ENDWHILE || opcode == OPCODE_ENDSWITCH;;
1101 	}
1102 
isPredicated() const1103 	bool Shader::Instruction::isPredicated() const
1104 	{
1105 		return predicate ||
1106 		       analysisBranch ||
1107 		       analysisBreak ||
1108 		       analysisContinue ||
1109 		       analysisLeave;
1110 	}
1111 
Shader()1112 	Shader::Shader() : serialID(serialCounter++)
1113 	{
1114 		usedSamplers = 0;
1115 	}
1116 
~Shader()1117 	Shader::~Shader()
1118 	{
1119 		for(unsigned int i = 0; i < instruction.size(); i++)
1120 		{
1121 			delete instruction[i];
1122 			instruction[i] = 0;
1123 		}
1124 	}
1125 
parse(const unsigned long * token)1126 	void Shader::parse(const unsigned long *token)
1127 	{
1128 		minorVersion = (unsigned char)(token[0] & 0x000000FF);
1129 		majorVersion = (unsigned char)((token[0] & 0x0000FF00) >> 8);
1130 		shaderType = (ShaderType)((token[0] & 0xFFFF0000) >> 16);
1131 
1132 		int length = 0;
1133 
1134 		if(shaderType == SHADER_VERTEX)
1135 		{
1136 			length = VertexShader::validate(token);
1137 		}
1138 		else if(shaderType == SHADER_PIXEL)
1139 		{
1140 			length = PixelShader::validate(token);
1141 		}
1142 		else ASSERT(false);
1143 
1144 		ASSERT(length != 0);
1145 		instruction.resize(length);
1146 
1147 		for(int i = 0; i < length; i++)
1148 		{
1149 			while((*token & 0x0000FFFF) == 0x0000FFFE)   // Comment token
1150 			{
1151 				int length = (*token & 0x7FFF0000) >> 16;
1152 
1153 				token += length + 1;
1154 			}
1155 
1156 			int tokenCount = size(*token);
1157 
1158 			instruction[i] = new Instruction(token, tokenCount, majorVersion);
1159 
1160 			token += 1 + tokenCount;
1161 		}
1162 	}
1163 
size(unsigned long opcode) const1164 	int Shader::size(unsigned long opcode) const
1165 	{
1166 		return size(opcode, version);
1167 	}
1168 
size(unsigned long opcode,unsigned short version)1169 	int Shader::size(unsigned long opcode, unsigned short version)
1170 	{
1171 		if(version > 0x0300)
1172 		{
1173 			ASSERT(false);
1174 		}
1175 
1176 		static const char size[] =
1177 		{
1178 			0,   // NOP = 0
1179 			2,   // MOV
1180 			3,   // ADD
1181 			3,   // SUB
1182 			4,   // MAD
1183 			3,   // MUL
1184 			2,   // RCP
1185 			2,   // RSQ
1186 			3,   // DP3
1187 			3,   // DP4
1188 			3,   // MIN
1189 			3,   // MAX
1190 			3,   // SLT
1191 			3,   // SGE
1192 			2,   // EXP
1193 			2,   // LOG
1194 			2,   // LIT
1195 			3,   // DST
1196 			4,   // LRP
1197 			2,   // FRC
1198 			3,   // M4x4
1199 			3,   // M4x3
1200 			3,   // M3x4
1201 			3,   // M3x3
1202 			3,   // M3x2
1203 			1,   // CALL
1204 			2,   // CALLNZ
1205 			2,   // LOOP
1206 			0,   // RET
1207 			0,   // ENDLOOP
1208 			1,   // LABEL
1209 			2,   // DCL
1210 			3,   // POW
1211 			3,   // CRS
1212 			4,   // SGN
1213 			2,   // ABS
1214 			2,   // NRM
1215 			4,   // SINCOS
1216 			1,   // REP
1217 			0,   // ENDREP
1218 			1,   // IF
1219 			2,   // IFC
1220 			0,   // ELSE
1221 			0,   // ENDIF
1222 			0,   // BREAK
1223 			2,   // BREAKC
1224 			2,   // MOVA
1225 			2,   // DEFB
1226 			5,   // DEFI
1227 			-1,  // 49
1228 			-1,  // 50
1229 			-1,  // 51
1230 			-1,  // 52
1231 			-1,  // 53
1232 			-1,  // 54
1233 			-1,  // 55
1234 			-1,  // 56
1235 			-1,  // 57
1236 			-1,  // 58
1237 			-1,  // 59
1238 			-1,  // 60
1239 			-1,  // 61
1240 			-1,  // 62
1241 			-1,  // 63
1242 			1,   // TEXCOORD = 64
1243 			1,   // TEXKILL
1244 			1,   // TEX
1245 			2,   // TEXBEM
1246 			2,   // TEXBEML
1247 			2,   // TEXREG2AR
1248 			2,   // TEXREG2GB
1249 			2,   // TEXM3x2PAD
1250 			2,   // TEXM3x2TEX
1251 			2,   // TEXM3x3PAD
1252 			2,   // TEXM3x3TEX
1253 			-1,  // RESERVED0
1254 			3,   // TEXM3x3SPEC
1255 			2,   // TEXM3x3VSPEC
1256 			2,   // EXPP
1257 			2,   // LOGP
1258 			4,   // CND
1259 			5,   // DEF
1260 			2,   // TEXREG2RGB
1261 			2,   // TEXDP3TEX
1262 			2,   // TEXM3x2DEPTH
1263 			2,   // TEXDP3
1264 			2,   // TEXM3x3
1265 			1,   // TEXDEPTH
1266 			4,   // CMP
1267 			3,   // BEM
1268 			4,   // DP2ADD
1269 			2,   // DSX
1270 			2,   // DSY
1271 			5,   // TEXLDD
1272 			3,   // SETP
1273 			3,   // TEXLDL
1274 			2,   // BREAKP
1275 			-1,  // 97
1276 			-1,  // 98
1277 			-1,  // 99
1278 			-1,  // 100
1279 			-1,  // 101
1280 			-1,  // 102
1281 			-1,  // 103
1282 			-1,  // 104
1283 			-1,  // 105
1284 			-1,  // 106
1285 			-1,  // 107
1286 			-1,  // 108
1287 			-1,  // 109
1288 			-1,  // 110
1289 			-1,  // 111
1290 			-1,  // 112
1291 		};
1292 
1293 		int length = 0;
1294 
1295 		if((opcode & 0x0000FFFF) == OPCODE_COMMENT)
1296 		{
1297 			return (opcode & 0x7FFF0000) >> 16;
1298 		}
1299 
1300 		if(opcode != OPCODE_PS_1_0 &&
1301 		   opcode != OPCODE_PS_1_1 &&
1302 		   opcode != OPCODE_PS_1_2 &&
1303 		   opcode != OPCODE_PS_1_3 &&
1304 		   opcode != OPCODE_PS_1_4 &&
1305 		   opcode != OPCODE_PS_2_0 &&
1306 		   opcode != OPCODE_PS_2_x &&
1307 		   opcode != OPCODE_PS_3_0 &&
1308 		   opcode != OPCODE_VS_1_0 &&
1309 		   opcode != OPCODE_VS_1_1 &&
1310 		   opcode != OPCODE_VS_2_0 &&
1311 		   opcode != OPCODE_VS_2_x &&
1312 		   opcode != OPCODE_VS_2_sw &&
1313 		   opcode != OPCODE_VS_3_0 &&
1314 		   opcode != OPCODE_VS_3_sw &&
1315 		   opcode != OPCODE_PHASE &&
1316 		   opcode != OPCODE_END)
1317 		{
1318 			if(version >= 0x0200)
1319 			{
1320 				length = (opcode & 0x0F000000) >> 24;
1321 			}
1322 			else
1323 			{
1324 				length = size[opcode & 0x0000FFFF];
1325 			}
1326 		}
1327 
1328 		if(length < 0)
1329 		{
1330 			ASSERT(false);
1331 		}
1332 
1333 		if(version == 0x0104)
1334 		{
1335 			switch(opcode & 0x0000FFFF)
1336 			{
1337 			case OPCODE_TEX:
1338 				length += 1;
1339 				break;
1340 			case OPCODE_TEXCOORD:
1341 				length += 1;
1342 				break;
1343 			default:
1344 				break;
1345 			}
1346 		}
1347 
1348 		return length;
1349 	}
1350 
maskContainsComponent(int mask,int component)1351 	bool Shader::maskContainsComponent(int mask, int component)
1352 	{
1353 		return (mask & (1 << component)) != 0;
1354 	}
1355 
swizzleContainsComponent(int swizzle,int component)1356 	bool Shader::swizzleContainsComponent(int swizzle, int component)
1357 	{
1358 		if((swizzle & 0x03) >> 0 == component) return true;
1359 		if((swizzle & 0x0C) >> 2 == component) return true;
1360 		if((swizzle & 0x30) >> 4 == component) return true;
1361 		if((swizzle & 0xC0) >> 6 == component) return true;
1362 
1363 		return false;
1364 	}
1365 
swizzleContainsComponentMasked(int swizzle,int component,int mask)1366 	bool Shader::swizzleContainsComponentMasked(int swizzle, int component, int mask)
1367 	{
1368 		if(mask & 0x1) if((swizzle & 0x03) >> 0 == component) return true;
1369 		if(mask & 0x2) if((swizzle & 0x0C) >> 2 == component) return true;
1370 		if(mask & 0x4) if((swizzle & 0x30) >> 4 == component) return true;
1371 		if(mask & 0x8) if((swizzle & 0xC0) >> 6 == component) return true;
1372 
1373 		return false;
1374 	}
1375 
containsDynamicBranching() const1376 	bool Shader::containsDynamicBranching() const
1377 	{
1378 		return dynamicBranching;
1379 	}
1380 
containsBreakInstruction() const1381 	bool Shader::containsBreakInstruction() const
1382 	{
1383 		return containsBreak;
1384 	}
1385 
containsContinueInstruction() const1386 	bool Shader::containsContinueInstruction() const
1387 	{
1388 		return containsContinue;
1389 	}
1390 
containsLeaveInstruction() const1391 	bool Shader::containsLeaveInstruction() const
1392 	{
1393 		return containsLeave;
1394 	}
1395 
containsDefineInstruction() const1396 	bool Shader::containsDefineInstruction() const
1397 	{
1398 		return containsDefine;
1399 	}
1400 
usesSampler(int index) const1401 	bool Shader::usesSampler(int index) const
1402 	{
1403 		return (usedSamplers & (1 << index)) != 0;
1404 	}
1405 
getSerialID() const1406 	int Shader::getSerialID() const
1407 	{
1408 		return serialID;
1409 	}
1410 
getLength() const1411 	size_t Shader::getLength() const
1412 	{
1413 		return instruction.size();
1414 	}
1415 
getShaderType() const1416 	Shader::ShaderType Shader::getShaderType() const
1417 	{
1418 		return shaderType;
1419 	}
1420 
getVersion() const1421 	unsigned short Shader::getVersion() const
1422 	{
1423 		return version;
1424 	}
1425 
print(const char * fileName,...) const1426 	void Shader::print(const char *fileName, ...) const
1427 	{
1428 		char fullName[1024 + 1];
1429 
1430 		va_list vararg;
1431 		va_start(vararg, fileName);
1432 		vsnprintf(fullName, 1024, fileName, vararg);
1433 		va_end(vararg);
1434 
1435 		std::ofstream file(fullName, std::ofstream::out);
1436 
1437 		for(unsigned int i = 0; i < instruction.size(); i++)
1438 		{
1439 			file << instruction[i]->string(shaderType, version) << std::endl;
1440 		}
1441 	}
1442 
printInstruction(int index,const char * fileName) const1443 	void Shader::printInstruction(int index, const char *fileName) const
1444 	{
1445 		std::ofstream file(fileName, std::ofstream::out | std::ofstream::app);
1446 
1447 		file << instruction[index]->string(shaderType, version) << std::endl;
1448 	}
1449 
append(Instruction * instruction)1450 	void Shader::append(Instruction *instruction)
1451 	{
1452 		this->instruction.push_back(instruction);
1453 	}
1454 
declareSampler(int i)1455 	void Shader::declareSampler(int i)
1456 	{
1457 		usedSamplers |= 1 << i;
1458 	}
1459 
getInstruction(unsigned int i) const1460 	const Shader::Instruction *Shader::getInstruction(unsigned int i) const
1461 	{
1462 		ASSERT(i < instruction.size());
1463 
1464 		return instruction[i];
1465 	}
1466 
optimize()1467 	void Shader::optimize()
1468 	{
1469 		optimizeLeave();
1470 		optimizeCall();
1471 		removeNull();
1472 	}
1473 
optimizeLeave()1474 	void Shader::optimizeLeave()
1475 	{
1476 		// A return (leave) right before the end of a function or the shader can be removed
1477 		for(unsigned int i = 0; i < instruction.size(); i++)
1478 		{
1479 			if(instruction[i]->opcode == OPCODE_LEAVE)
1480 			{
1481 				if(i == instruction.size() - 1 || instruction[i + 1]->opcode == OPCODE_RET)
1482 				{
1483 					instruction[i]->opcode = OPCODE_NULL;
1484 				}
1485 			}
1486 		}
1487 	}
1488 
optimizeCall()1489 	void Shader::optimizeCall()
1490 	{
1491 		// Eliminate uncalled functions
1492 		std::set<int> calledFunctions;
1493 		bool rescan = true;
1494 
1495 		while(rescan)
1496 		{
1497 			calledFunctions.clear();
1498 			rescan = false;
1499 
1500 			for(unsigned int i = 0; i < instruction.size(); i++)
1501 			{
1502 				if(instruction[i]->isCall())
1503 				{
1504 					calledFunctions.insert(instruction[i]->dst.label);
1505 				}
1506 			}
1507 
1508 			if(!calledFunctions.empty())
1509 			{
1510 				for(unsigned int i = 0; i < instruction.size(); i++)
1511 				{
1512 					if(instruction[i]->opcode == OPCODE_LABEL)
1513 					{
1514 						if(calledFunctions.find(instruction[i]->dst.label) == calledFunctions.end())
1515 						{
1516 							for( ; i < instruction.size(); i++)
1517 							{
1518 								Opcode oldOpcode = instruction[i]->opcode;
1519 								instruction[i]->opcode = OPCODE_NULL;
1520 
1521 								if(oldOpcode == OPCODE_RET)
1522 								{
1523 									rescan = true;
1524 									break;
1525 								}
1526 							}
1527 						}
1528 					}
1529 				}
1530 			}
1531 		}
1532 
1533 		// Optimize the entry call
1534 		if(instruction.size() >= 2 && instruction[0]->opcode == OPCODE_CALL && instruction[1]->opcode == OPCODE_RET)
1535 		{
1536 			if(calledFunctions.size() == 1)
1537 			{
1538 				instruction[0]->opcode = OPCODE_NULL;
1539 				instruction[1]->opcode = OPCODE_NULL;
1540 
1541 				for(size_t i = 2; i < instruction.size(); i++)
1542 				{
1543 					if(instruction[i]->opcode == OPCODE_LABEL || instruction[i]->opcode == OPCODE_RET)
1544 					{
1545 						instruction[i]->opcode = OPCODE_NULL;
1546 					}
1547 				}
1548 			}
1549 		}
1550 	}
1551 
removeNull()1552 	void Shader::removeNull()
1553 	{
1554 		size_t size = 0;
1555 		for(size_t i = 0; i < instruction.size(); i++)
1556 		{
1557 			if(instruction[i]->opcode != OPCODE_NULL)
1558 			{
1559 				instruction[size] = instruction[i];
1560 				size++;
1561 			}
1562 			else
1563 			{
1564 				delete instruction[i];
1565 			}
1566 		}
1567 
1568 		instruction.resize(size);
1569 	}
1570 
analyzeDirtyConstants()1571 	void Shader::analyzeDirtyConstants()
1572 	{
1573 		dirtyConstantsF = 0;
1574 		dirtyConstantsI = 0;
1575 		dirtyConstantsB = 0;
1576 
1577 		for(unsigned int i = 0; i < instruction.size(); i++)
1578 		{
1579 			switch(instruction[i]->opcode)
1580 			{
1581 			case OPCODE_DEF:
1582 				if(instruction[i]->dst.index + 1 > dirtyConstantsF)
1583 				{
1584 					dirtyConstantsF = instruction[i]->dst.index + 1;
1585 				}
1586 				break;
1587 			case OPCODE_DEFI:
1588 				if(instruction[i]->dst.index + 1 > dirtyConstantsI)
1589 				{
1590 					dirtyConstantsI = instruction[i]->dst.index + 1;
1591 				}
1592 				break;
1593 			case OPCODE_DEFB:
1594 				if(instruction[i]->dst.index + 1 > dirtyConstantsB)
1595 				{
1596 					dirtyConstantsB = instruction[i]->dst.index + 1;
1597 				}
1598 				break;
1599 			default:
1600 				break;
1601 			}
1602 		}
1603 	}
1604 
analyzeDynamicBranching()1605 	void Shader::analyzeDynamicBranching()
1606 	{
1607 		dynamicBranching = false;
1608 		containsLeave = false;
1609 		containsBreak = false;
1610 		containsContinue = false;
1611 		containsDefine = false;
1612 
1613 		// Determine global presence of branching instructions
1614 		for(unsigned int i = 0; i < instruction.size(); i++)
1615 		{
1616 			switch(instruction[i]->opcode)
1617 			{
1618 			case OPCODE_CALLNZ:
1619 			case OPCODE_IF:
1620 			case OPCODE_IFC:
1621 			case OPCODE_BREAK:
1622 			case OPCODE_BREAKC:
1623 			case OPCODE_CMP:
1624 			case OPCODE_BREAKP:
1625 			case OPCODE_LEAVE:
1626 			case OPCODE_CONTINUE:
1627 				if(instruction[i]->src[0].type != PARAMETER_CONSTBOOL)
1628 				{
1629 					dynamicBranching = true;
1630 				}
1631 
1632 				if(instruction[i]->opcode == OPCODE_LEAVE)
1633 				{
1634 					containsLeave = true;
1635 				}
1636 
1637 				if(instruction[i]->isBreak())
1638 				{
1639 					containsBreak = true;
1640 				}
1641 
1642 				if(instruction[i]->opcode == OPCODE_CONTINUE)
1643 				{
1644 					containsContinue = true;
1645 				}
1646 			case OPCODE_DEF:
1647 			case OPCODE_DEFB:
1648 			case OPCODE_DEFI:
1649 				containsDefine = true;
1650 			default:
1651 				break;
1652 			}
1653 		}
1654 
1655 		// Conservatively determine which instructions are affected by dynamic branching
1656 		int branchDepth = 0;
1657 		int breakDepth = 0;
1658 		int continueDepth = 0;
1659 		bool leaveReturn = false;
1660 
1661 		for(unsigned int i = 0; i < instruction.size(); i++)
1662 		{
1663 			// If statements
1664 			if(instruction[i]->isBranch())
1665 			{
1666 				branchDepth++;
1667 			}
1668 			else if(instruction[i]->opcode == OPCODE_ENDIF)
1669 			{
1670 				branchDepth--;
1671 			}
1672 
1673 			if(branchDepth > 0)
1674 			{
1675 				instruction[i]->analysisBranch = true;
1676 
1677 				if(instruction[i]->isCall())
1678 				{
1679 					markFunctionAnalysis(instruction[i]->dst.label, ANALYSIS_BRANCH);
1680 				}
1681 			}
1682 
1683 			// Break statemement
1684 			if(instruction[i]->isBreak())
1685 			{
1686 				breakDepth++;
1687 			}
1688 
1689 			if(breakDepth > 0)
1690 			{
1691 				if(instruction[i]->isLoopOrSwitch())   // Nested loop or switch, don't make the end of it disable the break execution mask
1692 				{
1693 					breakDepth++;
1694 				}
1695 				else if(instruction[i]->isEndLoopOrSwitch())
1696 				{
1697 					breakDepth--;
1698 				}
1699 
1700 				instruction[i]->analysisBreak = true;
1701 
1702 				if(instruction[i]->isCall())
1703 				{
1704 					markFunctionAnalysis(instruction[i]->dst.label, ANALYSIS_BRANCH);
1705 				}
1706 			}
1707 
1708 			// Continue statement
1709 			if(instruction[i]->opcode == OPCODE_CONTINUE)
1710 			{
1711 				continueDepth++;
1712 			}
1713 
1714 			if(continueDepth > 0)
1715 			{
1716 				if(instruction[i]->isLoopOrSwitch())   // Nested loop or switch, don't make the end of it disable the break execution mask
1717 				{
1718 					continueDepth++;
1719 				}
1720 				else if(instruction[i]->isEndLoopOrSwitch())
1721 				{
1722 					continueDepth--;
1723 				}
1724 
1725 				instruction[i]->analysisContinue = true;
1726 
1727 				if(instruction[i]->isCall())
1728 				{
1729 					markFunctionAnalysis(instruction[i]->dst.label, ANALYSIS_CONTINUE);
1730 				}
1731 			}
1732 
1733 			// Return (leave) statement
1734 			if(instruction[i]->opcode == OPCODE_LEAVE)
1735 			{
1736 				leaveReturn = true;
1737 			}
1738 			else if(instruction[i]->opcode == OPCODE_RET)   // End of the function
1739 			{
1740 				leaveReturn = false;
1741 			}
1742 
1743 			if(leaveReturn)
1744 			{
1745 				instruction[i]->analysisLeave = true;
1746 
1747 				if(instruction[i]->isCall())
1748 				{
1749 					markFunctionAnalysis(instruction[i]->dst.label, ANALYSIS_LEAVE);
1750 				}
1751 			}
1752 		}
1753 	}
1754 
markFunctionAnalysis(unsigned int functionLabel,Analysis flag)1755 	void Shader::markFunctionAnalysis(unsigned int functionLabel, Analysis flag)
1756 	{
1757 		bool marker = false;
1758 		for(unsigned int i = 0; i < instruction.size(); i++)
1759 		{
1760 			if(!marker)
1761 			{
1762 				if(instruction[i]->opcode == OPCODE_LABEL && instruction[i]->dst.label == functionLabel)
1763 				{
1764 					marker = true;
1765 				}
1766 			}
1767 			else
1768 			{
1769 				if(instruction[i]->opcode == OPCODE_RET)
1770 				{
1771 					break;
1772 				}
1773 				else if(instruction[i]->isCall())
1774 				{
1775 					markFunctionAnalysis(instruction[i]->dst.label, flag);
1776 				}
1777 
1778 				instruction[i]->analysis |= flag;
1779 			}
1780 		}
1781 	}
1782 
analyzeSamplers()1783 	void Shader::analyzeSamplers()
1784 	{
1785 		for(unsigned int i = 0; i < instruction.size(); i++)
1786 		{
1787 			switch(instruction[i]->opcode)
1788 			{
1789 			case OPCODE_TEX:
1790 			case OPCODE_TEXBEM:
1791 			case OPCODE_TEXBEML:
1792 			case OPCODE_TEXREG2AR:
1793 			case OPCODE_TEXREG2GB:
1794 			case OPCODE_TEXM3X2TEX:
1795 			case OPCODE_TEXM3X3TEX:
1796 			case OPCODE_TEXM3X3SPEC:
1797 			case OPCODE_TEXM3X3VSPEC:
1798 			case OPCODE_TEXREG2RGB:
1799 			case OPCODE_TEXDP3TEX:
1800 			case OPCODE_TEXM3X2DEPTH:
1801 			case OPCODE_TEXLDD:
1802 			case OPCODE_TEXLDL:
1803 			case OPCODE_TEXOFFSET:
1804 			case OPCODE_TEXLDLOFFSET:
1805 			case OPCODE_TEXELFETCH:
1806 			case OPCODE_TEXELFETCHOFFSET:
1807 			case OPCODE_TEXGRAD:
1808 			case OPCODE_TEXGRADOFFSET:
1809 				{
1810 					Parameter &dst = instruction[i]->dst;
1811 					Parameter &src1 = instruction[i]->src[1];
1812 
1813 					if(majorVersion >= 2)
1814 					{
1815 						usedSamplers |= 1 << src1.index;
1816 					}
1817 					else
1818 					{
1819 						usedSamplers |= 1 << dst.index;
1820 					}
1821 				}
1822 				break;
1823 			default:
1824 				break;
1825 			}
1826 		}
1827 	}
1828 
1829 	// Assigns a unique index to each call instruction, on a per label basis.
1830 	// This is used to know what basic block to return to.
analyzeCallSites()1831 	void Shader::analyzeCallSites()
1832 	{
1833 		int callSiteIndex[2048] = {0};
1834 
1835 		for(unsigned int i = 0; i < instruction.size(); i++)
1836 		{
1837 			if(instruction[i]->opcode == OPCODE_CALL || instruction[i]->opcode == OPCODE_CALLNZ)
1838 			{
1839 				int label = instruction[i]->dst.label;
1840 
1841 				instruction[i]->dst.callSite = callSiteIndex[label]++;
1842 			}
1843 		}
1844 	}
1845 
analyzeDynamicIndexing()1846 	void Shader::analyzeDynamicIndexing()
1847 	{
1848 		dynamicallyIndexedTemporaries = false;
1849 		dynamicallyIndexedInput = false;
1850 		dynamicallyIndexedOutput = false;
1851 
1852 		for(unsigned int i = 0; i < instruction.size(); i++)
1853 		{
1854 			if(instruction[i]->dst.rel.type == PARAMETER_ADDR ||
1855 			   instruction[i]->dst.rel.type == PARAMETER_LOOP ||
1856 			   instruction[i]->dst.rel.type == PARAMETER_TEMP ||
1857 			   instruction[i]->dst.rel.type == PARAMETER_CONST)
1858 			{
1859 				switch(instruction[i]->dst.type)
1860 				{
1861 				case PARAMETER_TEMP:   dynamicallyIndexedTemporaries = true; break;
1862 				case PARAMETER_INPUT:  dynamicallyIndexedInput = true;       break;
1863 				case PARAMETER_OUTPUT: dynamicallyIndexedOutput = true;      break;
1864 				default: break;
1865 				}
1866 			}
1867 
1868 			for(int j = 0; j < 3; j++)
1869 			{
1870 				if(instruction[i]->src[j].rel.type == PARAMETER_ADDR ||
1871 				   instruction[i]->src[j].rel.type == PARAMETER_LOOP ||
1872 				   instruction[i]->src[j].rel.type == PARAMETER_TEMP ||
1873 				   instruction[i]->src[j].rel.type == PARAMETER_CONST)
1874 				{
1875 					switch(instruction[i]->src[j].type)
1876 					{
1877 					case PARAMETER_TEMP:   dynamicallyIndexedTemporaries = true; break;
1878 					case PARAMETER_INPUT:  dynamicallyIndexedInput = true;       break;
1879 					case PARAMETER_OUTPUT: dynamicallyIndexedOutput = true;      break;
1880 					default: break;
1881 					}
1882 				}
1883 			}
1884 		}
1885 	}
1886 }
1887