1 // Copyright 2016 The SwiftShader Authors. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //    http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "Shader.hpp"
16 
17 #include "VertexShader.hpp"
18 #include "PixelShader.hpp"
19 #include "Common/Math.hpp"
20 #include "Common/Debug.hpp"
21 
22 #include <algorithm>
23 #include <set>
24 #include <fstream>
25 #include <functional>
26 #include <sstream>
27 #include <stdarg.h>
28 #include <unordered_map>
29 #include <unordered_set>
30 
31 namespace sw
32 {
33 	volatile int Shader::serialCounter = 1;
34 
OPCODE_DP(int i)35 	Shader::Opcode Shader::OPCODE_DP(int i)
36 	{
37 		switch(i)
38 		{
39 		default: ASSERT(false);
40 		case 1: return OPCODE_DP1;
41 		case 2: return OPCODE_DP2;
42 		case 3: return OPCODE_DP3;
43 		case 4: return OPCODE_DP4;
44 		}
45 	}
46 
OPCODE_LEN(int i)47 	Shader::Opcode Shader::OPCODE_LEN(int i)
48 	{
49 		switch(i)
50 		{
51 		default: ASSERT(false);
52 		case 1: return OPCODE_ABS;
53 		case 2: return OPCODE_LEN2;
54 		case 3: return OPCODE_LEN3;
55 		case 4: return OPCODE_LEN4;
56 		}
57 	}
58 
OPCODE_DIST(int i)59 	Shader::Opcode Shader::OPCODE_DIST(int i)
60 	{
61 		switch(i)
62 		{
63 		default: ASSERT(false);
64 		case 1: return OPCODE_DIST1;
65 		case 2: return OPCODE_DIST2;
66 		case 3: return OPCODE_DIST3;
67 		case 4: return OPCODE_DIST4;
68 		}
69 	}
70 
OPCODE_NRM(int i)71 	Shader::Opcode Shader::OPCODE_NRM(int i)
72 	{
73 		switch(i)
74 		{
75 		default: ASSERT(false);
76 		case 1: return OPCODE_SGN;
77 		case 2: return OPCODE_NRM2;
78 		case 3: return OPCODE_NRM3;
79 		case 4: return OPCODE_NRM4;
80 		}
81 	}
82 
OPCODE_FORWARD(int i)83 	Shader::Opcode Shader::OPCODE_FORWARD(int i)
84 	{
85 		switch(i)
86 		{
87 		default: ASSERT(false);
88 		case 1: return OPCODE_FORWARD1;
89 		case 2: return OPCODE_FORWARD2;
90 		case 3: return OPCODE_FORWARD3;
91 		case 4: return OPCODE_FORWARD4;
92 		}
93 	}
94 
OPCODE_REFLECT(int i)95 	Shader::Opcode Shader::OPCODE_REFLECT(int i)
96 	{
97 		switch(i)
98 		{
99 		default: ASSERT(false);
100 		case 1: return OPCODE_REFLECT1;
101 		case 2: return OPCODE_REFLECT2;
102 		case 3: return OPCODE_REFLECT3;
103 		case 4: return OPCODE_REFLECT4;
104 		}
105 	}
106 
OPCODE_REFRACT(int i)107 	Shader::Opcode Shader::OPCODE_REFRACT(int i)
108 	{
109 		switch(i)
110 		{
111 		default: ASSERT(false);
112 		case 1: return OPCODE_REFRACT1;
113 		case 2: return OPCODE_REFRACT2;
114 		case 3: return OPCODE_REFRACT3;
115 		case 4: return OPCODE_REFRACT4;
116 		}
117 	}
118 
Instruction(Opcode opcode)119 	Shader::Instruction::Instruction(Opcode opcode) : opcode(opcode), analysis(0)
120 	{
121 		control = CONTROL_RESERVED0;
122 
123 		predicate = false;
124 		predicateNot = false;
125 		predicateSwizzle = 0xE4;
126 
127 		coissue = false;
128 		samplerType = SAMPLER_UNKNOWN;
129 		usage = USAGE_POSITION;
130 		usageIndex = 0;
131 	}
132 
Instruction(const unsigned long * token,int size,unsigned char majorVersion)133 	Shader::Instruction::Instruction(const unsigned long *token, int size, unsigned char majorVersion) : analysis(0)
134 	{
135 		parseOperationToken(*token++, majorVersion);
136 
137 		samplerType = SAMPLER_UNKNOWN;
138 		usage = USAGE_POSITION;
139 		usageIndex = 0;
140 
141 		if(opcode == OPCODE_IF ||
142 		   opcode == OPCODE_IFC ||
143 		   opcode == OPCODE_LOOP ||
144 		   opcode == OPCODE_REP ||
145 		   opcode == OPCODE_BREAKC ||
146 		   opcode == OPCODE_BREAKP)   // No destination operand
147 		{
148 			if(size > 0) parseSourceToken(0, token++, majorVersion);
149 			if(size > 1) parseSourceToken(1, token++, majorVersion);
150 			if(size > 2) parseSourceToken(2, token++, majorVersion);
151 			if(size > 3) ASSERT(false);
152 		}
153 		else if(opcode == OPCODE_DCL)
154 		{
155 			parseDeclarationToken(*token++);
156 			parseDestinationToken(token++, majorVersion);
157 		}
158 		else
159 		{
160 			if(size > 0)
161 			{
162 				parseDestinationToken(token, majorVersion);
163 
164 				if(dst.rel.type != PARAMETER_VOID && majorVersion >= 3)
165 				{
166 					token++;
167 					size--;
168 				}
169 
170 				token++;
171 				size--;
172 			}
173 
174 			if(predicate)
175 			{
176 				ASSERT(size != 0);
177 
178 				predicateNot = (Modifier)((*token & 0x0F000000) >> 24) == MODIFIER_NOT;
179 				predicateSwizzle = (unsigned char)((*token & 0x00FF0000) >> 16);
180 
181 				token++;
182 				size--;
183 			}
184 
185 			for(int i = 0; size > 0; i++)
186 			{
187 				parseSourceToken(i, token, majorVersion);
188 
189 				token++;
190 				size--;
191 
192 				if(src[i].rel.type != PARAMETER_VOID && majorVersion >= 2)
193 				{
194 					token++;
195 					size--;
196 				}
197 			}
198 		}
199 	}
200 
~Instruction()201 	Shader::Instruction::~Instruction()
202 	{
203 	}
204 
string(ShaderType shaderType,unsigned short version) const205 	std::string Shader::Instruction::string(ShaderType shaderType, unsigned short version) const
206 	{
207 		std::string instructionString;
208 
209 		if(opcode != OPCODE_DCL)
210 		{
211 			instructionString += coissue ? "+ " : "";
212 
213 			if(predicate)
214 			{
215 				instructionString += predicateNot ? "(!p0" : "(p0";
216 				instructionString += swizzleString(PARAMETER_PREDICATE, predicateSwizzle);
217 				instructionString += ") ";
218 			}
219 
220 			instructionString += operationString(version) + controlString() + dst.shiftString() + dst.modifierString();
221 
222 			if(dst.type != PARAMETER_VOID)
223 			{
224 				instructionString += " " + dst.string(shaderType, version) +
225 				                           dst.relativeString() +
226 				                           dst.maskString();
227 			}
228 
229 			for(int i = 0; i < 4; i++)
230 			{
231 				if(src[i].type != PARAMETER_VOID)
232 				{
233 					instructionString += (dst.type != PARAMETER_VOID || i > 0) ? ", " : " ";
234 					instructionString += src[i].preModifierString() +
235 										 src[i].string(shaderType, version) +
236 										 src[i].relativeString() +
237 										 src[i].postModifierString() +
238 										 src[i].swizzleString();
239 				}
240 			}
241 		}
242 		else   // DCL
243 		{
244 			instructionString += "dcl";
245 
246 			if(dst.type == PARAMETER_SAMPLER)
247 			{
248 				switch(samplerType)
249 				{
250 				case SAMPLER_UNKNOWN: instructionString += " ";        break;
251 				case SAMPLER_1D:      instructionString += "_1d ";     break;
252 				case SAMPLER_2D:      instructionString += "_2d ";     break;
253 				case SAMPLER_CUBE:    instructionString += "_cube ";   break;
254 				case SAMPLER_VOLUME:  instructionString += "_volume "; break;
255 				default:
256 					ASSERT(false);
257 				}
258 
259 				instructionString += dst.string(shaderType, version);
260 			}
261 			else if(dst.type == PARAMETER_INPUT ||
262 				    dst.type == PARAMETER_OUTPUT ||
263 				    dst.type == PARAMETER_TEXTURE)
264 			{
265 				if(version >= 0x0300)
266 				{
267 					switch(usage)
268 					{
269 					case USAGE_POSITION:     instructionString += "_position";     break;
270 					case USAGE_BLENDWEIGHT:  instructionString += "_blendweight";  break;
271 					case USAGE_BLENDINDICES: instructionString += "_blendindices"; break;
272 					case USAGE_NORMAL:       instructionString += "_normal";       break;
273 					case USAGE_PSIZE:        instructionString += "_psize";        break;
274 					case USAGE_TEXCOORD:     instructionString += "_texcoord";     break;
275 					case USAGE_TANGENT:      instructionString += "_tangent";      break;
276 					case USAGE_BINORMAL:     instructionString += "_binormal";     break;
277 					case USAGE_TESSFACTOR:   instructionString += "_tessfactor";   break;
278 					case USAGE_POSITIONT:    instructionString += "_positiont";    break;
279 					case USAGE_COLOR:        instructionString += "_color";        break;
280 					case USAGE_FOG:          instructionString += "_fog";          break;
281 					case USAGE_DEPTH:        instructionString += "_depth";        break;
282 					case USAGE_SAMPLE:       instructionString += "_sample";       break;
283 					default:
284 						ASSERT(false);
285 					}
286 
287 					if(usageIndex > 0)
288 					{
289 						std::ostringstream buffer;
290 
291 						buffer << (int)usageIndex;
292 
293 						instructionString += buffer.str();
294 					}
295 				}
296 				else ASSERT(dst.type != PARAMETER_OUTPUT);
297 
298 				instructionString += " ";
299 
300 				instructionString += dst.string(shaderType, version);
301 				instructionString += dst.maskString();
302 			}
303 			else if(dst.type == PARAMETER_MISCTYPE)   // vPos and vFace
304 			{
305 				instructionString += " ";
306 
307 				instructionString += dst.string(shaderType, version);
308 			}
309 			else ASSERT(false);
310 		}
311 
312 		return instructionString;
313 	}
314 
modifierString() const315 	std::string Shader::DestinationParameter::modifierString() const
316 	{
317 		if(type == PARAMETER_VOID || type == PARAMETER_LABEL)
318 		{
319 			return "";
320 		}
321 
322 		std::string modifierString;
323 
324 		if(saturate)
325 		{
326 			modifierString += "_sat";
327 		}
328 
329 		if(partialPrecision)
330 		{
331 			modifierString += "_pp";
332 		}
333 
334 		if(centroid)
335 		{
336 			modifierString += "_centroid";
337 		}
338 
339 		return modifierString;
340 	}
341 
shiftString() const342 	std::string Shader::DestinationParameter::shiftString() const
343 	{
344 		if(type == PARAMETER_VOID || type == PARAMETER_LABEL)
345 		{
346 			return "";
347 		}
348 
349 		switch(shift)
350 		{
351 		case 0:		return "";
352 		case 1:		return "_x2";
353 		case 2:		return "_x4";
354 		case 3:		return "_x8";
355 		case -1:	return "_d2";
356 		case -2:	return "_d4";
357 		case -3:	return "_d8";
358 		default:
359 			return "";
360 		//	ASSERT(false);   // FIXME
361 		}
362 	}
363 
maskString() const364 	std::string Shader::DestinationParameter::maskString() const
365 	{
366 		if(type == PARAMETER_VOID || type == PARAMETER_LABEL)
367 		{
368 			return "";
369 		}
370 
371 		switch(mask)
372 		{
373 		case 0x0:	return "";
374 		case 0x1:	return ".x";
375 		case 0x2:	return ".y";
376 		case 0x3:	return ".xy";
377 		case 0x4:	return ".z";
378 		case 0x5:	return ".xz";
379 		case 0x6:	return ".yz";
380 		case 0x7:	return ".xyz";
381 		case 0x8:	return ".w";
382 		case 0x9:	return ".xw";
383 		case 0xA:	return ".yw";
384 		case 0xB:	return ".xyw";
385 		case 0xC:	return ".zw";
386 		case 0xD:	return ".xzw";
387 		case 0xE:	return ".yzw";
388 		case 0xF:	return "";
389 		default:
390 			ASSERT(false);
391 		}
392 
393 		return "";
394 	}
395 
preModifierString() const396 	std::string Shader::SourceParameter::preModifierString() const
397 	{
398 		if(type == PARAMETER_VOID)
399 		{
400 			return "";
401 		}
402 
403 		switch(modifier)
404 		{
405 		case MODIFIER_NONE:			return "";
406 		case MODIFIER_NEGATE:		return "-";
407 		case MODIFIER_BIAS:			return "";
408 		case MODIFIER_BIAS_NEGATE:	return "-";
409 		case MODIFIER_SIGN:			return "";
410 		case MODIFIER_SIGN_NEGATE:	return "-";
411 		case MODIFIER_COMPLEMENT:	return "1-";
412 		case MODIFIER_X2:			return "";
413 		case MODIFIER_X2_NEGATE:	return "-";
414 		case MODIFIER_DZ:			return "";
415 		case MODIFIER_DW:			return "";
416 		case MODIFIER_ABS:			return "";
417 		case MODIFIER_ABS_NEGATE:	return "-";
418 		case MODIFIER_NOT:			return "!";
419 		default:
420 			ASSERT(false);
421 		}
422 
423 		return "";
424 	}
425 
relativeString() const426 	std::string Shader::Parameter::relativeString() const
427 	{
428 		if(type == PARAMETER_CONST || type == PARAMETER_INPUT || type == PARAMETER_OUTPUT || type == PARAMETER_TEMP)
429 		{
430 			if(rel.type == PARAMETER_VOID)
431 			{
432 				return "";
433 			}
434 			else if(rel.type == PARAMETER_ADDR)
435 			{
436 				switch(rel.swizzle & 0x03)
437 				{
438 				case 0: return "[a0.x]";
439 				case 1: return "[a0.y]";
440 				case 2: return "[a0.z]";
441 				case 3: return "[a0.w]";
442 				}
443 			}
444 			else if(rel.type == PARAMETER_TEMP)
445 			{
446 				std::ostringstream buffer;
447 				buffer << rel.index;
448 
449 				switch(rel.swizzle & 0x03)
450 				{
451 				case 0: return "[r" + buffer.str() + ".x]";
452 				case 1: return "[r" + buffer.str() + ".y]";
453 				case 2: return "[r" + buffer.str() + ".z]";
454 				case 3: return "[r" + buffer.str() + ".w]";
455 				}
456 			}
457 			else if(rel.type == PARAMETER_LOOP)
458 			{
459 				return "[aL]";
460 			}
461 			else if(rel.type == PARAMETER_CONST)
462 			{
463 				std::ostringstream buffer;
464 				buffer << rel.index;
465 
466 				switch(rel.swizzle & 0x03)
467 				{
468 				case 0: return "[c" + buffer.str() + ".x]";
469 				case 1: return "[c" + buffer.str() + ".y]";
470 				case 2: return "[c" + buffer.str() + ".z]";
471 				case 3: return "[c" + buffer.str() + ".w]";
472 				}
473 			}
474 			else ASSERT(false);
475 		}
476 
477 		return "";
478 	}
479 
postModifierString() const480 	std::string Shader::SourceParameter::postModifierString() const
481 	{
482 		if(type == PARAMETER_VOID)
483 		{
484 			return "";
485 		}
486 
487 		switch(modifier)
488 		{
489 		case MODIFIER_NONE:			return "";
490 		case MODIFIER_NEGATE:		return "";
491 		case MODIFIER_BIAS:			return "_bias";
492 		case MODIFIER_BIAS_NEGATE:	return "_bias";
493 		case MODIFIER_SIGN:			return "_bx2";
494 		case MODIFIER_SIGN_NEGATE:	return "_bx2";
495 		case MODIFIER_COMPLEMENT:	return "";
496 		case MODIFIER_X2:			return "_x2";
497 		case MODIFIER_X2_NEGATE:	return "_x2";
498 		case MODIFIER_DZ:			return "_dz";
499 		case MODIFIER_DW:			return "_dw";
500 		case MODIFIER_ABS:			return "_abs";
501 		case MODIFIER_ABS_NEGATE:	return "_abs";
502 		case MODIFIER_NOT:			return "";
503 		default:
504 			ASSERT(false);
505 		}
506 
507 		return "";
508 	}
509 
string(ShaderType shaderType,unsigned short version) const510 	std::string Shader::SourceParameter::string(ShaderType shaderType, unsigned short version) const
511 	{
512 		if(type == PARAMETER_CONST && bufferIndex >= 0)
513 		{
514 			std::ostringstream buffer;
515 			buffer << bufferIndex;
516 
517 			std::ostringstream offset;
518 			offset << index;
519 
520 			return "cb" + buffer.str() + "[" + offset.str() + "]";
521 		}
522 		else
523 		{
524 			return Parameter::string(shaderType, version);
525 		}
526 	}
527 
swizzleString() const528 	std::string Shader::SourceParameter::swizzleString() const
529 	{
530 		return Instruction::swizzleString(type, swizzle);
531 	}
532 
parseOperationToken(unsigned long token,unsigned char majorVersion)533 	void Shader::Instruction::parseOperationToken(unsigned long token, unsigned char majorVersion)
534 	{
535 		if((token & 0xFFFF0000) == 0xFFFF0000 || (token & 0xFFFF0000) == 0xFFFE0000)   // Version token
536 		{
537 			opcode = (Opcode)token;
538 
539 			control = CONTROL_RESERVED0;
540 			predicate = false;
541 			coissue = false;
542 		}
543 		else
544 		{
545 			opcode = (Opcode)(token & 0x0000FFFF);
546 			control = (Control)((token & 0x00FF0000) >> 16);
547 
548 			int size = (token & 0x0F000000) >> 24;
549 
550 			predicate = (token & 0x10000000) != 0x00000000;
551 			coissue = (token & 0x40000000) != 0x00000000;
552 
553 			if(majorVersion < 2)
554 			{
555 				if(size != 0)
556 				{
557 					ASSERT(false);   // Reserved
558 				}
559 			}
560 
561 			if(majorVersion < 2)
562 			{
563 				if(predicate)
564 				{
565 					ASSERT(false);
566 				}
567 			}
568 
569 			if((token & 0x20000000) != 0x00000000)
570 			{
571 				ASSERT(false);   // Reserved
572 			}
573 
574 			if(majorVersion >= 2)
575 			{
576 				if(coissue)
577 				{
578 					ASSERT(false);   // Reserved
579 				}
580 			}
581 
582 			if((token & 0x80000000) != 0x00000000)
583 			{
584 				ASSERT(false);
585 			}
586 		}
587 	}
588 
parseDeclarationToken(unsigned long token)589 	void Shader::Instruction::parseDeclarationToken(unsigned long token)
590 	{
591 		samplerType = (SamplerType)((token & 0x78000000) >> 27);
592 		usage = (Usage)(token & 0x0000001F);
593 		usageIndex = (unsigned char)((token & 0x000F0000) >> 16);
594 	}
595 
parseDestinationToken(const unsigned long * token,unsigned char majorVersion)596 	void Shader::Instruction::parseDestinationToken(const unsigned long *token, unsigned char majorVersion)
597 	{
598 		dst.index = (unsigned short)(token[0] & 0x000007FF);
599 		dst.type = (ParameterType)(((token[0] & 0x00001800) >> 8) | ((token[0] & 0x70000000) >> 28));
600 
601 		// TODO: Check type and index range
602 
603 		bool relative = (token[0] & 0x00002000) != 0x00000000;
604 		dst.rel.type = relative ? PARAMETER_ADDR : PARAMETER_VOID;
605 		dst.rel.swizzle = 0x00;
606 		dst.rel.scale = 1;
607 
608 		if(relative && majorVersion >= 3)
609 		{
610 			dst.rel.type = (ParameterType)(((token[1] & 0x00001800) >> 8) | ((token[1] & 0x70000000) >> 28));
611 			dst.rel.swizzle = (unsigned char)((token[1] & 0x00FF0000) >> 16);
612 		}
613 		else if(relative) ASSERT(false);   // Reserved
614 
615 		if((token[0] & 0x0000C000) != 0x00000000)
616 		{
617 			ASSERT(false);   // Reserved
618 		}
619 
620 		dst.mask = (unsigned char)((token[0] & 0x000F0000) >> 16);
621 		dst.saturate = (token[0] & 0x00100000) != 0;
622 		dst.partialPrecision = (token[0] & 0x00200000) != 0;
623 		dst.centroid = (token[0] & 0x00400000) != 0;
624 		dst.shift = (signed char)((token[0] & 0x0F000000) >> 20) >> 4;
625 
626 		if(majorVersion >= 2)
627 		{
628 			if(dst.shift)
629 			{
630 				ASSERT(false);   // Reserved
631 			}
632 		}
633 
634 		if((token[0] & 0x80000000) != 0x80000000)
635 		{
636 			ASSERT(false);
637 		}
638 	}
639 
parseSourceToken(int i,const unsigned long * token,unsigned char majorVersion)640 	void Shader::Instruction::parseSourceToken(int i, const unsigned long *token, unsigned char majorVersion)
641 	{
642 		// Defaults
643 		src[i].index = 0;
644 		src[i].type = PARAMETER_VOID;
645 		src[i].modifier = MODIFIER_NONE;
646 		src[i].swizzle = 0xE4;
647 		src[i].rel.type = PARAMETER_VOID;
648 		src[i].rel.swizzle = 0x00;
649 		src[i].rel.scale = 1;
650 
651 		switch(opcode)
652 		{
653 		case OPCODE_DEF:
654 			src[0].type = PARAMETER_FLOAT4LITERAL;
655 			src[0].value[i] = *(float*)token;
656 			break;
657 		case OPCODE_DEFB:
658 			src[0].type = PARAMETER_BOOL1LITERAL;
659 			src[0].boolean[0] = *(int*)token;
660 			break;
661 		case OPCODE_DEFI:
662 			src[0].type = PARAMETER_INT4LITERAL;
663 			src[0].integer[i] = *(int*)token;
664 			break;
665 		default:
666 			src[i].index = (unsigned short)(token[0] & 0x000007FF);
667 			src[i].type = (ParameterType)(((token[0] & 0x00001800) >> 8) | ((token[0] & 0x70000000) >> 28));
668 
669 			// FIXME: Check type and index range
670 
671 			bool relative = (token[0] & 0x00002000) != 0x00000000;
672 			src[i].rel.type = relative ? PARAMETER_ADDR : PARAMETER_VOID;
673 
674 			if((token[0] & 0x0000C000) != 0x00000000)
675 			{
676 				if(opcode != OPCODE_DEF &&
677 				   opcode != OPCODE_DEFI &&
678 				   opcode != OPCODE_DEFB)
679 				{
680 					ASSERT(false);
681 				}
682 			}
683 
684 			src[i].swizzle = (unsigned char)((token[0] & 0x00FF0000) >> 16);
685 			src[i].modifier = (Modifier)((token[0] & 0x0F000000) >> 24);
686 
687 			if((token[0] & 0x80000000) != 0x80000000)
688 			{
689 				if(opcode != OPCODE_DEF &&
690 				   opcode != OPCODE_DEFI &&
691 				   opcode != OPCODE_DEFB)
692 				{
693 					ASSERT(false);
694 				}
695 			}
696 
697 			if(relative && majorVersion >= 2)
698 			{
699 				src[i].rel.type = (ParameterType)(((token[1] & 0x00001800) >> 8) | ((token[1] & 0x70000000) >> 28));
700 				src[i].rel.swizzle = (unsigned char)((token[1] & 0x00FF0000) >> 16);
701 			}
702 		}
703 	}
704 
swizzleString(ParameterType type,unsigned char swizzle)705 	std::string Shader::Instruction::swizzleString(ParameterType type, unsigned char swizzle)
706 	{
707 		if(type == PARAMETER_VOID || type == PARAMETER_LABEL || swizzle == 0xE4)
708 		{
709 			return "";
710 		}
711 
712 		int x = (swizzle & 0x03) >> 0;
713 		int y = (swizzle & 0x0C) >> 2;
714 		int z = (swizzle & 0x30) >> 4;
715 		int w = (swizzle & 0xC0) >> 6;
716 
717 		std::string swizzleString = ".";
718 
719 		switch(x)
720 		{
721 		case 0: swizzleString += "x"; break;
722 		case 1: swizzleString += "y"; break;
723 		case 2: swizzleString += "z"; break;
724 		case 3: swizzleString += "w"; break;
725 		}
726 
727 		if(!(x == y && y == z && z == w))
728 		{
729 			switch(y)
730 			{
731 			case 0: swizzleString += "x"; break;
732 			case 1: swizzleString += "y"; break;
733 			case 2: swizzleString += "z"; break;
734 			case 3: swizzleString += "w"; break;
735 			}
736 
737 			if(!(y == z && z == w))
738 			{
739 				switch(z)
740 				{
741 				case 0: swizzleString += "x"; break;
742 				case 1: swizzleString += "y"; break;
743 				case 2: swizzleString += "z"; break;
744 				case 3: swizzleString += "w"; break;
745 				}
746 
747 				if(!(z == w))
748 				{
749 					switch(w)
750 					{
751 					case 0: swizzleString += "x"; break;
752 					case 1: swizzleString += "y"; break;
753 					case 2: swizzleString += "z"; break;
754 					case 3: swizzleString += "w"; break;
755 					}
756 				}
757 			}
758 		}
759 
760 		return swizzleString;
761 	}
762 
operationString(unsigned short version) const763 	std::string Shader::Instruction::operationString(unsigned short version) const
764 	{
765 		switch(opcode)
766 		{
767 		case OPCODE_NULL:            return "null";
768 		case OPCODE_NOP:             return "nop";
769 		case OPCODE_MOV:             return "mov";
770 		case OPCODE_ADD:             return "add";
771 		case OPCODE_IADD:            return "iadd";
772 		case OPCODE_SUB:             return "sub";
773 		case OPCODE_ISUB:            return "isub";
774 		case OPCODE_MAD:             return "mad";
775 		case OPCODE_IMAD:            return "imad";
776 		case OPCODE_MUL:             return "mul";
777 		case OPCODE_IMUL:            return "imul";
778 		case OPCODE_RCPX:            return "rcpx";
779 		case OPCODE_DIV:             return "div";
780 		case OPCODE_IDIV:            return "idiv";
781 		case OPCODE_UDIV:            return "udiv";
782 		case OPCODE_MOD:             return "mod";
783 		case OPCODE_IMOD:            return "imod";
784 		case OPCODE_UMOD:            return "umod";
785 		case OPCODE_SHL:             return "shl";
786 		case OPCODE_ISHR:            return "ishr";
787 		case OPCODE_USHR:            return "ushr";
788 		case OPCODE_RSQX:            return "rsqx";
789 		case OPCODE_SQRT:            return "sqrt";
790 		case OPCODE_RSQ:             return "rsq";
791 		case OPCODE_LEN2:            return "len2";
792 		case OPCODE_LEN3:            return "len3";
793 		case OPCODE_LEN4:            return "len4";
794 		case OPCODE_DIST1:           return "dist1";
795 		case OPCODE_DIST2:           return "dist2";
796 		case OPCODE_DIST3:           return "dist3";
797 		case OPCODE_DIST4:           return "dist4";
798 		case OPCODE_DP3:             return "dp3";
799 		case OPCODE_DP4:             return "dp4";
800 		case OPCODE_DET2:            return "det2";
801 		case OPCODE_DET3:            return "det3";
802 		case OPCODE_DET4:            return "det4";
803 		case OPCODE_MIN:             return "min";
804 		case OPCODE_IMIN:            return "imin";
805 		case OPCODE_UMIN:            return "umin";
806 		case OPCODE_MAX:             return "max";
807 		case OPCODE_IMAX:            return "imax";
808 		case OPCODE_UMAX:            return "umax";
809 		case OPCODE_SLT:             return "slt";
810 		case OPCODE_SGE:             return "sge";
811 		case OPCODE_EXP2X:           return "exp2x";
812 		case OPCODE_LOG2X:           return "log2x";
813 		case OPCODE_LIT:             return "lit";
814 		case OPCODE_ATT:             return "att";
815 		case OPCODE_LRP:             return "lrp";
816 		case OPCODE_STEP:            return "step";
817 		case OPCODE_SMOOTH:          return "smooth";
818 		case OPCODE_FLOATBITSTOINT:  return "floatBitsToInt";
819 		case OPCODE_FLOATBITSTOUINT: return "floatBitsToUInt";
820 		case OPCODE_INTBITSTOFLOAT:  return "intBitsToFloat";
821 		case OPCODE_UINTBITSTOFLOAT: return "uintBitsToFloat";
822 		case OPCODE_PACKSNORM2x16:   return "packSnorm2x16";
823 		case OPCODE_PACKUNORM2x16:   return "packUnorm2x16";
824 		case OPCODE_PACKHALF2x16:    return "packHalf2x16";
825 		case OPCODE_UNPACKSNORM2x16: return "unpackSnorm2x16";
826 		case OPCODE_UNPACKUNORM2x16: return "unpackUnorm2x16";
827 		case OPCODE_UNPACKHALF2x16:  return "unpackHalf2x16";
828 		case OPCODE_FRC:             return "frc";
829 		case OPCODE_M4X4:            return "m4x4";
830 		case OPCODE_M4X3:            return "m4x3";
831 		case OPCODE_M3X4:            return "m3x4";
832 		case OPCODE_M3X3:            return "m3x3";
833 		case OPCODE_M3X2:            return "m3x2";
834 		case OPCODE_CALL:            return "call";
835 		case OPCODE_CALLNZ:          return "callnz";
836 		case OPCODE_LOOP:            return "loop";
837 		case OPCODE_RET:             return "ret";
838 		case OPCODE_ENDLOOP:         return "endloop";
839 		case OPCODE_LABEL:           return "label";
840 		case OPCODE_DCL:             return "dcl";
841 		case OPCODE_POWX:            return "powx";
842 		case OPCODE_CRS:             return "crs";
843 		case OPCODE_SGN:             return "sgn";
844 		case OPCODE_ISGN:            return "isgn";
845 		case OPCODE_ABS:             return "abs";
846 		case OPCODE_IABS:            return "iabs";
847 		case OPCODE_NRM2:            return "nrm2";
848 		case OPCODE_NRM3:            return "nrm3";
849 		case OPCODE_NRM4:            return "nrm4";
850 		case OPCODE_SINCOS:          return "sincos";
851 		case OPCODE_REP:             return "rep";
852 		case OPCODE_ENDREP:          return "endrep";
853 		case OPCODE_IF:              return "if";
854 		case OPCODE_IFC:             return "ifc";
855 		case OPCODE_ELSE:            return "else";
856 		case OPCODE_ENDIF:           return "endif";
857 		case OPCODE_BREAK:           return "break";
858 		case OPCODE_BREAKC:          return "breakc";
859 		case OPCODE_MOVA:            return "mova";
860 		case OPCODE_DEFB:            return "defb";
861 		case OPCODE_DEFI:            return "defi";
862 		case OPCODE_TEXCOORD:        return "texcoord";
863 		case OPCODE_TEXKILL:         return "texkill";
864 		case OPCODE_DISCARD:         return "discard";
865 		case OPCODE_TEX:
866 			if(version < 0x0104)     return "tex";
867 			else                     return "texld";
868 		case OPCODE_TEXBEM:          return "texbem";
869 		case OPCODE_TEXBEML:         return "texbeml";
870 		case OPCODE_TEXREG2AR:       return "texreg2ar";
871 		case OPCODE_TEXREG2GB:       return "texreg2gb";
872 		case OPCODE_TEXM3X2PAD:      return "texm3x2pad";
873 		case OPCODE_TEXM3X2TEX:      return "texm3x2tex";
874 		case OPCODE_TEXM3X3PAD:      return "texm3x3pad";
875 		case OPCODE_TEXM3X3TEX:      return "texm3x3tex";
876 		case OPCODE_RESERVED0:       return "reserved0";
877 		case OPCODE_TEXM3X3SPEC:     return "texm3x3spec";
878 		case OPCODE_TEXM3X3VSPEC:    return "texm3x3vspec";
879 		case OPCODE_EXPP:            return "expp";
880 		case OPCODE_LOGP:            return "logp";
881 		case OPCODE_CND:             return "cnd";
882 		case OPCODE_DEF:             return "def";
883 		case OPCODE_TEXREG2RGB:      return "texreg2rgb";
884 		case OPCODE_TEXDP3TEX:       return "texdp3tex";
885 		case OPCODE_TEXM3X2DEPTH:    return "texm3x2depth";
886 		case OPCODE_TEXDP3:          return "texdp3";
887 		case OPCODE_TEXM3X3:         return "texm3x3";
888 		case OPCODE_TEXDEPTH:        return "texdepth";
889 		case OPCODE_CMP0:            return "cmp0";
890 		case OPCODE_ICMP:            return "icmp";
891 		case OPCODE_UCMP:            return "ucmp";
892 		case OPCODE_SELECT:          return "select";
893 		case OPCODE_EXTRACT:         return "extract";
894 		case OPCODE_INSERT:          return "insert";
895 		case OPCODE_BEM:             return "bem";
896 		case OPCODE_DP2ADD:          return "dp2add";
897 		case OPCODE_DFDX:            return "dFdx";
898 		case OPCODE_DFDY:            return "dFdy";
899 		case OPCODE_FWIDTH:          return "fwidth";
900 		case OPCODE_TEXLDD:          return "texldd";
901 		case OPCODE_CMP:             return "cmp";
902 		case OPCODE_TEXLDL:          return "texldl";
903 		case OPCODE_TEXBIAS:         return "texbias";
904 		case OPCODE_TEXOFFSET:       return "texoffset";
905 		case OPCODE_TEXOFFSETBIAS:   return "texoffsetbias";
906 		case OPCODE_TEXLOD:          return "texlod";
907 		case OPCODE_TEXLODOFFSET:    return "texlodoffset";
908 		case OPCODE_TEXELFETCH:      return "texelfetch";
909 		case OPCODE_TEXELFETCHOFFSET: return "texelfetchoffset";
910 		case OPCODE_TEXGRAD:         return "texgrad";
911 		case OPCODE_TEXGRADOFFSET:   return "texgradoffset";
912 		case OPCODE_BREAKP:          return "breakp";
913 		case OPCODE_TEXSIZE:         return "texsize";
914 		case OPCODE_PHASE:           return "phase";
915 		case OPCODE_COMMENT:         return "comment";
916 		case OPCODE_END:             return "end";
917 		case OPCODE_PS_1_0:          return "ps_1_0";
918 		case OPCODE_PS_1_1:          return "ps_1_1";
919 		case OPCODE_PS_1_2:          return "ps_1_2";
920 		case OPCODE_PS_1_3:          return "ps_1_3";
921 		case OPCODE_PS_1_4:          return "ps_1_4";
922 		case OPCODE_PS_2_0:          return "ps_2_0";
923 		case OPCODE_PS_2_x:          return "ps_2_x";
924 		case OPCODE_PS_3_0:          return "ps_3_0";
925 		case OPCODE_VS_1_0:          return "vs_1_0";
926 		case OPCODE_VS_1_1:          return "vs_1_1";
927 		case OPCODE_VS_2_0:          return "vs_2_0";
928 		case OPCODE_VS_2_x:          return "vs_2_x";
929 		case OPCODE_VS_2_sw:         return "vs_2_sw";
930 		case OPCODE_VS_3_0:          return "vs_3_0";
931 		case OPCODE_VS_3_sw:         return "vs_3_sw";
932 		case OPCODE_WHILE:           return "while";
933 		case OPCODE_ENDWHILE:        return "endwhile";
934 		case OPCODE_COS:             return "cos";
935 		case OPCODE_SIN:             return "sin";
936 		case OPCODE_TAN:             return "tan";
937 		case OPCODE_ACOS:            return "acos";
938 		case OPCODE_ASIN:            return "asin";
939 		case OPCODE_ATAN:            return "atan";
940 		case OPCODE_ATAN2:           return "atan2";
941 		case OPCODE_COSH:            return "cosh";
942 		case OPCODE_SINH:            return "sinh";
943 		case OPCODE_TANH:            return "tanh";
944 		case OPCODE_ACOSH:           return "acosh";
945 		case OPCODE_ASINH:           return "asinh";
946 		case OPCODE_ATANH:           return "atanh";
947 		case OPCODE_DP1:             return "dp1";
948 		case OPCODE_DP2:             return "dp2";
949 		case OPCODE_TRUNC:           return "trunc";
950 		case OPCODE_FLOOR:           return "floor";
951 		case OPCODE_ROUND:           return "round";
952 		case OPCODE_ROUNDEVEN:       return "roundEven";
953 		case OPCODE_CEIL:            return "ceil";
954 		case OPCODE_EXP2:            return "exp2";
955 		case OPCODE_LOG2:            return "log2";
956 		case OPCODE_EXP:             return "exp";
957 		case OPCODE_LOG:             return "log";
958 		case OPCODE_POW:             return "pow";
959 		case OPCODE_F2B:             return "f2b";
960 		case OPCODE_B2F:             return "b2f";
961 		case OPCODE_F2I:             return "f2i";
962 		case OPCODE_I2F:             return "i2f";
963 		case OPCODE_F2U:             return "f2u";
964 		case OPCODE_U2F:             return "u2f";
965 		case OPCODE_B2I:             return "b2i";
966 		case OPCODE_I2B:             return "i2b";
967 		case OPCODE_ALL:             return "all";
968 		case OPCODE_ANY:             return "any";
969 		case OPCODE_NEG:             return "neg";
970 		case OPCODE_INEG:            return "ineg";
971 		case OPCODE_ISNAN:           return "isnan";
972 		case OPCODE_ISINF:           return "isinf";
973 		case OPCODE_NOT:             return "not";
974 		case OPCODE_OR:              return "or";
975 		case OPCODE_XOR:             return "xor";
976 		case OPCODE_AND:             return "and";
977 		case OPCODE_EQ:              return "eq";
978 		case OPCODE_NE:              return "neq";
979 		case OPCODE_FORWARD1:        return "forward1";
980 		case OPCODE_FORWARD2:        return "forward2";
981 		case OPCODE_FORWARD3:        return "forward3";
982 		case OPCODE_FORWARD4:        return "forward4";
983 		case OPCODE_REFLECT1:        return "reflect1";
984 		case OPCODE_REFLECT2:        return "reflect2";
985 		case OPCODE_REFLECT3:        return "reflect3";
986 		case OPCODE_REFLECT4:        return "reflect4";
987 		case OPCODE_REFRACT1:        return "refract1";
988 		case OPCODE_REFRACT2:        return "refract2";
989 		case OPCODE_REFRACT3:        return "refract3";
990 		case OPCODE_REFRACT4:        return "refract4";
991 		case OPCODE_LEAVE:           return "leave";
992 		case OPCODE_CONTINUE:        return "continue";
993 		case OPCODE_TEST:            return "test";
994 		case OPCODE_SWITCH:          return "switch";
995 		case OPCODE_ENDSWITCH:       return "endswitch";
996 		case OPCODE_SCALAR:          return "scalar";
997 		}
998 
999 		return "<unknown>";
1000 	}
1001 
controlString() const1002 	std::string Shader::Instruction::controlString() const
1003 	{
1004 		if(opcode != OPCODE_LOOP && opcode != OPCODE_BREAKC && opcode != OPCODE_IFC && opcode != OPCODE_CMP)
1005 		{
1006 			if(project) return "p";
1007 
1008 			if(bias) return "b";
1009 
1010 			// FIXME: LOD
1011 		}
1012 
1013 		switch(control)
1014 		{
1015 		case 1: return "_gt";
1016 		case 2: return "_eq";
1017 		case 3: return "_ge";
1018 		case 4: return "_lt";
1019 		case 5: return "_ne";
1020 		case 6: return "_le";
1021 		default:
1022 			return "";
1023 		//	ASSERT(false);   // FIXME
1024 		}
1025 	}
1026 
string(ShaderType shaderType,unsigned short version) const1027 	std::string Shader::Parameter::string(ShaderType shaderType, unsigned short version) const
1028 	{
1029 		std::ostringstream buffer;
1030 
1031 		if(type == PARAMETER_FLOAT4LITERAL)
1032 		{
1033 			buffer << '{' << value[0] << ", " << value[1] << ", " << value[2] << ", " << value[3] << '}';
1034 
1035 			return buffer.str();
1036 		}
1037 		else if(type != PARAMETER_RASTOUT && !(type == PARAMETER_ADDR && shaderType == SHADER_VERTEX) && type != PARAMETER_LOOP && type != PARAMETER_PREDICATE && type != PARAMETER_MISCTYPE)
1038 		{
1039 			buffer << index;
1040 
1041 			return typeString(shaderType, version) + buffer.str();
1042 		}
1043 		else
1044 		{
1045 			return typeString(shaderType, version);
1046 		}
1047 	}
1048 
typeString(ShaderType shaderType,unsigned short version) const1049 	std::string Shader::Parameter::typeString(ShaderType shaderType, unsigned short version) const
1050 	{
1051 		switch(type)
1052 		{
1053 		case PARAMETER_TEMP:			return "r";
1054 		case PARAMETER_INPUT:			return "v";
1055 		case PARAMETER_CONST:			return "c";
1056 		case PARAMETER_TEXTURE:
1057 	//	case PARAMETER_ADDR:
1058 			if(shaderType == SHADER_PIXEL)	return "t";
1059 			else							return "a0";
1060 		case PARAMETER_RASTOUT:
1061 			if(index == 0)              return "oPos";
1062 			else if(index == 1)         return "oFog";
1063 			else if(index == 2)         return "oPts";
1064 			else                        ASSERT(false);
1065 		case PARAMETER_ATTROUT:			return "oD";
1066 		case PARAMETER_TEXCRDOUT:
1067 	//	case PARAMETER_OUTPUT:			return "";
1068 			if(version < 0x0300)		return "oT";
1069 			else						return "o";
1070 		case PARAMETER_CONSTINT:		return "i";
1071 		case PARAMETER_COLOROUT:		return "oC";
1072 		case PARAMETER_DEPTHOUT:		return "oDepth";
1073 		case PARAMETER_SAMPLER:			return "s";
1074 	//	case PARAMETER_CONST2:			return "";
1075 	//	case PARAMETER_CONST3:			return "";
1076 	//	case PARAMETER_CONST4:			return "";
1077 		case PARAMETER_CONSTBOOL:		return "b";
1078 		case PARAMETER_LOOP:			return "aL";
1079 	//	case PARAMETER_TEMPFLOAT16:		return "";
1080 		case PARAMETER_MISCTYPE:
1081 			switch(index)
1082 			{
1083 			case VPosIndex:				return "vPos";
1084 			case VFaceIndex:			return "vFace";
1085 			case InstanceIDIndex:		return "iID";
1086 			case VertexIDIndex:			return "vID";
1087 			default: ASSERT(false);
1088 			}
1089 		case PARAMETER_LABEL:			return "l";
1090 		case PARAMETER_PREDICATE:		return "p0";
1091 		case PARAMETER_FLOAT4LITERAL:	return "";
1092 		case PARAMETER_BOOL1LITERAL:	return "";
1093 		case PARAMETER_INT4LITERAL:		return "";
1094 	//	case PARAMETER_VOID:			return "";
1095 		default:
1096 			ASSERT(false);
1097 		}
1098 
1099 		return "";
1100 	}
1101 
isBranch() const1102 	bool Shader::Instruction::isBranch() const
1103 	{
1104 		return opcode == OPCODE_IF || opcode == OPCODE_IFC;
1105 	}
1106 
isCall() const1107 	bool Shader::Instruction::isCall() const
1108 	{
1109 		return opcode == OPCODE_CALL || opcode == OPCODE_CALLNZ;
1110 	}
1111 
isBreak() const1112 	bool Shader::Instruction::isBreak() const
1113 	{
1114 		return opcode == OPCODE_BREAK || opcode == OPCODE_BREAKC || opcode == OPCODE_BREAKP;
1115 	}
1116 
isLoop() const1117 	bool Shader::Instruction::isLoop() const
1118 	{
1119 		return opcode == OPCODE_LOOP || opcode == OPCODE_REP || opcode == OPCODE_WHILE;
1120 	}
1121 
isEndLoop() const1122 	bool Shader::Instruction::isEndLoop() const
1123 	{
1124 		return opcode == OPCODE_ENDLOOP || opcode == OPCODE_ENDREP || opcode == OPCODE_ENDWHILE;
1125 	}
1126 
isPredicated() const1127 	bool Shader::Instruction::isPredicated() const
1128 	{
1129 		return predicate ||
1130 		       analysisBranch ||
1131 		       analysisBreak ||
1132 		       analysisContinue ||
1133 		       analysisLeave;
1134 	}
1135 
Shader()1136 	Shader::Shader() : serialID(serialCounter++)
1137 	{
1138 		usedSamplers = 0;
1139 	}
1140 
~Shader()1141 	Shader::~Shader()
1142 	{
1143 		for(auto &inst : instruction)
1144 		{
1145 			delete inst;
1146 			inst = 0;
1147 		}
1148 	}
1149 
parse(const unsigned long * token)1150 	void Shader::parse(const unsigned long *token)
1151 	{
1152 		minorVersion = (unsigned char)(token[0] & 0x000000FF);
1153 		majorVersion = (unsigned char)((token[0] & 0x0000FF00) >> 8);
1154 		shaderType = (ShaderType)((token[0] & 0xFFFF0000) >> 16);
1155 
1156 		int length = 0;
1157 
1158 		if(shaderType == SHADER_VERTEX)
1159 		{
1160 			length = VertexShader::validate(token);
1161 		}
1162 		else if(shaderType == SHADER_PIXEL)
1163 		{
1164 			length = PixelShader::validate(token);
1165 		}
1166 		else ASSERT(false);
1167 
1168 		ASSERT(length != 0);
1169 		instruction.resize(length);
1170 
1171 		for(int i = 0; i < length; i++)
1172 		{
1173 			while((*token & 0x0000FFFF) == 0x0000FFFE)   // Comment token
1174 			{
1175 				int length = (*token & 0x7FFF0000) >> 16;
1176 
1177 				token += length + 1;
1178 			}
1179 
1180 			int tokenCount = size(*token);
1181 
1182 			instruction[i] = new Instruction(token, tokenCount, majorVersion);
1183 
1184 			token += 1 + tokenCount;
1185 		}
1186 	}
1187 
size(unsigned long opcode) const1188 	int Shader::size(unsigned long opcode) const
1189 	{
1190 		return size(opcode, shaderModel);
1191 	}
1192 
size(unsigned long opcode,unsigned short shaderModel)1193 	int Shader::size(unsigned long opcode, unsigned short shaderModel)
1194 	{
1195 		if(shaderModel > 0x0300)
1196 		{
1197 			ASSERT(false);
1198 		}
1199 
1200 		static const signed char size[] =
1201 		{
1202 			0,   // NOP = 0
1203 			2,   // MOV
1204 			3,   // ADD
1205 			3,   // SUB
1206 			4,   // MAD
1207 			3,   // MUL
1208 			2,   // RCP
1209 			2,   // RSQ
1210 			3,   // DP3
1211 			3,   // DP4
1212 			3,   // MIN
1213 			3,   // MAX
1214 			3,   // SLT
1215 			3,   // SGE
1216 			2,   // EXP
1217 			2,   // LOG
1218 			2,   // LIT
1219 			3,   // DST
1220 			4,   // LRP
1221 			2,   // FRC
1222 			3,   // M4x4
1223 			3,   // M4x3
1224 			3,   // M3x4
1225 			3,   // M3x3
1226 			3,   // M3x2
1227 			1,   // CALL
1228 			2,   // CALLNZ
1229 			2,   // LOOP
1230 			0,   // RET
1231 			0,   // ENDLOOP
1232 			1,   // LABEL
1233 			2,   // DCL
1234 			3,   // POW
1235 			3,   // CRS
1236 			4,   // SGN
1237 			2,   // ABS
1238 			2,   // NRM
1239 			4,   // SINCOS
1240 			1,   // REP
1241 			0,   // ENDREP
1242 			1,   // IF
1243 			2,   // IFC
1244 			0,   // ELSE
1245 			0,   // ENDIF
1246 			0,   // BREAK
1247 			2,   // BREAKC
1248 			2,   // MOVA
1249 			2,   // DEFB
1250 			5,   // DEFI
1251 			-1,  // 49
1252 			-1,  // 50
1253 			-1,  // 51
1254 			-1,  // 52
1255 			-1,  // 53
1256 			-1,  // 54
1257 			-1,  // 55
1258 			-1,  // 56
1259 			-1,  // 57
1260 			-1,  // 58
1261 			-1,  // 59
1262 			-1,  // 60
1263 			-1,  // 61
1264 			-1,  // 62
1265 			-1,  // 63
1266 			1,   // TEXCOORD = 64
1267 			1,   // TEXKILL
1268 			1,   // TEX
1269 			2,   // TEXBEM
1270 			2,   // TEXBEML
1271 			2,   // TEXREG2AR
1272 			2,   // TEXREG2GB
1273 			2,   // TEXM3x2PAD
1274 			2,   // TEXM3x2TEX
1275 			2,   // TEXM3x3PAD
1276 			2,   // TEXM3x3TEX
1277 			-1,  // RESERVED0
1278 			3,   // TEXM3x3SPEC
1279 			2,   // TEXM3x3VSPEC
1280 			2,   // EXPP
1281 			2,   // LOGP
1282 			4,   // CND
1283 			5,   // DEF
1284 			2,   // TEXREG2RGB
1285 			2,   // TEXDP3TEX
1286 			2,   // TEXM3x2DEPTH
1287 			2,   // TEXDP3
1288 			2,   // TEXM3x3
1289 			1,   // TEXDEPTH
1290 			4,   // CMP
1291 			3,   // BEM
1292 			4,   // DP2ADD
1293 			2,   // DSX
1294 			2,   // DSY
1295 			5,   // TEXLDD
1296 			3,   // SETP
1297 			3,   // TEXLDL
1298 			2,   // BREAKP
1299 			-1,  // 97
1300 			-1,  // 98
1301 			-1,  // 99
1302 			-1,  // 100
1303 			-1,  // 101
1304 			-1,  // 102
1305 			-1,  // 103
1306 			-1,  // 104
1307 			-1,  // 105
1308 			-1,  // 106
1309 			-1,  // 107
1310 			-1,  // 108
1311 			-1,  // 109
1312 			-1,  // 110
1313 			-1,  // 111
1314 			-1,  // 112
1315 		};
1316 
1317 		int length = 0;
1318 
1319 		if((opcode & 0x0000FFFF) == OPCODE_COMMENT)
1320 		{
1321 			return (opcode & 0x7FFF0000) >> 16;
1322 		}
1323 
1324 		if(opcode != OPCODE_PS_1_0 &&
1325 		   opcode != OPCODE_PS_1_1 &&
1326 		   opcode != OPCODE_PS_1_2 &&
1327 		   opcode != OPCODE_PS_1_3 &&
1328 		   opcode != OPCODE_PS_1_4 &&
1329 		   opcode != OPCODE_PS_2_0 &&
1330 		   opcode != OPCODE_PS_2_x &&
1331 		   opcode != OPCODE_PS_3_0 &&
1332 		   opcode != OPCODE_VS_1_0 &&
1333 		   opcode != OPCODE_VS_1_1 &&
1334 		   opcode != OPCODE_VS_2_0 &&
1335 		   opcode != OPCODE_VS_2_x &&
1336 		   opcode != OPCODE_VS_2_sw &&
1337 		   opcode != OPCODE_VS_3_0 &&
1338 		   opcode != OPCODE_VS_3_sw &&
1339 		   opcode != OPCODE_PHASE &&
1340 		   opcode != OPCODE_END)
1341 		{
1342 			if(shaderModel >= 0x0200)
1343 			{
1344 				length = (opcode & 0x0F000000) >> 24;
1345 			}
1346 			else
1347 			{
1348 				length = size[opcode & 0x0000FFFF];
1349 			}
1350 		}
1351 
1352 		if(length < 0)
1353 		{
1354 			ASSERT(false);
1355 		}
1356 
1357 		if(shaderModel == 0x0104)
1358 		{
1359 			switch(opcode & 0x0000FFFF)
1360 			{
1361 			case OPCODE_TEX:
1362 				length += 1;
1363 				break;
1364 			case OPCODE_TEXCOORD:
1365 				length += 1;
1366 				break;
1367 			default:
1368 				break;
1369 			}
1370 		}
1371 
1372 		return length;
1373 	}
1374 
maskContainsComponent(int mask,int component)1375 	bool Shader::maskContainsComponent(int mask, int component)
1376 	{
1377 		return (mask & (1 << component)) != 0;
1378 	}
1379 
swizzleContainsComponent(int swizzle,int component)1380 	bool Shader::swizzleContainsComponent(int swizzle, int component)
1381 	{
1382 		if((swizzle & 0x03) >> 0 == component) return true;
1383 		if((swizzle & 0x0C) >> 2 == component) return true;
1384 		if((swizzle & 0x30) >> 4 == component) return true;
1385 		if((swizzle & 0xC0) >> 6 == component) return true;
1386 
1387 		return false;
1388 	}
1389 
swizzleContainsComponentMasked(int swizzle,int component,int mask)1390 	bool Shader::swizzleContainsComponentMasked(int swizzle, int component, int mask)
1391 	{
1392 		if(mask & 0x1) if((swizzle & 0x03) >> 0 == component) return true;
1393 		if(mask & 0x2) if((swizzle & 0x0C) >> 2 == component) return true;
1394 		if(mask & 0x4) if((swizzle & 0x30) >> 4 == component) return true;
1395 		if(mask & 0x8) if((swizzle & 0xC0) >> 6 == component) return true;
1396 
1397 		return false;
1398 	}
1399 
containsDynamicBranching() const1400 	bool Shader::containsDynamicBranching() const
1401 	{
1402 		return dynamicBranching;
1403 	}
1404 
containsBreakInstruction() const1405 	bool Shader::containsBreakInstruction() const
1406 	{
1407 		return containsBreak;
1408 	}
1409 
containsContinueInstruction() const1410 	bool Shader::containsContinueInstruction() const
1411 	{
1412 		return containsContinue;
1413 	}
1414 
containsLeaveInstruction() const1415 	bool Shader::containsLeaveInstruction() const
1416 	{
1417 		return containsLeave;
1418 	}
1419 
containsDefineInstruction() const1420 	bool Shader::containsDefineInstruction() const
1421 	{
1422 		return containsDefine;
1423 	}
1424 
usesSampler(int index) const1425 	bool Shader::usesSampler(int index) const
1426 	{
1427 		return (usedSamplers & (1 << index)) != 0;
1428 	}
1429 
getSerialID() const1430 	int Shader::getSerialID() const
1431 	{
1432 		return serialID;
1433 	}
1434 
getLength() const1435 	size_t Shader::getLength() const
1436 	{
1437 		return instruction.size();
1438 	}
1439 
getShaderType() const1440 	Shader::ShaderType Shader::getShaderType() const
1441 	{
1442 		return shaderType;
1443 	}
1444 
getShaderModel() const1445 	unsigned short Shader::getShaderModel() const
1446 	{
1447 		return shaderModel;
1448 	}
1449 
print(const char * fileName,...) const1450 	void Shader::print(const char *fileName, ...) const
1451 	{
1452 		char fullName[1024 + 1];
1453 
1454 		va_list vararg;
1455 		va_start(vararg, fileName);
1456 		vsnprintf(fullName, 1024, fileName, vararg);
1457 		va_end(vararg);
1458 
1459 		std::ofstream file(fullName, std::ofstream::out);
1460 
1461 		for(const auto &inst : instruction)
1462 		{
1463 			file << inst->string(shaderType, shaderModel) << std::endl;
1464 		}
1465 	}
1466 
printInstruction(int index,const char * fileName) const1467 	void Shader::printInstruction(int index, const char *fileName) const
1468 	{
1469 		std::ofstream file(fileName, std::ofstream::out | std::ofstream::app);
1470 
1471 		file << instruction[index]->string(shaderType, shaderModel) << std::endl;
1472 	}
1473 
append(Instruction * instruction)1474 	void Shader::append(Instruction *instruction)
1475 	{
1476 		this->instruction.push_back(instruction);
1477 	}
1478 
declareSampler(int i)1479 	void Shader::declareSampler(int i)
1480 	{
1481 		if(i >= 0 && i < 16)
1482 		{
1483 			usedSamplers |= 1 << i;
1484 		}
1485 	}
1486 
getInstruction(size_t i) const1487 	const Shader::Instruction *Shader::getInstruction(size_t i) const
1488 	{
1489 		ASSERT(i < instruction.size());
1490 
1491 		return instruction[i];
1492 	}
1493 
optimize()1494 	void Shader::optimize()
1495 	{
1496 		optimizeLeave();
1497 		optimizeCall();
1498 		removeNull();
1499 	}
1500 
optimizeLeave()1501 	void Shader::optimizeLeave()
1502 	{
1503 		// A return (leave) right before the end of a function or the shader can be removed
1504 		for(unsigned int i = 0; i < instruction.size(); i++)
1505 		{
1506 			if(instruction[i]->opcode == OPCODE_LEAVE)
1507 			{
1508 				if(i == instruction.size() - 1 || instruction[i + 1]->opcode == OPCODE_RET)
1509 				{
1510 					instruction[i]->opcode = OPCODE_NULL;
1511 				}
1512 			}
1513 		}
1514 	}
1515 
optimizeCall()1516 	void Shader::optimizeCall()
1517 	{
1518 		// Eliminate uncalled functions
1519 		std::set<int> calledFunctions;
1520 		bool rescan = true;
1521 
1522 		while(rescan)
1523 		{
1524 			calledFunctions.clear();
1525 			rescan = false;
1526 
1527 			for(const auto &inst : instruction)
1528 			{
1529 				if(inst->isCall())
1530 				{
1531 					calledFunctions.insert(inst->dst.label);
1532 				}
1533 			}
1534 
1535 			if(!calledFunctions.empty())
1536 			{
1537 				for(unsigned int i = 0; i < instruction.size(); i++)
1538 				{
1539 					if(instruction[i]->opcode == OPCODE_LABEL)
1540 					{
1541 						if(calledFunctions.find(instruction[i]->dst.label) == calledFunctions.end())
1542 						{
1543 							for( ; i < instruction.size(); i++)
1544 							{
1545 								Opcode oldOpcode = instruction[i]->opcode;
1546 								instruction[i]->opcode = OPCODE_NULL;
1547 
1548 								if(oldOpcode == OPCODE_RET)
1549 								{
1550 									rescan = true;
1551 									break;
1552 								}
1553 							}
1554 						}
1555 					}
1556 				}
1557 			}
1558 		}
1559 
1560 		// Optimize the entry call
1561 		if(instruction.size() >= 2 && instruction[0]->opcode == OPCODE_CALL && instruction[1]->opcode == OPCODE_RET)
1562 		{
1563 			if(calledFunctions.size() == 1)
1564 			{
1565 				instruction[0]->opcode = OPCODE_NULL;
1566 				instruction[1]->opcode = OPCODE_NULL;
1567 
1568 				for(size_t i = 2; i < instruction.size(); i++)
1569 				{
1570 					if(instruction[i]->opcode == OPCODE_LABEL || instruction[i]->opcode == OPCODE_RET)
1571 					{
1572 						instruction[i]->opcode = OPCODE_NULL;
1573 					}
1574 				}
1575 			}
1576 		}
1577 	}
1578 
removeNull()1579 	void Shader::removeNull()
1580 	{
1581 		size_t size = 0;
1582 		for(size_t i = 0; i < instruction.size(); i++)
1583 		{
1584 			if(instruction[i]->opcode != OPCODE_NULL)
1585 			{
1586 				instruction[size] = instruction[i];
1587 				size++;
1588 			}
1589 			else
1590 			{
1591 				delete instruction[i];
1592 			}
1593 		}
1594 
1595 		instruction.resize(size);
1596 	}
1597 
analyzeDirtyConstants()1598 	void Shader::analyzeDirtyConstants()
1599 	{
1600 		dirtyConstantsF = 0;
1601 		dirtyConstantsI = 0;
1602 		dirtyConstantsB = 0;
1603 
1604 		for(const auto &inst : instruction)
1605 		{
1606 			switch(inst->opcode)
1607 			{
1608 			case OPCODE_DEF:
1609 				if(inst->dst.index + 1 > dirtyConstantsF)
1610 				{
1611 					dirtyConstantsF = inst->dst.index + 1;
1612 				}
1613 				break;
1614 			case OPCODE_DEFI:
1615 				if(inst->dst.index + 1 > dirtyConstantsI)
1616 				{
1617 					dirtyConstantsI = inst->dst.index + 1;
1618 				}
1619 				break;
1620 			case OPCODE_DEFB:
1621 				if(inst->dst.index + 1 > dirtyConstantsB)
1622 				{
1623 					dirtyConstantsB = inst->dst.index + 1;
1624 				}
1625 				break;
1626 			default:
1627 				break;
1628 			}
1629 		}
1630 	}
1631 
analyzeDynamicBranching()1632 	void Shader::analyzeDynamicBranching()
1633 	{
1634 		dynamicBranching = false;
1635 		containsLeave = false;
1636 		containsBreak = false;
1637 		containsContinue = false;
1638 		containsDefine = false;
1639 
1640 		// Determine global presence of branching instructions
1641 		for(const auto &inst : instruction)
1642 		{
1643 			switch(inst->opcode)
1644 			{
1645 			case OPCODE_CALLNZ:
1646 			case OPCODE_IF:
1647 			case OPCODE_IFC:
1648 			case OPCODE_BREAK:
1649 			case OPCODE_BREAKC:
1650 			case OPCODE_CMP:
1651 			case OPCODE_BREAKP:
1652 			case OPCODE_LEAVE:
1653 			case OPCODE_CONTINUE:
1654 				if(inst->src[0].type != PARAMETER_CONSTBOOL)
1655 				{
1656 					dynamicBranching = true;
1657 				}
1658 
1659 				if(inst->opcode == OPCODE_LEAVE)
1660 				{
1661 					containsLeave = true;
1662 				}
1663 
1664 				if(inst->isBreak())
1665 				{
1666 					containsBreak = true;
1667 				}
1668 
1669 				if(inst->opcode == OPCODE_CONTINUE)
1670 				{
1671 					containsContinue = true;
1672 				}
1673 			case OPCODE_DEF:
1674 			case OPCODE_DEFB:
1675 			case OPCODE_DEFI:
1676 				containsDefine = true;
1677 			default:
1678 				break;
1679 			}
1680 		}
1681 
1682 		// Conservatively determine which instructions are affected by dynamic branching
1683 		int branchDepth = 0;
1684 		int breakDepth = 0;
1685 		int continueDepth = 0;
1686 		bool leaveReturn = false;
1687 		unsigned int functionBegin = 0;
1688 
1689 		for(unsigned int i = 0; i < instruction.size(); i++)
1690 		{
1691 			// If statements and loops
1692 			if(instruction[i]->isBranch() || instruction[i]->isLoop())
1693 			{
1694 				branchDepth++;
1695 			}
1696 			else if(instruction[i]->opcode == OPCODE_ENDIF || instruction[i]->isEndLoop())
1697 			{
1698 				branchDepth--;
1699 			}
1700 
1701 			if(branchDepth > 0)
1702 			{
1703 				instruction[i]->analysisBranch = true;
1704 
1705 				if(instruction[i]->isCall())
1706 				{
1707 					markFunctionAnalysis(instruction[i]->dst.label, ANALYSIS_BRANCH);
1708 				}
1709 			}
1710 
1711 			// Break statemement
1712 			if(instruction[i]->isBreak())
1713 			{
1714 				breakDepth++;
1715 			}
1716 
1717 			if(breakDepth > 0)
1718 			{
1719 				if(instruction[i]->isLoop() || instruction[i]->opcode == OPCODE_SWITCH)   // Nested loop or switch, don't make the end of it disable the break execution mask
1720 				{
1721 					breakDepth++;
1722 				}
1723 				else if(instruction[i]->isEndLoop() || instruction[i]->opcode == OPCODE_ENDSWITCH)
1724 				{
1725 					breakDepth--;
1726 				}
1727 
1728 				instruction[i]->analysisBreak = true;
1729 
1730 				if(instruction[i]->isCall())
1731 				{
1732 					markFunctionAnalysis(instruction[i]->dst.label, ANALYSIS_BRANCH);
1733 				}
1734 			}
1735 
1736 			// Continue statement
1737 			if(instruction[i]->opcode == OPCODE_CONTINUE)
1738 			{
1739 				continueDepth++;
1740 			}
1741 
1742 			if(continueDepth > 0)
1743 			{
1744 				if(instruction[i]->isLoop() || instruction[i]->opcode == OPCODE_SWITCH)   // Nested loop or switch, don't make the end of it disable the break execution mask
1745 				{
1746 					continueDepth++;
1747 				}
1748 				else if(instruction[i]->isEndLoop() || instruction[i]->opcode == OPCODE_ENDSWITCH)
1749 				{
1750 					continueDepth--;
1751 				}
1752 
1753 				instruction[i]->analysisContinue = true;
1754 
1755 				if(instruction[i]->isCall())
1756 				{
1757 					markFunctionAnalysis(instruction[i]->dst.label, ANALYSIS_CONTINUE);
1758 				}
1759 			}
1760 
1761 			// Return (leave) statement
1762 			if(instruction[i]->opcode == OPCODE_LEAVE)
1763 			{
1764 				leaveReturn = true;
1765 
1766 				// Mark loop body instructions prior to the return statement
1767 				for(unsigned int l = functionBegin; l < i; l++)
1768 				{
1769 					if(instruction[l]->isLoop())
1770 					{
1771 						for(unsigned int r = l + 1; r < i; r++)
1772 						{
1773 							instruction[r]->analysisLeave = true;
1774 						}
1775 
1776 						break;
1777 					}
1778 				}
1779 			}
1780 			else if(instruction[i]->opcode == OPCODE_RET)   // End of the function
1781 			{
1782 				leaveReturn = false;
1783 			}
1784 			else if(instruction[i]->opcode == OPCODE_LABEL)
1785 			{
1786 				functionBegin = i;
1787 			}
1788 
1789 			if(leaveReturn)
1790 			{
1791 				instruction[i]->analysisLeave = true;
1792 
1793 				if(instruction[i]->isCall())
1794 				{
1795 					markFunctionAnalysis(instruction[i]->dst.label, ANALYSIS_LEAVE);
1796 				}
1797 			}
1798 		}
1799 	}
1800 
markFunctionAnalysis(unsigned int functionLabel,Analysis flag)1801 	void Shader::markFunctionAnalysis(unsigned int functionLabel, Analysis flag)
1802 	{
1803 		bool marker = false;
1804 		for(auto &inst : instruction)
1805 		{
1806 			if(!marker)
1807 			{
1808 				if(inst->opcode == OPCODE_LABEL && inst->dst.label == functionLabel)
1809 				{
1810 					marker = true;
1811 				}
1812 			}
1813 			else
1814 			{
1815 				if(inst->opcode == OPCODE_RET)
1816 				{
1817 					break;
1818 				}
1819 				else if(inst->isCall())
1820 				{
1821 					markFunctionAnalysis(inst->dst.label, flag);
1822 				}
1823 
1824 				inst->analysis |= flag;
1825 			}
1826 		}
1827 	}
1828 
analyzeSamplers()1829 	void Shader::analyzeSamplers()
1830 	{
1831 		for(const auto &inst : instruction)
1832 		{
1833 			switch(inst->opcode)
1834 			{
1835 			case OPCODE_TEX:
1836 			case OPCODE_TEXBEM:
1837 			case OPCODE_TEXBEML:
1838 			case OPCODE_TEXREG2AR:
1839 			case OPCODE_TEXREG2GB:
1840 			case OPCODE_TEXM3X2TEX:
1841 			case OPCODE_TEXM3X3TEX:
1842 			case OPCODE_TEXM3X3SPEC:
1843 			case OPCODE_TEXM3X3VSPEC:
1844 			case OPCODE_TEXREG2RGB:
1845 			case OPCODE_TEXDP3TEX:
1846 			case OPCODE_TEXM3X2DEPTH:
1847 			case OPCODE_TEXLDD:
1848 			case OPCODE_TEXLDL:
1849 			case OPCODE_TEXLOD:
1850 			case OPCODE_TEXOFFSET:
1851 			case OPCODE_TEXOFFSETBIAS:
1852 			case OPCODE_TEXLODOFFSET:
1853 			case OPCODE_TEXELFETCH:
1854 			case OPCODE_TEXELFETCHOFFSET:
1855 			case OPCODE_TEXGRAD:
1856 			case OPCODE_TEXGRADOFFSET:
1857 				{
1858 					Parameter &dst = inst->dst;
1859 					Parameter &src1 = inst->src[1];
1860 
1861 					if(majorVersion >= 2)
1862 					{
1863 						if(src1.type == PARAMETER_SAMPLER)
1864 						{
1865 							usedSamplers |= 1 << src1.index;
1866 						}
1867 					}
1868 					else
1869 					{
1870 						usedSamplers |= 1 << dst.index;
1871 					}
1872 				}
1873 				break;
1874 			default:
1875 				break;
1876 			}
1877 		}
1878 	}
1879 
1880 	// Assigns a unique index to each call instruction, on a per label basis.
1881 	// This is used to know what basic block to return to.
analyzeCallSites()1882 	void Shader::analyzeCallSites()
1883 	{
1884 		std::unordered_map<int, int> callSiteIndices;
1885 
1886 		for(auto &inst : instruction)
1887 		{
1888 			if(inst->opcode == OPCODE_CALL || inst->opcode == OPCODE_CALLNZ)
1889 			{
1890 				inst->dst.callSite = callSiteIndices[inst->dst.label]++;
1891 			}
1892 		}
1893 	}
1894 
analyzeIndirectAddressing()1895 	void Shader::analyzeIndirectAddressing()
1896 	{
1897 		indirectAddressableTemporaries = false;
1898 		indirectAddressableInput = false;
1899 		indirectAddressableOutput = false;
1900 
1901 		for(const auto &inst : instruction)
1902 		{
1903 			if(inst->dst.rel.type != PARAMETER_VOID)
1904 			{
1905 				switch(inst->dst.type)
1906 				{
1907 				case PARAMETER_TEMP:   indirectAddressableTemporaries = true; break;
1908 				case PARAMETER_INPUT:  indirectAddressableInput = true;       break;
1909 				case PARAMETER_OUTPUT: indirectAddressableOutput = true;      break;
1910 				default: break;
1911 				}
1912 			}
1913 
1914 			for(int j = 0; j < 3; j++)
1915 			{
1916 				if(inst->src[j].rel.type != PARAMETER_VOID)
1917 				{
1918 					switch(inst->src[j].type)
1919 					{
1920 					case PARAMETER_TEMP:   indirectAddressableTemporaries = true; break;
1921 					case PARAMETER_INPUT:  indirectAddressableInput = true;       break;
1922 					case PARAMETER_OUTPUT: indirectAddressableOutput = true;      break;
1923 					default: break;
1924 					}
1925 				}
1926 			}
1927 		}
1928 	}
1929 
1930 	// analyzeLimits analyzes the whole shader program to determine the deepest
1931 	// nesting of control flow blocks and function calls. These calculations
1932 	// are stored into the limits member, and is used by the programs to
1933 	// allocate stack storage variables.
analyzeLimits()1934 	void Shader::analyzeLimits()
1935 	{
1936 		typedef unsigned int FunctionID;
1937 
1938 		// Identifier of the function with the main entry point.
1939 		constexpr FunctionID MAIN_ID = 0xF0000000;
1940 
1941 		// Invalid function identifier.
1942 		constexpr FunctionID INVALID_ID = ~0U;
1943 
1944 		// Limits on a single function.
1945 		struct FunctionLimits
1946 		{
1947 			uint32_t loops = 0; // maximum nested loop and reps.
1948 			uint32_t ifs = 0; // maximum nested if statements.
1949 			uint32_t stack = 0; // maximum call depth.
1950 		};
1951 
1952 		// Information about a single function in the shader.
1953 		struct FunctionInfo
1954 		{
1955 			FunctionLimits limits;
1956 			std::unordered_set<FunctionID> calls; // What this function calls.
1957 			bool reachable; // Is this function reachable?
1958 		};
1959 
1960 		std::unordered_map<FunctionID, FunctionInfo> functions;
1961 
1962 		uint32_t maxLabel = 0; // Highest label found
1963 
1964 		// Add a definition for the main entry point.
1965 		// This starts at the beginning of the instructions and does not have
1966 		// its own label.
1967 		functions[MAIN_ID] = FunctionInfo();
1968 		functions[MAIN_ID].reachable = true;
1969 
1970 		// Begin by doing a pass over the instructions to identify all the
1971 		// functions. These start with a label and end with a ret. Note that
1972 		// functions can have labels within them.
1973 		FunctionID currentFunc = MAIN_ID;
1974 		for(auto &inst : instruction)
1975 		{
1976 			switch (inst->opcode)
1977 			{
1978 				case OPCODE_LABEL:
1979 					if (currentFunc == INVALID_ID)
1980 					{
1981 						// Start of a function.
1982 						FunctionID id = inst->dst.label;
1983 						ASSERT(id != MAIN_ID); // If this fires, we're going to have to represent main with something else.
1984 						functions[id] = FunctionInfo();
1985 					}
1986 					break;
1987 				case OPCODE_RET:
1988 					currentFunc = INVALID_ID;
1989 					break;
1990 				default:
1991 					break;
1992 			}
1993 		}
1994 
1995 		// Limits for the currently analyzed function.
1996 		FunctionLimits currentLimits;
1997 
1998 		// Now loop over the instructions gathering the limits of each of the
1999 		// functions.
2000 		currentFunc = MAIN_ID;
2001 		for(size_t i = 0; i < instruction.size(); i++)
2002 		{
2003 			const auto& inst = instruction[i];
2004 			switch (inst->opcode)
2005 			{
2006 				case OPCODE_LABEL:
2007 				{
2008 					maxLabel = std::max(maxLabel, inst->dst.label);
2009 					if (currentFunc == INVALID_ID)
2010 					{
2011 						// Start of a function.
2012 						FunctionID id = inst->dst.label;
2013 						ASSERT(functions.find(id) != functions.end()); // Sanity check
2014 						currentFunc = id;
2015 					}
2016 					break;
2017 				}
2018 				case OPCODE_CALL:
2019 				case OPCODE_CALLNZ:
2020 				{
2021 					ASSERT(currentFunc != INVALID_ID);
2022 					FunctionID id = inst->dst.label;
2023 					ASSERT(functions.find(id) != functions.end());
2024 					functions[currentFunc].calls.emplace(id);
2025 					functions[id].reachable = true;
2026 					break;
2027 				}
2028 				case OPCODE_LOOP:
2029 				case OPCODE_REP:
2030 				case OPCODE_WHILE:
2031 				case OPCODE_SWITCH: // Not a mistake - switches share loopReps.
2032 				{
2033 					ASSERT(currentFunc != INVALID_ID);
2034 					++currentLimits.loops;
2035 					auto& func = functions[currentFunc];
2036 					func.limits.loops = std::max(func.limits.loops, currentLimits.loops);
2037 					break;
2038 				}
2039 				case OPCODE_ENDLOOP:
2040 				case OPCODE_ENDREP:
2041 				case OPCODE_ENDWHILE:
2042 				case OPCODE_ENDSWITCH:
2043 				{
2044 					ASSERT(currentLimits.loops > 0);
2045 					--currentLimits.loops;
2046 					break;
2047 				}
2048 				case OPCODE_IF:
2049 				case OPCODE_IFC:
2050 				{
2051 					ASSERT(currentFunc != INVALID_ID);
2052 					++currentLimits.ifs;
2053 					auto& func = functions[currentFunc];
2054 					func.limits.ifs = std::max(func.limits.ifs, currentLimits.ifs);
2055 					break;
2056 				}
2057 				case OPCODE_ENDIF:
2058 				{
2059 					ASSERT(currentLimits.ifs > 0);
2060 					currentLimits.ifs--;
2061 					break;
2062 				}
2063 				case OPCODE_RET:
2064 				{
2065 					// Must be in a function to return.
2066 					ASSERT(currentFunc != INVALID_ID);
2067 
2068 					// All stacks should be popped before returning.
2069 					ASSERT(currentLimits.ifs == 0);
2070 					ASSERT(currentLimits.loops == 0);
2071 
2072 					currentFunc = INVALID_ID;
2073 					currentLimits = FunctionLimits();
2074 					break;
2075 				}
2076 				default:
2077 					break;
2078 			}
2079 		}
2080 
2081 #if !defined(NDEBUG) || defined(DCHECK_ALWAYS_ON)
2082 		// Assert that every function is reachable (these should have been
2083 		// stripped in earlier stages). Unreachable functions may be code
2084 		// generated, but their own limits are not considered below, potentially
2085 		// causing OOB indexing in later stages.
2086 		// If we ever find cases where there are unreachable functions, we can
2087 		// replace this assert with NO-OPing or stripping out the dead
2088 		// functions.
2089 		for (const auto &it : functions) { ASSERT(it.second.reachable); }
2090 #endif
2091 
2092 		// We have now gathered all the information about each of the functions
2093 		// in the shader. Traverse these functions starting from the main
2094 		// function to calculate the maximum limits across the entire shader.
2095 
2096 		std::unordered_set<FunctionID> visited;
2097 		std::function<Limits(FunctionID)> traverse;
2098 		traverse = [&](FunctionID id) -> Limits
2099 		{
2100 			const auto& func = functions[id];
2101 			ASSERT(visited.count(id) == 0); // Sanity check: Recursive functions are not allowed.
2102 			visited.insert(id);
2103 			Limits limits;
2104 			limits.stack = 1;
2105 			for (auto callee : func.calls)
2106 			{
2107 				auto calleeLimits = traverse(callee);
2108 				limits.loops = std::max(limits.loops, calleeLimits.loops);
2109 				limits.ifs = std::max(limits.ifs, calleeLimits.ifs);
2110 				limits.stack = std::max(limits.stack, calleeLimits.stack + 1);
2111 			}
2112 			visited.erase(id);
2113 
2114 			limits.loops += func.limits.loops;
2115 			limits.ifs += func.limits.ifs;
2116 			return limits;
2117 		};
2118 
2119 		limits = traverse(MAIN_ID);
2120 		limits.maxLabel = maxLabel;
2121 	}
2122 }
2123