1 // Copyright 2016 The SwiftShader Authors. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //    http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "OutputASM.h"
16 #include "Common/Math.hpp"
17 
18 #include "common/debug.h"
19 #include "InfoSink.h"
20 
21 #include "libGLESv2/Shader.h"
22 
23 #include <GLES2/gl2.h>
24 #include <GLES2/gl2ext.h>
25 #include <GLES3/gl3.h>
26 #include <GL/glcorearb.h>
27 #include <GL/glext.h>
28 
29 #include <stdlib.h>
30 
31 namespace
32 {
glVariableType(const TType & type)33 	GLenum glVariableType(const TType &type)
34 	{
35 		switch(type.getBasicType())
36 		{
37 		case EbtFloat:
38 			if(type.isScalar())
39 			{
40 				return GL_FLOAT;
41 			}
42 			else if(type.isVector())
43 			{
44 				switch(type.getNominalSize())
45 				{
46 				case 2: return GL_FLOAT_VEC2;
47 				case 3: return GL_FLOAT_VEC3;
48 				case 4: return GL_FLOAT_VEC4;
49 				default: UNREACHABLE(type.getNominalSize());
50 				}
51 			}
52 			else if(type.isMatrix())
53 			{
54 				switch(type.getNominalSize())
55 				{
56 				case 2:
57 					switch(type.getSecondarySize())
58 					{
59 					case 2: return GL_FLOAT_MAT2;
60 					case 3: return GL_FLOAT_MAT2x3;
61 					case 4: return GL_FLOAT_MAT2x4;
62 					default: UNREACHABLE(type.getSecondarySize());
63 					}
64 				case 3:
65 					switch(type.getSecondarySize())
66 					{
67 					case 2: return GL_FLOAT_MAT3x2;
68 					case 3: return GL_FLOAT_MAT3;
69 					case 4: return GL_FLOAT_MAT3x4;
70 					default: UNREACHABLE(type.getSecondarySize());
71 					}
72 				case 4:
73 					switch(type.getSecondarySize())
74 					{
75 					case 2: return GL_FLOAT_MAT4x2;
76 					case 3: return GL_FLOAT_MAT4x3;
77 					case 4: return GL_FLOAT_MAT4;
78 					default: UNREACHABLE(type.getSecondarySize());
79 					}
80 				default: UNREACHABLE(type.getNominalSize());
81 				}
82 			}
83 			else UNREACHABLE(0);
84 			break;
85 		case EbtInt:
86 			if(type.isScalar())
87 			{
88 				return GL_INT;
89 			}
90 			else if(type.isVector())
91 			{
92 				switch(type.getNominalSize())
93 				{
94 				case 2: return GL_INT_VEC2;
95 				case 3: return GL_INT_VEC3;
96 				case 4: return GL_INT_VEC4;
97 				default: UNREACHABLE(type.getNominalSize());
98 				}
99 			}
100 			else UNREACHABLE(0);
101 			break;
102 		case EbtUInt:
103 			if(type.isScalar())
104 			{
105 				return GL_UNSIGNED_INT;
106 			}
107 			else if(type.isVector())
108 			{
109 				switch(type.getNominalSize())
110 				{
111 				case 2: return GL_UNSIGNED_INT_VEC2;
112 				case 3: return GL_UNSIGNED_INT_VEC3;
113 				case 4: return GL_UNSIGNED_INT_VEC4;
114 				default: UNREACHABLE(type.getNominalSize());
115 				}
116 			}
117 			else UNREACHABLE(0);
118 			break;
119 		case EbtBool:
120 			if(type.isScalar())
121 			{
122 				return GL_BOOL;
123 			}
124 			else if(type.isVector())
125 			{
126 				switch(type.getNominalSize())
127 				{
128 				case 2: return GL_BOOL_VEC2;
129 				case 3: return GL_BOOL_VEC3;
130 				case 4: return GL_BOOL_VEC4;
131 				default: UNREACHABLE(type.getNominalSize());
132 				}
133 			}
134 			else UNREACHABLE(0);
135 			break;
136 		case EbtSampler2D:
137 			return GL_SAMPLER_2D;
138 		case EbtISampler2D:
139 			return GL_INT_SAMPLER_2D;
140 		case EbtUSampler2D:
141 			return GL_UNSIGNED_INT_SAMPLER_2D;
142 		case EbtSamplerCube:
143 			return GL_SAMPLER_CUBE;
144 		case EbtSampler2DRect:
145 			return GL_SAMPLER_2D_RECT_ARB;
146 		case EbtISamplerCube:
147 			return GL_INT_SAMPLER_CUBE;
148 		case EbtUSamplerCube:
149 			return GL_UNSIGNED_INT_SAMPLER_CUBE;
150 		case EbtSamplerExternalOES:
151 			return GL_SAMPLER_EXTERNAL_OES;
152 		case EbtSampler3D:
153 			return GL_SAMPLER_3D_OES;
154 		case EbtISampler3D:
155 			return GL_INT_SAMPLER_3D;
156 		case EbtUSampler3D:
157 			return GL_UNSIGNED_INT_SAMPLER_3D;
158 		case EbtSampler2DArray:
159 			return GL_SAMPLER_2D_ARRAY;
160 		case EbtISampler2DArray:
161 			return GL_INT_SAMPLER_2D_ARRAY;
162 		case EbtUSampler2DArray:
163 			return GL_UNSIGNED_INT_SAMPLER_2D_ARRAY;
164 		case EbtSampler2DShadow:
165 			return GL_SAMPLER_2D_SHADOW;
166 		case EbtSamplerCubeShadow:
167 			return GL_SAMPLER_CUBE_SHADOW;
168 		case EbtSampler2DArrayShadow:
169 			return GL_SAMPLER_2D_ARRAY_SHADOW;
170 		default:
171 			UNREACHABLE(type.getBasicType());
172 			break;
173 		}
174 
175 		return GL_NONE;
176 	}
177 
glVariablePrecision(const TType & type)178 	GLenum glVariablePrecision(const TType &type)
179 	{
180 		if(type.getBasicType() == EbtFloat)
181 		{
182 			switch(type.getPrecision())
183 			{
184 			case EbpHigh:   return GL_HIGH_FLOAT;
185 			case EbpMedium: return GL_MEDIUM_FLOAT;
186 			case EbpLow:    return GL_LOW_FLOAT;
187 			case EbpUndefined:
188 				// Should be defined as the default precision by the parser
189 			default: UNREACHABLE(type.getPrecision());
190 			}
191 		}
192 		else if(type.getBasicType() == EbtInt)
193 		{
194 			switch(type.getPrecision())
195 			{
196 			case EbpHigh:   return GL_HIGH_INT;
197 			case EbpMedium: return GL_MEDIUM_INT;
198 			case EbpLow:    return GL_LOW_INT;
199 			case EbpUndefined:
200 				// Should be defined as the default precision by the parser
201 			default: UNREACHABLE(type.getPrecision());
202 			}
203 		}
204 
205 		// Other types (boolean, sampler) don't have a precision
206 		return GL_NONE;
207 	}
208 }
209 
210 namespace glsl
211 {
212 	// Integer to TString conversion
str(int i)213 	TString str(int i)
214 	{
215 		char buffer[20];
216 		sprintf(buffer, "%d", i);
217 		return buffer;
218 	}
219 
220 	class Temporary : public TIntermSymbol
221 	{
222 	public:
Temporary(OutputASM * assembler)223 		Temporary(OutputASM *assembler) : TIntermSymbol(TSymbolTableLevel::nextUniqueId(), "tmp", TType(EbtFloat, EbpHigh, EvqTemporary, 4, 1, false)), assembler(assembler)
224 		{
225 		}
226 
~Temporary()227 		~Temporary()
228 		{
229 			assembler->freeTemporary(this);
230 		}
231 
232 	private:
233 		OutputASM *const assembler;
234 	};
235 
236 	class Constant : public TIntermConstantUnion
237 	{
238 	public:
Constant(float x,float y,float z,float w)239 		Constant(float x, float y, float z, float w) : TIntermConstantUnion(constants, TType(EbtFloat, EbpHigh, EvqConstExpr, 4, 1, false))
240 		{
241 			constants[0].setFConst(x);
242 			constants[1].setFConst(y);
243 			constants[2].setFConst(z);
244 			constants[3].setFConst(w);
245 		}
246 
Constant(bool b)247 		Constant(bool b) : TIntermConstantUnion(constants, TType(EbtBool, EbpHigh, EvqConstExpr, 1, 1, false))
248 		{
249 			constants[0].setBConst(b);
250 		}
251 
Constant(int i)252 		Constant(int i) : TIntermConstantUnion(constants, TType(EbtInt, EbpHigh, EvqConstExpr, 1, 1, false))
253 		{
254 			constants[0].setIConst(i);
255 		}
256 
~Constant()257 		~Constant()
258 		{
259 		}
260 
261 	private:
262 		ConstantUnion constants[4];
263 	};
264 
ShaderVariable(const TType & type,const std::string & name,int registerIndex)265 	ShaderVariable::ShaderVariable(const TType& type, const std::string& name, int registerIndex) :
266 		type(type.isStruct() ? GL_NONE : glVariableType(type)), precision(glVariablePrecision(type)),
267 		name(name), arraySize(type.getArraySize()), registerIndex(registerIndex)
268 	{
269 		if(type.isStruct())
270 		{
271 			for(const auto& field : type.getStruct()->fields())
272 			{
273 				fields.push_back(ShaderVariable(*(field->type()), field->name().c_str(), -1));
274 			}
275 		}
276 	}
277 
Uniform(const TType & type,const std::string & name,int registerIndex,int blockId,const BlockMemberInfo & blockMemberInfo)278 	Uniform::Uniform(const TType& type, const std::string &name, int registerIndex, int blockId, const BlockMemberInfo& blockMemberInfo) :
279 		ShaderVariable(type, name, registerIndex), blockId(blockId), blockInfo(blockMemberInfo)
280 	{
281 	}
282 
UniformBlock(const std::string & name,unsigned int dataSize,unsigned int arraySize,TLayoutBlockStorage layout,bool isRowMajorLayout,int registerIndex,int blockId)283 	UniformBlock::UniformBlock(const std::string& name, unsigned int dataSize, unsigned int arraySize,
284 	                           TLayoutBlockStorage layout, bool isRowMajorLayout, int registerIndex, int blockId) :
285 		name(name), dataSize(dataSize), arraySize(arraySize), layout(layout),
286 		isRowMajorLayout(isRowMajorLayout), registerIndex(registerIndex), blockId(blockId)
287 	{
288 	}
289 
BlockLayoutEncoder()290 	BlockLayoutEncoder::BlockLayoutEncoder()
291 		: mCurrentOffset(0)
292 	{
293 	}
294 
encodeType(const TType & type)295 	BlockMemberInfo BlockLayoutEncoder::encodeType(const TType &type)
296 	{
297 		int arrayStride;
298 		int matrixStride;
299 
300 		bool isRowMajor = type.getLayoutQualifier().matrixPacking == EmpRowMajor;
301 		getBlockLayoutInfo(type, type.getArraySize(), isRowMajor, &arrayStride, &matrixStride);
302 
303 		const BlockMemberInfo memberInfo(static_cast<int>(mCurrentOffset * BytesPerComponent),
304 		                                 static_cast<int>(arrayStride * BytesPerComponent),
305 		                                 static_cast<int>(matrixStride * BytesPerComponent),
306 		                                 (matrixStride > 0) && isRowMajor);
307 
308 		advanceOffset(type, type.getArraySize(), isRowMajor, arrayStride, matrixStride);
309 
310 		return memberInfo;
311 	}
312 
313 	// static
getBlockRegister(const BlockMemberInfo & info)314 	size_t BlockLayoutEncoder::getBlockRegister(const BlockMemberInfo &info)
315 	{
316 		return (info.offset / BytesPerComponent) / ComponentsPerRegister;
317 	}
318 
319 	// static
getBlockRegisterElement(const BlockMemberInfo & info)320 	size_t BlockLayoutEncoder::getBlockRegisterElement(const BlockMemberInfo &info)
321 	{
322 		return (info.offset / BytesPerComponent) % ComponentsPerRegister;
323 	}
324 
nextRegister()325 	void BlockLayoutEncoder::nextRegister()
326 	{
327 		mCurrentOffset = sw::align(mCurrentOffset, ComponentsPerRegister);
328 	}
329 
Std140BlockEncoder()330 	Std140BlockEncoder::Std140BlockEncoder() : BlockLayoutEncoder()
331 	{
332 	}
333 
enterAggregateType()334 	void Std140BlockEncoder::enterAggregateType()
335 	{
336 		nextRegister();
337 	}
338 
exitAggregateType()339 	void Std140BlockEncoder::exitAggregateType()
340 	{
341 		nextRegister();
342 	}
343 
getBlockLayoutInfo(const TType & type,unsigned int arraySize,bool isRowMajorMatrix,int * arrayStrideOut,int * matrixStrideOut)344 	void Std140BlockEncoder::getBlockLayoutInfo(const TType &type, unsigned int arraySize, bool isRowMajorMatrix, int *arrayStrideOut, int *matrixStrideOut)
345 	{
346 		size_t baseAlignment = 0;
347 		int matrixStride = 0;
348 		int arrayStride = 0;
349 
350 		if(type.isMatrix())
351 		{
352 			baseAlignment = ComponentsPerRegister;
353 			matrixStride = ComponentsPerRegister;
354 
355 			if(arraySize > 0)
356 			{
357 				const int numRegisters = isRowMajorMatrix ? type.getSecondarySize() : type.getNominalSize();
358 				arrayStride = ComponentsPerRegister * numRegisters;
359 			}
360 		}
361 		else if(arraySize > 0)
362 		{
363 			baseAlignment = ComponentsPerRegister;
364 			arrayStride = ComponentsPerRegister;
365 		}
366 		else
367 		{
368 			const size_t numComponents = type.getElementSize();
369 			baseAlignment = (numComponents == 3 ? 4u : numComponents);
370 		}
371 
372 		mCurrentOffset = sw::align(mCurrentOffset, baseAlignment);
373 
374 		*matrixStrideOut = matrixStride;
375 		*arrayStrideOut = arrayStride;
376 	}
377 
advanceOffset(const TType & type,unsigned int arraySize,bool isRowMajorMatrix,int arrayStride,int matrixStride)378 	void Std140BlockEncoder::advanceOffset(const TType &type, unsigned int arraySize, bool isRowMajorMatrix, int arrayStride, int matrixStride)
379 	{
380 		if(arraySize > 0)
381 		{
382 			mCurrentOffset += arrayStride * arraySize;
383 		}
384 		else if(type.isMatrix())
385 		{
386 			ASSERT(matrixStride == ComponentsPerRegister);
387 			const int numRegisters = isRowMajorMatrix ? type.getSecondarySize() : type.getNominalSize();
388 			mCurrentOffset += ComponentsPerRegister * numRegisters;
389 		}
390 		else
391 		{
392 			mCurrentOffset += type.getElementSize();
393 		}
394 	}
395 
Attribute()396 	Attribute::Attribute()
397 	{
398 		type = GL_NONE;
399 		arraySize = 0;
400 		registerIndex = 0;
401 	}
402 
Attribute(GLenum type,const std::string & name,int arraySize,int layoutLocation,int registerIndex)403 	Attribute::Attribute(GLenum type, const std::string &name, int arraySize, int layoutLocation, int registerIndex)
404 	{
405 		this->type = type;
406 		this->name = name;
407 		this->arraySize = arraySize;
408 		this->layoutLocation = layoutLocation;
409 		this->registerIndex = registerIndex;
410 	}
411 
getPixelShader() const412 	sw::PixelShader *Shader::getPixelShader() const
413 	{
414 		return nullptr;
415 	}
416 
getVertexShader() const417 	sw::VertexShader *Shader::getVertexShader() const
418 	{
419 		return nullptr;
420 	}
421 
TextureFunction(const TString & nodeName)422 	OutputASM::TextureFunction::TextureFunction(const TString& nodeName) : method(IMPLICIT), proj(false), offset(false)
423 	{
424 		TString name = TFunction::unmangleName(nodeName);
425 
426 		if(name == "texture2D" || name == "textureCube" || name == "texture" || name == "texture3D" || name == "texture2DRect")
427 		{
428 			method = IMPLICIT;
429 		}
430 		else if(name == "texture2DProj" || name == "textureProj" || name == "texture2DRectProj")
431 		{
432 			method = IMPLICIT;
433 			proj = true;
434 		}
435 		else if(name == "texture2DLod" || name == "textureCubeLod" || name == "textureLod")
436 		{
437 			method = LOD;
438 		}
439 		else if(name == "texture2DProjLod" || name == "textureProjLod")
440 		{
441 			method = LOD;
442 			proj = true;
443 		}
444 		else if(name == "textureSize")
445 		{
446 			method = SIZE;
447 		}
448 		else if(name == "textureOffset")
449 		{
450 			method = IMPLICIT;
451 			offset = true;
452 		}
453 		else if(name == "textureProjOffset")
454 		{
455 			method = IMPLICIT;
456 			offset = true;
457 			proj = true;
458 		}
459 		else if(name == "textureLodOffset")
460 		{
461 			method = LOD;
462 			offset = true;
463 		}
464 		else if(name == "textureProjLodOffset")
465 		{
466 			method = LOD;
467 			proj = true;
468 			offset = true;
469 		}
470 		else if(name == "texelFetch")
471 		{
472 			method = FETCH;
473 		}
474 		else if(name == "texelFetchOffset")
475 		{
476 			method = FETCH;
477 			offset = true;
478 		}
479 		else if(name == "textureGrad")
480 		{
481 			method = GRAD;
482 		}
483 		else if(name == "textureGradOffset")
484 		{
485 			method = GRAD;
486 			offset = true;
487 		}
488 		else if(name == "textureProjGrad")
489 		{
490 			method = GRAD;
491 			proj = true;
492 		}
493 		else if(name == "textureProjGradOffset")
494 		{
495 			method = GRAD;
496 			proj = true;
497 			offset = true;
498 		}
499 		else UNREACHABLE(0);
500 	}
501 
OutputASM(TParseContext & context,Shader * shaderObject)502 	OutputASM::OutputASM(TParseContext &context, Shader *shaderObject) : TIntermTraverser(true, true, true), shaderObject(shaderObject), mContext(context)
503 	{
504 		shader = nullptr;
505 		pixelShader = nullptr;
506 		vertexShader = nullptr;
507 
508 		if(shaderObject)
509 		{
510 			shader = shaderObject->getShader();
511 			pixelShader = shaderObject->getPixelShader();
512 			vertexShader = shaderObject->getVertexShader();
513 		}
514 
515 		functionArray.push_back(Function(0, "main(", nullptr, nullptr));
516 		currentFunction = 0;
517 		outputQualifier = EvqOutput;   // Initialize outputQualifier to any value other than EvqFragColor or EvqFragData
518 	}
519 
~OutputASM()520 	OutputASM::~OutputASM()
521 	{
522 	}
523 
output()524 	void OutputASM::output()
525 	{
526 		if(shader)
527 		{
528 			emitShader(GLOBAL);
529 
530 			if(functionArray.size() > 1)   // Only call main() when there are other functions
531 			{
532 				Instruction *callMain = emit(sw::Shader::OPCODE_CALL);
533 				callMain->dst.type = sw::Shader::PARAMETER_LABEL;
534 				callMain->dst.index = 0;   // main()
535 
536 				emit(sw::Shader::OPCODE_RET);
537 			}
538 
539 			emitShader(FUNCTION);
540 		}
541 	}
542 
emitShader(Scope scope)543 	void OutputASM::emitShader(Scope scope)
544 	{
545 		emitScope = scope;
546 		currentScope = GLOBAL;
547 		mContext.getTreeRoot()->traverse(this);
548 	}
549 
freeTemporary(Temporary * temporary)550 	void OutputASM::freeTemporary(Temporary *temporary)
551 	{
552 		free(temporaries, temporary);
553 	}
554 
getOpcode(sw::Shader::Opcode op,TIntermTyped * in) const555 	sw::Shader::Opcode OutputASM::getOpcode(sw::Shader::Opcode op, TIntermTyped *in) const
556 	{
557 		TBasicType baseType = in->getType().getBasicType();
558 
559 		switch(op)
560 		{
561 		case sw::Shader::OPCODE_NEG:
562 			switch(baseType)
563 			{
564 			case EbtInt:
565 			case EbtUInt:
566 				return sw::Shader::OPCODE_INEG;
567 			case EbtFloat:
568 			default:
569 				return op;
570 			}
571 		case sw::Shader::OPCODE_ABS:
572 			switch(baseType)
573 			{
574 			case EbtInt:
575 				return sw::Shader::OPCODE_IABS;
576 			case EbtFloat:
577 			default:
578 				return op;
579 			}
580 		case sw::Shader::OPCODE_SGN:
581 			switch(baseType)
582 			{
583 			case EbtInt:
584 				return sw::Shader::OPCODE_ISGN;
585 			case EbtFloat:
586 			default:
587 				return op;
588 			}
589 		case sw::Shader::OPCODE_ADD:
590 			switch(baseType)
591 			{
592 			case EbtInt:
593 			case EbtUInt:
594 				return sw::Shader::OPCODE_IADD;
595 			case EbtFloat:
596 			default:
597 				return op;
598 			}
599 		case sw::Shader::OPCODE_SUB:
600 			switch(baseType)
601 			{
602 			case EbtInt:
603 			case EbtUInt:
604 				return sw::Shader::OPCODE_ISUB;
605 			case EbtFloat:
606 			default:
607 				return op;
608 			}
609 		case sw::Shader::OPCODE_MUL:
610 			switch(baseType)
611 			{
612 			case EbtInt:
613 			case EbtUInt:
614 				return sw::Shader::OPCODE_IMUL;
615 			case EbtFloat:
616 			default:
617 				return op;
618 			}
619 		case sw::Shader::OPCODE_DIV:
620 			switch(baseType)
621 			{
622 			case EbtInt:
623 				return sw::Shader::OPCODE_IDIV;
624 			case EbtUInt:
625 				return sw::Shader::OPCODE_UDIV;
626 			case EbtFloat:
627 			default:
628 				return op;
629 			}
630 		case sw::Shader::OPCODE_IMOD:
631 			return baseType == EbtUInt ? sw::Shader::OPCODE_UMOD : op;
632 		case sw::Shader::OPCODE_ISHR:
633 			return baseType == EbtUInt ? sw::Shader::OPCODE_USHR : op;
634 		case sw::Shader::OPCODE_MIN:
635 			switch(baseType)
636 			{
637 			case EbtInt:
638 				return sw::Shader::OPCODE_IMIN;
639 			case EbtUInt:
640 				return sw::Shader::OPCODE_UMIN;
641 			case EbtFloat:
642 			default:
643 				return op;
644 			}
645 		case sw::Shader::OPCODE_MAX:
646 			switch(baseType)
647 			{
648 			case EbtInt:
649 				return sw::Shader::OPCODE_IMAX;
650 			case EbtUInt:
651 				return sw::Shader::OPCODE_UMAX;
652 			case EbtFloat:
653 			default:
654 				return op;
655 			}
656 		default:
657 			return op;
658 		}
659 	}
660 
visitSymbol(TIntermSymbol * symbol)661 	void OutputASM::visitSymbol(TIntermSymbol *symbol)
662 	{
663 		// The type of vertex outputs and fragment inputs with the same name must match (validated at link time),
664 		// so declare them but don't assign a register index yet (one will be assigned when referenced in reachable code).
665 		switch(symbol->getQualifier())
666 		{
667 		case EvqVaryingIn:
668 		case EvqVaryingOut:
669 		case EvqInvariantVaryingIn:
670 		case EvqInvariantVaryingOut:
671 		case EvqVertexOut:
672 		case EvqFragmentIn:
673 			if(symbol->getBasicType() != EbtInvariant)   // Typeless declarations are not new varyings
674 			{
675 				declareVarying(symbol, -1);
676 			}
677 			break;
678 		case EvqFragmentOut:
679 			declareFragmentOutput(symbol);
680 			break;
681 		default:
682 			break;
683 		}
684 
685 		TInterfaceBlock* block = symbol->getType().getInterfaceBlock();
686 		// OpenGL ES 3.0.4 spec, section 2.12.6 Uniform Variables:
687 		// "All members of a named uniform block declared with a shared or std140 layout qualifier
688 		// are considered active, even if they are not referenced in any shader in the program.
689 		// The uniform block itself is also considered active, even if no member of the block is referenced."
690 		if(block && ((block->blockStorage() == EbsShared) || (block->blockStorage() == EbsStd140)))
691 		{
692 			uniformRegister(symbol);
693 		}
694 	}
695 
visitBinary(Visit visit,TIntermBinary * node)696 	bool OutputASM::visitBinary(Visit visit, TIntermBinary *node)
697 	{
698 		if(currentScope != emitScope)
699 		{
700 			return false;
701 		}
702 
703 		TIntermTyped *result = node;
704 		TIntermTyped *left = node->getLeft();
705 		TIntermTyped *right = node->getRight();
706 		const TType &leftType = left->getType();
707 		const TType &rightType = right->getType();
708 
709 		if(isSamplerRegister(result))
710 		{
711 			return false;   // Don't traverse, the register index is determined statically
712 		}
713 
714 		switch(node->getOp())
715 		{
716 		case EOpAssign:
717 			assert(visit == PreVisit);
718 			right->traverse(this);
719 			assignLvalue(left, right);
720 			copy(result, right);
721 			return false;
722 		case EOpInitialize:
723 			assert(visit == PreVisit);
724 			// Constant arrays go into the constant register file.
725 			if(leftType.getQualifier() == EvqConstExpr && leftType.isArray() && leftType.getArraySize() > 1)
726 			{
727 				for(int i = 0; i < left->totalRegisterCount(); i++)
728 				{
729 					emit(sw::Shader::OPCODE_DEF, left, i, right, i);
730 				}
731 			}
732 			else
733 			{
734 				right->traverse(this);
735 				copy(left, right);
736 			}
737 			return false;
738 		case EOpMatrixTimesScalarAssign:
739 			assert(visit == PreVisit);
740 			right->traverse(this);
741 			for(int i = 0; i < leftType.getNominalSize(); i++)
742 			{
743 				emit(sw::Shader::OPCODE_MUL, result, i, left, i, right);
744 			}
745 
746 			assignLvalue(left, result);
747 			return false;
748 		case EOpVectorTimesMatrixAssign:
749 			assert(visit == PreVisit);
750 			{
751 				// The left operand may contain a swizzle serving double-duty as
752 				// swizzle and writemask, so it's important that we traverse it
753 				// first. Otherwise we may end up never setting up our left
754 				// operand correctly.
755 				left->traverse(this);
756 				right->traverse(this);
757 				int size = leftType.getNominalSize();
758 
759 				for(int i = 0; i < size; i++)
760 				{
761 					Instruction *dot = emit(sw::Shader::OPCODE_DP(size), result, 0, left, 0, right, i);
762 					dot->dst.mask = 1 << i;
763 				}
764 
765 				assignLvalue(left, result);
766 			}
767 			return false;
768 		case EOpMatrixTimesMatrixAssign:
769 			assert(visit == PreVisit);
770 			{
771 				right->traverse(this);
772 				int dim = leftType.getNominalSize();
773 
774 				for(int i = 0; i < dim; i++)
775 				{
776 					Instruction *mul = emit(sw::Shader::OPCODE_MUL, result, i, left, 0, right, i);
777 					mul->src[1].swizzle = 0x00;
778 
779 					for(int j = 1; j < dim; j++)
780 					{
781 						Instruction *mad = emit(sw::Shader::OPCODE_MAD, result, i, left, j, right, i, result, i);
782 						mad->src[1].swizzle = j * 0x55;
783 					}
784 				}
785 
786 				assignLvalue(left, result);
787 			}
788 			return false;
789 		case EOpIndexDirect:
790 		case EOpIndexIndirect:
791 		case EOpIndexDirectStruct:
792 		case EOpIndexDirectInterfaceBlock:
793 			assert(visit == PreVisit);
794 			evaluateRvalue(node);
795 			return false;
796 		case EOpVectorSwizzle:
797 			if(visit == PostVisit)
798 			{
799 				int swizzle = 0;
800 				TIntermAggregate *components = right->getAsAggregate();
801 
802 				if(components)
803 				{
804 					TIntermSequence &sequence = components->getSequence();
805 					int component = 0;
806 
807 					for(TIntermSequence::iterator sit = sequence.begin(); sit != sequence.end(); sit++)
808 					{
809 						TIntermConstantUnion *element = (*sit)->getAsConstantUnion();
810 
811 						if(element)
812 						{
813 							int i = element->getUnionArrayPointer()[0].getIConst();
814 							swizzle |= i << (component * 2);
815 							component++;
816 						}
817 						else UNREACHABLE(0);
818 					}
819 				}
820 				else UNREACHABLE(0);
821 
822 				Instruction *mov = emit(sw::Shader::OPCODE_MOV, result, left);
823 				mov->src[0].swizzle = swizzle;
824 			}
825 			break;
826 		case EOpAddAssign: if(visit == PostVisit) emitAssign(getOpcode(sw::Shader::OPCODE_ADD, result), result, left, left, right); break;
827 		case EOpAdd:       if(visit == PostVisit) emitBinary(getOpcode(sw::Shader::OPCODE_ADD, result), result, left, right);       break;
828 		case EOpSubAssign: if(visit == PostVisit) emitAssign(getOpcode(sw::Shader::OPCODE_SUB, result), result, left, left, right); break;
829 		case EOpSub:       if(visit == PostVisit) emitBinary(getOpcode(sw::Shader::OPCODE_SUB, result), result, left, right);       break;
830 		case EOpMulAssign: if(visit == PostVisit) emitAssign(getOpcode(sw::Shader::OPCODE_MUL, result), result, left, left, right); break;
831 		case EOpMul:       if(visit == PostVisit) emitBinary(getOpcode(sw::Shader::OPCODE_MUL, result), result, left, right);       break;
832 		case EOpDivAssign: if(visit == PostVisit) emitAssign(getOpcode(sw::Shader::OPCODE_DIV, result), result, left, left, right); break;
833 		case EOpDiv:       if(visit == PostVisit) emitBinary(getOpcode(sw::Shader::OPCODE_DIV, result), result, left, right);       break;
834 		case EOpIModAssign:          if(visit == PostVisit) emitAssign(getOpcode(sw::Shader::OPCODE_IMOD, result), result, left, left, right); break;
835 		case EOpIMod:                if(visit == PostVisit) emitBinary(getOpcode(sw::Shader::OPCODE_IMOD, result), result, left, right);       break;
836 		case EOpBitShiftLeftAssign:  if(visit == PostVisit) emitAssign(sw::Shader::OPCODE_SHL, result, left, left, right); break;
837 		case EOpBitShiftLeft:        if(visit == PostVisit) emitBinary(sw::Shader::OPCODE_SHL, result, left, right);       break;
838 		case EOpBitShiftRightAssign: if(visit == PostVisit) emitAssign(getOpcode(sw::Shader::OPCODE_ISHR, result), result, left, left, right); break;
839 		case EOpBitShiftRight:       if(visit == PostVisit) emitBinary(getOpcode(sw::Shader::OPCODE_ISHR, result), result, left, right);       break;
840 		case EOpBitwiseAndAssign:    if(visit == PostVisit) emitAssign(sw::Shader::OPCODE_AND, result, left, left, right); break;
841 		case EOpBitwiseAnd:          if(visit == PostVisit) emitBinary(sw::Shader::OPCODE_AND, result, left, right);       break;
842 		case EOpBitwiseXorAssign:    if(visit == PostVisit) emitAssign(sw::Shader::OPCODE_XOR, result, left, left, right); break;
843 		case EOpBitwiseXor:          if(visit == PostVisit) emitBinary(sw::Shader::OPCODE_XOR, result, left, right);       break;
844 		case EOpBitwiseOrAssign:     if(visit == PostVisit) emitAssign(sw::Shader::OPCODE_OR, result, left, left, right);  break;
845 		case EOpBitwiseOr:           if(visit == PostVisit) emitBinary(sw::Shader::OPCODE_OR, result, left, right);        break;
846 		case EOpEqual:
847 			if(visit == PostVisit)
848 			{
849 				emitBinary(sw::Shader::OPCODE_EQ, result, left, right);
850 
851 				for(int index = 1; index < left->totalRegisterCount(); index++)
852 				{
853 					Temporary equal(this);
854 					emit(sw::Shader::OPCODE_EQ, &equal, 0, left, index, right, index);
855 					emit(sw::Shader::OPCODE_AND, result, result, &equal);
856 				}
857 			}
858 			break;
859 		case EOpNotEqual:
860 			if(visit == PostVisit)
861 			{
862 				emitBinary(sw::Shader::OPCODE_NE, result, left, right);
863 
864 				for(int index = 1; index < left->totalRegisterCount(); index++)
865 				{
866 					Temporary notEqual(this);
867 					emit(sw::Shader::OPCODE_NE, &notEqual, 0, left, index, right, index);
868 					emit(sw::Shader::OPCODE_OR, result, result, &notEqual);
869 				}
870 			}
871 			break;
872 		case EOpLessThan:                if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_LT, result, left, right); break;
873 		case EOpGreaterThan:             if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_GT, result, left, right); break;
874 		case EOpLessThanEqual:           if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_LE, result, left, right); break;
875 		case EOpGreaterThanEqual:        if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_GE, result, left, right); break;
876 		case EOpVectorTimesScalarAssign: if(visit == PostVisit) emitAssign(getOpcode(sw::Shader::OPCODE_MUL, left), result, left, left, right); break;
877 		case EOpVectorTimesScalar:       if(visit == PostVisit) emit(getOpcode(sw::Shader::OPCODE_MUL, left), result, left, right); break;
878 		case EOpMatrixTimesScalar:
879 			if(visit == PostVisit)
880 			{
881 				if(left->isMatrix())
882 				{
883 					for(int i = 0; i < leftType.getNominalSize(); i++)
884 					{
885 						emit(sw::Shader::OPCODE_MUL, result, i, left, i, right, 0);
886 					}
887 				}
888 				else if(right->isMatrix())
889 				{
890 					for(int i = 0; i < rightType.getNominalSize(); i++)
891 					{
892 						emit(sw::Shader::OPCODE_MUL, result, i, left, 0, right, i);
893 					}
894 				}
895 				else UNREACHABLE(0);
896 			}
897 			break;
898 		case EOpVectorTimesMatrix:
899 			if(visit == PostVisit)
900 			{
901 				sw::Shader::Opcode dpOpcode = sw::Shader::OPCODE_DP(leftType.getNominalSize());
902 
903 				int size = rightType.getNominalSize();
904 				for(int i = 0; i < size; i++)
905 				{
906 					Instruction *dot = emit(dpOpcode, result, 0, left, 0, right, i);
907 					dot->dst.mask = 1 << i;
908 				}
909 			}
910 			break;
911 		case EOpMatrixTimesVector:
912 			if(visit == PostVisit)
913 			{
914 				Instruction *mul = emit(sw::Shader::OPCODE_MUL, result, left, right);
915 				mul->src[1].swizzle = 0x00;
916 
917 				int size = rightType.getNominalSize();
918 				for(int i = 1; i < size; i++)
919 				{
920 					Instruction *mad = emit(sw::Shader::OPCODE_MAD, result, 0, left, i, right, 0, result);
921 					mad->src[1].swizzle = i * 0x55;
922 				}
923 			}
924 			break;
925 		case EOpMatrixTimesMatrix:
926 			if(visit == PostVisit)
927 			{
928 				int dim = leftType.getNominalSize();
929 
930 				int size = rightType.getNominalSize();
931 				for(int i = 0; i < size; i++)
932 				{
933 					Instruction *mul = emit(sw::Shader::OPCODE_MUL, result, i, left, 0, right, i);
934 					mul->src[1].swizzle = 0x00;
935 
936 					for(int j = 1; j < dim; j++)
937 					{
938 						Instruction *mad = emit(sw::Shader::OPCODE_MAD, result, i, left, j, right, i, result, i);
939 						mad->src[1].swizzle = j * 0x55;
940 					}
941 				}
942 			}
943 			break;
944 		case EOpLogicalOr:
945 			if(trivial(right, 6))
946 			{
947 				if(visit == PostVisit)
948 				{
949 					emit(sw::Shader::OPCODE_OR, result, left, right);
950 				}
951 			}
952 			else   // Short-circuit evaluation
953 			{
954 				if(visit == InVisit)
955 				{
956 					emit(sw::Shader::OPCODE_MOV, result, left);
957 					Instruction *ifnot = emit(sw::Shader::OPCODE_IF, 0, result);
958 					ifnot->src[0].modifier = sw::Shader::MODIFIER_NOT;
959 				}
960 				else if(visit == PostVisit)
961 				{
962 					emit(sw::Shader::OPCODE_MOV, result, right);
963 					emit(sw::Shader::OPCODE_ENDIF);
964 				}
965 			}
966 			break;
967 		case EOpLogicalXor:        if(visit == PostVisit) emit(sw::Shader::OPCODE_XOR, result, left, right); break;
968 		case EOpLogicalAnd:
969 			if(trivial(right, 6))
970 			{
971 				if(visit == PostVisit)
972 				{
973 					emit(sw::Shader::OPCODE_AND, result, left, right);
974 				}
975 			}
976 			else   // Short-circuit evaluation
977 			{
978 				if(visit == InVisit)
979 				{
980 					emit(sw::Shader::OPCODE_MOV, result, left);
981 					emit(sw::Shader::OPCODE_IF, 0, result);
982 				}
983 				else if(visit == PostVisit)
984 				{
985 					emit(sw::Shader::OPCODE_MOV, result, right);
986 					emit(sw::Shader::OPCODE_ENDIF);
987 				}
988 			}
989 			break;
990 		default: UNREACHABLE(node->getOp());
991 		}
992 
993 		return true;
994 	}
995 
emitDeterminant(TIntermTyped * result,TIntermTyped * arg,int size,int col,int row,int outCol,int outRow)996 	void OutputASM::emitDeterminant(TIntermTyped *result, TIntermTyped *arg, int size, int col, int row, int outCol, int outRow)
997 	{
998 		switch(size)
999 		{
1000 		case 1: // Used for cofactor computation only
1001 			{
1002 				// For a 2x2 matrix, the cofactor is simply a transposed move or negate
1003 				bool isMov = (row == col);
1004 				sw::Shader::Opcode op = isMov ? sw::Shader::OPCODE_MOV : sw::Shader::OPCODE_NEG;
1005 				Instruction *mov = emit(op, result, outCol, arg, isMov ? 1 - row : row);
1006 				mov->src[0].swizzle = 0x55 * (isMov ? 1 - col : col);
1007 				mov->dst.mask = 1 << outRow;
1008 			}
1009 			break;
1010 		case 2:
1011 			{
1012 				static const unsigned int swizzle[3] = { 0x99, 0x88, 0x44 }; // xy?? : yzyz, xzxz, xyxy
1013 
1014 				bool isCofactor = (col >= 0) && (row >= 0);
1015 				int col0 = (isCofactor && (col <= 0)) ? 1 : 0;
1016 				int col1 = (isCofactor && (col <= 1)) ? 2 : 1;
1017 				bool negate = isCofactor && ((col & 0x01) ^ (row & 0x01));
1018 
1019 				Instruction *det = emit(sw::Shader::OPCODE_DET2, result, outCol, arg, negate ? col1 : col0, arg, negate ? col0 : col1);
1020 				det->src[0].swizzle = det->src[1].swizzle = swizzle[isCofactor ? row : 2];
1021 				det->dst.mask = 1 << outRow;
1022 			}
1023 			break;
1024 		case 3:
1025 			{
1026 				static const unsigned int swizzle[4] = { 0xF9, 0xF8, 0xF4, 0xE4 }; // xyz? : yzww, xzww, xyww, xyzw
1027 
1028 				bool isCofactor = (col >= 0) && (row >= 0);
1029 				int col0 = (isCofactor && (col <= 0)) ? 1 : 0;
1030 				int col1 = (isCofactor && (col <= 1)) ? 2 : 1;
1031 				int col2 = (isCofactor && (col <= 2)) ? 3 : 2;
1032 				bool negate = isCofactor && ((col & 0x01) ^ (row & 0x01));
1033 
1034 				Instruction *det = emit(sw::Shader::OPCODE_DET3, result, outCol, arg, col0, arg, negate ? col2 : col1, arg, negate ? col1 : col2);
1035 				det->src[0].swizzle = det->src[1].swizzle = det->src[2].swizzle = swizzle[isCofactor ? row : 3];
1036 				det->dst.mask = 1 << outRow;
1037 			}
1038 			break;
1039 		case 4:
1040 			{
1041 				Instruction *det = emit(sw::Shader::OPCODE_DET4, result, outCol, arg, 0, arg, 1, arg, 2, arg, 3);
1042 				det->dst.mask = 1 << outRow;
1043 			}
1044 			break;
1045 		default:
1046 			UNREACHABLE(size);
1047 			break;
1048 		}
1049 	}
1050 
visitUnary(Visit visit,TIntermUnary * node)1051 	bool OutputASM::visitUnary(Visit visit, TIntermUnary *node)
1052 	{
1053 		if(currentScope != emitScope)
1054 		{
1055 			return false;
1056 		}
1057 
1058 		TIntermTyped *result = node;
1059 		TIntermTyped *arg = node->getOperand();
1060 		TBasicType basicType = arg->getType().getBasicType();
1061 
1062 		union
1063 		{
1064 			float f;
1065 			int i;
1066 		} one_value;
1067 
1068 		if(basicType == EbtInt || basicType == EbtUInt)
1069 		{
1070 			one_value.i = 1;
1071 		}
1072 		else
1073 		{
1074 			one_value.f = 1.0f;
1075 		}
1076 
1077 		Constant one(one_value.f, one_value.f, one_value.f, one_value.f);
1078 		Constant rad(1.74532925e-2f, 1.74532925e-2f, 1.74532925e-2f, 1.74532925e-2f);
1079 		Constant deg(5.72957795e+1f, 5.72957795e+1f, 5.72957795e+1f, 5.72957795e+1f);
1080 
1081 		switch(node->getOp())
1082 		{
1083 		case EOpNegative:
1084 			if(visit == PostVisit)
1085 			{
1086 				sw::Shader::Opcode negOpcode = getOpcode(sw::Shader::OPCODE_NEG, arg);
1087 				for(int index = 0; index < arg->totalRegisterCount(); index++)
1088 				{
1089 					emit(negOpcode, result, index, arg, index);
1090 				}
1091 			}
1092 			break;
1093 		case EOpVectorLogicalNot: if(visit == PostVisit) emit(sw::Shader::OPCODE_NOT, result, arg); break;
1094 		case EOpLogicalNot:       if(visit == PostVisit) emit(sw::Shader::OPCODE_NOT, result, arg); break;
1095 		case EOpBitwiseNot:       if(visit == PostVisit) emit(sw::Shader::OPCODE_NOT, result, arg); break;
1096 		case EOpPostIncrement:
1097 			if(visit == PostVisit)
1098 			{
1099 				copy(result, arg);
1100 
1101 				sw::Shader::Opcode addOpcode = getOpcode(sw::Shader::OPCODE_ADD, arg);
1102 				for(int index = 0; index < arg->totalRegisterCount(); index++)
1103 				{
1104 					emit(addOpcode, arg, index, arg, index, &one);
1105 				}
1106 
1107 				assignLvalue(arg, arg);
1108 			}
1109 			break;
1110 		case EOpPostDecrement:
1111 			if(visit == PostVisit)
1112 			{
1113 				copy(result, arg);
1114 
1115 				sw::Shader::Opcode subOpcode = getOpcode(sw::Shader::OPCODE_SUB, arg);
1116 				for(int index = 0; index < arg->totalRegisterCount(); index++)
1117 				{
1118 					emit(subOpcode, arg, index, arg, index, &one);
1119 				}
1120 
1121 				assignLvalue(arg, arg);
1122 			}
1123 			break;
1124 		case EOpPreIncrement:
1125 			if(visit == PostVisit)
1126 			{
1127 				sw::Shader::Opcode addOpcode = getOpcode(sw::Shader::OPCODE_ADD, arg);
1128 				for(int index = 0; index < arg->totalRegisterCount(); index++)
1129 				{
1130 					emit(addOpcode, result, index, arg, index, &one);
1131 				}
1132 
1133 				assignLvalue(arg, result);
1134 			}
1135 			break;
1136 		case EOpPreDecrement:
1137 			if(visit == PostVisit)
1138 			{
1139 				sw::Shader::Opcode subOpcode = getOpcode(sw::Shader::OPCODE_SUB, arg);
1140 				for(int index = 0; index < arg->totalRegisterCount(); index++)
1141 				{
1142 					emit(subOpcode, result, index, arg, index, &one);
1143 				}
1144 
1145 				assignLvalue(arg, result);
1146 			}
1147 			break;
1148 		case EOpRadians:          if(visit == PostVisit) emit(sw::Shader::OPCODE_MUL, result, arg, &rad); break;
1149 		case EOpDegrees:          if(visit == PostVisit) emit(sw::Shader::OPCODE_MUL, result, arg, &deg); break;
1150 		case EOpSin:              if(visit == PostVisit) emit(sw::Shader::OPCODE_SIN, result, arg); break;
1151 		case EOpCos:              if(visit == PostVisit) emit(sw::Shader::OPCODE_COS, result, arg); break;
1152 		case EOpTan:              if(visit == PostVisit) emit(sw::Shader::OPCODE_TAN, result, arg); break;
1153 		case EOpAsin:             if(visit == PostVisit) emit(sw::Shader::OPCODE_ASIN, result, arg); break;
1154 		case EOpAcos:             if(visit == PostVisit) emit(sw::Shader::OPCODE_ACOS, result, arg); break;
1155 		case EOpAtan:             if(visit == PostVisit) emit(sw::Shader::OPCODE_ATAN, result, arg); break;
1156 		case EOpSinh:             if(visit == PostVisit) emit(sw::Shader::OPCODE_SINH, result, arg); break;
1157 		case EOpCosh:             if(visit == PostVisit) emit(sw::Shader::OPCODE_COSH, result, arg); break;
1158 		case EOpTanh:             if(visit == PostVisit) emit(sw::Shader::OPCODE_TANH, result, arg); break;
1159 		case EOpAsinh:            if(visit == PostVisit) emit(sw::Shader::OPCODE_ASINH, result, arg); break;
1160 		case EOpAcosh:            if(visit == PostVisit) emit(sw::Shader::OPCODE_ACOSH, result, arg); break;
1161 		case EOpAtanh:            if(visit == PostVisit) emit(sw::Shader::OPCODE_ATANH, result, arg); break;
1162 		case EOpExp:              if(visit == PostVisit) emit(sw::Shader::OPCODE_EXP, result, arg); break;
1163 		case EOpLog:              if(visit == PostVisit) emit(sw::Shader::OPCODE_LOG, result, arg); break;
1164 		case EOpExp2:             if(visit == PostVisit) emit(sw::Shader::OPCODE_EXP2, result, arg); break;
1165 		case EOpLog2:             if(visit == PostVisit) emit(sw::Shader::OPCODE_LOG2, result, arg); break;
1166 		case EOpSqrt:             if(visit == PostVisit) emit(sw::Shader::OPCODE_SQRT, result, arg); break;
1167 		case EOpInverseSqrt:      if(visit == PostVisit) emit(sw::Shader::OPCODE_RSQ, result, arg); break;
1168 		case EOpAbs:              if(visit == PostVisit) emit(getOpcode(sw::Shader::OPCODE_ABS, result), result, arg); break;
1169 		case EOpSign:             if(visit == PostVisit) emit(getOpcode(sw::Shader::OPCODE_SGN, result), result, arg); break;
1170 		case EOpFloor:            if(visit == PostVisit) emit(sw::Shader::OPCODE_FLOOR, result, arg); break;
1171 		case EOpTrunc:            if(visit == PostVisit) emit(sw::Shader::OPCODE_TRUNC, result, arg); break;
1172 		case EOpRound:            if(visit == PostVisit) emit(sw::Shader::OPCODE_ROUND, result, arg); break;
1173 		case EOpRoundEven:        if(visit == PostVisit) emit(sw::Shader::OPCODE_ROUNDEVEN, result, arg); break;
1174 		case EOpCeil:             if(visit == PostVisit) emit(sw::Shader::OPCODE_CEIL, result, arg, result); break;
1175 		case EOpFract:            if(visit == PostVisit) emit(sw::Shader::OPCODE_FRC, result, arg); break;
1176 		case EOpIsNan:            if(visit == PostVisit) emit(sw::Shader::OPCODE_ISNAN, result, arg); break;
1177 		case EOpIsInf:            if(visit == PostVisit) emit(sw::Shader::OPCODE_ISINF, result, arg); break;
1178 		case EOpLength:           if(visit == PostVisit) emit(sw::Shader::OPCODE_LEN(dim(arg)), result, arg); break;
1179 		case EOpNormalize:        if(visit == PostVisit) emit(sw::Shader::OPCODE_NRM(dim(arg)), result, arg); break;
1180 		case EOpDFdx:             if(visit == PostVisit) emit(sw::Shader::OPCODE_DFDX, result, arg); break;
1181 		case EOpDFdy:             if(visit == PostVisit) emit(sw::Shader::OPCODE_DFDY, result, arg); break;
1182 		case EOpFwidth:           if(visit == PostVisit) emit(sw::Shader::OPCODE_FWIDTH, result, arg); break;
1183 		case EOpAny:              if(visit == PostVisit) emit(sw::Shader::OPCODE_ANY, result, arg); break;
1184 		case EOpAll:              if(visit == PostVisit) emit(sw::Shader::OPCODE_ALL, result, arg); break;
1185 		case EOpFloatBitsToInt:   if(visit == PostVisit) emit(sw::Shader::OPCODE_FLOATBITSTOINT, result, arg); break;
1186 		case EOpFloatBitsToUint:  if(visit == PostVisit) emit(sw::Shader::OPCODE_FLOATBITSTOUINT, result, arg); break;
1187 		case EOpIntBitsToFloat:   if(visit == PostVisit) emit(sw::Shader::OPCODE_INTBITSTOFLOAT, result, arg); break;
1188 		case EOpUintBitsToFloat:  if(visit == PostVisit) emit(sw::Shader::OPCODE_UINTBITSTOFLOAT, result, arg); break;
1189 		case EOpPackSnorm2x16:    if(visit == PostVisit) emit(sw::Shader::OPCODE_PACKSNORM2x16, result, arg); break;
1190 		case EOpPackUnorm2x16:    if(visit == PostVisit) emit(sw::Shader::OPCODE_PACKUNORM2x16, result, arg); break;
1191 		case EOpPackHalf2x16:     if(visit == PostVisit) emit(sw::Shader::OPCODE_PACKHALF2x16, result, arg); break;
1192 		case EOpUnpackSnorm2x16:  if(visit == PostVisit) emit(sw::Shader::OPCODE_UNPACKSNORM2x16, result, arg); break;
1193 		case EOpUnpackUnorm2x16:  if(visit == PostVisit) emit(sw::Shader::OPCODE_UNPACKUNORM2x16, result, arg); break;
1194 		case EOpUnpackHalf2x16:   if(visit == PostVisit) emit(sw::Shader::OPCODE_UNPACKHALF2x16, result, arg); break;
1195 		case EOpTranspose:
1196 			if(visit == PostVisit)
1197 			{
1198 				int numCols = arg->getNominalSize();
1199 				int numRows = arg->getSecondarySize();
1200 				for(int i = 0; i < numCols; ++i)
1201 				{
1202 					for(int j = 0; j < numRows; ++j)
1203 					{
1204 						Instruction *mov = emit(sw::Shader::OPCODE_MOV, result, j, arg, i);
1205 						mov->src[0].swizzle = 0x55 * j;
1206 						mov->dst.mask = 1 << i;
1207 					}
1208 				}
1209 			}
1210 			break;
1211 		case EOpDeterminant:
1212 			if(visit == PostVisit)
1213 			{
1214 				int size = arg->getNominalSize();
1215 				ASSERT(size == arg->getSecondarySize());
1216 
1217 				emitDeterminant(result, arg, size);
1218 			}
1219 			break;
1220 		case EOpInverse:
1221 			if(visit == PostVisit)
1222 			{
1223 				int size = arg->getNominalSize();
1224 				ASSERT(size == arg->getSecondarySize());
1225 
1226 				// Compute transposed matrix of cofactors
1227 				for(int i = 0; i < size; ++i)
1228 				{
1229 					for(int j = 0; j < size; ++j)
1230 					{
1231 						// For a 2x2 matrix, the cofactor is simply a transposed move or negate
1232 						// For a 3x3 or 4x4 matrix, the cofactor is a transposed determinant
1233 						emitDeterminant(result, arg, size - 1, j, i, i, j);
1234 					}
1235 				}
1236 
1237 				// Compute 1 / determinant
1238 				Temporary invDet(this);
1239 				emitDeterminant(&invDet, arg, size);
1240 				Constant one(1.0f, 1.0f, 1.0f, 1.0f);
1241 				Instruction *div = emit(sw::Shader::OPCODE_DIV, &invDet, &one, &invDet);
1242 				div->src[1].swizzle = 0x00; // xxxx
1243 
1244 				// Divide transposed matrix of cofactors by determinant
1245 				for(int i = 0; i < size; ++i)
1246 				{
1247 					emit(sw::Shader::OPCODE_MUL, result, i, result, i, &invDet);
1248 				}
1249 			}
1250 			break;
1251 		default: UNREACHABLE(node->getOp());
1252 		}
1253 
1254 		return true;
1255 	}
1256 
visitAggregate(Visit visit,TIntermAggregate * node)1257 	bool OutputASM::visitAggregate(Visit visit, TIntermAggregate *node)
1258 	{
1259 		if(currentScope != emitScope && node->getOp() != EOpFunction && node->getOp() != EOpSequence)
1260 		{
1261 			return false;
1262 		}
1263 
1264 		Constant zero(0.0f, 0.0f, 0.0f, 0.0f);
1265 
1266 		TIntermTyped *result = node;
1267 		const TType &resultType = node->getType();
1268 		TIntermSequence &arg = node->getSequence();
1269 		int argumentCount = static_cast<int>(arg.size());
1270 
1271 		switch(node->getOp())
1272 		{
1273 		case EOpSequence:             break;
1274 		case EOpDeclaration:          break;
1275 		case EOpInvariantDeclaration: break;
1276 		case EOpPrototype:            break;
1277 		case EOpComma:
1278 			if(visit == PostVisit)
1279 			{
1280 				copy(result, arg[1]);
1281 			}
1282 			break;
1283 		case EOpFunction:
1284 			if(visit == PreVisit)
1285 			{
1286 				const TString &name = node->getName();
1287 
1288 				if(emitScope == FUNCTION)
1289 				{
1290 					if(functionArray.size() > 1)   // No need for a label when there's only main()
1291 					{
1292 						Instruction *label = emit(sw::Shader::OPCODE_LABEL);
1293 						label->dst.type = sw::Shader::PARAMETER_LABEL;
1294 
1295 						const Function *function = findFunction(name);
1296 						ASSERT(function);   // Should have been added during global pass
1297 						label->dst.index = function->label;
1298 						currentFunction = function->label;
1299 					}
1300 				}
1301 				else if(emitScope == GLOBAL)
1302 				{
1303 					if(name != "main(")
1304 					{
1305 						TIntermSequence &arguments = node->getSequence()[0]->getAsAggregate()->getSequence();
1306 						functionArray.push_back(Function(functionArray.size(), name, &arguments, node));
1307 					}
1308 				}
1309 				else UNREACHABLE(emitScope);
1310 
1311 				currentScope = FUNCTION;
1312 			}
1313 			else if(visit == PostVisit)
1314 			{
1315 				if(emitScope == FUNCTION)
1316 				{
1317 					if(functionArray.size() > 1)   // No need to return when there's only main()
1318 					{
1319 						emit(sw::Shader::OPCODE_RET);
1320 					}
1321 				}
1322 
1323 				currentScope = GLOBAL;
1324 			}
1325 			break;
1326 		case EOpFunctionCall:
1327 			if(visit == PostVisit)
1328 			{
1329 				if(node->isUserDefined())
1330 				{
1331 					const TString &name = node->getName();
1332 					const Function *function = findFunction(name);
1333 
1334 					if(!function)
1335 					{
1336 						mContext.error(node->getLine(), "function definition not found", name.c_str());
1337 						return false;
1338 					}
1339 
1340 					TIntermSequence &arguments = *function->arg;
1341 
1342 					for(int i = 0; i < argumentCount; i++)
1343 					{
1344 						TIntermTyped *in = arguments[i]->getAsTyped();
1345 
1346 						if(in->getQualifier() == EvqIn ||
1347 						   in->getQualifier() == EvqInOut ||
1348 						   in->getQualifier() == EvqConstReadOnly)
1349 						{
1350 							copy(in, arg[i]);
1351 						}
1352 					}
1353 
1354 					Instruction *call = emit(sw::Shader::OPCODE_CALL);
1355 					call->dst.type = sw::Shader::PARAMETER_LABEL;
1356 					call->dst.index = function->label;
1357 
1358 					if(function->ret && function->ret->getType().getBasicType() != EbtVoid)
1359 					{
1360 						copy(result, function->ret);
1361 					}
1362 
1363 					for(int i = 0; i < argumentCount; i++)
1364 					{
1365 						TIntermTyped *argument = arguments[i]->getAsTyped();
1366 						TIntermTyped *out = arg[i]->getAsTyped();
1367 
1368 						if(argument->getQualifier() == EvqOut ||
1369 						   argument->getQualifier() == EvqInOut)
1370 						{
1371 							assignLvalue(out, argument);
1372 						}
1373 					}
1374 				}
1375 				else
1376 				{
1377 					const TextureFunction textureFunction(node->getName());
1378 					TIntermTyped *s = arg[0]->getAsTyped();
1379 					TIntermTyped *t = arg[1]->getAsTyped();
1380 
1381 					Temporary coord(this);
1382 
1383 					if(textureFunction.proj)
1384 					{
1385 						Instruction *rcp = emit(sw::Shader::OPCODE_RCPX, &coord, arg[1]);
1386 						rcp->src[0].swizzle = 0x55 * (t->getNominalSize() - 1);
1387 						rcp->dst.mask = 0x7;
1388 
1389 						Instruction *mul = emit(sw::Shader::OPCODE_MUL, &coord, arg[1], &coord);
1390 						mul->dst.mask = 0x7;
1391 
1392 						if(IsShadowSampler(s->getBasicType()))
1393 						{
1394 							ASSERT(s->getBasicType() == EbtSampler2DShadow);
1395 							Instruction *mov = emit(sw::Shader::OPCODE_MOV, &coord, &coord);
1396 							mov->src[0].swizzle = 0xA4;
1397 						}
1398 					}
1399 					else
1400 					{
1401 						Instruction *mov = emit(sw::Shader::OPCODE_MOV, &coord, arg[1]);
1402 
1403 						if(IsShadowSampler(s->getBasicType()) && t->getNominalSize() == 3)
1404 						{
1405 							ASSERT(s->getBasicType() == EbtSampler2DShadow);
1406 							mov->src[0].swizzle = 0xA4;
1407 						}
1408 					}
1409 
1410 					switch(textureFunction.method)
1411 					{
1412 					case TextureFunction::IMPLICIT:
1413 						if(!textureFunction.offset)
1414 						{
1415 							if(argumentCount == 2)
1416 							{
1417 								emit(sw::Shader::OPCODE_TEX, result, &coord, s);
1418 							}
1419 							else if(argumentCount == 3)   // Bias
1420 							{
1421 								emit(sw::Shader::OPCODE_TEXBIAS, result, &coord, s, arg[2]);
1422 							}
1423 							else UNREACHABLE(argumentCount);
1424 						}
1425 						else   // Offset
1426 						{
1427 							if(argumentCount == 3)
1428 							{
1429 								emit(sw::Shader::OPCODE_TEXOFFSET, result, &coord, s, arg[2]);
1430 							}
1431 							else if(argumentCount == 4)   // Bias
1432 							{
1433 								emit(sw::Shader::OPCODE_TEXOFFSETBIAS, result, &coord, s, arg[2], arg[3]);
1434 							}
1435 							else UNREACHABLE(argumentCount);
1436 						}
1437 						break;
1438 					case TextureFunction::LOD:
1439 						if(!textureFunction.offset && argumentCount == 3)
1440 						{
1441 							emit(sw::Shader::OPCODE_TEXLOD, result, &coord, s, arg[2]);
1442 						}
1443 						else if(argumentCount == 4)   // Offset
1444 						{
1445 							emit(sw::Shader::OPCODE_TEXLODOFFSET, result, &coord, s, arg[3], arg[2]);
1446 						}
1447 						else UNREACHABLE(argumentCount);
1448 						break;
1449 					case TextureFunction::FETCH:
1450 						if(!textureFunction.offset && argumentCount == 3)
1451 						{
1452 							emit(sw::Shader::OPCODE_TEXELFETCH, result, &coord, s, arg[2]);
1453 						}
1454 						else if(argumentCount == 4)   // Offset
1455 						{
1456 							emit(sw::Shader::OPCODE_TEXELFETCHOFFSET, result, &coord, s, arg[3], arg[2]);
1457 						}
1458 						else UNREACHABLE(argumentCount);
1459 						break;
1460 					case TextureFunction::GRAD:
1461 						if(!textureFunction.offset && argumentCount == 4)
1462 						{
1463 							emit(sw::Shader::OPCODE_TEXGRAD, result, &coord, s, arg[2], arg[3]);
1464 						}
1465 						else if(argumentCount == 5)   // Offset
1466 						{
1467 							emit(sw::Shader::OPCODE_TEXGRADOFFSET, result, &coord, s, arg[2], arg[3], arg[4]);
1468 						}
1469 						else UNREACHABLE(argumentCount);
1470 						break;
1471 					case TextureFunction::SIZE:
1472 						emit(sw::Shader::OPCODE_TEXSIZE, result, arg[1], s);
1473 						break;
1474 					default:
1475 						UNREACHABLE(textureFunction.method);
1476 					}
1477 				}
1478 			}
1479 			break;
1480 		case EOpParameters:
1481 			break;
1482 		case EOpConstructFloat:
1483 		case EOpConstructVec2:
1484 		case EOpConstructVec3:
1485 		case EOpConstructVec4:
1486 		case EOpConstructBool:
1487 		case EOpConstructBVec2:
1488 		case EOpConstructBVec3:
1489 		case EOpConstructBVec4:
1490 		case EOpConstructInt:
1491 		case EOpConstructIVec2:
1492 		case EOpConstructIVec3:
1493 		case EOpConstructIVec4:
1494 		case EOpConstructUInt:
1495 		case EOpConstructUVec2:
1496 		case EOpConstructUVec3:
1497 		case EOpConstructUVec4:
1498 			if(visit == PostVisit)
1499 			{
1500 				int component = 0;
1501 				int arrayMaxIndex = result->isArray() ? result->getArraySize() - 1 : 0;
1502 				int arrayComponents = result->getType().getElementSize();
1503 				for(int i = 0; i < argumentCount; i++)
1504 				{
1505 					TIntermTyped *argi = arg[i]->getAsTyped();
1506 					int size = argi->getNominalSize();
1507 					int arrayIndex = std::min(component / arrayComponents, arrayMaxIndex);
1508 					int swizzle = component - (arrayIndex * arrayComponents);
1509 
1510 					if(!argi->isMatrix())
1511 					{
1512 						Instruction *mov = emitCast(result, arrayIndex, argi, 0);
1513 						mov->dst.mask = (0xF << swizzle) & 0xF;
1514 						mov->src[0].swizzle = readSwizzle(argi, size) << (swizzle * 2);
1515 
1516 						component += size;
1517 					}
1518 					else if(!result->isMatrix()) // Construct a non matrix from a matrix
1519 					{
1520 						Instruction *mov = emitCast(result, arrayIndex, argi, 0);
1521 						mov->dst.mask = (0xF << swizzle) & 0xF;
1522 						mov->src[0].swizzle = readSwizzle(argi, size) << (swizzle * 2);
1523 
1524 						// At most one more instruction when constructing a vec3 from a mat2 or a vec4 from a mat2/mat3
1525 						if(result->getNominalSize() > size)
1526 						{
1527 							Instruction *mov = emitCast(result, arrayIndex, argi, 1);
1528 							mov->dst.mask = (0xF << (swizzle + size)) & 0xF;
1529 							// mat2: xxxy (0x40), mat3: xxxx (0x00)
1530 							mov->src[0].swizzle = ((size == 2) ? 0x40 : 0x00) << (swizzle * 2);
1531 						}
1532 
1533 						component += size;
1534 					}
1535 					else   // Matrix
1536 					{
1537 						int column = 0;
1538 
1539 						while(component < resultType.getNominalSize())
1540 						{
1541 							Instruction *mov = emitCast(result, arrayIndex, argi, column);
1542 							mov->dst.mask = (0xF << swizzle) & 0xF;
1543 							mov->src[0].swizzle = readSwizzle(argi, size) << (swizzle * 2);
1544 
1545 							column++;
1546 							component += size;
1547 						}
1548 					}
1549 				}
1550 			}
1551 			break;
1552 		case EOpConstructMat2:
1553 		case EOpConstructMat2x3:
1554 		case EOpConstructMat2x4:
1555 		case EOpConstructMat3x2:
1556 		case EOpConstructMat3:
1557 		case EOpConstructMat3x4:
1558 		case EOpConstructMat4x2:
1559 		case EOpConstructMat4x3:
1560 		case EOpConstructMat4:
1561 			if(visit == PostVisit)
1562 			{
1563 				TIntermTyped *arg0 = arg[0]->getAsTyped();
1564 				const int outCols = result->getNominalSize();
1565 				const int outRows = result->getSecondarySize();
1566 
1567 				if(arg0->isScalar() && arg.size() == 1)   // Construct scale matrix
1568 				{
1569 					for(int i = 0; i < outCols; i++)
1570 					{
1571 						emit(sw::Shader::OPCODE_MOV, result, i, &zero);
1572 						if (i < outRows)
1573 						{
1574 							// Insert the scalar value on the main diagonal.
1575 							// For non-square matrices, Avoid emitting in
1576 							// a column which doesn't /have/ a main diagonal
1577 							// element, even though it would be fairly benign --
1578 							// it's not necessarily trivial for downstream
1579 							// passes to see that this is redundant and strip it
1580 							// out.
1581 							Instruction *mov = emitCast(result, i, arg0, 0);
1582 							mov->dst.mask = 1 << i;
1583 							ASSERT(mov->src[0].swizzle == 0x00);
1584 						}
1585 					}
1586 				}
1587 				else if(arg0->isMatrix())
1588 				{
1589 					int arraySize = result->isArray() ? result->getArraySize() : 1;
1590 
1591 					for(int n = 0; n < arraySize; n++)
1592 					{
1593 						TIntermTyped *argi = arg[n]->getAsTyped();
1594 						const int inCols = argi->getNominalSize();
1595 						const int inRows = argi->getSecondarySize();
1596 
1597 						for(int i = 0; i < outCols; i++)
1598 						{
1599 							if(i >= inCols || outRows > inRows)
1600 							{
1601 								// Initialize to identity matrix
1602 								Constant col((i == 0 ? 1.0f : 0.0f), (i == 1 ? 1.0f : 0.0f), (i == 2 ? 1.0f : 0.0f), (i == 3 ? 1.0f : 0.0f));
1603 								emitCast(result, i + n * outCols, &col, 0);
1604 							}
1605 
1606 							if(i < inCols)
1607 							{
1608 								Instruction *mov = emitCast(result, i + n * outCols, argi, i);
1609 								mov->dst.mask = 0xF >> (4 - inRows);
1610 							}
1611 						}
1612 					}
1613 				}
1614 				else
1615 				{
1616 					int column = 0;
1617 					int row = 0;
1618 
1619 					for(int i = 0; i < argumentCount; i++)
1620 					{
1621 						TIntermTyped *argi = arg[i]->getAsTyped();
1622 						int size = argi->getNominalSize();
1623 						int element = 0;
1624 
1625 						while(element < size)
1626 						{
1627 							Instruction *mov = emitCast(result, column, argi, 0);
1628 							mov->dst.mask = (0xF << row) & 0xF;
1629 							mov->src[0].swizzle = (readSwizzle(argi, size) << (row * 2)) + 0x55 * element;
1630 
1631 							int end = row + size - element;
1632 							column = end >= outRows ? column + 1 : column;
1633 							element = element + outRows - row;
1634 							row = end >= outRows ? 0 : end;
1635 						}
1636 					}
1637 				}
1638 			}
1639 			break;
1640 		case EOpConstructStruct:
1641 			if(visit == PostVisit)
1642 			{
1643 				int offset = 0;
1644 				for(int i = 0; i < argumentCount; i++)
1645 				{
1646 					TIntermTyped *argi = arg[i]->getAsTyped();
1647 					int size = argi->totalRegisterCount();
1648 
1649 					for(int index = 0; index < size; index++)
1650 					{
1651 						Instruction *mov = emit(sw::Shader::OPCODE_MOV, result, index + offset, argi, index);
1652 						mov->dst.mask = writeMask(result, offset + index);
1653 					}
1654 
1655 					offset += size;
1656 				}
1657 			}
1658 			break;
1659 		case EOpLessThan:         if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_LT, result, arg[0], arg[1]); break;
1660 		case EOpGreaterThan:      if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_GT, result, arg[0], arg[1]); break;
1661 		case EOpLessThanEqual:    if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_LE, result, arg[0], arg[1]); break;
1662 		case EOpGreaterThanEqual: if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_GE, result, arg[0], arg[1]); break;
1663 		case EOpVectorEqual:      if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_EQ, result, arg[0], arg[1]); break;
1664 		case EOpVectorNotEqual:   if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_NE, result, arg[0], arg[1]); break;
1665 		case EOpMod:              if(visit == PostVisit) emit(sw::Shader::OPCODE_MOD, result, arg[0], arg[1]); break;
1666 		case EOpModf:
1667 			if(visit == PostVisit)
1668 			{
1669 				TIntermTyped* arg1 = arg[1]->getAsTyped();
1670 				emit(sw::Shader::OPCODE_TRUNC, arg1, arg[0]);
1671 				assignLvalue(arg1, arg1);
1672 				emitBinary(sw::Shader::OPCODE_SUB, result, arg[0], arg1);
1673 			}
1674 			break;
1675 		case EOpPow:              if(visit == PostVisit) emit(sw::Shader::OPCODE_POW, result, arg[0], arg[1]); break;
1676 		case EOpAtan:             if(visit == PostVisit) emit(sw::Shader::OPCODE_ATAN2, result, arg[0], arg[1]); break;
1677 		case EOpMin:              if(visit == PostVisit) emit(getOpcode(sw::Shader::OPCODE_MIN, result), result, arg[0], arg[1]); break;
1678 		case EOpMax:              if(visit == PostVisit) emit(getOpcode(sw::Shader::OPCODE_MAX, result), result, arg[0], arg[1]); break;
1679 		case EOpClamp:
1680 			if(visit == PostVisit)
1681 			{
1682 				emit(getOpcode(sw::Shader::OPCODE_MAX, result), result, arg[0], arg[1]);
1683 				emit(getOpcode(sw::Shader::OPCODE_MIN, result), result, result, arg[2]);
1684 			}
1685 			break;
1686 		case EOpMix:
1687 			if(visit == PostVisit)
1688 			{
1689 				if(arg[2]->getAsTyped()->getBasicType() == EbtBool)
1690 				{
1691 					emit(sw::Shader::OPCODE_SELECT, result, arg[2], arg[1], arg[0]);
1692 				}
1693 				else
1694 				{
1695 					emit(sw::Shader::OPCODE_LRP, result, arg[2], arg[1], arg[0]);
1696 				}
1697 			}
1698 			break;
1699 		case EOpStep:        if(visit == PostVisit) emit(sw::Shader::OPCODE_STEP, result, arg[0], arg[1]); break;
1700 		case EOpSmoothStep:  if(visit == PostVisit) emit(sw::Shader::OPCODE_SMOOTH, result, arg[0], arg[1], arg[2]); break;
1701 		case EOpDistance:    if(visit == PostVisit) emit(sw::Shader::OPCODE_DIST(dim(arg[0])), result, arg[0], arg[1]); break;
1702 		case EOpDot:         if(visit == PostVisit) emit(sw::Shader::OPCODE_DP(dim(arg[0])), result, arg[0], arg[1]); break;
1703 		case EOpCross:       if(visit == PostVisit) emit(sw::Shader::OPCODE_CRS, result, arg[0], arg[1]); break;
1704 		case EOpFaceForward: if(visit == PostVisit) emit(sw::Shader::OPCODE_FORWARD(dim(arg[0])), result, arg[0], arg[1], arg[2]); break;
1705 		case EOpReflect:     if(visit == PostVisit) emit(sw::Shader::OPCODE_REFLECT(dim(arg[0])), result, arg[0], arg[1]); break;
1706 		case EOpRefract:     if(visit == PostVisit) emit(sw::Shader::OPCODE_REFRACT(dim(arg[0])), result, arg[0], arg[1], arg[2]); break;
1707 		case EOpMul:
1708 			if(visit == PostVisit)
1709 			{
1710 				TIntermTyped *arg0 = arg[0]->getAsTyped();
1711 				ASSERT((arg0->getNominalSize() == arg[1]->getAsTyped()->getNominalSize()) &&
1712 				       (arg0->getSecondarySize() == arg[1]->getAsTyped()->getSecondarySize()));
1713 
1714 				int size = arg0->getNominalSize();
1715 				for(int i = 0; i < size; i++)
1716 				{
1717 					emit(sw::Shader::OPCODE_MUL, result, i, arg[0], i, arg[1], i);
1718 				}
1719 			}
1720 			break;
1721 		case EOpOuterProduct:
1722 			if(visit == PostVisit)
1723 			{
1724 				for(int i = 0; i < dim(arg[1]); i++)
1725 				{
1726 					Instruction *mul = emit(sw::Shader::OPCODE_MUL, result, i, arg[0], 0, arg[1]);
1727 					mul->src[1].swizzle = 0x55 * i;
1728 				}
1729 			}
1730 			break;
1731 		default: UNREACHABLE(node->getOp());
1732 		}
1733 
1734 		return true;
1735 	}
1736 
visitSelection(Visit visit,TIntermSelection * node)1737 	bool OutputASM::visitSelection(Visit visit, TIntermSelection *node)
1738 	{
1739 		if(currentScope != emitScope)
1740 		{
1741 			return false;
1742 		}
1743 
1744 		TIntermTyped *condition = node->getCondition();
1745 		TIntermNode *trueBlock = node->getTrueBlock();
1746 		TIntermNode *falseBlock = node->getFalseBlock();
1747 		TIntermConstantUnion *constantCondition = condition->getAsConstantUnion();
1748 
1749 		condition->traverse(this);
1750 
1751 		if(node->usesTernaryOperator())
1752 		{
1753 			if(constantCondition)
1754 			{
1755 				bool trueCondition = constantCondition->getUnionArrayPointer()->getBConst();
1756 
1757 				if(trueCondition)
1758 				{
1759 					trueBlock->traverse(this);
1760 					copy(node, trueBlock);
1761 				}
1762 				else
1763 				{
1764 					falseBlock->traverse(this);
1765 					copy(node, falseBlock);
1766 				}
1767 			}
1768 			else if(trivial(node, 6))   // Fast to compute both potential results and no side effects
1769 			{
1770 				trueBlock->traverse(this);
1771 				falseBlock->traverse(this);
1772 				emit(sw::Shader::OPCODE_SELECT, node, condition, trueBlock, falseBlock);
1773 			}
1774 			else
1775 			{
1776 				emit(sw::Shader::OPCODE_IF, 0, condition);
1777 
1778 				if(trueBlock)
1779 				{
1780 					trueBlock->traverse(this);
1781 					copy(node, trueBlock);
1782 				}
1783 
1784 				if(falseBlock)
1785 				{
1786 					emit(sw::Shader::OPCODE_ELSE);
1787 					falseBlock->traverse(this);
1788 					copy(node, falseBlock);
1789 				}
1790 
1791 				emit(sw::Shader::OPCODE_ENDIF);
1792 			}
1793 		}
1794 		else  // if/else statement
1795 		{
1796 			if(constantCondition)
1797 			{
1798 				bool trueCondition = constantCondition->getUnionArrayPointer()->getBConst();
1799 
1800 				if(trueCondition)
1801 				{
1802 					if(trueBlock)
1803 					{
1804 						trueBlock->traverse(this);
1805 					}
1806 				}
1807 				else
1808 				{
1809 					if(falseBlock)
1810 					{
1811 						falseBlock->traverse(this);
1812 					}
1813 				}
1814 			}
1815 			else
1816 			{
1817 				emit(sw::Shader::OPCODE_IF, 0, condition);
1818 
1819 				if(trueBlock)
1820 				{
1821 					trueBlock->traverse(this);
1822 				}
1823 
1824 				if(falseBlock)
1825 				{
1826 					emit(sw::Shader::OPCODE_ELSE);
1827 					falseBlock->traverse(this);
1828 				}
1829 
1830 				emit(sw::Shader::OPCODE_ENDIF);
1831 			}
1832 		}
1833 
1834 		return false;
1835 	}
1836 
visitLoop(Visit visit,TIntermLoop * node)1837 	bool OutputASM::visitLoop(Visit visit, TIntermLoop *node)
1838 	{
1839 		if(currentScope != emitScope)
1840 		{
1841 			return false;
1842 		}
1843 
1844 		LoopInfo loop(node);
1845 
1846 		if(loop.iterations == 0)
1847 		{
1848 			return false;
1849 		}
1850 
1851 		bool unroll = (loop.iterations <= 4);
1852 
1853 		TIntermNode *init = node->getInit();
1854 		TIntermTyped *condition = node->getCondition();
1855 		TIntermTyped *expression = node->getExpression();
1856 		TIntermNode *body = node->getBody();
1857 		Constant True(true);
1858 
1859 		if(loop.isDeterministic())
1860 		{
1861 			 deterministicVariables.insert(loop.index->getId());
1862 
1863 			 if(!unroll)
1864 			 {
1865 				 emit(sw::Shader::OPCODE_SCALAR);   // Unrolled loops don't have an ENDWHILE to disable scalar mode.
1866 			 }
1867 		}
1868 
1869 		if(node->getType() == ELoopDoWhile)
1870 		{
1871 			Temporary iterate(this);
1872 			emit(sw::Shader::OPCODE_MOV, &iterate, &True);
1873 
1874 			emit(sw::Shader::OPCODE_WHILE, 0, &iterate);   // FIXME: Implement real do-while
1875 
1876 			if(body)
1877 			{
1878 				body->traverse(this);
1879 			}
1880 
1881 			emit(sw::Shader::OPCODE_TEST);
1882 
1883 			condition->traverse(this);
1884 			emit(sw::Shader::OPCODE_MOV, &iterate, condition);
1885 
1886 			emit(sw::Shader::OPCODE_ENDWHILE);
1887 		}
1888 		else
1889 		{
1890 			if(init)
1891 			{
1892 				init->traverse(this);
1893 			}
1894 
1895 			if(unroll)
1896 			{
1897 				mContext.info(node->getLine(), "loop unrolled", "for");
1898 
1899 				for(unsigned int i = 0; i < loop.iterations; i++)
1900 				{
1901 				//	condition->traverse(this);   // Condition could contain statements, but not in an unrollable loop
1902 
1903 					if(body)
1904 					{
1905 						body->traverse(this);
1906 					}
1907 
1908 					if(expression)
1909 					{
1910 						expression->traverse(this);
1911 					}
1912 				}
1913 			}
1914 			else
1915 			{
1916 				if(condition)
1917 				{
1918 					condition->traverse(this);
1919 				}
1920 				else
1921 				{
1922 					condition = &True;
1923 				}
1924 
1925 				emit(sw::Shader::OPCODE_WHILE, 0, condition);
1926 
1927 				if(body)
1928 				{
1929 					body->traverse(this);
1930 				}
1931 
1932 				emit(sw::Shader::OPCODE_TEST);
1933 
1934 				if(loop.isDeterministic())
1935 				{
1936 					emit(sw::Shader::OPCODE_SCALAR);
1937 				}
1938 
1939 				if(expression)
1940 				{
1941 					expression->traverse(this);
1942 				}
1943 
1944 				if(condition)
1945 				{
1946 					condition->traverse(this);
1947 				}
1948 
1949 				emit(sw::Shader::OPCODE_ENDWHILE);
1950 			}
1951 		}
1952 
1953 		if(loop.isDeterministic())
1954 		{
1955 			 deterministicVariables.erase(loop.index->getId());
1956 		}
1957 
1958 		return false;
1959 	}
1960 
visitBranch(Visit visit,TIntermBranch * node)1961 	bool OutputASM::visitBranch(Visit visit, TIntermBranch *node)
1962 	{
1963 		if(currentScope != emitScope)
1964 		{
1965 			return false;
1966 		}
1967 
1968 		switch(node->getFlowOp())
1969 		{
1970 		case EOpKill:      if(visit == PostVisit) emit(sw::Shader::OPCODE_DISCARD);  break;
1971 		case EOpBreak:     if(visit == PostVisit) emit(sw::Shader::OPCODE_BREAK);    break;
1972 		case EOpContinue:  if(visit == PostVisit) emit(sw::Shader::OPCODE_CONTINUE); break;
1973 		case EOpReturn:
1974 			if(visit == PostVisit)
1975 			{
1976 				TIntermTyped *value = node->getExpression();
1977 
1978 				if(value)
1979 				{
1980 					copy(functionArray[currentFunction].ret, value);
1981 				}
1982 
1983 				emit(sw::Shader::OPCODE_LEAVE);
1984 			}
1985 			break;
1986 		default: UNREACHABLE(node->getFlowOp());
1987 		}
1988 
1989 		return true;
1990 	}
1991 
visitSwitch(Visit visit,TIntermSwitch * node)1992 	bool OutputASM::visitSwitch(Visit visit, TIntermSwitch *node)
1993 	{
1994 		if(currentScope != emitScope)
1995 		{
1996 			return false;
1997 		}
1998 
1999 		TIntermTyped* switchValue = node->getInit();
2000 		TIntermAggregate* opList = node->getStatementList();
2001 
2002 		if(!switchValue || !opList)
2003 		{
2004 			return false;
2005 		}
2006 
2007 		switchValue->traverse(this);
2008 
2009 		emit(sw::Shader::OPCODE_SWITCH);
2010 
2011 		TIntermSequence& sequence = opList->getSequence();
2012 		TIntermSequence::iterator it = sequence.begin();
2013 		TIntermSequence::iterator defaultIt = sequence.end();
2014 		int nbCases = 0;
2015 		for(; it != sequence.end(); ++it)
2016 		{
2017 			TIntermCase* currentCase = (*it)->getAsCaseNode();
2018 			if(currentCase)
2019 			{
2020 				TIntermSequence::iterator caseIt = it;
2021 
2022 				TIntermTyped* condition = currentCase->getCondition();
2023 				if(condition) // non default case
2024 				{
2025 					if(nbCases != 0)
2026 					{
2027 						emit(sw::Shader::OPCODE_ELSE);
2028 					}
2029 
2030 					condition->traverse(this);
2031 					Temporary result(this);
2032 					emitBinary(sw::Shader::OPCODE_EQ, &result, switchValue, condition);
2033 					emit(sw::Shader::OPCODE_IF, 0, &result);
2034 					nbCases++;
2035 
2036 					// Emit the code for this case and all subsequent cases until we hit a break statement.
2037 					// TODO: This can repeat a lot of code for switches with many fall-through cases.
2038 					for(++caseIt; caseIt != sequence.end(); ++caseIt)
2039 					{
2040 						(*caseIt)->traverse(this);
2041 
2042 						// Stop if we encounter an unconditional branch (break, continue, return, or kill).
2043 						// TODO: This doesn't work if the statement is at a deeper scope level (e.g. {break;}).
2044 						// Note that this eliminates useless operations but shouldn't affect correctness.
2045 						if((*caseIt)->getAsBranchNode())
2046 						{
2047 							break;
2048 						}
2049 					}
2050 				}
2051 				else
2052 				{
2053 					defaultIt = it; // The default case might not be the last case, keep it for last
2054 				}
2055 			}
2056 		}
2057 
2058 		// If there's a default case, traverse it here
2059 		if(defaultIt != sequence.end())
2060 		{
2061 			if(nbCases != 0)
2062 			{
2063 				emit(sw::Shader::OPCODE_ELSE);
2064 			}
2065 
2066 			for(++defaultIt; defaultIt != sequence.end(); ++defaultIt)
2067 			{
2068 				(*defaultIt)->traverse(this);
2069 				if((*defaultIt)->getAsBranchNode()) // Kill, Break, Continue or Return
2070 				{
2071 					break;
2072 				}
2073 			}
2074 		}
2075 
2076 		for(int i = 0; i < nbCases; ++i)
2077 		{
2078 			emit(sw::Shader::OPCODE_ENDIF);
2079 		}
2080 
2081 		emit(sw::Shader::OPCODE_ENDSWITCH);
2082 
2083 		return false;
2084 	}
2085 
emit(sw::Shader::Opcode op,TIntermTyped * dst,TIntermNode * src0,TIntermNode * src1,TIntermNode * src2,TIntermNode * src3,TIntermNode * src4)2086 	Instruction *OutputASM::emit(sw::Shader::Opcode op, TIntermTyped *dst, TIntermNode *src0, TIntermNode *src1, TIntermNode *src2, TIntermNode *src3, TIntermNode *src4)
2087 	{
2088 		return emit(op, dst, 0, src0, 0, src1, 0, src2, 0, src3, 0, src4, 0);
2089 	}
2090 
emit(sw::Shader::Opcode op,TIntermTyped * dst,int dstIndex,TIntermNode * src0,int index0,TIntermNode * src1,int index1,TIntermNode * src2,int index2,TIntermNode * src3,int index3,TIntermNode * src4,int index4)2091 	Instruction *OutputASM::emit(sw::Shader::Opcode op, TIntermTyped *dst, int dstIndex, TIntermNode *src0, int index0, TIntermNode *src1, int index1,
2092 	                             TIntermNode *src2, int index2, TIntermNode *src3, int index3, TIntermNode *src4, int index4)
2093 	{
2094 		Instruction *instruction = new Instruction(op);
2095 
2096 		if(dst)
2097 		{
2098 			destination(instruction->dst, dst, dstIndex);
2099 		}
2100 
2101 		if(src0)
2102 		{
2103 			TIntermTyped* src = src0->getAsTyped();
2104 			instruction->dst.partialPrecision = src && (src->getPrecision() <= EbpLow);
2105 		}
2106 
2107 		source(instruction->src[0], src0, index0);
2108 		source(instruction->src[1], src1, index1);
2109 		source(instruction->src[2], src2, index2);
2110 		source(instruction->src[3], src3, index3);
2111 		source(instruction->src[4], src4, index4);
2112 
2113 		shader->append(instruction);
2114 
2115 		return instruction;
2116 	}
2117 
emitCast(TIntermTyped * dst,TIntermTyped * src)2118 	Instruction *OutputASM::emitCast(TIntermTyped *dst, TIntermTyped *src)
2119 	{
2120 		return emitCast(dst, 0, src, 0);
2121 	}
2122 
emitCast(TIntermTyped * dst,int dstIndex,TIntermTyped * src,int srcIndex)2123 	Instruction *OutputASM::emitCast(TIntermTyped *dst, int dstIndex, TIntermTyped *src, int srcIndex)
2124 	{
2125 		switch(src->getBasicType())
2126 		{
2127 		case EbtBool:
2128 			switch(dst->getBasicType())
2129 			{
2130 			case EbtInt:   return emit(sw::Shader::OPCODE_B2I, dst, dstIndex, src, srcIndex);
2131 			case EbtUInt:  return emit(sw::Shader::OPCODE_B2I, dst, dstIndex, src, srcIndex);
2132 			case EbtFloat: return emit(sw::Shader::OPCODE_B2F, dst, dstIndex, src, srcIndex);
2133 			default:       break;
2134 			}
2135 			break;
2136 		case EbtInt:
2137 			switch(dst->getBasicType())
2138 			{
2139 			case EbtBool:  return emit(sw::Shader::OPCODE_I2B, dst, dstIndex, src, srcIndex);
2140 			case EbtFloat: return emit(sw::Shader::OPCODE_I2F, dst, dstIndex, src, srcIndex);
2141 			default:       break;
2142 			}
2143 			break;
2144 		case EbtUInt:
2145 			switch(dst->getBasicType())
2146 			{
2147 			case EbtBool:  return emit(sw::Shader::OPCODE_I2B, dst, dstIndex, src, srcIndex);
2148 			case EbtFloat: return emit(sw::Shader::OPCODE_U2F, dst, dstIndex, src, srcIndex);
2149 			default:       break;
2150 			}
2151 			break;
2152 		case EbtFloat:
2153 			switch(dst->getBasicType())
2154 			{
2155 			case EbtBool: return emit(sw::Shader::OPCODE_F2B, dst, dstIndex, src, srcIndex);
2156 			case EbtInt:  return emit(sw::Shader::OPCODE_F2I, dst, dstIndex, src, srcIndex);
2157 			case EbtUInt: return emit(sw::Shader::OPCODE_F2U, dst, dstIndex, src, srcIndex);
2158 			default:      break;
2159 			}
2160 			break;
2161 		default:
2162 			break;
2163 		}
2164 
2165 		ASSERT((src->getBasicType() == dst->getBasicType()) ||
2166 		      ((src->getBasicType() == EbtInt) && (dst->getBasicType() == EbtUInt)) ||
2167 		      ((src->getBasicType() == EbtUInt) && (dst->getBasicType() == EbtInt)));
2168 
2169 		return emit(sw::Shader::OPCODE_MOV, dst, dstIndex, src, srcIndex);
2170 	}
2171 
emitBinary(sw::Shader::Opcode op,TIntermTyped * dst,TIntermNode * src0,TIntermNode * src1,TIntermNode * src2)2172 	void OutputASM::emitBinary(sw::Shader::Opcode op, TIntermTyped *dst, TIntermNode *src0, TIntermNode *src1, TIntermNode *src2)
2173 	{
2174 		for(int index = 0; index < dst->elementRegisterCount(); index++)
2175 		{
2176 			emit(op, dst, index, src0, index, src1, index, src2, index);
2177 		}
2178 	}
2179 
emitAssign(sw::Shader::Opcode op,TIntermTyped * result,TIntermTyped * lhs,TIntermTyped * src0,TIntermTyped * src1)2180 	void OutputASM::emitAssign(sw::Shader::Opcode op, TIntermTyped *result, TIntermTyped *lhs, TIntermTyped *src0, TIntermTyped *src1)
2181 	{
2182 		emitBinary(op, result, src0, src1);
2183 		assignLvalue(lhs, result);
2184 	}
2185 
emitCmp(sw::Shader::Control cmpOp,TIntermTyped * dst,TIntermNode * left,TIntermNode * right,int index)2186 	void OutputASM::emitCmp(sw::Shader::Control cmpOp, TIntermTyped *dst, TIntermNode *left, TIntermNode *right, int index)
2187 	{
2188 		sw::Shader::Opcode opcode;
2189 		switch(left->getAsTyped()->getBasicType())
2190 		{
2191 		case EbtBool:
2192 		case EbtInt:
2193 			opcode = sw::Shader::OPCODE_ICMP;
2194 			break;
2195 		case EbtUInt:
2196 			opcode = sw::Shader::OPCODE_UCMP;
2197 			break;
2198 		default:
2199 			opcode = sw::Shader::OPCODE_CMP;
2200 			break;
2201 		}
2202 
2203 		Instruction *cmp = emit(opcode, dst, 0, left, index, right, index);
2204 		cmp->control = cmpOp;
2205 	}
2206 
componentCount(const TType & type,int registers)2207 	int componentCount(const TType &type, int registers)
2208 	{
2209 		if(registers == 0)
2210 		{
2211 			return 0;
2212 		}
2213 
2214 		if(type.isArray() && registers >= type.elementRegisterCount())
2215 		{
2216 			int index = registers / type.elementRegisterCount();
2217 			registers -= index * type.elementRegisterCount();
2218 			return index * type.getElementSize() + componentCount(type, registers);
2219 		}
2220 
2221 		if(type.isStruct() || type.isInterfaceBlock())
2222 		{
2223 			const TFieldList& fields = type.getStruct() ? type.getStruct()->fields() : type.getInterfaceBlock()->fields();
2224 			int elements = 0;
2225 
2226 			for(const auto &field : fields)
2227 			{
2228 				const TType &fieldType = *(field->type());
2229 
2230 				if(fieldType.totalRegisterCount() <= registers)
2231 				{
2232 					registers -= fieldType.totalRegisterCount();
2233 					elements += fieldType.getObjectSize();
2234 				}
2235 				else   // Register within this field
2236 				{
2237 					return elements + componentCount(fieldType, registers);
2238 				}
2239 			}
2240 		}
2241 		else if(type.isMatrix())
2242 		{
2243 			return registers * type.registerSize();
2244 		}
2245 
2246 		UNREACHABLE(0);
2247 		return 0;
2248 	}
2249 
registerSize(const TType & type,int registers)2250 	int registerSize(const TType &type, int registers)
2251 	{
2252 		if(registers == 0)
2253 		{
2254 			if(type.isStruct())
2255 			{
2256 				return registerSize(*((*(type.getStruct()->fields().begin()))->type()), 0);
2257 			}
2258 			else if(type.isInterfaceBlock())
2259 			{
2260 				return registerSize(*((*(type.getInterfaceBlock()->fields().begin()))->type()), 0);
2261 			}
2262 
2263 			return type.registerSize();
2264 		}
2265 
2266 		if(type.isArray() && registers >= type.elementRegisterCount())
2267 		{
2268 			int index = registers / type.elementRegisterCount();
2269 			registers -= index * type.elementRegisterCount();
2270 			return registerSize(type, registers);
2271 		}
2272 
2273 		if(type.isStruct() || type.isInterfaceBlock())
2274 		{
2275 			const TFieldList& fields = type.getStruct() ? type.getStruct()->fields() : type.getInterfaceBlock()->fields();
2276 			int elements = 0;
2277 
2278 			for(const auto &field : fields)
2279 			{
2280 				const TType &fieldType = *(field->type());
2281 
2282 				if(fieldType.totalRegisterCount() <= registers)
2283 				{
2284 					registers -= fieldType.totalRegisterCount();
2285 					elements += fieldType.getObjectSize();
2286 				}
2287 				else   // Register within this field
2288 				{
2289 					return registerSize(fieldType, registers);
2290 				}
2291 			}
2292 		}
2293 		else if(type.isMatrix())
2294 		{
2295 			return registerSize(type, 0);
2296 		}
2297 
2298 		UNREACHABLE(0);
2299 		return 0;
2300 	}
2301 
getBlockId(TIntermTyped * arg)2302 	int OutputASM::getBlockId(TIntermTyped *arg)
2303 	{
2304 		if(arg)
2305 		{
2306 			const TType &type = arg->getType();
2307 			TInterfaceBlock* block = type.getInterfaceBlock();
2308 			if(block && (type.getQualifier() == EvqUniform))
2309 			{
2310 				// Make sure the uniform block is declared
2311 				uniformRegister(arg);
2312 
2313 				const char* blockName = block->name().c_str();
2314 
2315 				// Fetch uniform block index from array of blocks
2316 				for(ActiveUniformBlocks::const_iterator it = shaderObject->activeUniformBlocks.begin(); it != shaderObject->activeUniformBlocks.end(); ++it)
2317 				{
2318 					if(blockName == it->name)
2319 					{
2320 						return it->blockId;
2321 					}
2322 				}
2323 
2324 				ASSERT(false);
2325 			}
2326 		}
2327 
2328 		return -1;
2329 	}
2330 
getArgumentInfo(TIntermTyped * arg,int index)2331 	OutputASM::ArgumentInfo OutputASM::getArgumentInfo(TIntermTyped *arg, int index)
2332 	{
2333 		const TType &type = arg->getType();
2334 		int blockId = getBlockId(arg);
2335 		ArgumentInfo argumentInfo(BlockMemberInfo::getDefaultBlockInfo(), type, -1, -1);
2336 		if(blockId != -1)
2337 		{
2338 			argumentInfo.bufferIndex = 0;
2339 			for(int i = 0; i < blockId; ++i)
2340 			{
2341 				int blockArraySize = shaderObject->activeUniformBlocks[i].arraySize;
2342 				argumentInfo.bufferIndex += blockArraySize > 0 ? blockArraySize : 1;
2343 			}
2344 
2345 			const BlockDefinitionIndexMap& blockDefinition = blockDefinitions[blockId];
2346 
2347 			BlockDefinitionIndexMap::const_iterator itEnd = blockDefinition.end();
2348 			BlockDefinitionIndexMap::const_iterator it = itEnd;
2349 
2350 			argumentInfo.clampedIndex = index;
2351 			if(type.isInterfaceBlock())
2352 			{
2353 				// Offset index to the beginning of the selected instance
2354 				int blockRegisters = type.elementRegisterCount();
2355 				int bufferOffset = argumentInfo.clampedIndex / blockRegisters;
2356 				argumentInfo.bufferIndex += bufferOffset;
2357 				argumentInfo.clampedIndex -= bufferOffset * blockRegisters;
2358 			}
2359 
2360 			int regIndex = registerIndex(arg);
2361 			for(int i = regIndex + argumentInfo.clampedIndex; i >= regIndex; --i)
2362 			{
2363 				it = blockDefinition.find(i);
2364 				if(it != itEnd)
2365 				{
2366 					argumentInfo.clampedIndex -= (i - regIndex);
2367 					break;
2368 				}
2369 			}
2370 			ASSERT(it != itEnd);
2371 
2372 			argumentInfo.typedMemberInfo = it->second;
2373 
2374 			int registerCount = argumentInfo.typedMemberInfo.type.totalRegisterCount();
2375 			argumentInfo.clampedIndex = (argumentInfo.clampedIndex >= registerCount) ? registerCount - 1 : argumentInfo.clampedIndex;
2376 		}
2377 		else
2378 		{
2379 			argumentInfo.clampedIndex = (index >= arg->totalRegisterCount()) ? arg->totalRegisterCount() - 1 : index;
2380 		}
2381 
2382 		return argumentInfo;
2383 	}
2384 
source(sw::Shader::SourceParameter & parameter,TIntermNode * argument,int index)2385 	void OutputASM::source(sw::Shader::SourceParameter &parameter, TIntermNode *argument, int index)
2386 	{
2387 		if(argument)
2388 		{
2389 			TIntermTyped *arg = argument->getAsTyped();
2390 			Temporary unpackedUniform(this);
2391 
2392 			const TType& srcType = arg->getType();
2393 			TInterfaceBlock* srcBlock = srcType.getInterfaceBlock();
2394 			if(srcBlock && (srcType.getQualifier() == EvqUniform))
2395 			{
2396 				const ArgumentInfo argumentInfo = getArgumentInfo(arg, index);
2397 				const TType &memberType = argumentInfo.typedMemberInfo.type;
2398 
2399 				if(memberType.getBasicType() == EbtBool)
2400 				{
2401 					ASSERT(argumentInfo.clampedIndex < (memberType.isArray() ? memberType.getArraySize() : 1)); // index < arraySize
2402 
2403 					// Convert the packed bool, which is currently an int, to a true bool
2404 					Instruction *instruction = new Instruction(sw::Shader::OPCODE_I2B);
2405 					instruction->dst.type = sw::Shader::PARAMETER_TEMP;
2406 					instruction->dst.index = registerIndex(&unpackedUniform);
2407 					instruction->src[0].type = sw::Shader::PARAMETER_CONST;
2408 					instruction->src[0].bufferIndex = argumentInfo.bufferIndex;
2409 					instruction->src[0].index = argumentInfo.typedMemberInfo.offset + argumentInfo.clampedIndex * argumentInfo.typedMemberInfo.arrayStride;
2410 
2411 					shader->append(instruction);
2412 
2413 					arg = &unpackedUniform;
2414 					index = 0;
2415 				}
2416 				else if((memberType.getLayoutQualifier().matrixPacking == EmpRowMajor) && memberType.isMatrix())
2417 				{
2418 					int numCols = memberType.getNominalSize();
2419 					int numRows = memberType.getSecondarySize();
2420 
2421 					ASSERT(argumentInfo.clampedIndex < (numCols * (memberType.isArray() ? memberType.getArraySize() : 1))); // index < cols * arraySize
2422 
2423 					unsigned int dstIndex = registerIndex(&unpackedUniform);
2424 					unsigned int srcSwizzle = (argumentInfo.clampedIndex % numCols) * 0x55;
2425 					int arrayIndex = argumentInfo.clampedIndex / numCols;
2426 					int matrixStartOffset = argumentInfo.typedMemberInfo.offset + arrayIndex * argumentInfo.typedMemberInfo.arrayStride;
2427 
2428 					for(int j = 0; j < numRows; ++j)
2429 					{
2430 						// Transpose the row major matrix
2431 						Instruction *instruction = new Instruction(sw::Shader::OPCODE_MOV);
2432 						instruction->dst.type = sw::Shader::PARAMETER_TEMP;
2433 						instruction->dst.index = dstIndex;
2434 						instruction->dst.mask = 1 << j;
2435 						instruction->src[0].type = sw::Shader::PARAMETER_CONST;
2436 						instruction->src[0].bufferIndex = argumentInfo.bufferIndex;
2437 						instruction->src[0].index = matrixStartOffset + j * argumentInfo.typedMemberInfo.matrixStride;
2438 						instruction->src[0].swizzle = srcSwizzle;
2439 
2440 						shader->append(instruction);
2441 					}
2442 
2443 					arg = &unpackedUniform;
2444 					index = 0;
2445 				}
2446 			}
2447 
2448 			const ArgumentInfo argumentInfo = getArgumentInfo(arg, index);
2449 			const TType &type = argumentInfo.typedMemberInfo.type;
2450 
2451 			int size = registerSize(type, argumentInfo.clampedIndex);
2452 
2453 			parameter.type = registerType(arg);
2454 			parameter.bufferIndex = argumentInfo.bufferIndex;
2455 
2456 			if(arg->getAsConstantUnion() && arg->getAsConstantUnion()->getUnionArrayPointer())
2457 			{
2458 				int component = componentCount(type, argumentInfo.clampedIndex);
2459 				ConstantUnion *constants = arg->getAsConstantUnion()->getUnionArrayPointer();
2460 
2461 				for(int i = 0; i < 4; i++)
2462 				{
2463 					if(size == 1)   // Replicate
2464 					{
2465 						parameter.value[i] = constants[component + 0].getAsFloat();
2466 					}
2467 					else if(i < size)
2468 					{
2469 						parameter.value[i] = constants[component + i].getAsFloat();
2470 					}
2471 					else
2472 					{
2473 						parameter.value[i] = 0.0f;
2474 					}
2475 				}
2476 			}
2477 			else
2478 			{
2479 				parameter.index = registerIndex(arg) + argumentInfo.clampedIndex;
2480 
2481 				if(parameter.bufferIndex != -1)
2482 				{
2483 					int stride = (argumentInfo.typedMemberInfo.matrixStride > 0) ? argumentInfo.typedMemberInfo.matrixStride : argumentInfo.typedMemberInfo.arrayStride;
2484 					parameter.index = argumentInfo.typedMemberInfo.offset + argumentInfo.clampedIndex * stride;
2485 				}
2486 
2487 				if(parameter.index >= sw::NUM_TEMPORARY_REGISTERS)
2488 				{
2489 					mContext.error(arg->getLine(),
2490 						"Too many temporary registers required to compile shader",
2491 						pixelShader ? "pixel shader" : "vertex shader");
2492 				}
2493 			}
2494 
2495 			if(!IsSampler(arg->getBasicType()))
2496 			{
2497 				parameter.swizzle = readSwizzle(arg, size);
2498 			}
2499 		}
2500 	}
2501 
destination(sw::Shader::DestinationParameter & parameter,TIntermTyped * arg,int index)2502 	void OutputASM::destination(sw::Shader::DestinationParameter &parameter, TIntermTyped *arg, int index)
2503 	{
2504 		parameter.type = registerType(arg);
2505 		parameter.index = registerIndex(arg) + index;
2506 		parameter.mask = writeMask(arg, index);
2507 
2508 		if(parameter.index >= sw::NUM_TEMPORARY_REGISTERS)
2509 		{
2510 			mContext.error(arg->getLine(),
2511 				"Too many temporary registers required to compile shader",
2512 				pixelShader ? "pixel shader" : "vertex shader");
2513 		}
2514 
2515 	}
2516 
copy(TIntermTyped * dst,TIntermNode * src,int offset)2517 	void OutputASM::copy(TIntermTyped *dst, TIntermNode *src, int offset)
2518 	{
2519 		for(int index = 0; index < dst->totalRegisterCount(); index++)
2520 		{
2521 			emit(sw::Shader::OPCODE_MOV, dst, index, src, offset + index);
2522 		}
2523 	}
2524 
swizzleElement(int swizzle,int index)2525 	int swizzleElement(int swizzle, int index)
2526 	{
2527 		return (swizzle >> (index * 2)) & 0x03;
2528 	}
2529 
swizzleSwizzle(int leftSwizzle,int rightSwizzle)2530 	int swizzleSwizzle(int leftSwizzle, int rightSwizzle)
2531 	{
2532 		return (swizzleElement(leftSwizzle, swizzleElement(rightSwizzle, 0)) << 0) |
2533 		       (swizzleElement(leftSwizzle, swizzleElement(rightSwizzle, 1)) << 2) |
2534 		       (swizzleElement(leftSwizzle, swizzleElement(rightSwizzle, 2)) << 4) |
2535 		       (swizzleElement(leftSwizzle, swizzleElement(rightSwizzle, 3)) << 6);
2536 	}
2537 
assignLvalue(TIntermTyped * dst,TIntermTyped * src)2538 	void OutputASM::assignLvalue(TIntermTyped *dst, TIntermTyped *src)
2539 	{
2540 		if((src->isVector() && (!dst->isVector() || (src->getNominalSize() != dst->getNominalSize()))) ||
2541 		   (src->isMatrix() && (!dst->isMatrix() || (src->getNominalSize() != dst->getNominalSize()) || (src->getSecondarySize() != dst->getSecondarySize()))))
2542 		{
2543 			return mContext.error(src->getLine(), "Result type should match the l-value type in compound assignment", src->isVector() ? "vector" : "matrix");
2544 		}
2545 
2546 		TIntermBinary *binary = dst->getAsBinaryNode();
2547 
2548 		if(binary && binary->getOp() == EOpIndexIndirect && binary->getLeft()->isVector() && dst->isScalar())
2549 		{
2550 			Instruction *insert = new Instruction(sw::Shader::OPCODE_INSERT);
2551 
2552 			lvalue(insert->dst, dst);
2553 
2554 			insert->src[0].type = insert->dst.type;
2555 			insert->src[0].index = insert->dst.index;
2556 			insert->src[0].rel = insert->dst.rel;
2557 			source(insert->src[1], src);
2558 			source(insert->src[2], binary->getRight());
2559 
2560 			shader->append(insert);
2561 		}
2562 		else
2563 		{
2564 			Instruction *mov1 = new Instruction(sw::Shader::OPCODE_MOV);
2565 
2566 			int swizzle = lvalue(mov1->dst, dst);
2567 
2568 			source(mov1->src[0], src);
2569 			mov1->src[0].swizzle = swizzleSwizzle(mov1->src[0].swizzle, swizzle);
2570 
2571 			shader->append(mov1);
2572 
2573 			for(int offset = 1; offset < dst->totalRegisterCount(); offset++)
2574 			{
2575 				Instruction *mov = new Instruction(sw::Shader::OPCODE_MOV);
2576 
2577 				mov->dst = mov1->dst;
2578 				mov->dst.index += offset;
2579 				mov->dst.mask = writeMask(dst, offset);
2580 
2581 				source(mov->src[0], src, offset);
2582 
2583 				shader->append(mov);
2584 			}
2585 		}
2586 	}
2587 
evaluateRvalue(TIntermTyped * node)2588 	void OutputASM::evaluateRvalue(TIntermTyped *node)
2589 	{
2590 		TIntermBinary *binary = node->getAsBinaryNode();
2591 
2592 		if(binary && binary->getOp() == EOpIndexIndirect && binary->getLeft()->isVector() && node->isScalar())
2593 		{
2594 			Instruction *insert = new Instruction(sw::Shader::OPCODE_EXTRACT);
2595 
2596 			destination(insert->dst, node);
2597 
2598 			Temporary address(this);
2599 			unsigned char mask;
2600 			TIntermTyped *root = nullptr;
2601 			unsigned int offset = 0;
2602 			int swizzle = lvalue(root, offset, insert->src[0].rel, mask, address, node);
2603 
2604 			source(insert->src[0], root, offset);
2605 			insert->src[0].swizzle = swizzleSwizzle(insert->src[0].swizzle, swizzle);
2606 
2607 			source(insert->src[1], binary->getRight());
2608 
2609 			shader->append(insert);
2610 		}
2611 		else
2612 		{
2613 			Instruction *mov1 = new Instruction(sw::Shader::OPCODE_MOV);
2614 
2615 			destination(mov1->dst, node, 0);
2616 
2617 			Temporary address(this);
2618 			unsigned char mask;
2619 			TIntermTyped *root = nullptr;
2620 			unsigned int offset = 0;
2621 			int swizzle = lvalue(root, offset, mov1->src[0].rel, mask, address, node);
2622 
2623 			source(mov1->src[0], root, offset);
2624 			mov1->src[0].swizzle = swizzleSwizzle(mov1->src[0].swizzle, swizzle);
2625 
2626 			shader->append(mov1);
2627 
2628 			for(int i = 1; i < node->totalRegisterCount(); i++)
2629 			{
2630 				Instruction *mov = emit(sw::Shader::OPCODE_MOV, node, i, root, offset + i);
2631 				mov->src[0].rel = mov1->src[0].rel;
2632 			}
2633 		}
2634 	}
2635 
lvalue(sw::Shader::DestinationParameter & dst,TIntermTyped * node)2636 	int OutputASM::lvalue(sw::Shader::DestinationParameter &dst, TIntermTyped *node)
2637 	{
2638 		Temporary address(this);
2639 		TIntermTyped *root = nullptr;
2640 		unsigned int offset = 0;
2641 		unsigned char mask = 0xF;
2642 		int swizzle = lvalue(root, offset, dst.rel, mask, address, node);
2643 
2644 		dst.type = registerType(root);
2645 		dst.index = registerIndex(root) + offset;
2646 		dst.mask = mask;
2647 
2648 		return swizzle;
2649 	}
2650 
lvalue(TIntermTyped * & root,unsigned int & offset,sw::Shader::Relative & rel,unsigned char & mask,Temporary & address,TIntermTyped * node)2651 	int OutputASM::lvalue(TIntermTyped *&root, unsigned int &offset, sw::Shader::Relative &rel, unsigned char &mask, Temporary &address, TIntermTyped *node)
2652 	{
2653 		TIntermTyped *result = node;
2654 		TIntermBinary *binary = node->getAsBinaryNode();
2655 		TIntermSymbol *symbol = node->getAsSymbolNode();
2656 
2657 		if(binary)
2658 		{
2659 			TIntermTyped *left = binary->getLeft();
2660 			TIntermTyped *right = binary->getRight();
2661 
2662 			int leftSwizzle = lvalue(root, offset, rel, mask, address, left);   // Resolve the l-value of the left side
2663 
2664 			switch(binary->getOp())
2665 			{
2666 			case EOpIndexDirect:
2667 				{
2668 					int rightIndex = right->getAsConstantUnion()->getIConst(0);
2669 
2670 					if(left->isRegister())
2671 					{
2672 						int leftMask = mask;
2673 
2674 						mask = 1;
2675 						while((leftMask & mask) == 0)
2676 						{
2677 							mask = mask << 1;
2678 						}
2679 
2680 						int element = swizzleElement(leftSwizzle, rightIndex);
2681 						mask = 1 << element;
2682 
2683 						return element;
2684 					}
2685 					else if(left->isArray() || left->isMatrix())
2686 					{
2687 						offset += rightIndex * result->totalRegisterCount();
2688 						return 0xE4;
2689 					}
2690 					else UNREACHABLE(0);
2691 				}
2692 				break;
2693 			case EOpIndexIndirect:
2694 				{
2695 					right->traverse(this);
2696 
2697 					if(left->isRegister())
2698 					{
2699 						// Requires INSERT instruction (handled by calling function)
2700 					}
2701 					else if(left->isArray() || left->isMatrix())
2702 					{
2703 						int scale = result->totalRegisterCount();
2704 
2705 						if(rel.type == sw::Shader::PARAMETER_VOID)   // Use the index register as the relative address directly
2706 						{
2707 							if(left->totalRegisterCount() > 1)
2708 							{
2709 								sw::Shader::SourceParameter relativeRegister;
2710 								source(relativeRegister, right);
2711 
2712 								int indexId = right->getAsSymbolNode() ? right->getAsSymbolNode()->getId() : 0;
2713 
2714 								rel.index = relativeRegister.index;
2715 								rel.type = relativeRegister.type;
2716 								rel.scale = scale;
2717 								rel.dynamic = (right->getQualifier() != EvqUniform) && (deterministicVariables.count(indexId) == 0);
2718 							}
2719 						}
2720 						else if(rel.index != registerIndex(&address))   // Move the previous index register to the address register
2721 						{
2722 							if(scale == 1)
2723 							{
2724 								Constant oldScale((int)rel.scale);
2725 								Instruction *mad = emit(sw::Shader::OPCODE_IMAD, &address, &address, &oldScale, right);
2726 								mad->src[0].index = rel.index;
2727 								mad->src[0].type = rel.type;
2728 							}
2729 							else
2730 							{
2731 								Constant oldScale((int)rel.scale);
2732 								Instruction *mul = emit(sw::Shader::OPCODE_IMUL, &address, &address, &oldScale);
2733 								mul->src[0].index = rel.index;
2734 								mul->src[0].type = rel.type;
2735 
2736 								Constant newScale(scale);
2737 								emit(sw::Shader::OPCODE_IMAD, &address, right, &newScale, &address);
2738 							}
2739 
2740 							rel.type = sw::Shader::PARAMETER_TEMP;
2741 							rel.index = registerIndex(&address);
2742 							rel.scale = 1;
2743 						}
2744 						else   // Just add the new index to the address register
2745 						{
2746 							if(scale == 1)
2747 							{
2748 								emit(sw::Shader::OPCODE_IADD, &address, &address, right);
2749 							}
2750 							else
2751 							{
2752 								Constant newScale(scale);
2753 								emit(sw::Shader::OPCODE_IMAD, &address, right, &newScale, &address);
2754 							}
2755 						}
2756 					}
2757 					else UNREACHABLE(0);
2758 				}
2759 				break;
2760 			case EOpIndexDirectStruct:
2761 			case EOpIndexDirectInterfaceBlock:
2762 				{
2763 					const TFieldList& fields = (binary->getOp() == EOpIndexDirectStruct) ?
2764 					                           left->getType().getStruct()->fields() :
2765 					                           left->getType().getInterfaceBlock()->fields();
2766 					int index = right->getAsConstantUnion()->getIConst(0);
2767 					int fieldOffset = 0;
2768 
2769 					for(int i = 0; i < index; i++)
2770 					{
2771 						fieldOffset += fields[i]->type()->totalRegisterCount();
2772 					}
2773 
2774 					offset += fieldOffset;
2775 					mask = writeMask(result);
2776 
2777 					return 0xE4;
2778 				}
2779 				break;
2780 			case EOpVectorSwizzle:
2781 				{
2782 					ASSERT(left->isRegister());
2783 
2784 					int leftMask = mask;
2785 
2786 					int swizzle = 0;
2787 					int rightMask = 0;
2788 
2789 					TIntermSequence &sequence = right->getAsAggregate()->getSequence();
2790 
2791 					for(unsigned int i = 0; i < sequence.size(); i++)
2792 					{
2793 						int index = sequence[i]->getAsConstantUnion()->getIConst(0);
2794 
2795 						int element = swizzleElement(leftSwizzle, index);
2796 						rightMask = rightMask | (1 << element);
2797 						swizzle = swizzle | swizzleElement(leftSwizzle, i) << (element * 2);
2798 					}
2799 
2800 					mask = leftMask & rightMask;
2801 
2802 					return swizzle;
2803 				}
2804 				break;
2805 			default:
2806 				UNREACHABLE(binary->getOp());   // Not an l-value operator
2807 				break;
2808 			}
2809 		}
2810 		else if(symbol)
2811 		{
2812 			root = symbol;
2813 			offset = 0;
2814 			mask = writeMask(symbol);
2815 
2816 			return 0xE4;
2817 		}
2818 		else
2819 		{
2820 			node->traverse(this);
2821 
2822 			root = node;
2823 			offset = 0;
2824 			mask = writeMask(node);
2825 
2826 			return 0xE4;
2827 		}
2828 
2829 		return 0xE4;
2830 	}
2831 
registerType(TIntermTyped * operand)2832 	sw::Shader::ParameterType OutputASM::registerType(TIntermTyped *operand)
2833 	{
2834 		if(isSamplerRegister(operand))
2835 		{
2836 			return sw::Shader::PARAMETER_SAMPLER;
2837 		}
2838 
2839 		const TQualifier qualifier = operand->getQualifier();
2840 		if((qualifier == EvqFragColor) || (qualifier == EvqFragData))
2841 		{
2842 			if(((qualifier == EvqFragData) && (outputQualifier == EvqFragColor)) ||
2843 			   ((qualifier == EvqFragColor) && (outputQualifier == EvqFragData)))
2844 			{
2845 				mContext.error(operand->getLine(), "static assignment to both gl_FragData and gl_FragColor", "");
2846 			}
2847 			outputQualifier = qualifier;
2848 		}
2849 
2850 		if(qualifier == EvqConstExpr && (!operand->getAsConstantUnion() || !operand->getAsConstantUnion()->getUnionArrayPointer()))
2851 		{
2852 			// Constant arrays are in the constant register file.
2853 			if(operand->isArray() && operand->getArraySize() > 1)
2854 			{
2855 				return sw::Shader::PARAMETER_CONST;
2856 			}
2857 			else
2858 			{
2859 				return sw::Shader::PARAMETER_TEMP;
2860 			}
2861 		}
2862 
2863 		switch(qualifier)
2864 		{
2865 		case EvqTemporary:           return sw::Shader::PARAMETER_TEMP;
2866 		case EvqGlobal:              return sw::Shader::PARAMETER_TEMP;
2867 		case EvqConstExpr:           return sw::Shader::PARAMETER_FLOAT4LITERAL;   // All converted to float
2868 		case EvqAttribute:           return sw::Shader::PARAMETER_INPUT;
2869 		case EvqVaryingIn:           return sw::Shader::PARAMETER_INPUT;
2870 		case EvqVaryingOut:          return sw::Shader::PARAMETER_OUTPUT;
2871 		case EvqVertexIn:            return sw::Shader::PARAMETER_INPUT;
2872 		case EvqFragmentOut:         return sw::Shader::PARAMETER_COLOROUT;
2873 		case EvqVertexOut:           return sw::Shader::PARAMETER_OUTPUT;
2874 		case EvqFragmentIn:          return sw::Shader::PARAMETER_INPUT;
2875 		case EvqInvariantVaryingIn:  return sw::Shader::PARAMETER_INPUT;    // FIXME: Guarantee invariance at the backend
2876 		case EvqInvariantVaryingOut: return sw::Shader::PARAMETER_OUTPUT;   // FIXME: Guarantee invariance at the backend
2877 		case EvqSmooth:              return sw::Shader::PARAMETER_OUTPUT;
2878 		case EvqFlat:                return sw::Shader::PARAMETER_OUTPUT;
2879 		case EvqCentroidOut:         return sw::Shader::PARAMETER_OUTPUT;
2880 		case EvqSmoothIn:            return sw::Shader::PARAMETER_INPUT;
2881 		case EvqFlatIn:              return sw::Shader::PARAMETER_INPUT;
2882 		case EvqCentroidIn:          return sw::Shader::PARAMETER_INPUT;
2883 		case EvqUniform:             return sw::Shader::PARAMETER_CONST;
2884 		case EvqIn:                  return sw::Shader::PARAMETER_TEMP;
2885 		case EvqOut:                 return sw::Shader::PARAMETER_TEMP;
2886 		case EvqInOut:               return sw::Shader::PARAMETER_TEMP;
2887 		case EvqConstReadOnly:       return sw::Shader::PARAMETER_TEMP;
2888 		case EvqPosition:            return sw::Shader::PARAMETER_OUTPUT;
2889 		case EvqPointSize:           return sw::Shader::PARAMETER_OUTPUT;
2890 		case EvqInstanceID:          return sw::Shader::PARAMETER_MISCTYPE;
2891 		case EvqVertexID:            return sw::Shader::PARAMETER_MISCTYPE;
2892 		case EvqFragCoord:           return sw::Shader::PARAMETER_MISCTYPE;
2893 		case EvqFrontFacing:         return sw::Shader::PARAMETER_MISCTYPE;
2894 		case EvqPointCoord:          return sw::Shader::PARAMETER_INPUT;
2895 		case EvqFragColor:           return sw::Shader::PARAMETER_COLOROUT;
2896 		case EvqFragData:            return sw::Shader::PARAMETER_COLOROUT;
2897 		case EvqFragDepth:           return sw::Shader::PARAMETER_DEPTHOUT;
2898 		default: UNREACHABLE(qualifier);
2899 		}
2900 
2901 		return sw::Shader::PARAMETER_VOID;
2902 	}
2903 
hasFlatQualifier(TIntermTyped * operand)2904 	bool OutputASM::hasFlatQualifier(TIntermTyped *operand)
2905 	{
2906 		const TQualifier qualifier = operand->getQualifier();
2907 		return qualifier == EvqFlat || qualifier == EvqFlatOut || qualifier == EvqFlatIn;
2908 	}
2909 
registerIndex(TIntermTyped * operand)2910 	unsigned int OutputASM::registerIndex(TIntermTyped *operand)
2911 	{
2912 		if(isSamplerRegister(operand))
2913 		{
2914 			return samplerRegister(operand);
2915 		}
2916 		else if(operand->getType().totalSamplerRegisterCount() > 0) // Struct containing a sampler
2917 		{
2918 			samplerRegister(operand); // Make sure the sampler is declared
2919 		}
2920 
2921 		const TQualifier qualifier = operand->getQualifier();
2922 		if(qualifier == EvqConstExpr && (!operand->getAsConstantUnion() || !operand->getAsConstantUnion()->getUnionArrayPointer()))
2923 		{
2924 			// Constant arrays are in the constant register file.
2925 			if(operand->isArray() && operand->getArraySize() > 1)
2926 			{
2927 				return uniformRegister(operand);
2928 			}
2929 			else
2930 			{
2931 				return temporaryRegister(operand);
2932 			}
2933 		}
2934 
2935 		switch(operand->getQualifier())
2936 		{
2937 		case EvqTemporary:           return temporaryRegister(operand);
2938 		case EvqGlobal:              return temporaryRegister(operand);
2939 		case EvqConstExpr:           return temporaryRegister(operand);   // Unevaluated constant expression
2940 		case EvqAttribute:           return attributeRegister(operand);
2941 		case EvqVaryingIn:           return varyingRegister(operand);
2942 		case EvqVaryingOut:          return varyingRegister(operand);
2943 		case EvqVertexIn:            return attributeRegister(operand);
2944 		case EvqFragmentOut:         return fragmentOutputRegister(operand);
2945 		case EvqVertexOut:           return varyingRegister(operand);
2946 		case EvqFragmentIn:          return varyingRegister(operand);
2947 		case EvqInvariantVaryingIn:  return varyingRegister(operand);
2948 		case EvqInvariantVaryingOut: return varyingRegister(operand);
2949 		case EvqSmooth:              return varyingRegister(operand);
2950 		case EvqFlat:                return varyingRegister(operand);
2951 		case EvqCentroidOut:         return varyingRegister(operand);
2952 		case EvqSmoothIn:            return varyingRegister(operand);
2953 		case EvqFlatIn:              return varyingRegister(operand);
2954 		case EvqCentroidIn:          return varyingRegister(operand);
2955 		case EvqUniform:             return uniformRegister(operand);
2956 		case EvqIn:                  return temporaryRegister(operand);
2957 		case EvqOut:                 return temporaryRegister(operand);
2958 		case EvqInOut:               return temporaryRegister(operand);
2959 		case EvqConstReadOnly:       return temporaryRegister(operand);
2960 		case EvqPosition:            return varyingRegister(operand);
2961 		case EvqPointSize:           return varyingRegister(operand);
2962 		case EvqInstanceID:          vertexShader->declareInstanceId(); return sw::Shader::InstanceIDIndex;
2963 		case EvqVertexID:            vertexShader->declareVertexId(); return sw::Shader::VertexIDIndex;
2964 		case EvqFragCoord:           pixelShader->declareVPos();  return sw::Shader::VPosIndex;
2965 		case EvqFrontFacing:         pixelShader->declareVFace(); return sw::Shader::VFaceIndex;
2966 		case EvqPointCoord:          return varyingRegister(operand);
2967 		case EvqFragColor:           return 0;
2968 		case EvqFragData:            return fragmentOutputRegister(operand);
2969 		case EvqFragDepth:           return 0;
2970 		default: UNREACHABLE(operand->getQualifier());
2971 		}
2972 
2973 		return 0;
2974 	}
2975 
writeMask(TIntermTyped * destination,int index)2976 	int OutputASM::writeMask(TIntermTyped *destination, int index)
2977 	{
2978 		if(destination->getQualifier() == EvqPointSize)
2979 		{
2980 			return 0x2;   // Point size stored in the y component
2981 		}
2982 
2983 		return 0xF >> (4 - registerSize(destination->getType(), index));
2984 	}
2985 
readSwizzle(TIntermTyped * argument,int size)2986 	int OutputASM::readSwizzle(TIntermTyped *argument, int size)
2987 	{
2988 		if(argument->getQualifier() == EvqPointSize)
2989 		{
2990 			return 0x55;   // Point size stored in the y component
2991 		}
2992 
2993 		static const unsigned char swizzleSize[5] = {0x00, 0x00, 0x54, 0xA4, 0xE4};   // (void), xxxx, xyyy, xyzz, xyzw
2994 
2995 		return swizzleSize[size];
2996 	}
2997 
2998 	// Conservatively checks whether an expression is fast to compute and has no side effects
trivial(TIntermTyped * expression,int budget)2999 	bool OutputASM::trivial(TIntermTyped *expression, int budget)
3000 	{
3001 		if(!expression->isRegister())
3002 		{
3003 			return false;
3004 		}
3005 
3006 		return cost(expression, budget) >= 0;
3007 	}
3008 
3009 	// Returns the remaining computing budget (if < 0 the expression is too expensive or has side effects)
cost(TIntermNode * expression,int budget)3010 	int OutputASM::cost(TIntermNode *expression, int budget)
3011 	{
3012 		if(budget < 0)
3013 		{
3014 			return budget;
3015 		}
3016 
3017 		if(expression->getAsSymbolNode())
3018 		{
3019 			return budget;
3020 		}
3021 		else if(expression->getAsConstantUnion())
3022 		{
3023 			return budget;
3024 		}
3025 		else if(expression->getAsBinaryNode())
3026 		{
3027 			TIntermBinary *binary = expression->getAsBinaryNode();
3028 
3029 			switch(binary->getOp())
3030 			{
3031 			case EOpVectorSwizzle:
3032 			case EOpIndexDirect:
3033 			case EOpIndexDirectStruct:
3034 			case EOpIndexDirectInterfaceBlock:
3035 				return cost(binary->getLeft(), budget - 0);
3036 			case EOpAdd:
3037 			case EOpSub:
3038 			case EOpMul:
3039 				return cost(binary->getLeft(), cost(binary->getRight(), budget - 1));
3040 			default:
3041 				return -1;
3042 			}
3043 		}
3044 		else if(expression->getAsUnaryNode())
3045 		{
3046 			TIntermUnary *unary = expression->getAsUnaryNode();
3047 
3048 			switch(unary->getOp())
3049 			{
3050 			case EOpAbs:
3051 			case EOpNegative:
3052 				return cost(unary->getOperand(), budget - 1);
3053 			default:
3054 				return -1;
3055 			}
3056 		}
3057 		else if(expression->getAsSelectionNode())
3058 		{
3059 			TIntermSelection *selection = expression->getAsSelectionNode();
3060 
3061 			if(selection->usesTernaryOperator())
3062 			{
3063 				TIntermTyped *condition = selection->getCondition();
3064 				TIntermNode *trueBlock = selection->getTrueBlock();
3065 				TIntermNode *falseBlock = selection->getFalseBlock();
3066 				TIntermConstantUnion *constantCondition = condition->getAsConstantUnion();
3067 
3068 				if(constantCondition)
3069 				{
3070 					bool trueCondition = constantCondition->getUnionArrayPointer()->getBConst();
3071 
3072 					if(trueCondition)
3073 					{
3074 						return cost(trueBlock, budget - 0);
3075 					}
3076 					else
3077 					{
3078 						return cost(falseBlock, budget - 0);
3079 					}
3080 				}
3081 				else
3082 				{
3083 					return cost(trueBlock, cost(falseBlock, budget - 2));
3084 				}
3085 			}
3086 		}
3087 
3088 		return -1;
3089 	}
3090 
findFunction(const TString & name)3091 	const Function *OutputASM::findFunction(const TString &name)
3092 	{
3093 		for(unsigned int f = 0; f < functionArray.size(); f++)
3094 		{
3095 			if(functionArray[f].name == name)
3096 			{
3097 				return &functionArray[f];
3098 			}
3099 		}
3100 
3101 		return 0;
3102 	}
3103 
temporaryRegister(TIntermTyped * temporary)3104 	int OutputASM::temporaryRegister(TIntermTyped *temporary)
3105 	{
3106 		int index = allocate(temporaries, temporary);
3107 		if(index >= sw::NUM_TEMPORARY_REGISTERS)
3108 		{
3109 			mContext.error(temporary->getLine(),
3110 				"Too many temporary registers required to compile shader",
3111 				pixelShader ? "pixel shader" : "vertex shader");
3112 		}
3113 		return index;
3114 	}
3115 
setPixelShaderInputs(const TType & type,int var,bool flat)3116 	void OutputASM::setPixelShaderInputs(const TType& type, int var, bool flat)
3117 	{
3118 		if(type.isStruct())
3119 		{
3120 			const TFieldList &fields = type.getStruct()->fields();
3121 			int fieldVar = var;
3122 			for(const auto &field : fields)
3123 			{
3124 				const TType& fieldType = *(field->type());
3125 				setPixelShaderInputs(fieldType, fieldVar, flat);
3126 				fieldVar += fieldType.totalRegisterCount();
3127 			}
3128 		}
3129 		else
3130 		{
3131 			for(int i = 0; i < type.totalRegisterCount(); i++)
3132 			{
3133 				pixelShader->setInput(var + i, type.registerSize(), sw::Shader::Semantic(sw::Shader::USAGE_COLOR, var + i, flat));
3134 			}
3135 		}
3136 	}
3137 
varyingRegister(TIntermTyped * varying)3138 	int OutputASM::varyingRegister(TIntermTyped *varying)
3139 	{
3140 		int var = lookup(varyings, varying);
3141 
3142 		if(var == -1)
3143 		{
3144 			var = allocate(varyings, varying);
3145 			if (var == -1)
3146 			{
3147 				return 0;
3148 			}
3149 			int registerCount = varying->totalRegisterCount();
3150 
3151 			if(pixelShader)
3152 			{
3153 				if((var + registerCount) > sw::MAX_FRAGMENT_INPUTS)
3154 				{
3155 					mContext.error(varying->getLine(), "Varyings packing failed: Too many varyings", "fragment shader");
3156 					return 0;
3157 				}
3158 
3159 				if(varying->getQualifier() == EvqPointCoord)
3160 				{
3161 					ASSERT(varying->isRegister());
3162 					pixelShader->setInput(var, varying->registerSize(), sw::Shader::Semantic(sw::Shader::USAGE_TEXCOORD, var));
3163 				}
3164 				else
3165 				{
3166 					setPixelShaderInputs(varying->getType(), var, hasFlatQualifier(varying));
3167 				}
3168 			}
3169 			else if(vertexShader)
3170 			{
3171 				if((var + registerCount) > sw::MAX_VERTEX_OUTPUTS)
3172 				{
3173 					mContext.error(varying->getLine(), "Varyings packing failed: Too many varyings", "vertex shader");
3174 					return 0;
3175 				}
3176 
3177 				if(varying->getQualifier() == EvqPosition)
3178 				{
3179 					ASSERT(varying->isRegister());
3180 					vertexShader->setPositionRegister(var);
3181 				}
3182 				else if(varying->getQualifier() == EvqPointSize)
3183 				{
3184 					ASSERT(varying->isRegister());
3185 					vertexShader->setPointSizeRegister(var);
3186 				}
3187 				else
3188 				{
3189 					// Semantic indexes for user varyings will be assigned during program link to match the pixel shader
3190 				}
3191 			}
3192 			else UNREACHABLE(0);
3193 
3194 			declareVarying(varying, var);
3195 		}
3196 
3197 		return var;
3198 	}
3199 
declareVarying(TIntermTyped * varying,int reg)3200 	void OutputASM::declareVarying(TIntermTyped *varying, int reg)
3201 	{
3202 		if(varying->getQualifier() != EvqPointCoord)   // gl_PointCoord does not need linking
3203 		{
3204 			TIntermSymbol *symbol = varying->getAsSymbolNode();
3205 			declareVarying(varying->getType(), symbol->getSymbol(), reg);
3206 		}
3207 	}
3208 
declareVarying(const TType & type,const TString & varyingName,int registerIndex)3209 	void OutputASM::declareVarying(const TType &type, const TString &varyingName, int registerIndex)
3210 	{
3211 		const char *name = varyingName.c_str();
3212 		VaryingList &activeVaryings = shaderObject->varyings;
3213 
3214 		TStructure* structure = type.getStruct();
3215 		if(structure)
3216 		{
3217 			int fieldRegisterIndex = registerIndex;
3218 
3219 			const TFieldList &fields = type.getStruct()->fields();
3220 			for(const auto &field : fields)
3221 			{
3222 				const TType& fieldType = *(field->type());
3223 				declareVarying(fieldType, varyingName + "." + field->name(), fieldRegisterIndex);
3224 				if(fieldRegisterIndex >= 0)
3225 				{
3226 					fieldRegisterIndex += fieldType.totalRegisterCount();
3227 				}
3228 			}
3229 		}
3230 		else
3231 		{
3232 			// Check if this varying has been declared before without having a register assigned
3233 			for(VaryingList::iterator v = activeVaryings.begin(); v != activeVaryings.end(); v++)
3234 			{
3235 				if(v->name == name)
3236 				{
3237 					if(registerIndex >= 0)
3238 					{
3239 						ASSERT(v->registerIndex < 0 || v->registerIndex == registerIndex);
3240 						v->registerIndex = registerIndex;
3241 					}
3242 
3243 					return;
3244 				}
3245 			}
3246 
3247 			activeVaryings.push_back(glsl::Varying(type, name, registerIndex, 0));
3248 		}
3249 	}
3250 
declareFragmentOutput(TIntermTyped * fragmentOutput)3251 	void OutputASM::declareFragmentOutput(TIntermTyped *fragmentOutput)
3252 	{
3253 		int requestedLocation = fragmentOutput->getType().getLayoutQualifier().location;
3254 		int registerCount = fragmentOutput->totalRegisterCount();
3255 		if(requestedLocation < 0)
3256 		{
3257 			ASSERT(requestedLocation == -1); // All other negative values would have been prevented in TParseContext::parseLayoutQualifier
3258 			return; // No requested location
3259 		}
3260 		else if((requestedLocation + registerCount) > sw::RENDERTARGETS)
3261 		{
3262 			mContext.error(fragmentOutput->getLine(), "Fragment output location larger or equal to MAX_DRAW_BUFFERS", "fragment shader");
3263 		}
3264 		else
3265 		{
3266 			int currentIndex = lookup(fragmentOutputs, fragmentOutput);
3267 			if(requestedLocation != currentIndex)
3268 			{
3269 				if(currentIndex != -1)
3270 				{
3271 					mContext.error(fragmentOutput->getLine(), "Multiple locations for fragment output", "fragment shader");
3272 				}
3273 				else
3274 				{
3275 					if(fragmentOutputs.size() <= (size_t)requestedLocation)
3276 					{
3277 						while(fragmentOutputs.size() < (size_t)requestedLocation)
3278 						{
3279 							fragmentOutputs.push_back(nullptr);
3280 						}
3281 						for(int i = 0; i < registerCount; i++)
3282 						{
3283 							fragmentOutputs.push_back(fragmentOutput);
3284 						}
3285 					}
3286 					else
3287 					{
3288 						for(int i = 0; i < registerCount; i++)
3289 						{
3290 							if(!fragmentOutputs[requestedLocation + i])
3291 							{
3292 								fragmentOutputs[requestedLocation + i] = fragmentOutput;
3293 							}
3294 							else
3295 							{
3296 								mContext.error(fragmentOutput->getLine(), "Fragment output location aliasing", "fragment shader");
3297 								return;
3298 							}
3299 						}
3300 					}
3301 				}
3302 			}
3303 		}
3304 	}
3305 
uniformRegister(TIntermTyped * uniform)3306 	int OutputASM::uniformRegister(TIntermTyped *uniform)
3307 	{
3308 		const TType &type = uniform->getType();
3309 		ASSERT(!IsSampler(type.getBasicType()));
3310 		TInterfaceBlock *block = type.getAsInterfaceBlock();
3311 		TIntermSymbol *symbol = uniform->getAsSymbolNode();
3312 		ASSERT(symbol || block);
3313 
3314 		if(symbol || block)
3315 		{
3316 			TInterfaceBlock* parentBlock = type.getInterfaceBlock();
3317 			bool isBlockMember = (!block && parentBlock);
3318 			int index = isBlockMember ? lookup(uniforms, parentBlock) : lookup(uniforms, uniform);
3319 
3320 			if(index == -1 || isBlockMember)
3321 			{
3322 				if(index == -1)
3323 				{
3324 					index = allocate(uniforms, uniform);
3325 					if (index == -1)
3326 					{
3327 						return 0;
3328 					}
3329 				}
3330 
3331 				// Verify if the current uniform is a member of an already declared block
3332 				const TString &name = symbol ? symbol->getSymbol() : block->name();
3333 				int blockMemberIndex = blockMemberLookup(type, name, index);
3334 				if(blockMemberIndex == -1)
3335 				{
3336 					declareUniform(type, name, index, false);
3337 				}
3338 				else
3339 				{
3340 					index = blockMemberIndex;
3341 				}
3342 			}
3343 
3344 			return index;
3345 		}
3346 
3347 		return 0;
3348 	}
3349 
attributeRegister(TIntermTyped * attribute)3350 	int OutputASM::attributeRegister(TIntermTyped *attribute)
3351 	{
3352 		ASSERT(!attribute->isArray());
3353 
3354 		int index = lookup(attributes, attribute);
3355 
3356 		if(index == -1)
3357 		{
3358 			TIntermSymbol *symbol = attribute->getAsSymbolNode();
3359 			ASSERT(symbol);
3360 
3361 			if(symbol)
3362 			{
3363 				index = allocate(attributes, attribute);
3364 				if (index == -1)
3365 				{
3366 					return -1;
3367 				}
3368 				const TType &type = attribute->getType();
3369 				int registerCount = attribute->totalRegisterCount();
3370 				sw::VertexShader::AttribType attribType = sw::VertexShader::ATTRIBTYPE_FLOAT;
3371 				switch(type.getBasicType())
3372 				{
3373 				case EbtInt:
3374 					attribType = sw::VertexShader::ATTRIBTYPE_INT;
3375 					break;
3376 				case EbtUInt:
3377 					attribType = sw::VertexShader::ATTRIBTYPE_UINT;
3378 					break;
3379 				case EbtFloat:
3380 				default:
3381 					break;
3382 				}
3383 
3384 				if(vertexShader && (index + registerCount) <= sw::MAX_VERTEX_INPUTS)
3385 				{
3386 					for(int i = 0; i < registerCount; i++)
3387 					{
3388 						vertexShader->setInput(index + i, sw::Shader::Semantic(sw::Shader::USAGE_TEXCOORD, index + i, false), attribType);
3389 					}
3390 				}
3391 
3392 				ActiveAttributes &activeAttributes = shaderObject->activeAttributes;
3393 
3394 				const char *name = symbol->getSymbol().c_str();
3395 				activeAttributes.push_back(Attribute(glVariableType(type), name, type.getArraySize(), type.getLayoutQualifier().location, index));
3396 			}
3397 		}
3398 
3399 		return index;
3400 	}
3401 
fragmentOutputRegister(TIntermTyped * fragmentOutput)3402 	int OutputASM::fragmentOutputRegister(TIntermTyped *fragmentOutput)
3403 	{
3404 		return allocate(fragmentOutputs, fragmentOutput);
3405 	}
3406 
samplerRegister(TIntermTyped * sampler)3407 	int OutputASM::samplerRegister(TIntermTyped *sampler)
3408 	{
3409 		const TType &type = sampler->getType();
3410 		ASSERT(IsSampler(type.getBasicType()) || type.isStruct());   // Structures can contain samplers
3411 
3412 		TIntermSymbol *symbol = sampler->getAsSymbolNode();
3413 		TIntermBinary *binary = sampler->getAsBinaryNode();
3414 
3415 		if(symbol)
3416 		{
3417 			switch(type.getQualifier())
3418 			{
3419 			case EvqUniform:
3420 				return samplerRegister(symbol);
3421 			case EvqIn:
3422 			case EvqConstReadOnly:
3423 				// Function arguments are not (uniform) sampler registers
3424 				return -1;
3425 			default:
3426 				UNREACHABLE(type.getQualifier());
3427 			}
3428 		}
3429 		else if(binary)
3430 		{
3431 			TIntermTyped *left = binary->getLeft();
3432 			TIntermTyped *right = binary->getRight();
3433 			const TType &leftType = left->getType();
3434 			int index = right->getAsConstantUnion() ? right->getAsConstantUnion()->getIConst(0) : 0;
3435 			int offset = 0;
3436 
3437 			switch(binary->getOp())
3438 			{
3439 			case EOpIndexDirect:
3440 				ASSERT(left->isArray());
3441 				offset = index * leftType.samplerRegisterCount();
3442 				break;
3443 			case EOpIndexDirectStruct:
3444 				ASSERT(leftType.isStruct());
3445 				{
3446 					const TFieldList &fields = leftType.getStruct()->fields();
3447 
3448 					for(int i = 0; i < index; i++)
3449 					{
3450 						offset += fields[i]->type()->totalSamplerRegisterCount();
3451 					}
3452 				}
3453 				break;
3454 			case EOpIndexIndirect:               // Indirect indexing produces a temporary, not a sampler register
3455 				return -1;
3456 			case EOpIndexDirectInterfaceBlock:   // Interface blocks can't contain samplers
3457 			default:
3458 				UNREACHABLE(binary->getOp());
3459 				return -1;
3460 			}
3461 
3462 			int base = samplerRegister(left);
3463 
3464 			if(base < 0)
3465 			{
3466 				return -1;
3467 			}
3468 
3469 			return base + offset;
3470 		}
3471 
3472 		UNREACHABLE(0);
3473 		return -1;   // Not a (uniform) sampler register
3474 	}
3475 
samplerRegister(TIntermSymbol * sampler)3476 	int OutputASM::samplerRegister(TIntermSymbol *sampler)
3477 	{
3478 		const TType &type = sampler->getType();
3479 		ASSERT(IsSampler(type.getBasicType()) || type.isStruct());   // Structures can contain samplers
3480 
3481 		int index = lookup(samplers, sampler);
3482 
3483 		if(index == -1)
3484 		{
3485 			index = allocate(samplers, sampler, true);
3486 			if (index == -1)
3487 			{
3488 				return 0;
3489 			}
3490 
3491 			if(sampler->getQualifier() == EvqUniform)
3492 			{
3493 				const char *name = sampler->getSymbol().c_str();
3494 				declareUniform(type, name, index, true);
3495 			}
3496 		}
3497 
3498 		return index;
3499 	}
3500 
isSamplerRegister(TIntermTyped * operand)3501 	bool OutputASM::isSamplerRegister(TIntermTyped *operand)
3502 	{
3503 		return operand && IsSampler(operand->getBasicType()) && samplerRegister(operand) >= 0;
3504 	}
3505 
arrayExceedsLimits(TIntermTyped * operand)3506 	bool OutputASM::arrayExceedsLimits(TIntermTyped *operand)
3507 	{
3508 		const TVariable *maxUniformVectors = nullptr;
3509 		TString builtinName = "";
3510 		if (vertexShader)
3511 		{
3512 			builtinName = "gl_MaxVertexUniformVectors";
3513 		}
3514 		else if (pixelShader)
3515 		{
3516 			builtinName = "gl_MaxFragmentUniformVectors";
3517 		}
3518 		maxUniformVectors = static_cast<const TVariable *>(mContext.symbolTable.findBuiltIn(builtinName.c_str(), mContext.getShaderVersion()));
3519 		if (operand->getArraySize() > maxUniformVectors->getConstPointer()->getIConst())
3520 		{
3521 			std::stringstream extraInfoStream;
3522 			extraInfoStream << "Array size (" << operand->getArraySize() << ") "
3523 			                << "exceeds limit of " << builtinName
3524 			                << " (" << maxUniformVectors->getConstPointer()->getIConst() << ")";
3525 			std::string errorStr = extraInfoStream.str();
3526 			mContext.error(operand->getLine(), errorStr.c_str(),
3527 			               operand->getBasicString());
3528 			return true;
3529 		}
3530 		return false;
3531 	}
3532 
lookup(VariableArray & list,TIntermTyped * variable)3533 	int OutputASM::lookup(VariableArray &list, TIntermTyped *variable)
3534 	{
3535 		for(unsigned int i = 0; i < list.size(); i++)
3536 		{
3537 			if(list[i] == variable)
3538 			{
3539 				return i;   // Pointer match
3540 			}
3541 		}
3542 
3543 		TIntermSymbol *varSymbol = variable->getAsSymbolNode();
3544 		TInterfaceBlock *varBlock = variable->getType().getAsInterfaceBlock();
3545 
3546 		if(varBlock)
3547 		{
3548 			for(unsigned int i = 0; i < list.size(); i++)
3549 			{
3550 				if(list[i])
3551 				{
3552 					TInterfaceBlock *listBlock = list[i]->getType().getAsInterfaceBlock();
3553 
3554 					if(listBlock)
3555 					{
3556 						if(listBlock->name() == varBlock->name())
3557 						{
3558 							ASSERT(listBlock->arraySize() == varBlock->arraySize());
3559 							ASSERT(listBlock->fields() == varBlock->fields());
3560 							ASSERT(listBlock->blockStorage() == varBlock->blockStorage());
3561 							ASSERT(listBlock->matrixPacking() == varBlock->matrixPacking());
3562 
3563 							return i;
3564 						}
3565 					}
3566 				}
3567 			}
3568 		}
3569 		else if(varSymbol)
3570 		{
3571 			for(unsigned int i = 0; i < list.size(); i++)
3572 			{
3573 				if(list[i])
3574 				{
3575 					TIntermSymbol *listSymbol = list[i]->getAsSymbolNode();
3576 
3577 					if(listSymbol)
3578 					{
3579 						if(listSymbol->getId() == varSymbol->getId())
3580 						{
3581 							ASSERT(listSymbol->getSymbol() == varSymbol->getSymbol());
3582 							ASSERT(listSymbol->getType() == varSymbol->getType());
3583 							ASSERT(listSymbol->getQualifier() == varSymbol->getQualifier());
3584 
3585 							return i;
3586 						}
3587 					}
3588 				}
3589 			}
3590 		}
3591 
3592 		return -1;
3593 	}
3594 
lookup(VariableArray & list,TInterfaceBlock * block)3595 	int OutputASM::lookup(VariableArray &list, TInterfaceBlock *block)
3596 	{
3597 		for(unsigned int i = 0; i < list.size(); i++)
3598 		{
3599 			if(list[i] && (list[i]->getType().getInterfaceBlock() == block))
3600 			{
3601 				return i;   // Pointer match
3602 			}
3603 		}
3604 		return -1;
3605 	}
3606 
allocate(VariableArray & list,TIntermTyped * variable,bool samplersOnly)3607 	int OutputASM::allocate(VariableArray &list, TIntermTyped *variable, bool samplersOnly)
3608 	{
3609 		int index = lookup(list, variable);
3610 
3611 		if(index == -1)
3612 		{
3613 			if (arrayExceedsLimits(variable))
3614 			{
3615 				return -1;
3616 			}
3617 			unsigned int registerCount = variable->blockRegisterCount(samplersOnly);
3618 
3619 			for(unsigned int i = 0; i < list.size(); i++)
3620 			{
3621 				if(list[i] == 0)
3622 				{
3623 					unsigned int j = 1;
3624 					for( ; j < registerCount && (i + j) < list.size(); j++)
3625 					{
3626 						if(list[i + j] != 0)
3627 						{
3628 							break;
3629 						}
3630 					}
3631 
3632 					if(j == registerCount)   // Found free slots
3633 					{
3634 						for(unsigned int j = 0; j < registerCount; j++)
3635 						{
3636 							list[i + j] = variable;
3637 						}
3638 
3639 						return i;
3640 					}
3641 				}
3642 			}
3643 
3644 			index = list.size();
3645 
3646 			for(unsigned int i = 0; i < registerCount; i++)
3647 			{
3648 				list.push_back(variable);
3649 			}
3650 		}
3651 
3652 		return index;
3653 	}
3654 
free(VariableArray & list,TIntermTyped * variable)3655 	void OutputASM::free(VariableArray &list, TIntermTyped *variable)
3656 	{
3657 		int index = lookup(list, variable);
3658 
3659 		if(index >= 0)
3660 		{
3661 			list[index] = 0;
3662 		}
3663 	}
3664 
blockMemberLookup(const TType & type,const TString & name,int registerIndex)3665 	int OutputASM::blockMemberLookup(const TType &type, const TString &name, int registerIndex)
3666 	{
3667 		const TInterfaceBlock *block = type.getInterfaceBlock();
3668 
3669 		if(block)
3670 		{
3671 			ActiveUniformBlocks &activeUniformBlocks = shaderObject->activeUniformBlocks;
3672 			const TFieldList& fields = block->fields();
3673 			const TString &blockName = block->name();
3674 			int fieldRegisterIndex = registerIndex;
3675 
3676 			if(!type.isInterfaceBlock())
3677 			{
3678 				// This is a uniform that's part of a block, let's see if the block is already defined
3679 				for(size_t i = 0; i < activeUniformBlocks.size(); ++i)
3680 				{
3681 					if(activeUniformBlocks[i].name == blockName.c_str())
3682 					{
3683 						// The block is already defined, find the register for the current uniform and return it
3684 						for(size_t j = 0; j < fields.size(); j++)
3685 						{
3686 							const TString &fieldName = fields[j]->name();
3687 							if(fieldName == name)
3688 							{
3689 								return fieldRegisterIndex;
3690 							}
3691 
3692 							fieldRegisterIndex += fields[j]->type()->totalRegisterCount();
3693 						}
3694 
3695 						ASSERT(false);
3696 						return fieldRegisterIndex;
3697 					}
3698 				}
3699 			}
3700 		}
3701 
3702 		return -1;
3703 	}
3704 
declareUniform(const TType & type,const TString & name,int registerIndex,bool samplersOnly,int blockId,BlockLayoutEncoder * encoder)3705 	void OutputASM::declareUniform(const TType &type, const TString &name, int registerIndex, bool samplersOnly, int blockId, BlockLayoutEncoder* encoder)
3706 	{
3707 		const TStructure *structure = type.getStruct();
3708 		const TInterfaceBlock *block = (type.isInterfaceBlock() || (blockId == -1)) ? type.getInterfaceBlock() : nullptr;
3709 
3710 		if(!structure && !block)
3711 		{
3712 			ActiveUniforms &activeUniforms = shaderObject->activeUniforms;
3713 			const BlockMemberInfo blockInfo = encoder ? encoder->encodeType(type) : BlockMemberInfo::getDefaultBlockInfo();
3714 			if(blockId >= 0)
3715 			{
3716 				blockDefinitions[blockId].insert(BlockDefinitionIndexMap::value_type(registerIndex, TypedMemberInfo(blockInfo, type)));
3717 				shaderObject->activeUniformBlocks[blockId].fields.push_back(activeUniforms.size());
3718 			}
3719 			int fieldRegisterIndex = encoder ? shaderObject->activeUniformBlocks[blockId].registerIndex + BlockLayoutEncoder::getBlockRegister(blockInfo) : registerIndex;
3720 			bool isSampler = IsSampler(type.getBasicType());
3721 			if(isSampler && samplersOnly)
3722 			{
3723 				for(int i = 0; i < type.totalRegisterCount(); i++)
3724 				{
3725 					shader->declareSampler(fieldRegisterIndex + i);
3726 				}
3727 			}
3728 			if(isSampler == samplersOnly)
3729 			{
3730 				activeUniforms.push_back(Uniform(type, name.c_str(), fieldRegisterIndex, blockId, blockInfo));
3731 			}
3732 		}
3733 		else if(block)
3734 		{
3735 			ActiveUniformBlocks &activeUniformBlocks = shaderObject->activeUniformBlocks;
3736 			const TFieldList& fields = block->fields();
3737 			const TString &blockName = block->name();
3738 			int fieldRegisterIndex = registerIndex;
3739 			bool isUniformBlockMember = !type.isInterfaceBlock() && (blockId == -1);
3740 
3741 			blockId = activeUniformBlocks.size();
3742 			bool isRowMajor = block->matrixPacking() == EmpRowMajor;
3743 			activeUniformBlocks.push_back(UniformBlock(blockName.c_str(), 0, block->arraySize(),
3744 			                                           block->blockStorage(), isRowMajor, registerIndex, blockId));
3745 			blockDefinitions.push_back(BlockDefinitionIndexMap());
3746 
3747 			Std140BlockEncoder currentBlockEncoder;
3748 			currentBlockEncoder.enterAggregateType();
3749 			for(const auto &field : fields)
3750 			{
3751 				const TType &fieldType = *(field->type());
3752 				const TString &fieldName = field->name();
3753 				if(isUniformBlockMember && (fieldName == name))
3754 				{
3755 					registerIndex = fieldRegisterIndex;
3756 				}
3757 
3758 				const TString uniformName = block->hasInstanceName() ? blockName + "." + fieldName : fieldName;
3759 
3760 				declareUniform(fieldType, uniformName, fieldRegisterIndex, samplersOnly, blockId, &currentBlockEncoder);
3761 				fieldRegisterIndex += fieldType.totalRegisterCount();
3762 			}
3763 			currentBlockEncoder.exitAggregateType();
3764 			activeUniformBlocks[blockId].dataSize = currentBlockEncoder.getBlockSize();
3765 		}
3766 		else
3767 		{
3768 			// Store struct for program link time validation
3769 			shaderObject->activeUniformStructs.push_back(Uniform(type, name.c_str(), registerIndex, -1, BlockMemberInfo::getDefaultBlockInfo()));
3770 
3771 			int fieldRegisterIndex = registerIndex;
3772 
3773 			const TFieldList& fields = structure->fields();
3774 			if(type.isArray() && (structure || type.isInterfaceBlock()))
3775 			{
3776 				for(int i = 0; i < type.getArraySize(); i++)
3777 				{
3778 					if(encoder)
3779 					{
3780 						encoder->enterAggregateType();
3781 					}
3782 					for(const auto &field : fields)
3783 					{
3784 						const TType &fieldType = *(field->type());
3785 						const TString &fieldName = field->name();
3786 						const TString uniformName = name + "[" + str(i) + "]." + fieldName;
3787 
3788 						declareUniform(fieldType, uniformName, fieldRegisterIndex, samplersOnly, blockId, encoder);
3789 						fieldRegisterIndex += samplersOnly ? fieldType.totalSamplerRegisterCount() : fieldType.totalRegisterCount();
3790 					}
3791 					if(encoder)
3792 					{
3793 						encoder->exitAggregateType();
3794 					}
3795 				}
3796 			}
3797 			else
3798 			{
3799 				if(encoder)
3800 				{
3801 					encoder->enterAggregateType();
3802 				}
3803 				for(const auto &field : fields)
3804 				{
3805 					const TType &fieldType = *(field->type());
3806 					const TString &fieldName = field->name();
3807 					const TString uniformName = name + "." + fieldName;
3808 
3809 					declareUniform(fieldType, uniformName, fieldRegisterIndex, samplersOnly, blockId, encoder);
3810 					fieldRegisterIndex += samplersOnly ? fieldType.totalSamplerRegisterCount() : fieldType.totalRegisterCount();
3811 				}
3812 				if(encoder)
3813 				{
3814 					encoder->exitAggregateType();
3815 				}
3816 			}
3817 		}
3818 	}
3819 
dim(TIntermNode * v)3820 	int OutputASM::dim(TIntermNode *v)
3821 	{
3822 		TIntermTyped *vector = v->getAsTyped();
3823 		ASSERT(vector && vector->isRegister());
3824 		return vector->getNominalSize();
3825 	}
3826 
dim2(TIntermNode * m)3827 	int OutputASM::dim2(TIntermNode *m)
3828 	{
3829 		TIntermTyped *matrix = m->getAsTyped();
3830 		ASSERT(matrix && matrix->isMatrix() && !matrix->isArray());
3831 		return matrix->getSecondarySize();
3832 	}
3833 
3834 	// Sets iterations to ~0u if no loop count could be statically determined.
LoopInfo(TIntermLoop * node)3835 	OutputASM::LoopInfo::LoopInfo(TIntermLoop *node)
3836 	{
3837 		// Parse loops of the form:
3838 		// for(int index = initial; index [comparator] limit; index [op] increment)
3839 
3840 		// Parse index name and intial value
3841 		if(node->getInit())
3842 		{
3843 			TIntermAggregate *init = node->getInit()->getAsAggregate();
3844 
3845 			if(init)
3846 			{
3847 				TIntermSequence &sequence = init->getSequence();
3848 				TIntermTyped *variable = sequence[0]->getAsTyped();
3849 
3850 				if(variable && variable->getQualifier() == EvqTemporary && variable->getBasicType() == EbtInt)
3851 				{
3852 					TIntermBinary *assign = variable->getAsBinaryNode();
3853 
3854 					if(assign && assign->getOp() == EOpInitialize)
3855 					{
3856 						TIntermSymbol *symbol = assign->getLeft()->getAsSymbolNode();
3857 						TIntermConstantUnion *constant = assign->getRight()->getAsConstantUnion();
3858 
3859 						if(symbol && constant)
3860 						{
3861 							if(constant->getBasicType() == EbtInt && constant->getNominalSize() == 1)
3862 							{
3863 								index = symbol;
3864 								initial = constant->getUnionArrayPointer()[0].getIConst();
3865 							}
3866 						}
3867 					}
3868 				}
3869 			}
3870 		}
3871 
3872 		// Parse comparator and limit value
3873 		if(index && node->getCondition())
3874 		{
3875 			TIntermBinary *test = node->getCondition()->getAsBinaryNode();
3876 			TIntermSymbol *left = test ? test->getLeft()->getAsSymbolNode() : nullptr;
3877 
3878 			if(left && (left->getId() == index->getId()))
3879 			{
3880 				TIntermConstantUnion *constant = test->getRight()->getAsConstantUnion();
3881 
3882 				if(constant)
3883 				{
3884 					if(constant->getBasicType() == EbtInt && constant->getNominalSize() == 1)
3885 					{
3886 						comparator = test->getOp();
3887 						limit = constant->getUnionArrayPointer()[0].getIConst();
3888 					}
3889 				}
3890 			}
3891 		}
3892 
3893 		// Parse increment
3894 		if(index && comparator != EOpNull && node->getExpression())
3895 		{
3896 			TIntermBinary *binaryTerminal = node->getExpression()->getAsBinaryNode();
3897 			TIntermUnary *unaryTerminal = node->getExpression()->getAsUnaryNode();
3898 
3899 			if(binaryTerminal)
3900 			{
3901 				TIntermSymbol *operand = binaryTerminal->getLeft()->getAsSymbolNode();
3902 
3903 				if(operand && operand->getId() == index->getId())
3904 				{
3905 					TOperator op = binaryTerminal->getOp();
3906 					TIntermConstantUnion *constant = binaryTerminal->getRight()->getAsConstantUnion();
3907 
3908 					if(constant)
3909 					{
3910 						if(constant->getBasicType() == EbtInt && constant->getNominalSize() == 1)
3911 						{
3912 							int value = constant->getUnionArrayPointer()[0].getIConst();
3913 
3914 							switch(op)
3915 							{
3916 							case EOpAddAssign: increment = value;  break;
3917 							case EOpSubAssign: increment = -value; break;
3918 							default:           increment = 0;      break;   // Rare cases left unhandled. Treated as non-deterministic.
3919 							}
3920 						}
3921 					}
3922 				}
3923 			}
3924 			else if(unaryTerminal)
3925 			{
3926 				TIntermSymbol *operand = unaryTerminal->getOperand()->getAsSymbolNode();
3927 
3928 				if(operand && operand->getId() == index->getId())
3929 				{
3930 					TOperator op = unaryTerminal->getOp();
3931 
3932 					switch(op)
3933 					{
3934 					case EOpPostIncrement: increment = 1;  break;
3935 					case EOpPostDecrement: increment = -1; break;
3936 					case EOpPreIncrement:  increment = 1;  break;
3937 					case EOpPreDecrement:  increment = -1; break;
3938 					default:               increment = 0;  break;   // Rare cases left unhandled. Treated as non-deterministic.
3939 					}
3940 				}
3941 			}
3942 		}
3943 
3944 		if(index && comparator != EOpNull && increment != 0)
3945 		{
3946 			// Check the loop body for return statements or changes to the index variable that make it non-deterministic.
3947 			LoopUnrollable loopUnrollable;
3948 			bool unrollable = loopUnrollable.traverse(node, index->getId());
3949 
3950 			if(!unrollable)
3951 			{
3952 				iterations = ~0u;
3953 				return;
3954 			}
3955 
3956 			if(comparator == EOpLessThanEqual)
3957 			{
3958 				comparator = EOpLessThan;
3959 				limit += 1;
3960 			}
3961 			else if(comparator == EOpGreaterThanEqual)
3962 			{
3963 				comparator = EOpLessThan;
3964 				limit -= 1;
3965 				std::swap(initial, limit);
3966 				increment = -increment;
3967 			}
3968 			else if(comparator == EOpGreaterThan)
3969 			{
3970 				comparator = EOpLessThan;
3971 				std::swap(initial, limit);
3972 				increment = -increment;
3973 			}
3974 
3975 			if(comparator == EOpLessThan)
3976 			{
3977 				if(!(initial < limit))   // Never loops
3978 				{
3979 					iterations = 0;
3980 				}
3981 				else if(increment < 0)
3982 				{
3983 					iterations = ~0u;
3984 				}
3985 				else
3986 				{
3987 					iterations = (limit - initial + abs(increment) - 1) / increment;   // Ceiling division
3988 				}
3989 			}
3990 			else
3991 			{
3992 				// Rare cases left unhandled. Treated as non-deterministic.
3993 				iterations = ~0u;
3994 			}
3995 		}
3996 	}
3997 
traverse(TIntermLoop * loop,int indexId)3998 	bool LoopUnrollable::traverse(TIntermLoop *loop, int indexId)
3999 	{
4000 		loopUnrollable = true;
4001 
4002 		loopIndexId = indexId;
4003 		TIntermNode *body = loop->getBody();
4004 
4005 		if(body)
4006 		{
4007 			body->traverse(this);
4008 		}
4009 
4010 		return loopUnrollable;
4011 	}
4012 
visitSymbol(TIntermSymbol * node)4013 	void LoopUnrollable::visitSymbol(TIntermSymbol *node)
4014 	{
4015 		// Check that the loop index is not used as the argument to a function out or inout parameter.
4016 		if(node->getId() == loopIndexId)
4017 		{
4018 			if(node->getQualifier() == EvqOut || node->getQualifier() == EvqInOut)
4019 			{
4020 				loopUnrollable = false;
4021 			}
4022 		}
4023 	}
4024 
visitBinary(Visit visit,TIntermBinary * node)4025 	bool LoopUnrollable::visitBinary(Visit visit, TIntermBinary *node)
4026 	{
4027 		if(!loopUnrollable)
4028 		{
4029 			return false;
4030 		}
4031 
4032 		// Check that the loop index is not statically assigned to.
4033 		TIntermSymbol *symbol = node->getLeft()->getAsSymbolNode();
4034 		loopUnrollable = !(node->modifiesState() && symbol && (symbol->getId() == loopIndexId));
4035 
4036 		return loopUnrollable;
4037 	}
4038 
visitUnary(Visit visit,TIntermUnary * node)4039 	bool LoopUnrollable::visitUnary(Visit visit, TIntermUnary *node)
4040 	{
4041 		if(!loopUnrollable)
4042 		{
4043 			return false;
4044 		}
4045 
4046 		// Check that the loop index is not statically assigned to.
4047 		TIntermSymbol *symbol = node->getOperand()->getAsSymbolNode();
4048 		loopUnrollable = !(node->modifiesState() && symbol && (symbol->getId() == loopIndexId));
4049 
4050 		return loopUnrollable;
4051 	}
4052 
visitBranch(Visit visit,TIntermBranch * node)4053 	bool LoopUnrollable::visitBranch(Visit visit, TIntermBranch *node)
4054 	{
4055 		if(!loopUnrollable)
4056 		{
4057 			return false;
4058 		}
4059 
4060 		switch(node->getFlowOp())
4061 		{
4062 		case EOpKill:
4063 		case EOpReturn:
4064 		case EOpBreak:
4065 		case EOpContinue:
4066 			loopUnrollable = false;
4067 			break;
4068 		default: UNREACHABLE(node->getFlowOp());
4069 		}
4070 
4071 		return loopUnrollable;
4072 	}
4073 
visitAggregate(Visit visit,TIntermAggregate * node)4074 	bool LoopUnrollable::visitAggregate(Visit visit, TIntermAggregate *node)
4075 	{
4076 		return loopUnrollable;
4077 	}
4078 }
4079