1 // Copyright 2016 The SwiftShader Authors. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //    http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "OutputASM.h"
16 #include "Common/Math.hpp"
17 
18 #include "common/debug.h"
19 #include "InfoSink.h"
20 
21 #include "libGLESv2/Shader.h"
22 
23 #include <GLES2/gl2.h>
24 #include <GLES2/gl2ext.h>
25 #include <GLES3/gl3.h>
26 #include <GL/glcorearb.h>
27 #include <GL/glext.h>
28 
29 #include <stdlib.h>
30 
31 namespace
32 {
glVariableType(const TType & type)33 	GLenum glVariableType(const TType &type)
34 	{
35 		switch(type.getBasicType())
36 		{
37 		case EbtFloat:
38 			if(type.isScalar())
39 			{
40 				return GL_FLOAT;
41 			}
42 			else if(type.isVector())
43 			{
44 				switch(type.getNominalSize())
45 				{
46 				case 2: return GL_FLOAT_VEC2;
47 				case 3: return GL_FLOAT_VEC3;
48 				case 4: return GL_FLOAT_VEC4;
49 				default: UNREACHABLE(type.getNominalSize());
50 				}
51 			}
52 			else if(type.isMatrix())
53 			{
54 				switch(type.getNominalSize())
55 				{
56 				case 2:
57 					switch(type.getSecondarySize())
58 					{
59 					case 2: return GL_FLOAT_MAT2;
60 					case 3: return GL_FLOAT_MAT2x3;
61 					case 4: return GL_FLOAT_MAT2x4;
62 					default: UNREACHABLE(type.getSecondarySize());
63 					}
64 				case 3:
65 					switch(type.getSecondarySize())
66 					{
67 					case 2: return GL_FLOAT_MAT3x2;
68 					case 3: return GL_FLOAT_MAT3;
69 					case 4: return GL_FLOAT_MAT3x4;
70 					default: UNREACHABLE(type.getSecondarySize());
71 					}
72 				case 4:
73 					switch(type.getSecondarySize())
74 					{
75 					case 2: return GL_FLOAT_MAT4x2;
76 					case 3: return GL_FLOAT_MAT4x3;
77 					case 4: return GL_FLOAT_MAT4;
78 					default: UNREACHABLE(type.getSecondarySize());
79 					}
80 				default: UNREACHABLE(type.getNominalSize());
81 				}
82 			}
83 			else UNREACHABLE(0);
84 			break;
85 		case EbtInt:
86 			if(type.isScalar())
87 			{
88 				return GL_INT;
89 			}
90 			else if(type.isVector())
91 			{
92 				switch(type.getNominalSize())
93 				{
94 				case 2: return GL_INT_VEC2;
95 				case 3: return GL_INT_VEC3;
96 				case 4: return GL_INT_VEC4;
97 				default: UNREACHABLE(type.getNominalSize());
98 				}
99 			}
100 			else UNREACHABLE(0);
101 			break;
102 		case EbtUInt:
103 			if(type.isScalar())
104 			{
105 				return GL_UNSIGNED_INT;
106 			}
107 			else if(type.isVector())
108 			{
109 				switch(type.getNominalSize())
110 				{
111 				case 2: return GL_UNSIGNED_INT_VEC2;
112 				case 3: return GL_UNSIGNED_INT_VEC3;
113 				case 4: return GL_UNSIGNED_INT_VEC4;
114 				default: UNREACHABLE(type.getNominalSize());
115 				}
116 			}
117 			else UNREACHABLE(0);
118 			break;
119 		case EbtBool:
120 			if(type.isScalar())
121 			{
122 				return GL_BOOL;
123 			}
124 			else if(type.isVector())
125 			{
126 				switch(type.getNominalSize())
127 				{
128 				case 2: return GL_BOOL_VEC2;
129 				case 3: return GL_BOOL_VEC3;
130 				case 4: return GL_BOOL_VEC4;
131 				default: UNREACHABLE(type.getNominalSize());
132 				}
133 			}
134 			else UNREACHABLE(0);
135 			break;
136 		case EbtSampler2D:
137 			return GL_SAMPLER_2D;
138 		case EbtISampler2D:
139 			return GL_INT_SAMPLER_2D;
140 		case EbtUSampler2D:
141 			return GL_UNSIGNED_INT_SAMPLER_2D;
142 		case EbtSamplerCube:
143 			return GL_SAMPLER_CUBE;
144 		case EbtSampler2DRect:
145 			return GL_SAMPLER_2D_RECT_ARB;
146 		case EbtISamplerCube:
147 			return GL_INT_SAMPLER_CUBE;
148 		case EbtUSamplerCube:
149 			return GL_UNSIGNED_INT_SAMPLER_CUBE;
150 		case EbtSamplerExternalOES:
151 			return GL_SAMPLER_EXTERNAL_OES;
152 		case EbtSampler3D:
153 			return GL_SAMPLER_3D_OES;
154 		case EbtISampler3D:
155 			return GL_INT_SAMPLER_3D;
156 		case EbtUSampler3D:
157 			return GL_UNSIGNED_INT_SAMPLER_3D;
158 		case EbtSampler2DArray:
159 			return GL_SAMPLER_2D_ARRAY;
160 		case EbtISampler2DArray:
161 			return GL_INT_SAMPLER_2D_ARRAY;
162 		case EbtUSampler2DArray:
163 			return GL_UNSIGNED_INT_SAMPLER_2D_ARRAY;
164 		case EbtSampler2DShadow:
165 			return GL_SAMPLER_2D_SHADOW;
166 		case EbtSamplerCubeShadow:
167 			return GL_SAMPLER_CUBE_SHADOW;
168 		case EbtSampler2DArrayShadow:
169 			return GL_SAMPLER_2D_ARRAY_SHADOW;
170 		default:
171 			UNREACHABLE(type.getBasicType());
172 			break;
173 		}
174 
175 		return GL_NONE;
176 	}
177 
glVariablePrecision(const TType & type)178 	GLenum glVariablePrecision(const TType &type)
179 	{
180 		if(type.getBasicType() == EbtFloat)
181 		{
182 			switch(type.getPrecision())
183 			{
184 			case EbpHigh:   return GL_HIGH_FLOAT;
185 			case EbpMedium: return GL_MEDIUM_FLOAT;
186 			case EbpLow:    return GL_LOW_FLOAT;
187 			case EbpUndefined:
188 				// Should be defined as the default precision by the parser
189 			default: UNREACHABLE(type.getPrecision());
190 			}
191 		}
192 		else if(type.getBasicType() == EbtInt)
193 		{
194 			switch(type.getPrecision())
195 			{
196 			case EbpHigh:   return GL_HIGH_INT;
197 			case EbpMedium: return GL_MEDIUM_INT;
198 			case EbpLow:    return GL_LOW_INT;
199 			case EbpUndefined:
200 				// Should be defined as the default precision by the parser
201 			default: UNREACHABLE(type.getPrecision());
202 			}
203 		}
204 
205 		// Other types (boolean, sampler) don't have a precision
206 		return GL_NONE;
207 	}
208 }
209 
210 namespace glsl
211 {
212 	// Integer to TString conversion
str(int i)213 	TString str(int i)
214 	{
215 		char buffer[20];
216 		sprintf(buffer, "%d", i);
217 		return buffer;
218 	}
219 
220 	class Temporary : public TIntermSymbol
221 	{
222 	public:
Temporary(OutputASM * assembler)223 		Temporary(OutputASM *assembler) : TIntermSymbol(TSymbolTableLevel::nextUniqueId(), "tmp", TType(EbtFloat, EbpHigh, EvqTemporary, 4, 1, false)), assembler(assembler)
224 		{
225 		}
226 
~Temporary()227 		~Temporary()
228 		{
229 			assembler->freeTemporary(this);
230 		}
231 
232 	private:
233 		OutputASM *const assembler;
234 	};
235 
236 	class Constant : public TIntermConstantUnion
237 	{
238 	public:
Constant(float x,float y,float z,float w)239 		Constant(float x, float y, float z, float w) : TIntermConstantUnion(constants, TType(EbtFloat, EbpHigh, EvqConstExpr, 4, 1, false))
240 		{
241 			constants[0].setFConst(x);
242 			constants[1].setFConst(y);
243 			constants[2].setFConst(z);
244 			constants[3].setFConst(w);
245 		}
246 
Constant(bool b)247 		Constant(bool b) : TIntermConstantUnion(constants, TType(EbtBool, EbpHigh, EvqConstExpr, 1, 1, false))
248 		{
249 			constants[0].setBConst(b);
250 		}
251 
Constant(int i)252 		Constant(int i) : TIntermConstantUnion(constants, TType(EbtInt, EbpHigh, EvqConstExpr, 1, 1, false))
253 		{
254 			constants[0].setIConst(i);
255 		}
256 
~Constant()257 		~Constant()
258 		{
259 		}
260 
261 	private:
262 		ConstantUnion constants[4];
263 	};
264 
ShaderVariable(const TType & type,const std::string & name,int registerIndex)265 	ShaderVariable::ShaderVariable(const TType& type, const std::string& name, int registerIndex) :
266 		type(type.isStruct() ? GL_NONE : glVariableType(type)), precision(glVariablePrecision(type)),
267 		name(name), arraySize(type.getArraySize()), registerIndex(registerIndex)
268 	{
269 		if(type.isStruct())
270 		{
271 			for(const auto& field : type.getStruct()->fields())
272 			{
273 				fields.push_back(ShaderVariable(*(field->type()), field->name().c_str(), -1));
274 			}
275 		}
276 	}
277 
Uniform(const TType & type,const std::string & name,int registerIndex,int blockId,const BlockMemberInfo & blockMemberInfo)278 	Uniform::Uniform(const TType& type, const std::string &name, int registerIndex, int blockId, const BlockMemberInfo& blockMemberInfo) :
279 		ShaderVariable(type, name, registerIndex), blockId(blockId), blockInfo(blockMemberInfo)
280 	{
281 	}
282 
UniformBlock(const std::string & name,unsigned int dataSize,unsigned int arraySize,TLayoutBlockStorage layout,bool isRowMajorLayout,int registerIndex,int blockId)283 	UniformBlock::UniformBlock(const std::string& name, unsigned int dataSize, unsigned int arraySize,
284 	                           TLayoutBlockStorage layout, bool isRowMajorLayout, int registerIndex, int blockId) :
285 		name(name), dataSize(dataSize), arraySize(arraySize), layout(layout),
286 		isRowMajorLayout(isRowMajorLayout), registerIndex(registerIndex), blockId(blockId)
287 	{
288 	}
289 
BlockLayoutEncoder()290 	BlockLayoutEncoder::BlockLayoutEncoder()
291 		: mCurrentOffset(0)
292 	{
293 	}
294 
encodeType(const TType & type)295 	BlockMemberInfo BlockLayoutEncoder::encodeType(const TType &type)
296 	{
297 		int arrayStride;
298 		int matrixStride;
299 
300 		bool isRowMajor = type.getLayoutQualifier().matrixPacking == EmpRowMajor;
301 		getBlockLayoutInfo(type, type.getArraySize(), isRowMajor, &arrayStride, &matrixStride);
302 
303 		const BlockMemberInfo memberInfo(static_cast<int>(mCurrentOffset * BytesPerComponent),
304 		                                 static_cast<int>(arrayStride * BytesPerComponent),
305 		                                 static_cast<int>(matrixStride * BytesPerComponent),
306 		                                 (matrixStride > 0) && isRowMajor);
307 
308 		advanceOffset(type, type.getArraySize(), isRowMajor, arrayStride, matrixStride);
309 
310 		return memberInfo;
311 	}
312 
313 	// static
getBlockRegister(const BlockMemberInfo & info)314 	size_t BlockLayoutEncoder::getBlockRegister(const BlockMemberInfo &info)
315 	{
316 		return (info.offset / BytesPerComponent) / ComponentsPerRegister;
317 	}
318 
319 	// static
getBlockRegisterElement(const BlockMemberInfo & info)320 	size_t BlockLayoutEncoder::getBlockRegisterElement(const BlockMemberInfo &info)
321 	{
322 		return (info.offset / BytesPerComponent) % ComponentsPerRegister;
323 	}
324 
nextRegister()325 	void BlockLayoutEncoder::nextRegister()
326 	{
327 		mCurrentOffset = sw::align(mCurrentOffset, ComponentsPerRegister);
328 	}
329 
Std140BlockEncoder()330 	Std140BlockEncoder::Std140BlockEncoder() : BlockLayoutEncoder()
331 	{
332 	}
333 
enterAggregateType()334 	void Std140BlockEncoder::enterAggregateType()
335 	{
336 		nextRegister();
337 	}
338 
exitAggregateType()339 	void Std140BlockEncoder::exitAggregateType()
340 	{
341 		nextRegister();
342 	}
343 
getBlockLayoutInfo(const TType & type,unsigned int arraySize,bool isRowMajorMatrix,int * arrayStrideOut,int * matrixStrideOut)344 	void Std140BlockEncoder::getBlockLayoutInfo(const TType &type, unsigned int arraySize, bool isRowMajorMatrix, int *arrayStrideOut, int *matrixStrideOut)
345 	{
346 		size_t baseAlignment = 0;
347 		int matrixStride = 0;
348 		int arrayStride = 0;
349 
350 		if(type.isMatrix())
351 		{
352 			baseAlignment = ComponentsPerRegister;
353 			matrixStride = ComponentsPerRegister;
354 
355 			if(arraySize > 0)
356 			{
357 				const int numRegisters = isRowMajorMatrix ? type.getSecondarySize() : type.getNominalSize();
358 				arrayStride = ComponentsPerRegister * numRegisters;
359 			}
360 		}
361 		else if(arraySize > 0)
362 		{
363 			baseAlignment = ComponentsPerRegister;
364 			arrayStride = ComponentsPerRegister;
365 		}
366 		else
367 		{
368 			const size_t numComponents = type.getElementSize();
369 			baseAlignment = (numComponents == 3 ? 4u : numComponents);
370 		}
371 
372 		mCurrentOffset = sw::align(mCurrentOffset, baseAlignment);
373 
374 		*matrixStrideOut = matrixStride;
375 		*arrayStrideOut = arrayStride;
376 	}
377 
advanceOffset(const TType & type,unsigned int arraySize,bool isRowMajorMatrix,int arrayStride,int matrixStride)378 	void Std140BlockEncoder::advanceOffset(const TType &type, unsigned int arraySize, bool isRowMajorMatrix, int arrayStride, int matrixStride)
379 	{
380 		if(arraySize > 0)
381 		{
382 			mCurrentOffset += arrayStride * arraySize;
383 		}
384 		else if(type.isMatrix())
385 		{
386 			ASSERT(matrixStride == ComponentsPerRegister);
387 			const int numRegisters = isRowMajorMatrix ? type.getSecondarySize() : type.getNominalSize();
388 			mCurrentOffset += ComponentsPerRegister * numRegisters;
389 		}
390 		else
391 		{
392 			mCurrentOffset += type.getElementSize();
393 		}
394 	}
395 
Attribute()396 	Attribute::Attribute()
397 	{
398 		type = GL_NONE;
399 		arraySize = 0;
400 		registerIndex = 0;
401 	}
402 
Attribute(GLenum type,const std::string & name,int arraySize,int layoutLocation,int registerIndex)403 	Attribute::Attribute(GLenum type, const std::string &name, int arraySize, int layoutLocation, int registerIndex)
404 	{
405 		this->type = type;
406 		this->name = name;
407 		this->arraySize = arraySize;
408 		this->layoutLocation = layoutLocation;
409 		this->registerIndex = registerIndex;
410 	}
411 
getPixelShader() const412 	sw::PixelShader *Shader::getPixelShader() const
413 	{
414 		return nullptr;
415 	}
416 
getVertexShader() const417 	sw::VertexShader *Shader::getVertexShader() const
418 	{
419 		return nullptr;
420 	}
421 
TextureFunction(const TString & nodeName)422 	OutputASM::TextureFunction::TextureFunction(const TString& nodeName) : method(IMPLICIT), proj(false), offset(false)
423 	{
424 		TString name = TFunction::unmangleName(nodeName);
425 
426 		if(name == "texture2D" || name == "textureCube" || name == "texture" || name == "texture3D" || name == "texture2DRect")
427 		{
428 			method = IMPLICIT;
429 		}
430 		else if(name == "texture2DProj" || name == "textureProj" || name == "texture2DRectProj")
431 		{
432 			method = IMPLICIT;
433 			proj = true;
434 		}
435 		else if(name == "texture2DLod" || name == "textureCubeLod" || name == "textureLod")
436 		{
437 			method = LOD;
438 		}
439 		else if(name == "texture2DProjLod" || name == "textureProjLod")
440 		{
441 			method = LOD;
442 			proj = true;
443 		}
444 		else if(name == "textureSize")
445 		{
446 			method = SIZE;
447 		}
448 		else if(name == "textureOffset")
449 		{
450 			method = IMPLICIT;
451 			offset = true;
452 		}
453 		else if(name == "textureProjOffset")
454 		{
455 			method = IMPLICIT;
456 			offset = true;
457 			proj = true;
458 		}
459 		else if(name == "textureLodOffset")
460 		{
461 			method = LOD;
462 			offset = true;
463 		}
464 		else if(name == "textureProjLodOffset")
465 		{
466 			method = LOD;
467 			proj = true;
468 			offset = true;
469 		}
470 		else if(name == "texelFetch")
471 		{
472 			method = FETCH;
473 		}
474 		else if(name == "texelFetchOffset")
475 		{
476 			method = FETCH;
477 			offset = true;
478 		}
479 		else if(name == "textureGrad")
480 		{
481 			method = GRAD;
482 		}
483 		else if(name == "textureGradOffset")
484 		{
485 			method = GRAD;
486 			offset = true;
487 		}
488 		else if(name == "textureProjGrad")
489 		{
490 			method = GRAD;
491 			proj = true;
492 		}
493 		else if(name == "textureProjGradOffset")
494 		{
495 			method = GRAD;
496 			proj = true;
497 			offset = true;
498 		}
499 		else UNREACHABLE(0);
500 	}
501 
OutputASM(TParseContext & context,Shader * shaderObject)502 	OutputASM::OutputASM(TParseContext &context, Shader *shaderObject) : TIntermTraverser(true, true, true), shaderObject(shaderObject), mContext(context)
503 	{
504 		shader = nullptr;
505 		pixelShader = nullptr;
506 		vertexShader = nullptr;
507 
508 		if(shaderObject)
509 		{
510 			shader = shaderObject->getShader();
511 			pixelShader = shaderObject->getPixelShader();
512 			vertexShader = shaderObject->getVertexShader();
513 		}
514 
515 		functionArray.push_back(Function(0, "main(", nullptr, nullptr));
516 		currentFunction = 0;
517 		outputQualifier = EvqOutput;   // Initialize outputQualifier to any value other than EvqFragColor or EvqFragData
518 	}
519 
~OutputASM()520 	OutputASM::~OutputASM()
521 	{
522 	}
523 
output()524 	void OutputASM::output()
525 	{
526 		if(shader)
527 		{
528 			emitShader(GLOBAL);
529 
530 			if(functionArray.size() > 1)   // Only call main() when there are other functions
531 			{
532 				Instruction *callMain = emit(sw::Shader::OPCODE_CALL);
533 				callMain->dst.type = sw::Shader::PARAMETER_LABEL;
534 				callMain->dst.index = 0;   // main()
535 
536 				emit(sw::Shader::OPCODE_RET);
537 			}
538 
539 			emitShader(FUNCTION);
540 		}
541 	}
542 
emitShader(Scope scope)543 	void OutputASM::emitShader(Scope scope)
544 	{
545 		emitScope = scope;
546 		currentScope = GLOBAL;
547 		mContext.getTreeRoot()->traverse(this);
548 	}
549 
freeTemporary(Temporary * temporary)550 	void OutputASM::freeTemporary(Temporary *temporary)
551 	{
552 		free(temporaries, temporary);
553 	}
554 
getOpcode(sw::Shader::Opcode op,TIntermTyped * in) const555 	sw::Shader::Opcode OutputASM::getOpcode(sw::Shader::Opcode op, TIntermTyped *in) const
556 	{
557 		TBasicType baseType = in->getType().getBasicType();
558 
559 		switch(op)
560 		{
561 		case sw::Shader::OPCODE_NEG:
562 			switch(baseType)
563 			{
564 			case EbtInt:
565 			case EbtUInt:
566 				return sw::Shader::OPCODE_INEG;
567 			case EbtFloat:
568 			default:
569 				return op;
570 			}
571 		case sw::Shader::OPCODE_ABS:
572 			switch(baseType)
573 			{
574 			case EbtInt:
575 				return sw::Shader::OPCODE_IABS;
576 			case EbtFloat:
577 			default:
578 				return op;
579 			}
580 		case sw::Shader::OPCODE_SGN:
581 			switch(baseType)
582 			{
583 			case EbtInt:
584 				return sw::Shader::OPCODE_ISGN;
585 			case EbtFloat:
586 			default:
587 				return op;
588 			}
589 		case sw::Shader::OPCODE_ADD:
590 			switch(baseType)
591 			{
592 			case EbtInt:
593 			case EbtUInt:
594 				return sw::Shader::OPCODE_IADD;
595 			case EbtFloat:
596 			default:
597 				return op;
598 			}
599 		case sw::Shader::OPCODE_SUB:
600 			switch(baseType)
601 			{
602 			case EbtInt:
603 			case EbtUInt:
604 				return sw::Shader::OPCODE_ISUB;
605 			case EbtFloat:
606 			default:
607 				return op;
608 			}
609 		case sw::Shader::OPCODE_MUL:
610 			switch(baseType)
611 			{
612 			case EbtInt:
613 			case EbtUInt:
614 				return sw::Shader::OPCODE_IMUL;
615 			case EbtFloat:
616 			default:
617 				return op;
618 			}
619 		case sw::Shader::OPCODE_DIV:
620 			switch(baseType)
621 			{
622 			case EbtInt:
623 				return sw::Shader::OPCODE_IDIV;
624 			case EbtUInt:
625 				return sw::Shader::OPCODE_UDIV;
626 			case EbtFloat:
627 			default:
628 				return op;
629 			}
630 		case sw::Shader::OPCODE_IMOD:
631 			return baseType == EbtUInt ? sw::Shader::OPCODE_UMOD : op;
632 		case sw::Shader::OPCODE_ISHR:
633 			return baseType == EbtUInt ? sw::Shader::OPCODE_USHR : op;
634 		case sw::Shader::OPCODE_MIN:
635 			switch(baseType)
636 			{
637 			case EbtInt:
638 				return sw::Shader::OPCODE_IMIN;
639 			case EbtUInt:
640 				return sw::Shader::OPCODE_UMIN;
641 			case EbtFloat:
642 			default:
643 				return op;
644 			}
645 		case sw::Shader::OPCODE_MAX:
646 			switch(baseType)
647 			{
648 			case EbtInt:
649 				return sw::Shader::OPCODE_IMAX;
650 			case EbtUInt:
651 				return sw::Shader::OPCODE_UMAX;
652 			case EbtFloat:
653 			default:
654 				return op;
655 			}
656 		default:
657 			return op;
658 		}
659 	}
660 
visitSymbol(TIntermSymbol * symbol)661 	void OutputASM::visitSymbol(TIntermSymbol *symbol)
662 	{
663 		// The type of vertex outputs and fragment inputs with the same name must match (validated at link time),
664 		// so declare them but don't assign a register index yet (one will be assigned when referenced in reachable code).
665 		switch(symbol->getQualifier())
666 		{
667 		case EvqVaryingIn:
668 		case EvqVaryingOut:
669 		case EvqInvariantVaryingIn:
670 		case EvqInvariantVaryingOut:
671 		case EvqVertexOut:
672 		case EvqFragmentIn:
673 			if(symbol->getBasicType() != EbtInvariant)   // Typeless declarations are not new varyings
674 			{
675 				declareVarying(symbol, -1);
676 			}
677 			break;
678 		case EvqFragmentOut:
679 			declareFragmentOutput(symbol);
680 			break;
681 		default:
682 			break;
683 		}
684 
685 		TInterfaceBlock* block = symbol->getType().getInterfaceBlock();
686 		// OpenGL ES 3.0.4 spec, section 2.12.6 Uniform Variables:
687 		// "All members of a named uniform block declared with a shared or std140 layout qualifier
688 		// are considered active, even if they are not referenced in any shader in the program.
689 		// The uniform block itself is also considered active, even if no member of the block is referenced."
690 		if(block && ((block->blockStorage() == EbsShared) || (block->blockStorage() == EbsStd140)))
691 		{
692 			uniformRegister(symbol);
693 		}
694 	}
695 
visitBinary(Visit visit,TIntermBinary * node)696 	bool OutputASM::visitBinary(Visit visit, TIntermBinary *node)
697 	{
698 		if(currentScope != emitScope)
699 		{
700 			return false;
701 		}
702 
703 		TIntermTyped *result = node;
704 		TIntermTyped *left = node->getLeft();
705 		TIntermTyped *right = node->getRight();
706 		const TType &leftType = left->getType();
707 		const TType &rightType = right->getType();
708 
709 		if(isSamplerRegister(result))
710 		{
711 			return false;   // Don't traverse, the register index is determined statically
712 		}
713 
714 		switch(node->getOp())
715 		{
716 		case EOpAssign:
717 			assert(visit == PreVisit);
718 			right->traverse(this);
719 			assignLvalue(left, right);
720 			copy(result, right);
721 			return false;
722 		case EOpInitialize:
723 			assert(visit == PreVisit);
724 			// Constant arrays go into the constant register file.
725 			if(leftType.getQualifier() == EvqConstExpr && leftType.isArray() && leftType.getArraySize() > 1)
726 			{
727 				for(int i = 0; i < left->totalRegisterCount(); i++)
728 				{
729 					emit(sw::Shader::OPCODE_DEF, left, i, right, i);
730 				}
731 			}
732 			else
733 			{
734 				right->traverse(this);
735 				copy(left, right);
736 			}
737 			return false;
738 		case EOpMatrixTimesScalarAssign:
739 			assert(visit == PreVisit);
740 			right->traverse(this);
741 			for(int i = 0; i < leftType.getNominalSize(); i++)
742 			{
743 				emit(sw::Shader::OPCODE_MUL, result, i, left, i, right);
744 			}
745 
746 			assignLvalue(left, result);
747 			return false;
748 		case EOpVectorTimesMatrixAssign:
749 			assert(visit == PreVisit);
750 			{
751 				// The left operand may contain a swizzle serving double-duty as
752 				// swizzle and writemask, so it's important that we traverse it
753 				// first. Otherwise we may end up never setting up our left
754 				// operand correctly.
755 				left->traverse(this);
756 				right->traverse(this);
757 				int size = leftType.getNominalSize();
758 
759 				for(int i = 0; i < size; i++)
760 				{
761 					Instruction *dot = emit(sw::Shader::OPCODE_DP(size), result, 0, left, 0, right, i);
762 					dot->dst.mask = 1 << i;
763 				}
764 
765 				assignLvalue(left, result);
766 			}
767 			return false;
768 		case EOpMatrixTimesMatrixAssign:
769 			assert(visit == PreVisit);
770 			{
771 				right->traverse(this);
772 				int dim = leftType.getNominalSize();
773 
774 				for(int i = 0; i < dim; i++)
775 				{
776 					Instruction *mul = emit(sw::Shader::OPCODE_MUL, result, i, left, 0, right, i);
777 					mul->src[1].swizzle = 0x00;
778 
779 					for(int j = 1; j < dim; j++)
780 					{
781 						Instruction *mad = emit(sw::Shader::OPCODE_MAD, result, i, left, j, right, i, result, i);
782 						mad->src[1].swizzle = j * 0x55;
783 					}
784 				}
785 
786 				assignLvalue(left, result);
787 			}
788 			return false;
789 		case EOpIndexDirect:
790 		case EOpIndexIndirect:
791 		case EOpIndexDirectStruct:
792 		case EOpIndexDirectInterfaceBlock:
793 			assert(visit == PreVisit);
794 			evaluateRvalue(node);
795 			return false;
796 		case EOpVectorSwizzle:
797 			if(visit == PostVisit)
798 			{
799 				int swizzle = 0;
800 				TIntermAggregate *components = right->getAsAggregate();
801 
802 				if(components)
803 				{
804 					TIntermSequence &sequence = components->getSequence();
805 					int component = 0;
806 
807 					for(TIntermSequence::iterator sit = sequence.begin(); sit != sequence.end(); sit++)
808 					{
809 						TIntermConstantUnion *element = (*sit)->getAsConstantUnion();
810 
811 						if(element)
812 						{
813 							int i = element->getUnionArrayPointer()[0].getIConst();
814 							swizzle |= i << (component * 2);
815 							component++;
816 						}
817 						else UNREACHABLE(0);
818 					}
819 				}
820 				else UNREACHABLE(0);
821 
822 				Instruction *mov = emit(sw::Shader::OPCODE_MOV, result, left);
823 				mov->src[0].swizzle = swizzle;
824 			}
825 			break;
826 		case EOpAddAssign: if(visit == PostVisit) emitAssign(getOpcode(sw::Shader::OPCODE_ADD, result), result, left, left, right); break;
827 		case EOpAdd:       if(visit == PostVisit) emitBinary(getOpcode(sw::Shader::OPCODE_ADD, result), result, left, right);       break;
828 		case EOpSubAssign: if(visit == PostVisit) emitAssign(getOpcode(sw::Shader::OPCODE_SUB, result), result, left, left, right); break;
829 		case EOpSub:       if(visit == PostVisit) emitBinary(getOpcode(sw::Shader::OPCODE_SUB, result), result, left, right);       break;
830 		case EOpMulAssign: if(visit == PostVisit) emitAssign(getOpcode(sw::Shader::OPCODE_MUL, result), result, left, left, right); break;
831 		case EOpMul:       if(visit == PostVisit) emitBinary(getOpcode(sw::Shader::OPCODE_MUL, result), result, left, right);       break;
832 		case EOpDivAssign: if(visit == PostVisit) emitAssign(getOpcode(sw::Shader::OPCODE_DIV, result), result, left, left, right); break;
833 		case EOpDiv:       if(visit == PostVisit) emitBinary(getOpcode(sw::Shader::OPCODE_DIV, result), result, left, right);       break;
834 		case EOpIModAssign:          if(visit == PostVisit) emitAssign(getOpcode(sw::Shader::OPCODE_IMOD, result), result, left, left, right); break;
835 		case EOpIMod:                if(visit == PostVisit) emitBinary(getOpcode(sw::Shader::OPCODE_IMOD, result), result, left, right);       break;
836 		case EOpBitShiftLeftAssign:  if(visit == PostVisit) emitAssign(sw::Shader::OPCODE_SHL, result, left, left, right); break;
837 		case EOpBitShiftLeft:        if(visit == PostVisit) emitBinary(sw::Shader::OPCODE_SHL, result, left, right);       break;
838 		case EOpBitShiftRightAssign: if(visit == PostVisit) emitAssign(getOpcode(sw::Shader::OPCODE_ISHR, result), result, left, left, right); break;
839 		case EOpBitShiftRight:       if(visit == PostVisit) emitBinary(getOpcode(sw::Shader::OPCODE_ISHR, result), result, left, right);       break;
840 		case EOpBitwiseAndAssign:    if(visit == PostVisit) emitAssign(sw::Shader::OPCODE_AND, result, left, left, right); break;
841 		case EOpBitwiseAnd:          if(visit == PostVisit) emitBinary(sw::Shader::OPCODE_AND, result, left, right);       break;
842 		case EOpBitwiseXorAssign:    if(visit == PostVisit) emitAssign(sw::Shader::OPCODE_XOR, result, left, left, right); break;
843 		case EOpBitwiseXor:          if(visit == PostVisit) emitBinary(sw::Shader::OPCODE_XOR, result, left, right);       break;
844 		case EOpBitwiseOrAssign:     if(visit == PostVisit) emitAssign(sw::Shader::OPCODE_OR, result, left, left, right);  break;
845 		case EOpBitwiseOr:           if(visit == PostVisit) emitBinary(sw::Shader::OPCODE_OR, result, left, right);        break;
846 		case EOpEqual:
847 			if(visit == PostVisit)
848 			{
849 				emitBinary(sw::Shader::OPCODE_EQ, result, left, right);
850 
851 				for(int index = 1; index < left->totalRegisterCount(); index++)
852 				{
853 					Temporary equal(this);
854 					emit(sw::Shader::OPCODE_EQ, &equal, 0, left, index, right, index);
855 					emit(sw::Shader::OPCODE_AND, result, result, &equal);
856 				}
857 			}
858 			break;
859 		case EOpNotEqual:
860 			if(visit == PostVisit)
861 			{
862 				emitBinary(sw::Shader::OPCODE_NE, result, left, right);
863 
864 				for(int index = 1; index < left->totalRegisterCount(); index++)
865 				{
866 					Temporary notEqual(this);
867 					emit(sw::Shader::OPCODE_NE, &notEqual, 0, left, index, right, index);
868 					emit(sw::Shader::OPCODE_OR, result, result, &notEqual);
869 				}
870 			}
871 			break;
872 		case EOpLessThan:                if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_LT, result, left, right); break;
873 		case EOpGreaterThan:             if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_GT, result, left, right); break;
874 		case EOpLessThanEqual:           if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_LE, result, left, right); break;
875 		case EOpGreaterThanEqual:        if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_GE, result, left, right); break;
876 		case EOpVectorTimesScalarAssign: if(visit == PostVisit) emitAssign(getOpcode(sw::Shader::OPCODE_MUL, left), result, left, left, right); break;
877 		case EOpVectorTimesScalar:       if(visit == PostVisit) emit(getOpcode(sw::Shader::OPCODE_MUL, left), result, left, right); break;
878 		case EOpMatrixTimesScalar:
879 			if(visit == PostVisit)
880 			{
881 				if(left->isMatrix())
882 				{
883 					for(int i = 0; i < leftType.getNominalSize(); i++)
884 					{
885 						emit(sw::Shader::OPCODE_MUL, result, i, left, i, right, 0);
886 					}
887 				}
888 				else if(right->isMatrix())
889 				{
890 					for(int i = 0; i < rightType.getNominalSize(); i++)
891 					{
892 						emit(sw::Shader::OPCODE_MUL, result, i, left, 0, right, i);
893 					}
894 				}
895 				else UNREACHABLE(0);
896 			}
897 			break;
898 		case EOpVectorTimesMatrix:
899 			if(visit == PostVisit)
900 			{
901 				sw::Shader::Opcode dpOpcode = sw::Shader::OPCODE_DP(leftType.getNominalSize());
902 
903 				int size = rightType.getNominalSize();
904 				for(int i = 0; i < size; i++)
905 				{
906 					Instruction *dot = emit(dpOpcode, result, 0, left, 0, right, i);
907 					dot->dst.mask = 1 << i;
908 				}
909 			}
910 			break;
911 		case EOpMatrixTimesVector:
912 			if(visit == PostVisit)
913 			{
914 				Instruction *mul = emit(sw::Shader::OPCODE_MUL, result, left, right);
915 				mul->src[1].swizzle = 0x00;
916 
917 				int size = rightType.getNominalSize();
918 				for(int i = 1; i < size; i++)
919 				{
920 					Instruction *mad = emit(sw::Shader::OPCODE_MAD, result, 0, left, i, right, 0, result);
921 					mad->src[1].swizzle = i * 0x55;
922 				}
923 			}
924 			break;
925 		case EOpMatrixTimesMatrix:
926 			if(visit == PostVisit)
927 			{
928 				int dim = leftType.getNominalSize();
929 
930 				int size = rightType.getNominalSize();
931 				for(int i = 0; i < size; i++)
932 				{
933 					Instruction *mul = emit(sw::Shader::OPCODE_MUL, result, i, left, 0, right, i);
934 					mul->src[1].swizzle = 0x00;
935 
936 					for(int j = 1; j < dim; j++)
937 					{
938 						Instruction *mad = emit(sw::Shader::OPCODE_MAD, result, i, left, j, right, i, result, i);
939 						mad->src[1].swizzle = j * 0x55;
940 					}
941 				}
942 			}
943 			break;
944 		case EOpLogicalOr:
945 			if(trivial(right, 6))
946 			{
947 				if(visit == PostVisit)
948 				{
949 					emit(sw::Shader::OPCODE_OR, result, left, right);
950 				}
951 			}
952 			else   // Short-circuit evaluation
953 			{
954 				if(visit == InVisit)
955 				{
956 					emit(sw::Shader::OPCODE_MOV, result, left);
957 					Instruction *ifnot = emit(sw::Shader::OPCODE_IF, 0, result);
958 					ifnot->src[0].modifier = sw::Shader::MODIFIER_NOT;
959 				}
960 				else if(visit == PostVisit)
961 				{
962 					emit(sw::Shader::OPCODE_MOV, result, right);
963 					emit(sw::Shader::OPCODE_ENDIF);
964 				}
965 			}
966 			break;
967 		case EOpLogicalXor:        if(visit == PostVisit) emit(sw::Shader::OPCODE_XOR, result, left, right); break;
968 		case EOpLogicalAnd:
969 			if(trivial(right, 6))
970 			{
971 				if(visit == PostVisit)
972 				{
973 					emit(sw::Shader::OPCODE_AND, result, left, right);
974 				}
975 			}
976 			else   // Short-circuit evaluation
977 			{
978 				if(visit == InVisit)
979 				{
980 					emit(sw::Shader::OPCODE_MOV, result, left);
981 					emit(sw::Shader::OPCODE_IF, 0, result);
982 				}
983 				else if(visit == PostVisit)
984 				{
985 					emit(sw::Shader::OPCODE_MOV, result, right);
986 					emit(sw::Shader::OPCODE_ENDIF);
987 				}
988 			}
989 			break;
990 		default: UNREACHABLE(node->getOp());
991 		}
992 
993 		return true;
994 	}
995 
emitDeterminant(TIntermTyped * result,TIntermTyped * arg,int size,int col,int row,int outCol,int outRow)996 	void OutputASM::emitDeterminant(TIntermTyped *result, TIntermTyped *arg, int size, int col, int row, int outCol, int outRow)
997 	{
998 		switch(size)
999 		{
1000 		case 1: // Used for cofactor computation only
1001 			{
1002 				// For a 2x2 matrix, the cofactor is simply a transposed move or negate
1003 				bool isMov = (row == col);
1004 				sw::Shader::Opcode op = isMov ? sw::Shader::OPCODE_MOV : sw::Shader::OPCODE_NEG;
1005 				Instruction *mov = emit(op, result, outCol, arg, isMov ? 1 - row : row);
1006 				mov->src[0].swizzle = 0x55 * (isMov ? 1 - col : col);
1007 				mov->dst.mask = 1 << outRow;
1008 			}
1009 			break;
1010 		case 2:
1011 			{
1012 				static const unsigned int swizzle[3] = { 0x99, 0x88, 0x44 }; // xy?? : yzyz, xzxz, xyxy
1013 
1014 				bool isCofactor = (col >= 0) && (row >= 0);
1015 				int col0 = (isCofactor && (col <= 0)) ? 1 : 0;
1016 				int col1 = (isCofactor && (col <= 1)) ? 2 : 1;
1017 				bool negate = isCofactor && ((col & 0x01) ^ (row & 0x01));
1018 
1019 				Instruction *det = emit(sw::Shader::OPCODE_DET2, result, outCol, arg, negate ? col1 : col0, arg, negate ? col0 : col1);
1020 				det->src[0].swizzle = det->src[1].swizzle = swizzle[isCofactor ? row : 2];
1021 				det->dst.mask = 1 << outRow;
1022 			}
1023 			break;
1024 		case 3:
1025 			{
1026 				static const unsigned int swizzle[4] = { 0xF9, 0xF8, 0xF4, 0xE4 }; // xyz? : yzww, xzww, xyww, xyzw
1027 
1028 				bool isCofactor = (col >= 0) && (row >= 0);
1029 				int col0 = (isCofactor && (col <= 0)) ? 1 : 0;
1030 				int col1 = (isCofactor && (col <= 1)) ? 2 : 1;
1031 				int col2 = (isCofactor && (col <= 2)) ? 3 : 2;
1032 				bool negate = isCofactor && ((col & 0x01) ^ (row & 0x01));
1033 
1034 				Instruction *det = emit(sw::Shader::OPCODE_DET3, result, outCol, arg, col0, arg, negate ? col2 : col1, arg, negate ? col1 : col2);
1035 				det->src[0].swizzle = det->src[1].swizzle = det->src[2].swizzle = swizzle[isCofactor ? row : 3];
1036 				det->dst.mask = 1 << outRow;
1037 			}
1038 			break;
1039 		case 4:
1040 			{
1041 				Instruction *det = emit(sw::Shader::OPCODE_DET4, result, outCol, arg, 0, arg, 1, arg, 2, arg, 3);
1042 				det->dst.mask = 1 << outRow;
1043 			}
1044 			break;
1045 		default:
1046 			UNREACHABLE(size);
1047 			break;
1048 		}
1049 	}
1050 
visitUnary(Visit visit,TIntermUnary * node)1051 	bool OutputASM::visitUnary(Visit visit, TIntermUnary *node)
1052 	{
1053 		if(currentScope != emitScope)
1054 		{
1055 			return false;
1056 		}
1057 
1058 		TIntermTyped *result = node;
1059 		TIntermTyped *arg = node->getOperand();
1060 		TBasicType basicType = arg->getType().getBasicType();
1061 
1062 		union
1063 		{
1064 			float f;
1065 			int i;
1066 		} one_value;
1067 
1068 		if(basicType == EbtInt || basicType == EbtUInt)
1069 		{
1070 			one_value.i = 1;
1071 		}
1072 		else
1073 		{
1074 			one_value.f = 1.0f;
1075 		}
1076 
1077 		Constant one(one_value.f, one_value.f, one_value.f, one_value.f);
1078 		Constant rad(1.74532925e-2f, 1.74532925e-2f, 1.74532925e-2f, 1.74532925e-2f);
1079 		Constant deg(5.72957795e+1f, 5.72957795e+1f, 5.72957795e+1f, 5.72957795e+1f);
1080 
1081 		switch(node->getOp())
1082 		{
1083 		case EOpNegative:
1084 			if(visit == PostVisit)
1085 			{
1086 				sw::Shader::Opcode negOpcode = getOpcode(sw::Shader::OPCODE_NEG, arg);
1087 				for(int index = 0; index < arg->totalRegisterCount(); index++)
1088 				{
1089 					emit(negOpcode, result, index, arg, index);
1090 				}
1091 			}
1092 			break;
1093 		case EOpVectorLogicalNot: if(visit == PostVisit) emit(sw::Shader::OPCODE_NOT, result, arg); break;
1094 		case EOpLogicalNot:       if(visit == PostVisit) emit(sw::Shader::OPCODE_NOT, result, arg); break;
1095 		case EOpBitwiseNot:       if(visit == PostVisit) emit(sw::Shader::OPCODE_NOT, result, arg); break;
1096 		case EOpPostIncrement:
1097 			if(visit == PostVisit)
1098 			{
1099 				copy(result, arg);
1100 
1101 				sw::Shader::Opcode addOpcode = getOpcode(sw::Shader::OPCODE_ADD, arg);
1102 				for(int index = 0; index < arg->totalRegisterCount(); index++)
1103 				{
1104 					emit(addOpcode, arg, index, arg, index, &one);
1105 				}
1106 
1107 				assignLvalue(arg, arg);
1108 			}
1109 			break;
1110 		case EOpPostDecrement:
1111 			if(visit == PostVisit)
1112 			{
1113 				copy(result, arg);
1114 
1115 				sw::Shader::Opcode subOpcode = getOpcode(sw::Shader::OPCODE_SUB, arg);
1116 				for(int index = 0; index < arg->totalRegisterCount(); index++)
1117 				{
1118 					emit(subOpcode, arg, index, arg, index, &one);
1119 				}
1120 
1121 				assignLvalue(arg, arg);
1122 			}
1123 			break;
1124 		case EOpPreIncrement:
1125 			if(visit == PostVisit)
1126 			{
1127 				sw::Shader::Opcode addOpcode = getOpcode(sw::Shader::OPCODE_ADD, arg);
1128 				for(int index = 0; index < arg->totalRegisterCount(); index++)
1129 				{
1130 					emit(addOpcode, result, index, arg, index, &one);
1131 				}
1132 
1133 				assignLvalue(arg, result);
1134 			}
1135 			break;
1136 		case EOpPreDecrement:
1137 			if(visit == PostVisit)
1138 			{
1139 				sw::Shader::Opcode subOpcode = getOpcode(sw::Shader::OPCODE_SUB, arg);
1140 				for(int index = 0; index < arg->totalRegisterCount(); index++)
1141 				{
1142 					emit(subOpcode, result, index, arg, index, &one);
1143 				}
1144 
1145 				assignLvalue(arg, result);
1146 			}
1147 			break;
1148 		case EOpRadians:          if(visit == PostVisit) emit(sw::Shader::OPCODE_MUL, result, arg, &rad); break;
1149 		case EOpDegrees:          if(visit == PostVisit) emit(sw::Shader::OPCODE_MUL, result, arg, &deg); break;
1150 		case EOpSin:              if(visit == PostVisit) emit(sw::Shader::OPCODE_SIN, result, arg); break;
1151 		case EOpCos:              if(visit == PostVisit) emit(sw::Shader::OPCODE_COS, result, arg); break;
1152 		case EOpTan:              if(visit == PostVisit) emit(sw::Shader::OPCODE_TAN, result, arg); break;
1153 		case EOpAsin:             if(visit == PostVisit) emit(sw::Shader::OPCODE_ASIN, result, arg); break;
1154 		case EOpAcos:             if(visit == PostVisit) emit(sw::Shader::OPCODE_ACOS, result, arg); break;
1155 		case EOpAtan:             if(visit == PostVisit) emit(sw::Shader::OPCODE_ATAN, result, arg); break;
1156 		case EOpSinh:             if(visit == PostVisit) emit(sw::Shader::OPCODE_SINH, result, arg); break;
1157 		case EOpCosh:             if(visit == PostVisit) emit(sw::Shader::OPCODE_COSH, result, arg); break;
1158 		case EOpTanh:             if(visit == PostVisit) emit(sw::Shader::OPCODE_TANH, result, arg); break;
1159 		case EOpAsinh:            if(visit == PostVisit) emit(sw::Shader::OPCODE_ASINH, result, arg); break;
1160 		case EOpAcosh:            if(visit == PostVisit) emit(sw::Shader::OPCODE_ACOSH, result, arg); break;
1161 		case EOpAtanh:            if(visit == PostVisit) emit(sw::Shader::OPCODE_ATANH, result, arg); break;
1162 		case EOpExp:              if(visit == PostVisit) emit(sw::Shader::OPCODE_EXP, result, arg); break;
1163 		case EOpLog:              if(visit == PostVisit) emit(sw::Shader::OPCODE_LOG, result, arg); break;
1164 		case EOpExp2:             if(visit == PostVisit) emit(sw::Shader::OPCODE_EXP2, result, arg); break;
1165 		case EOpLog2:             if(visit == PostVisit) emit(sw::Shader::OPCODE_LOG2, result, arg); break;
1166 		case EOpSqrt:             if(visit == PostVisit) emit(sw::Shader::OPCODE_SQRT, result, arg); break;
1167 		case EOpInverseSqrt:      if(visit == PostVisit) emit(sw::Shader::OPCODE_RSQ, result, arg); break;
1168 		case EOpAbs:              if(visit == PostVisit) emit(getOpcode(sw::Shader::OPCODE_ABS, result), result, arg); break;
1169 		case EOpSign:             if(visit == PostVisit) emit(getOpcode(sw::Shader::OPCODE_SGN, result), result, arg); break;
1170 		case EOpFloor:            if(visit == PostVisit) emit(sw::Shader::OPCODE_FLOOR, result, arg); break;
1171 		case EOpTrunc:            if(visit == PostVisit) emit(sw::Shader::OPCODE_TRUNC, result, arg); break;
1172 		case EOpRound:            if(visit == PostVisit) emit(sw::Shader::OPCODE_ROUND, result, arg); break;
1173 		case EOpRoundEven:        if(visit == PostVisit) emit(sw::Shader::OPCODE_ROUNDEVEN, result, arg); break;
1174 		case EOpCeil:             if(visit == PostVisit) emit(sw::Shader::OPCODE_CEIL, result, arg, result); break;
1175 		case EOpFract:            if(visit == PostVisit) emit(sw::Shader::OPCODE_FRC, result, arg); break;
1176 		case EOpIsNan:            if(visit == PostVisit) emit(sw::Shader::OPCODE_ISNAN, result, arg); break;
1177 		case EOpIsInf:            if(visit == PostVisit) emit(sw::Shader::OPCODE_ISINF, result, arg); break;
1178 		case EOpLength:           if(visit == PostVisit) emit(sw::Shader::OPCODE_LEN(dim(arg)), result, arg); break;
1179 		case EOpNormalize:        if(visit == PostVisit) emit(sw::Shader::OPCODE_NRM(dim(arg)), result, arg); break;
1180 		case EOpDFdx:             if(visit == PostVisit) emit(sw::Shader::OPCODE_DFDX, result, arg); break;
1181 		case EOpDFdy:             if(visit == PostVisit) emit(sw::Shader::OPCODE_DFDY, result, arg); break;
1182 		case EOpFwidth:           if(visit == PostVisit) emit(sw::Shader::OPCODE_FWIDTH, result, arg); break;
1183 		case EOpAny:              if(visit == PostVisit) emit(sw::Shader::OPCODE_ANY, result, arg); break;
1184 		case EOpAll:              if(visit == PostVisit) emit(sw::Shader::OPCODE_ALL, result, arg); break;
1185 		case EOpFloatBitsToInt:   if(visit == PostVisit) emit(sw::Shader::OPCODE_FLOATBITSTOINT, result, arg); break;
1186 		case EOpFloatBitsToUint:  if(visit == PostVisit) emit(sw::Shader::OPCODE_FLOATBITSTOUINT, result, arg); break;
1187 		case EOpIntBitsToFloat:   if(visit == PostVisit) emit(sw::Shader::OPCODE_INTBITSTOFLOAT, result, arg); break;
1188 		case EOpUintBitsToFloat:  if(visit == PostVisit) emit(sw::Shader::OPCODE_UINTBITSTOFLOAT, result, arg); break;
1189 		case EOpPackSnorm2x16:    if(visit == PostVisit) emit(sw::Shader::OPCODE_PACKSNORM2x16, result, arg); break;
1190 		case EOpPackUnorm2x16:    if(visit == PostVisit) emit(sw::Shader::OPCODE_PACKUNORM2x16, result, arg); break;
1191 		case EOpPackHalf2x16:     if(visit == PostVisit) emit(sw::Shader::OPCODE_PACKHALF2x16, result, arg); break;
1192 		case EOpUnpackSnorm2x16:  if(visit == PostVisit) emit(sw::Shader::OPCODE_UNPACKSNORM2x16, result, arg); break;
1193 		case EOpUnpackUnorm2x16:  if(visit == PostVisit) emit(sw::Shader::OPCODE_UNPACKUNORM2x16, result, arg); break;
1194 		case EOpUnpackHalf2x16:   if(visit == PostVisit) emit(sw::Shader::OPCODE_UNPACKHALF2x16, result, arg); break;
1195 		case EOpTranspose:
1196 			if(visit == PostVisit)
1197 			{
1198 				int numCols = arg->getNominalSize();
1199 				int numRows = arg->getSecondarySize();
1200 				for(int i = 0; i < numCols; ++i)
1201 				{
1202 					for(int j = 0; j < numRows; ++j)
1203 					{
1204 						Instruction *mov = emit(sw::Shader::OPCODE_MOV, result, j, arg, i);
1205 						mov->src[0].swizzle = 0x55 * j;
1206 						mov->dst.mask = 1 << i;
1207 					}
1208 				}
1209 			}
1210 			break;
1211 		case EOpDeterminant:
1212 			if(visit == PostVisit)
1213 			{
1214 				int size = arg->getNominalSize();
1215 				ASSERT(size == arg->getSecondarySize());
1216 
1217 				emitDeterminant(result, arg, size);
1218 			}
1219 			break;
1220 		case EOpInverse:
1221 			if(visit == PostVisit)
1222 			{
1223 				int size = arg->getNominalSize();
1224 				ASSERT(size == arg->getSecondarySize());
1225 
1226 				// Compute transposed matrix of cofactors
1227 				for(int i = 0; i < size; ++i)
1228 				{
1229 					for(int j = 0; j < size; ++j)
1230 					{
1231 						// For a 2x2 matrix, the cofactor is simply a transposed move or negate
1232 						// For a 3x3 or 4x4 matrix, the cofactor is a transposed determinant
1233 						emitDeterminant(result, arg, size - 1, j, i, i, j);
1234 					}
1235 				}
1236 
1237 				// Compute 1 / determinant
1238 				Temporary invDet(this);
1239 				emitDeterminant(&invDet, arg, size);
1240 				Constant one(1.0f, 1.0f, 1.0f, 1.0f);
1241 				Instruction *div = emit(sw::Shader::OPCODE_DIV, &invDet, &one, &invDet);
1242 				div->src[1].swizzle = 0x00; // xxxx
1243 
1244 				// Divide transposed matrix of cofactors by determinant
1245 				for(int i = 0; i < size; ++i)
1246 				{
1247 					emit(sw::Shader::OPCODE_MUL, result, i, result, i, &invDet);
1248 				}
1249 			}
1250 			break;
1251 		default: UNREACHABLE(node->getOp());
1252 		}
1253 
1254 		return true;
1255 	}
1256 
visitAggregate(Visit visit,TIntermAggregate * node)1257 	bool OutputASM::visitAggregate(Visit visit, TIntermAggregate *node)
1258 	{
1259 		if(currentScope != emitScope && node->getOp() != EOpFunction && node->getOp() != EOpSequence)
1260 		{
1261 			return false;
1262 		}
1263 
1264 		Constant zero(0.0f, 0.0f, 0.0f, 0.0f);
1265 
1266 		TIntermTyped *result = node;
1267 		const TType &resultType = node->getType();
1268 		TIntermSequence &arg = node->getSequence();
1269 		size_t argumentCount = arg.size();
1270 
1271 		switch(node->getOp())
1272 		{
1273 		case EOpSequence:             break;
1274 		case EOpDeclaration:          break;
1275 		case EOpInvariantDeclaration: break;
1276 		case EOpPrototype:            break;
1277 		case EOpComma:
1278 			if(visit == PostVisit)
1279 			{
1280 				copy(result, arg[1]);
1281 			}
1282 			break;
1283 		case EOpFunction:
1284 			if(visit == PreVisit)
1285 			{
1286 				const TString &name = node->getName();
1287 
1288 				if(emitScope == FUNCTION)
1289 				{
1290 					if(functionArray.size() > 1)   // No need for a label when there's only main()
1291 					{
1292 						Instruction *label = emit(sw::Shader::OPCODE_LABEL);
1293 						label->dst.type = sw::Shader::PARAMETER_LABEL;
1294 
1295 						const Function *function = findFunction(name);
1296 						ASSERT(function);   // Should have been added during global pass
1297 						label->dst.index = function->label;
1298 						currentFunction = function->label;
1299 					}
1300 				}
1301 				else if(emitScope == GLOBAL)
1302 				{
1303 					if(name != "main(")
1304 					{
1305 						TIntermSequence &arguments = node->getSequence()[0]->getAsAggregate()->getSequence();
1306 						functionArray.push_back(Function(functionArray.size(), name, &arguments, node));
1307 					}
1308 				}
1309 				else UNREACHABLE(emitScope);
1310 
1311 				currentScope = FUNCTION;
1312 			}
1313 			else if(visit == PostVisit)
1314 			{
1315 				if(emitScope == FUNCTION)
1316 				{
1317 					if(functionArray.size() > 1)   // No need to return when there's only main()
1318 					{
1319 						emit(sw::Shader::OPCODE_RET);
1320 					}
1321 				}
1322 
1323 				currentScope = GLOBAL;
1324 			}
1325 			break;
1326 		case EOpFunctionCall:
1327 			if(visit == PostVisit)
1328 			{
1329 				if(node->isUserDefined())
1330 				{
1331 					const TString &name = node->getName();
1332 					const Function *function = findFunction(name);
1333 
1334 					if(!function)
1335 					{
1336 						mContext.error(node->getLine(), "function definition not found", name.c_str());
1337 						return false;
1338 					}
1339 
1340 					TIntermSequence &arguments = *function->arg;
1341 
1342 					for(size_t i = 0; i < argumentCount; i++)
1343 					{
1344 						TIntermTyped *in = arguments[i]->getAsTyped();
1345 
1346 						if(in->getQualifier() == EvqIn ||
1347 						   in->getQualifier() == EvqInOut ||
1348 						   in->getQualifier() == EvqConstReadOnly)
1349 						{
1350 							copy(in, arg[i]);
1351 						}
1352 					}
1353 
1354 					Instruction *call = emit(sw::Shader::OPCODE_CALL);
1355 					call->dst.type = sw::Shader::PARAMETER_LABEL;
1356 					call->dst.index = function->label;
1357 
1358 					if(function->ret && function->ret->getType().getBasicType() != EbtVoid)
1359 					{
1360 						copy(result, function->ret);
1361 					}
1362 
1363 					for(size_t i = 0; i < argumentCount; i++)
1364 					{
1365 						TIntermTyped *argument = arguments[i]->getAsTyped();
1366 						TIntermTyped *out = arg[i]->getAsTyped();
1367 
1368 						if(argument->getQualifier() == EvqOut ||
1369 						   argument->getQualifier() == EvqInOut)
1370 						{
1371 							assignLvalue(out, argument);
1372 						}
1373 					}
1374 				}
1375 				else
1376 				{
1377 					const TextureFunction textureFunction(node->getName());
1378 					TIntermTyped *s = arg[0]->getAsTyped();
1379 					TIntermTyped *t = arg[1]->getAsTyped();
1380 
1381 					Temporary coord(this);
1382 
1383 					if(textureFunction.proj)
1384 					{
1385 						Instruction *rcp = emit(sw::Shader::OPCODE_RCPX, &coord, arg[1]);
1386 						rcp->src[0].swizzle = 0x55 * (t->getNominalSize() - 1);
1387 						rcp->dst.mask = 0x7;
1388 
1389 						Instruction *mul = emit(sw::Shader::OPCODE_MUL, &coord, arg[1], &coord);
1390 						mul->dst.mask = 0x7;
1391 
1392 						if(IsShadowSampler(s->getBasicType()))
1393 						{
1394 							ASSERT(s->getBasicType() == EbtSampler2DShadow);
1395 							Instruction *mov = emit(sw::Shader::OPCODE_MOV, &coord, &coord);
1396 							mov->src[0].swizzle = 0xA4;
1397 						}
1398 					}
1399 					else
1400 					{
1401 						Instruction *mov = emit(sw::Shader::OPCODE_MOV, &coord, arg[1]);
1402 
1403 						if(IsShadowSampler(s->getBasicType()) && t->getNominalSize() == 3)
1404 						{
1405 							ASSERT(s->getBasicType() == EbtSampler2DShadow);
1406 							mov->src[0].swizzle = 0xA4;
1407 						}
1408 					}
1409 
1410 					switch(textureFunction.method)
1411 					{
1412 					case TextureFunction::IMPLICIT:
1413 						if(!textureFunction.offset)
1414 						{
1415 							if(argumentCount == 2)
1416 							{
1417 								emit(sw::Shader::OPCODE_TEX, result, &coord, s);
1418 							}
1419 							else if(argumentCount == 3)   // Bias
1420 							{
1421 								emit(sw::Shader::OPCODE_TEXBIAS, result, &coord, s, arg[2]);
1422 							}
1423 							else UNREACHABLE(argumentCount);
1424 						}
1425 						else   // Offset
1426 						{
1427 							if(argumentCount == 3)
1428 							{
1429 								emit(sw::Shader::OPCODE_TEXOFFSET, result, &coord, s, arg[2]);
1430 							}
1431 							else if(argumentCount == 4)   // Bias
1432 							{
1433 								emit(sw::Shader::OPCODE_TEXOFFSETBIAS, result, &coord, s, arg[2], arg[3]);
1434 							}
1435 							else UNREACHABLE(argumentCount);
1436 						}
1437 						break;
1438 					case TextureFunction::LOD:
1439 						if(!textureFunction.offset && argumentCount == 3)
1440 						{
1441 							emit(sw::Shader::OPCODE_TEXLOD, result, &coord, s, arg[2]);
1442 						}
1443 						else if(argumentCount == 4)   // Offset
1444 						{
1445 							emit(sw::Shader::OPCODE_TEXLODOFFSET, result, &coord, s, arg[3], arg[2]);
1446 						}
1447 						else UNREACHABLE(argumentCount);
1448 						break;
1449 					case TextureFunction::FETCH:
1450 						if(!textureFunction.offset && argumentCount == 3)
1451 						{
1452 							emit(sw::Shader::OPCODE_TEXELFETCH, result, &coord, s, arg[2]);
1453 						}
1454 						else if(argumentCount == 4)   // Offset
1455 						{
1456 							emit(sw::Shader::OPCODE_TEXELFETCHOFFSET, result, &coord, s, arg[3], arg[2]);
1457 						}
1458 						else UNREACHABLE(argumentCount);
1459 						break;
1460 					case TextureFunction::GRAD:
1461 						if(!textureFunction.offset && argumentCount == 4)
1462 						{
1463 							emit(sw::Shader::OPCODE_TEXGRAD, result, &coord, s, arg[2], arg[3]);
1464 						}
1465 						else if(argumentCount == 5)   // Offset
1466 						{
1467 							emit(sw::Shader::OPCODE_TEXGRADOFFSET, result, &coord, s, arg[2], arg[3], arg[4]);
1468 						}
1469 						else UNREACHABLE(argumentCount);
1470 						break;
1471 					case TextureFunction::SIZE:
1472 						emit(sw::Shader::OPCODE_TEXSIZE, result, arg[1], s);
1473 						break;
1474 					default:
1475 						UNREACHABLE(textureFunction.method);
1476 					}
1477 				}
1478 			}
1479 			break;
1480 		case EOpParameters:
1481 			break;
1482 		case EOpConstructFloat:
1483 		case EOpConstructVec2:
1484 		case EOpConstructVec3:
1485 		case EOpConstructVec4:
1486 		case EOpConstructBool:
1487 		case EOpConstructBVec2:
1488 		case EOpConstructBVec3:
1489 		case EOpConstructBVec4:
1490 		case EOpConstructInt:
1491 		case EOpConstructIVec2:
1492 		case EOpConstructIVec3:
1493 		case EOpConstructIVec4:
1494 		case EOpConstructUInt:
1495 		case EOpConstructUVec2:
1496 		case EOpConstructUVec3:
1497 		case EOpConstructUVec4:
1498 			if(visit == PostVisit)
1499 			{
1500 				int component = 0;
1501 				int arrayMaxIndex = result->isArray() ? result->getArraySize() - 1 : 0;
1502 				int arrayComponents = result->getType().getElementSize();
1503 				for(size_t i = 0; i < argumentCount; i++)
1504 				{
1505 					TIntermTyped *argi = arg[i]->getAsTyped();
1506 					int size = argi->getNominalSize();
1507 					int arrayIndex = std::min(component / arrayComponents, arrayMaxIndex);
1508 					int swizzle = component - (arrayIndex * arrayComponents);
1509 
1510 					if(!argi->isMatrix())
1511 					{
1512 						Instruction *mov = emitCast(result, arrayIndex, argi, 0);
1513 						mov->dst.mask = (0xF << swizzle) & 0xF;
1514 						mov->src[0].swizzle = readSwizzle(argi, size) << (swizzle * 2);
1515 
1516 						component += size;
1517 					}
1518 					else if(!result->isMatrix()) // Construct a non matrix from a matrix
1519 					{
1520 						Instruction *mov = emitCast(result, arrayIndex, argi, 0);
1521 						mov->dst.mask = (0xF << swizzle) & 0xF;
1522 						mov->src[0].swizzle = readSwizzle(argi, size) << (swizzle * 2);
1523 
1524 						// At most one more instruction when constructing a vec3 from a mat2 or a vec4 from a mat2/mat3
1525 						if(result->getNominalSize() > size)
1526 						{
1527 							Instruction *mov = emitCast(result, arrayIndex, argi, 1);
1528 							mov->dst.mask = (0xF << (swizzle + size)) & 0xF;
1529 							// mat2: xxxy (0x40), mat3: xxxx (0x00)
1530 							mov->src[0].swizzle = ((size == 2) ? 0x40 : 0x00) << (swizzle * 2);
1531 						}
1532 
1533 						component += size;
1534 					}
1535 					else   // Matrix
1536 					{
1537 						int column = 0;
1538 
1539 						while(component < resultType.getNominalSize())
1540 						{
1541 							Instruction *mov = emitCast(result, arrayIndex, argi, column);
1542 							mov->dst.mask = (0xF << swizzle) & 0xF;
1543 							mov->src[0].swizzle = readSwizzle(argi, size) << (swizzle * 2);
1544 
1545 							column++;
1546 							component += size;
1547 						}
1548 					}
1549 				}
1550 			}
1551 			break;
1552 		case EOpConstructMat2:
1553 		case EOpConstructMat2x3:
1554 		case EOpConstructMat2x4:
1555 		case EOpConstructMat3x2:
1556 		case EOpConstructMat3:
1557 		case EOpConstructMat3x4:
1558 		case EOpConstructMat4x2:
1559 		case EOpConstructMat4x3:
1560 		case EOpConstructMat4:
1561 			if(visit == PostVisit)
1562 			{
1563 				TIntermTyped *arg0 = arg[0]->getAsTyped();
1564 				const int outCols = result->getNominalSize();
1565 				const int outRows = result->getSecondarySize();
1566 
1567 				if(arg0->isScalar() && arg.size() == 1)   // Construct scale matrix
1568 				{
1569 					for(int i = 0; i < outCols; i++)
1570 					{
1571 						emit(sw::Shader::OPCODE_MOV, result, i, &zero);
1572 						if (i < outRows)
1573 						{
1574 							// Insert the scalar value on the main diagonal.
1575 							// For non-square matrices, Avoid emitting in
1576 							// a column which doesn't /have/ a main diagonal
1577 							// element, even though it would be fairly benign --
1578 							// it's not necessarily trivial for downstream
1579 							// passes to see that this is redundant and strip it
1580 							// out.
1581 							Instruction *mov = emitCast(result, i, arg0, 0);
1582 							mov->dst.mask = 1 << i;
1583 							ASSERT(mov->src[0].swizzle == 0x00);
1584 						}
1585 					}
1586 				}
1587 				else if(arg0->isMatrix())
1588 				{
1589 					int arraySize = result->isArray() ? result->getArraySize() : 1;
1590 
1591 					for(int n = 0; n < arraySize; n++)
1592 					{
1593 						TIntermTyped *argi = arg[n]->getAsTyped();
1594 						const int inCols = argi->getNominalSize();
1595 						const int inRows = argi->getSecondarySize();
1596 
1597 						for(int i = 0; i < outCols; i++)
1598 						{
1599 							if(i >= inCols || outRows > inRows)
1600 							{
1601 								// Initialize to identity matrix
1602 								Constant col((i == 0 ? 1.0f : 0.0f), (i == 1 ? 1.0f : 0.0f), (i == 2 ? 1.0f : 0.0f), (i == 3 ? 1.0f : 0.0f));
1603 								emitCast(result, i + n * outCols, &col, 0);
1604 							}
1605 
1606 							if(i < inCols)
1607 							{
1608 								Instruction *mov = emitCast(result, i + n * outCols, argi, i);
1609 								mov->dst.mask = 0xF >> (4 - inRows);
1610 							}
1611 						}
1612 					}
1613 				}
1614 				else
1615 				{
1616 					int column = 0;
1617 					int row = 0;
1618 
1619 					for(size_t i = 0; i < argumentCount; i++)
1620 					{
1621 						TIntermTyped *argi = arg[i]->getAsTyped();
1622 						int size = argi->getNominalSize();
1623 						int element = 0;
1624 
1625 						while(element < size)
1626 						{
1627 							Instruction *mov = emitCast(result, column, argi, 0);
1628 							mov->dst.mask = (0xF << row) & 0xF;
1629 							mov->src[0].swizzle = (readSwizzle(argi, size) << (row * 2)) + 0x55 * element;
1630 
1631 							int end = row + size - element;
1632 							column = end >= outRows ? column + 1 : column;
1633 							element = element + outRows - row;
1634 							row = end >= outRows ? 0 : end;
1635 						}
1636 					}
1637 				}
1638 			}
1639 			break;
1640 		case EOpConstructStruct:
1641 			if(visit == PostVisit)
1642 			{
1643 				int offset = 0;
1644 				for(size_t i = 0; i < argumentCount; i++)
1645 				{
1646 					TIntermTyped *argi = arg[i]->getAsTyped();
1647 					int size = argi->totalRegisterCount();
1648 
1649 					for(int index = 0; index < size; index++)
1650 					{
1651 						Instruction *mov = emit(sw::Shader::OPCODE_MOV, result, index + offset, argi, index);
1652 						mov->dst.mask = writeMask(result, offset + index);
1653 					}
1654 
1655 					offset += size;
1656 				}
1657 			}
1658 			break;
1659 		case EOpLessThan:         if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_LT, result, arg[0], arg[1]); break;
1660 		case EOpGreaterThan:      if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_GT, result, arg[0], arg[1]); break;
1661 		case EOpLessThanEqual:    if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_LE, result, arg[0], arg[1]); break;
1662 		case EOpGreaterThanEqual: if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_GE, result, arg[0], arg[1]); break;
1663 		case EOpVectorEqual:      if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_EQ, result, arg[0], arg[1]); break;
1664 		case EOpVectorNotEqual:   if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_NE, result, arg[0], arg[1]); break;
1665 		case EOpMod:              if(visit == PostVisit) emit(sw::Shader::OPCODE_MOD, result, arg[0], arg[1]); break;
1666 		case EOpModf:
1667 			if(visit == PostVisit)
1668 			{
1669 				TIntermTyped* arg1 = arg[1]->getAsTyped();
1670 				emit(sw::Shader::OPCODE_TRUNC, arg1, arg[0]);
1671 				assignLvalue(arg1, arg1);
1672 				emitBinary(sw::Shader::OPCODE_SUB, result, arg[0], arg1);
1673 			}
1674 			break;
1675 		case EOpPow:              if(visit == PostVisit) emit(sw::Shader::OPCODE_POW, result, arg[0], arg[1]); break;
1676 		case EOpAtan:             if(visit == PostVisit) emit(sw::Shader::OPCODE_ATAN2, result, arg[0], arg[1]); break;
1677 		case EOpMin:              if(visit == PostVisit) emit(getOpcode(sw::Shader::OPCODE_MIN, result), result, arg[0], arg[1]); break;
1678 		case EOpMax:              if(visit == PostVisit) emit(getOpcode(sw::Shader::OPCODE_MAX, result), result, arg[0], arg[1]); break;
1679 		case EOpClamp:
1680 			if(visit == PostVisit)
1681 			{
1682 				emit(getOpcode(sw::Shader::OPCODE_MAX, result), result, arg[0], arg[1]);
1683 				emit(getOpcode(sw::Shader::OPCODE_MIN, result), result, result, arg[2]);
1684 			}
1685 			break;
1686 		case EOpMix:
1687 			if(visit == PostVisit)
1688 			{
1689 				if(arg[2]->getAsTyped()->getBasicType() == EbtBool)
1690 				{
1691 					emit(sw::Shader::OPCODE_SELECT, result, arg[2], arg[1], arg[0]);
1692 				}
1693 				else
1694 				{
1695 					emit(sw::Shader::OPCODE_LRP, result, arg[2], arg[1], arg[0]);
1696 				}
1697 			}
1698 			break;
1699 		case EOpStep:        if(visit == PostVisit) emit(sw::Shader::OPCODE_STEP, result, arg[0], arg[1]); break;
1700 		case EOpSmoothStep:  if(visit == PostVisit) emit(sw::Shader::OPCODE_SMOOTH, result, arg[0], arg[1], arg[2]); break;
1701 		case EOpDistance:    if(visit == PostVisit) emit(sw::Shader::OPCODE_DIST(dim(arg[0])), result, arg[0], arg[1]); break;
1702 		case EOpDot:         if(visit == PostVisit) emit(sw::Shader::OPCODE_DP(dim(arg[0])), result, arg[0], arg[1]); break;
1703 		case EOpCross:       if(visit == PostVisit) emit(sw::Shader::OPCODE_CRS, result, arg[0], arg[1]); break;
1704 		case EOpFaceForward: if(visit == PostVisit) emit(sw::Shader::OPCODE_FORWARD(dim(arg[0])), result, arg[0], arg[1], arg[2]); break;
1705 		case EOpReflect:     if(visit == PostVisit) emit(sw::Shader::OPCODE_REFLECT(dim(arg[0])), result, arg[0], arg[1]); break;
1706 		case EOpRefract:     if(visit == PostVisit) emit(sw::Shader::OPCODE_REFRACT(dim(arg[0])), result, arg[0], arg[1], arg[2]); break;
1707 		case EOpMul:
1708 			if(visit == PostVisit)
1709 			{
1710 				TIntermTyped *arg0 = arg[0]->getAsTyped();
1711 				ASSERT((arg0->getNominalSize() == arg[1]->getAsTyped()->getNominalSize()) &&
1712 				       (arg0->getSecondarySize() == arg[1]->getAsTyped()->getSecondarySize()));
1713 
1714 				int size = arg0->getNominalSize();
1715 				for(int i = 0; i < size; i++)
1716 				{
1717 					emit(sw::Shader::OPCODE_MUL, result, i, arg[0], i, arg[1], i);
1718 				}
1719 			}
1720 			break;
1721 		case EOpOuterProduct:
1722 			if(visit == PostVisit)
1723 			{
1724 				for(int i = 0; i < dim(arg[1]); i++)
1725 				{
1726 					Instruction *mul = emit(sw::Shader::OPCODE_MUL, result, i, arg[0], 0, arg[1]);
1727 					mul->src[1].swizzle = 0x55 * i;
1728 				}
1729 			}
1730 			break;
1731 		default: UNREACHABLE(node->getOp());
1732 		}
1733 
1734 		return true;
1735 	}
1736 
visitSelection(Visit visit,TIntermSelection * node)1737 	bool OutputASM::visitSelection(Visit visit, TIntermSelection *node)
1738 	{
1739 		if(currentScope != emitScope)
1740 		{
1741 			return false;
1742 		}
1743 
1744 		TIntermTyped *condition = node->getCondition();
1745 		TIntermNode *trueBlock = node->getTrueBlock();
1746 		TIntermNode *falseBlock = node->getFalseBlock();
1747 		TIntermConstantUnion *constantCondition = condition->getAsConstantUnion();
1748 
1749 		condition->traverse(this);
1750 
1751 		if(node->usesTernaryOperator())
1752 		{
1753 			if(constantCondition)
1754 			{
1755 				bool trueCondition = constantCondition->getUnionArrayPointer()->getBConst();
1756 
1757 				if(trueCondition)
1758 				{
1759 					trueBlock->traverse(this);
1760 					copy(node, trueBlock);
1761 				}
1762 				else
1763 				{
1764 					falseBlock->traverse(this);
1765 					copy(node, falseBlock);
1766 				}
1767 			}
1768 			else if(trivial(node, 6))   // Fast to compute both potential results and no side effects
1769 			{
1770 				trueBlock->traverse(this);
1771 				falseBlock->traverse(this);
1772 				emit(sw::Shader::OPCODE_SELECT, node, condition, trueBlock, falseBlock);
1773 			}
1774 			else
1775 			{
1776 				emit(sw::Shader::OPCODE_IF, 0, condition);
1777 
1778 				if(trueBlock)
1779 				{
1780 					trueBlock->traverse(this);
1781 					copy(node, trueBlock);
1782 				}
1783 
1784 				if(falseBlock)
1785 				{
1786 					emit(sw::Shader::OPCODE_ELSE);
1787 					falseBlock->traverse(this);
1788 					copy(node, falseBlock);
1789 				}
1790 
1791 				emit(sw::Shader::OPCODE_ENDIF);
1792 			}
1793 		}
1794 		else  // if/else statement
1795 		{
1796 			if(constantCondition)
1797 			{
1798 				bool trueCondition = constantCondition->getUnionArrayPointer()->getBConst();
1799 
1800 				if(trueCondition)
1801 				{
1802 					if(trueBlock)
1803 					{
1804 						trueBlock->traverse(this);
1805 					}
1806 				}
1807 				else
1808 				{
1809 					if(falseBlock)
1810 					{
1811 						falseBlock->traverse(this);
1812 					}
1813 				}
1814 			}
1815 			else
1816 			{
1817 				emit(sw::Shader::OPCODE_IF, 0, condition);
1818 
1819 				if(trueBlock)
1820 				{
1821 					trueBlock->traverse(this);
1822 				}
1823 
1824 				if(falseBlock)
1825 				{
1826 					emit(sw::Shader::OPCODE_ELSE);
1827 					falseBlock->traverse(this);
1828 				}
1829 
1830 				emit(sw::Shader::OPCODE_ENDIF);
1831 			}
1832 		}
1833 
1834 		return false;
1835 	}
1836 
visitLoop(Visit visit,TIntermLoop * node)1837 	bool OutputASM::visitLoop(Visit visit, TIntermLoop *node)
1838 	{
1839 		if(currentScope != emitScope)
1840 		{
1841 			return false;
1842 		}
1843 
1844 		LoopInfo loop(node);
1845 
1846 		if(loop.iterations == 0)
1847 		{
1848 			return false;
1849 		}
1850 
1851 		bool unroll = (loop.iterations <= 4);
1852 
1853 		TIntermNode *init = node->getInit();
1854 		TIntermTyped *condition = node->getCondition();
1855 		TIntermTyped *expression = node->getExpression();
1856 		TIntermNode *body = node->getBody();
1857 		Constant True(true);
1858 
1859 		if(loop.isDeterministic())
1860 		{
1861 			 deterministicVariables.insert(loop.index->getId());
1862 
1863 			 if(!unroll)
1864 			 {
1865 				 emit(sw::Shader::OPCODE_SCALAR);   // Unrolled loops don't have an ENDWHILE to disable scalar mode.
1866 			 }
1867 		}
1868 
1869 		if(node->getType() == ELoopDoWhile)
1870 		{
1871 			Temporary iterate(this);
1872 			emit(sw::Shader::OPCODE_MOV, &iterate, &True);
1873 
1874 			emit(sw::Shader::OPCODE_WHILE, 0, &iterate);   // FIXME: Implement real do-while
1875 
1876 			if(body)
1877 			{
1878 				body->traverse(this);
1879 			}
1880 
1881 			emit(sw::Shader::OPCODE_TEST);
1882 
1883 			condition->traverse(this);
1884 			emit(sw::Shader::OPCODE_MOV, &iterate, condition);
1885 
1886 			emit(sw::Shader::OPCODE_ENDWHILE);
1887 		}
1888 		else
1889 		{
1890 			if(init)
1891 			{
1892 				init->traverse(this);
1893 			}
1894 
1895 			if(unroll)
1896 			{
1897 				mContext.info(node->getLine(), "loop unrolled", "for");
1898 
1899 				for(unsigned int i = 0; i < loop.iterations; i++)
1900 				{
1901 				//	condition->traverse(this);   // Condition could contain statements, but not in an unrollable loop
1902 
1903 					if(body)
1904 					{
1905 						body->traverse(this);
1906 					}
1907 
1908 					if(expression)
1909 					{
1910 						expression->traverse(this);
1911 					}
1912 				}
1913 			}
1914 			else
1915 			{
1916 				if(condition)
1917 				{
1918 					condition->traverse(this);
1919 				}
1920 				else
1921 				{
1922 					condition = &True;
1923 				}
1924 
1925 				emit(sw::Shader::OPCODE_WHILE, 0, condition);
1926 
1927 				if(body)
1928 				{
1929 					body->traverse(this);
1930 				}
1931 
1932 				emit(sw::Shader::OPCODE_TEST);
1933 
1934 				if(loop.isDeterministic())
1935 				{
1936 					emit(sw::Shader::OPCODE_SCALAR);
1937 				}
1938 
1939 				if(expression)
1940 				{
1941 					expression->traverse(this);
1942 				}
1943 
1944 				if(condition)
1945 				{
1946 					condition->traverse(this);
1947 				}
1948 
1949 				emit(sw::Shader::OPCODE_ENDWHILE);
1950 			}
1951 		}
1952 
1953 		if(loop.isDeterministic())
1954 		{
1955 			 deterministicVariables.erase(loop.index->getId());
1956 		}
1957 
1958 		return false;
1959 	}
1960 
visitBranch(Visit visit,TIntermBranch * node)1961 	bool OutputASM::visitBranch(Visit visit, TIntermBranch *node)
1962 	{
1963 		if(currentScope != emitScope)
1964 		{
1965 			return false;
1966 		}
1967 
1968 		switch(node->getFlowOp())
1969 		{
1970 		case EOpKill:      if(visit == PostVisit) emit(sw::Shader::OPCODE_DISCARD);  break;
1971 		case EOpBreak:     if(visit == PostVisit) emit(sw::Shader::OPCODE_BREAK);    break;
1972 		case EOpContinue:  if(visit == PostVisit) emit(sw::Shader::OPCODE_CONTINUE); break;
1973 		case EOpReturn:
1974 			if(visit == PostVisit)
1975 			{
1976 				TIntermTyped *value = node->getExpression();
1977 
1978 				if(value)
1979 				{
1980 					copy(functionArray[currentFunction].ret, value);
1981 				}
1982 
1983 				emit(sw::Shader::OPCODE_LEAVE);
1984 			}
1985 			break;
1986 		default: UNREACHABLE(node->getFlowOp());
1987 		}
1988 
1989 		return true;
1990 	}
1991 
visitSwitch(Visit visit,TIntermSwitch * node)1992 	bool OutputASM::visitSwitch(Visit visit, TIntermSwitch *node)
1993 	{
1994 		if(currentScope != emitScope)
1995 		{
1996 			return false;
1997 		}
1998 
1999 		TIntermTyped* switchValue = node->getInit();
2000 		TIntermAggregate* opList = node->getStatementList();
2001 
2002 		if(!switchValue || !opList)
2003 		{
2004 			return false;
2005 		}
2006 
2007 		switchValue->traverse(this);
2008 
2009 		emit(sw::Shader::OPCODE_SWITCH);
2010 
2011 		TIntermSequence& sequence = opList->getSequence();
2012 		TIntermSequence::iterator it = sequence.begin();
2013 		TIntermSequence::iterator defaultIt = sequence.end();
2014 		int nbCases = 0;
2015 		for(; it != sequence.end(); ++it)
2016 		{
2017 			TIntermCase* currentCase = (*it)->getAsCaseNode();
2018 			if(currentCase)
2019 			{
2020 				TIntermSequence::iterator caseIt = it;
2021 
2022 				TIntermTyped* condition = currentCase->getCondition();
2023 				if(condition) // non default case
2024 				{
2025 					if(nbCases != 0)
2026 					{
2027 						emit(sw::Shader::OPCODE_ELSE);
2028 					}
2029 
2030 					condition->traverse(this);
2031 					Temporary result(this);
2032 					emitBinary(sw::Shader::OPCODE_EQ, &result, switchValue, condition);
2033 					emit(sw::Shader::OPCODE_IF, 0, &result);
2034 					nbCases++;
2035 
2036 					// Emit the code for this case and all subsequent cases until we hit a break statement.
2037 					// TODO: This can repeat a lot of code for switches with many fall-through cases.
2038 					for(++caseIt; caseIt != sequence.end(); ++caseIt)
2039 					{
2040 						(*caseIt)->traverse(this);
2041 
2042 						// Stop if we encounter an unconditional branch (break, continue, return, or kill).
2043 						// TODO: This doesn't work if the statement is at a deeper scope level (e.g. {break;}).
2044 						// Note that this eliminates useless operations but shouldn't affect correctness.
2045 						if((*caseIt)->getAsBranchNode())
2046 						{
2047 							break;
2048 						}
2049 					}
2050 				}
2051 				else
2052 				{
2053 					defaultIt = it; // The default case might not be the last case, keep it for last
2054 				}
2055 			}
2056 		}
2057 
2058 		// If there's a default case, traverse it here
2059 		if(defaultIt != sequence.end())
2060 		{
2061 			emit(sw::Shader::OPCODE_ELSE);
2062 			for(++defaultIt; defaultIt != sequence.end(); ++defaultIt)
2063 			{
2064 				(*defaultIt)->traverse(this);
2065 				if((*defaultIt)->getAsBranchNode()) // Kill, Break, Continue or Return
2066 				{
2067 					break;
2068 				}
2069 			}
2070 		}
2071 
2072 		for(int i = 0; i < nbCases; ++i)
2073 		{
2074 			emit(sw::Shader::OPCODE_ENDIF);
2075 		}
2076 
2077 		emit(sw::Shader::OPCODE_ENDSWITCH);
2078 
2079 		return false;
2080 	}
2081 
emit(sw::Shader::Opcode op,TIntermTyped * dst,TIntermNode * src0,TIntermNode * src1,TIntermNode * src2,TIntermNode * src3,TIntermNode * src4)2082 	Instruction *OutputASM::emit(sw::Shader::Opcode op, TIntermTyped *dst, TIntermNode *src0, TIntermNode *src1, TIntermNode *src2, TIntermNode *src3, TIntermNode *src4)
2083 	{
2084 		return emit(op, dst, 0, src0, 0, src1, 0, src2, 0, src3, 0, src4, 0);
2085 	}
2086 
emit(sw::Shader::Opcode op,TIntermTyped * dst,int dstIndex,TIntermNode * src0,int index0,TIntermNode * src1,int index1,TIntermNode * src2,int index2,TIntermNode * src3,int index3,TIntermNode * src4,int index4)2087 	Instruction *OutputASM::emit(sw::Shader::Opcode op, TIntermTyped *dst, int dstIndex, TIntermNode *src0, int index0, TIntermNode *src1, int index1,
2088 	                             TIntermNode *src2, int index2, TIntermNode *src3, int index3, TIntermNode *src4, int index4)
2089 	{
2090 		Instruction *instruction = new Instruction(op);
2091 
2092 		if(dst)
2093 		{
2094 			destination(instruction->dst, dst, dstIndex);
2095 		}
2096 
2097 		if(src0)
2098 		{
2099 			TIntermTyped* src = src0->getAsTyped();
2100 			instruction->dst.partialPrecision = src && (src->getPrecision() <= EbpLow);
2101 		}
2102 
2103 		source(instruction->src[0], src0, index0);
2104 		source(instruction->src[1], src1, index1);
2105 		source(instruction->src[2], src2, index2);
2106 		source(instruction->src[3], src3, index3);
2107 		source(instruction->src[4], src4, index4);
2108 
2109 		shader->append(instruction);
2110 
2111 		return instruction;
2112 	}
2113 
emitCast(TIntermTyped * dst,TIntermTyped * src)2114 	Instruction *OutputASM::emitCast(TIntermTyped *dst, TIntermTyped *src)
2115 	{
2116 		return emitCast(dst, 0, src, 0);
2117 	}
2118 
emitCast(TIntermTyped * dst,int dstIndex,TIntermTyped * src,int srcIndex)2119 	Instruction *OutputASM::emitCast(TIntermTyped *dst, int dstIndex, TIntermTyped *src, int srcIndex)
2120 	{
2121 		switch(src->getBasicType())
2122 		{
2123 		case EbtBool:
2124 			switch(dst->getBasicType())
2125 			{
2126 			case EbtInt:   return emit(sw::Shader::OPCODE_B2I, dst, dstIndex, src, srcIndex);
2127 			case EbtUInt:  return emit(sw::Shader::OPCODE_B2I, dst, dstIndex, src, srcIndex);
2128 			case EbtFloat: return emit(sw::Shader::OPCODE_B2F, dst, dstIndex, src, srcIndex);
2129 			default:       break;
2130 			}
2131 			break;
2132 		case EbtInt:
2133 			switch(dst->getBasicType())
2134 			{
2135 			case EbtBool:  return emit(sw::Shader::OPCODE_I2B, dst, dstIndex, src, srcIndex);
2136 			case EbtFloat: return emit(sw::Shader::OPCODE_I2F, dst, dstIndex, src, srcIndex);
2137 			default:       break;
2138 			}
2139 			break;
2140 		case EbtUInt:
2141 			switch(dst->getBasicType())
2142 			{
2143 			case EbtBool:  return emit(sw::Shader::OPCODE_I2B, dst, dstIndex, src, srcIndex);
2144 			case EbtFloat: return emit(sw::Shader::OPCODE_U2F, dst, dstIndex, src, srcIndex);
2145 			default:       break;
2146 			}
2147 			break;
2148 		case EbtFloat:
2149 			switch(dst->getBasicType())
2150 			{
2151 			case EbtBool: return emit(sw::Shader::OPCODE_F2B, dst, dstIndex, src, srcIndex);
2152 			case EbtInt:  return emit(sw::Shader::OPCODE_F2I, dst, dstIndex, src, srcIndex);
2153 			case EbtUInt: return emit(sw::Shader::OPCODE_F2U, dst, dstIndex, src, srcIndex);
2154 			default:      break;
2155 			}
2156 			break;
2157 		default:
2158 			break;
2159 		}
2160 
2161 		ASSERT((src->getBasicType() == dst->getBasicType()) ||
2162 		      ((src->getBasicType() == EbtInt) && (dst->getBasicType() == EbtUInt)) ||
2163 		      ((src->getBasicType() == EbtUInt) && (dst->getBasicType() == EbtInt)));
2164 
2165 		return emit(sw::Shader::OPCODE_MOV, dst, dstIndex, src, srcIndex);
2166 	}
2167 
emitBinary(sw::Shader::Opcode op,TIntermTyped * dst,TIntermNode * src0,TIntermNode * src1,TIntermNode * src2)2168 	void OutputASM::emitBinary(sw::Shader::Opcode op, TIntermTyped *dst, TIntermNode *src0, TIntermNode *src1, TIntermNode *src2)
2169 	{
2170 		for(int index = 0; index < dst->elementRegisterCount(); index++)
2171 		{
2172 			emit(op, dst, index, src0, index, src1, index, src2, index);
2173 		}
2174 	}
2175 
emitAssign(sw::Shader::Opcode op,TIntermTyped * result,TIntermTyped * lhs,TIntermTyped * src0,TIntermTyped * src1)2176 	void OutputASM::emitAssign(sw::Shader::Opcode op, TIntermTyped *result, TIntermTyped *lhs, TIntermTyped *src0, TIntermTyped *src1)
2177 	{
2178 		emitBinary(op, result, src0, src1);
2179 		assignLvalue(lhs, result);
2180 	}
2181 
emitCmp(sw::Shader::Control cmpOp,TIntermTyped * dst,TIntermNode * left,TIntermNode * right,int index)2182 	void OutputASM::emitCmp(sw::Shader::Control cmpOp, TIntermTyped *dst, TIntermNode *left, TIntermNode *right, int index)
2183 	{
2184 		sw::Shader::Opcode opcode;
2185 		switch(left->getAsTyped()->getBasicType())
2186 		{
2187 		case EbtBool:
2188 		case EbtInt:
2189 			opcode = sw::Shader::OPCODE_ICMP;
2190 			break;
2191 		case EbtUInt:
2192 			opcode = sw::Shader::OPCODE_UCMP;
2193 			break;
2194 		default:
2195 			opcode = sw::Shader::OPCODE_CMP;
2196 			break;
2197 		}
2198 
2199 		Instruction *cmp = emit(opcode, dst, 0, left, index, right, index);
2200 		cmp->control = cmpOp;
2201 	}
2202 
componentCount(const TType & type,int registers)2203 	int componentCount(const TType &type, int registers)
2204 	{
2205 		if(registers == 0)
2206 		{
2207 			return 0;
2208 		}
2209 
2210 		if(type.isArray() && registers >= type.elementRegisterCount())
2211 		{
2212 			int index = registers / type.elementRegisterCount();
2213 			registers -= index * type.elementRegisterCount();
2214 			return index * type.getElementSize() + componentCount(type, registers);
2215 		}
2216 
2217 		if(type.isStruct() || type.isInterfaceBlock())
2218 		{
2219 			const TFieldList& fields = type.getStruct() ? type.getStruct()->fields() : type.getInterfaceBlock()->fields();
2220 			int elements = 0;
2221 
2222 			for(const auto &field : fields)
2223 			{
2224 				const TType &fieldType = *(field->type());
2225 
2226 				if(fieldType.totalRegisterCount() <= registers)
2227 				{
2228 					registers -= fieldType.totalRegisterCount();
2229 					elements += fieldType.getObjectSize();
2230 				}
2231 				else   // Register within this field
2232 				{
2233 					return elements + componentCount(fieldType, registers);
2234 				}
2235 			}
2236 		}
2237 		else if(type.isMatrix())
2238 		{
2239 			return registers * type.registerSize();
2240 		}
2241 
2242 		UNREACHABLE(0);
2243 		return 0;
2244 	}
2245 
registerSize(const TType & type,int registers)2246 	int registerSize(const TType &type, int registers)
2247 	{
2248 		if(registers == 0)
2249 		{
2250 			if(type.isStruct())
2251 			{
2252 				return registerSize(*((*(type.getStruct()->fields().begin()))->type()), 0);
2253 			}
2254 			else if(type.isInterfaceBlock())
2255 			{
2256 				return registerSize(*((*(type.getInterfaceBlock()->fields().begin()))->type()), 0);
2257 			}
2258 
2259 			return type.registerSize();
2260 		}
2261 
2262 		if(type.isArray() && registers >= type.elementRegisterCount())
2263 		{
2264 			int index = registers / type.elementRegisterCount();
2265 			registers -= index * type.elementRegisterCount();
2266 			return registerSize(type, registers);
2267 		}
2268 
2269 		if(type.isStruct() || type.isInterfaceBlock())
2270 		{
2271 			const TFieldList& fields = type.getStruct() ? type.getStruct()->fields() : type.getInterfaceBlock()->fields();
2272 			int elements = 0;
2273 
2274 			for(const auto &field : fields)
2275 			{
2276 				const TType &fieldType = *(field->type());
2277 
2278 				if(fieldType.totalRegisterCount() <= registers)
2279 				{
2280 					registers -= fieldType.totalRegisterCount();
2281 					elements += fieldType.getObjectSize();
2282 				}
2283 				else   // Register within this field
2284 				{
2285 					return registerSize(fieldType, registers);
2286 				}
2287 			}
2288 		}
2289 		else if(type.isMatrix())
2290 		{
2291 			return registerSize(type, 0);
2292 		}
2293 
2294 		UNREACHABLE(0);
2295 		return 0;
2296 	}
2297 
getBlockId(TIntermTyped * arg)2298 	int OutputASM::getBlockId(TIntermTyped *arg)
2299 	{
2300 		if(arg)
2301 		{
2302 			const TType &type = arg->getType();
2303 			TInterfaceBlock* block = type.getInterfaceBlock();
2304 			if(block && (type.getQualifier() == EvqUniform))
2305 			{
2306 				// Make sure the uniform block is declared
2307 				uniformRegister(arg);
2308 
2309 				const char* blockName = block->name().c_str();
2310 
2311 				// Fetch uniform block index from array of blocks
2312 				for(ActiveUniformBlocks::const_iterator it = shaderObject->activeUniformBlocks.begin(); it != shaderObject->activeUniformBlocks.end(); ++it)
2313 				{
2314 					if(blockName == it->name)
2315 					{
2316 						return it->blockId;
2317 					}
2318 				}
2319 
2320 				ASSERT(false);
2321 			}
2322 		}
2323 
2324 		return -1;
2325 	}
2326 
getArgumentInfo(TIntermTyped * arg,int index)2327 	OutputASM::ArgumentInfo OutputASM::getArgumentInfo(TIntermTyped *arg, int index)
2328 	{
2329 		const TType &type = arg->getType();
2330 		int blockId = getBlockId(arg);
2331 		ArgumentInfo argumentInfo(BlockMemberInfo::getDefaultBlockInfo(), type, -1, -1);
2332 		if(blockId != -1)
2333 		{
2334 			argumentInfo.bufferIndex = 0;
2335 			for(int i = 0; i < blockId; ++i)
2336 			{
2337 				int blockArraySize = shaderObject->activeUniformBlocks[i].arraySize;
2338 				argumentInfo.bufferIndex += blockArraySize > 0 ? blockArraySize : 1;
2339 			}
2340 
2341 			const BlockDefinitionIndexMap& blockDefinition = blockDefinitions[blockId];
2342 
2343 			BlockDefinitionIndexMap::const_iterator itEnd = blockDefinition.end();
2344 			BlockDefinitionIndexMap::const_iterator it = itEnd;
2345 
2346 			argumentInfo.clampedIndex = index;
2347 			if(type.isInterfaceBlock())
2348 			{
2349 				// Offset index to the beginning of the selected instance
2350 				int blockRegisters = type.elementRegisterCount();
2351 				int bufferOffset = argumentInfo.clampedIndex / blockRegisters;
2352 				argumentInfo.bufferIndex += bufferOffset;
2353 				argumentInfo.clampedIndex -= bufferOffset * blockRegisters;
2354 			}
2355 
2356 			int regIndex = registerIndex(arg);
2357 			for(int i = regIndex + argumentInfo.clampedIndex; i >= regIndex; --i)
2358 			{
2359 				it = blockDefinition.find(i);
2360 				if(it != itEnd)
2361 				{
2362 					argumentInfo.clampedIndex -= (i - regIndex);
2363 					break;
2364 				}
2365 			}
2366 			ASSERT(it != itEnd);
2367 
2368 			argumentInfo.typedMemberInfo = it->second;
2369 
2370 			int registerCount = argumentInfo.typedMemberInfo.type.totalRegisterCount();
2371 			argumentInfo.clampedIndex = (argumentInfo.clampedIndex >= registerCount) ? registerCount - 1 : argumentInfo.clampedIndex;
2372 		}
2373 		else
2374 		{
2375 			argumentInfo.clampedIndex = (index >= arg->totalRegisterCount()) ? arg->totalRegisterCount() - 1 : index;
2376 		}
2377 
2378 		return argumentInfo;
2379 	}
2380 
source(sw::Shader::SourceParameter & parameter,TIntermNode * argument,int index)2381 	void OutputASM::source(sw::Shader::SourceParameter &parameter, TIntermNode *argument, int index)
2382 	{
2383 		if(argument)
2384 		{
2385 			TIntermTyped *arg = argument->getAsTyped();
2386 			Temporary unpackedUniform(this);
2387 
2388 			const TType& srcType = arg->getType();
2389 			TInterfaceBlock* srcBlock = srcType.getInterfaceBlock();
2390 			if(srcBlock && (srcType.getQualifier() == EvqUniform))
2391 			{
2392 				const ArgumentInfo argumentInfo = getArgumentInfo(arg, index);
2393 				const TType &memberType = argumentInfo.typedMemberInfo.type;
2394 
2395 				if(memberType.getBasicType() == EbtBool)
2396 				{
2397 					ASSERT(argumentInfo.clampedIndex < (memberType.isArray() ? memberType.getArraySize() : 1)); // index < arraySize
2398 
2399 					// Convert the packed bool, which is currently an int, to a true bool
2400 					Instruction *instruction = new Instruction(sw::Shader::OPCODE_I2B);
2401 					instruction->dst.type = sw::Shader::PARAMETER_TEMP;
2402 					instruction->dst.index = registerIndex(&unpackedUniform);
2403 					instruction->src[0].type = sw::Shader::PARAMETER_CONST;
2404 					instruction->src[0].bufferIndex = argumentInfo.bufferIndex;
2405 					instruction->src[0].index = argumentInfo.typedMemberInfo.offset + argumentInfo.clampedIndex * argumentInfo.typedMemberInfo.arrayStride;
2406 
2407 					shader->append(instruction);
2408 
2409 					arg = &unpackedUniform;
2410 					index = 0;
2411 				}
2412 				else if((memberType.getLayoutQualifier().matrixPacking == EmpRowMajor) && memberType.isMatrix())
2413 				{
2414 					int numCols = memberType.getNominalSize();
2415 					int numRows = memberType.getSecondarySize();
2416 
2417 					ASSERT(argumentInfo.clampedIndex < (numCols * (memberType.isArray() ? memberType.getArraySize() : 1))); // index < cols * arraySize
2418 
2419 					unsigned int dstIndex = registerIndex(&unpackedUniform);
2420 					unsigned int srcSwizzle = (argumentInfo.clampedIndex % numCols) * 0x55;
2421 					int arrayIndex = argumentInfo.clampedIndex / numCols;
2422 					int matrixStartOffset = argumentInfo.typedMemberInfo.offset + arrayIndex * argumentInfo.typedMemberInfo.arrayStride;
2423 
2424 					for(int j = 0; j < numRows; ++j)
2425 					{
2426 						// Transpose the row major matrix
2427 						Instruction *instruction = new Instruction(sw::Shader::OPCODE_MOV);
2428 						instruction->dst.type = sw::Shader::PARAMETER_TEMP;
2429 						instruction->dst.index = dstIndex;
2430 						instruction->dst.mask = 1 << j;
2431 						instruction->src[0].type = sw::Shader::PARAMETER_CONST;
2432 						instruction->src[0].bufferIndex = argumentInfo.bufferIndex;
2433 						instruction->src[0].index = matrixStartOffset + j * argumentInfo.typedMemberInfo.matrixStride;
2434 						instruction->src[0].swizzle = srcSwizzle;
2435 
2436 						shader->append(instruction);
2437 					}
2438 
2439 					arg = &unpackedUniform;
2440 					index = 0;
2441 				}
2442 			}
2443 
2444 			const ArgumentInfo argumentInfo = getArgumentInfo(arg, index);
2445 			const TType &type = argumentInfo.typedMemberInfo.type;
2446 
2447 			int size = registerSize(type, argumentInfo.clampedIndex);
2448 
2449 			parameter.type = registerType(arg);
2450 			parameter.bufferIndex = argumentInfo.bufferIndex;
2451 
2452 			if(arg->getAsConstantUnion() && arg->getAsConstantUnion()->getUnionArrayPointer())
2453 			{
2454 				int component = componentCount(type, argumentInfo.clampedIndex);
2455 				ConstantUnion *constants = arg->getAsConstantUnion()->getUnionArrayPointer();
2456 
2457 				for(int i = 0; i < 4; i++)
2458 				{
2459 					if(size == 1)   // Replicate
2460 					{
2461 						parameter.value[i] = constants[component + 0].getAsFloat();
2462 					}
2463 					else if(i < size)
2464 					{
2465 						parameter.value[i] = constants[component + i].getAsFloat();
2466 					}
2467 					else
2468 					{
2469 						parameter.value[i] = 0.0f;
2470 					}
2471 				}
2472 			}
2473 			else
2474 			{
2475 				parameter.index = registerIndex(arg) + argumentInfo.clampedIndex;
2476 
2477 				if(parameter.bufferIndex != -1)
2478 				{
2479 					int stride = (argumentInfo.typedMemberInfo.matrixStride > 0) ? argumentInfo.typedMemberInfo.matrixStride : argumentInfo.typedMemberInfo.arrayStride;
2480 					parameter.index = argumentInfo.typedMemberInfo.offset + argumentInfo.clampedIndex * stride;
2481 				}
2482 			}
2483 
2484 			if(!IsSampler(arg->getBasicType()))
2485 			{
2486 				parameter.swizzle = readSwizzle(arg, size);
2487 			}
2488 		}
2489 	}
2490 
destination(sw::Shader::DestinationParameter & parameter,TIntermTyped * arg,int index)2491 	void OutputASM::destination(sw::Shader::DestinationParameter &parameter, TIntermTyped *arg, int index)
2492 	{
2493 		parameter.type = registerType(arg);
2494 		parameter.index = registerIndex(arg) + index;
2495 		parameter.mask = writeMask(arg, index);
2496 	}
2497 
copy(TIntermTyped * dst,TIntermNode * src,int offset)2498 	void OutputASM::copy(TIntermTyped *dst, TIntermNode *src, int offset)
2499 	{
2500 		for(int index = 0; index < dst->totalRegisterCount(); index++)
2501 		{
2502 			Instruction *mov = emit(sw::Shader::OPCODE_MOV, dst, index, src, offset + index);
2503 		}
2504 	}
2505 
swizzleElement(int swizzle,int index)2506 	int swizzleElement(int swizzle, int index)
2507 	{
2508 		return (swizzle >> (index * 2)) & 0x03;
2509 	}
2510 
swizzleSwizzle(int leftSwizzle,int rightSwizzle)2511 	int swizzleSwizzle(int leftSwizzle, int rightSwizzle)
2512 	{
2513 		return (swizzleElement(leftSwizzle, swizzleElement(rightSwizzle, 0)) << 0) |
2514 		       (swizzleElement(leftSwizzle, swizzleElement(rightSwizzle, 1)) << 2) |
2515 		       (swizzleElement(leftSwizzle, swizzleElement(rightSwizzle, 2)) << 4) |
2516 		       (swizzleElement(leftSwizzle, swizzleElement(rightSwizzle, 3)) << 6);
2517 	}
2518 
assignLvalue(TIntermTyped * dst,TIntermTyped * src)2519 	void OutputASM::assignLvalue(TIntermTyped *dst, TIntermTyped *src)
2520 	{
2521 		if((src->isVector() && (!dst->isVector() || (src->getNominalSize() != dst->getNominalSize()))) ||
2522 		   (src->isMatrix() && (!dst->isMatrix() || (src->getNominalSize() != dst->getNominalSize()) || (src->getSecondarySize() != dst->getSecondarySize()))))
2523 		{
2524 			return mContext.error(src->getLine(), "Result type should match the l-value type in compound assignment", src->isVector() ? "vector" : "matrix");
2525 		}
2526 
2527 		TIntermBinary *binary = dst->getAsBinaryNode();
2528 
2529 		if(binary && binary->getOp() == EOpIndexIndirect && binary->getLeft()->isVector() && dst->isScalar())
2530 		{
2531 			Instruction *insert = new Instruction(sw::Shader::OPCODE_INSERT);
2532 
2533 			lvalue(insert->dst, dst);
2534 
2535 			insert->src[0].type = insert->dst.type;
2536 			insert->src[0].index = insert->dst.index;
2537 			insert->src[0].rel = insert->dst.rel;
2538 			source(insert->src[1], src);
2539 			source(insert->src[2], binary->getRight());
2540 
2541 			shader->append(insert);
2542 		}
2543 		else
2544 		{
2545 			Instruction *mov1 = new Instruction(sw::Shader::OPCODE_MOV);
2546 
2547 			int swizzle = lvalue(mov1->dst, dst);
2548 
2549 			source(mov1->src[0], src);
2550 			mov1->src[0].swizzle = swizzleSwizzle(mov1->src[0].swizzle, swizzle);
2551 
2552 			shader->append(mov1);
2553 
2554 			for(int offset = 1; offset < dst->totalRegisterCount(); offset++)
2555 			{
2556 				Instruction *mov = new Instruction(sw::Shader::OPCODE_MOV);
2557 
2558 				mov->dst = mov1->dst;
2559 				mov->dst.index += offset;
2560 				mov->dst.mask = writeMask(dst, offset);
2561 
2562 				source(mov->src[0], src, offset);
2563 
2564 				shader->append(mov);
2565 			}
2566 		}
2567 	}
2568 
evaluateRvalue(TIntermTyped * node)2569 	void OutputASM::evaluateRvalue(TIntermTyped *node)
2570 	{
2571 		TIntermBinary *binary = node->getAsBinaryNode();
2572 
2573 		if(binary && binary->getOp() == EOpIndexIndirect && binary->getLeft()->isVector() && node->isScalar())
2574 		{
2575 			Instruction *insert = new Instruction(sw::Shader::OPCODE_EXTRACT);
2576 
2577 			destination(insert->dst, node);
2578 
2579 			Temporary address(this);
2580 			unsigned char mask;
2581 			TIntermTyped *root = nullptr;
2582 			unsigned int offset = 0;
2583 			int swizzle = lvalue(root, offset, insert->src[0].rel, mask, address, node);
2584 
2585 			source(insert->src[0], root, offset);
2586 			insert->src[0].swizzle = swizzleSwizzle(insert->src[0].swizzle, swizzle);
2587 
2588 			source(insert->src[1], binary->getRight());
2589 
2590 			shader->append(insert);
2591 		}
2592 		else
2593 		{
2594 			Instruction *mov1 = new Instruction(sw::Shader::OPCODE_MOV);
2595 
2596 			destination(mov1->dst, node, 0);
2597 
2598 			Temporary address(this);
2599 			unsigned char mask;
2600 			TIntermTyped *root = nullptr;
2601 			unsigned int offset = 0;
2602 			int swizzle = lvalue(root, offset, mov1->src[0].rel, mask, address, node);
2603 
2604 			source(mov1->src[0], root, offset);
2605 			mov1->src[0].swizzle = swizzleSwizzle(mov1->src[0].swizzle, swizzle);
2606 
2607 			shader->append(mov1);
2608 
2609 			for(int i = 1; i < node->totalRegisterCount(); i++)
2610 			{
2611 				Instruction *mov = emit(sw::Shader::OPCODE_MOV, node, i, root, offset + i);
2612 				mov->src[0].rel = mov1->src[0].rel;
2613 			}
2614 		}
2615 	}
2616 
lvalue(sw::Shader::DestinationParameter & dst,TIntermTyped * node)2617 	int OutputASM::lvalue(sw::Shader::DestinationParameter &dst, TIntermTyped *node)
2618 	{
2619 		Temporary address(this);
2620 		TIntermTyped *root = nullptr;
2621 		unsigned int offset = 0;
2622 		unsigned char mask = 0xF;
2623 		int swizzle = lvalue(root, offset, dst.rel, mask, address, node);
2624 
2625 		dst.type = registerType(root);
2626 		dst.index = registerIndex(root) + offset;
2627 		dst.mask = mask;
2628 
2629 		return swizzle;
2630 	}
2631 
lvalue(TIntermTyped * & root,unsigned int & offset,sw::Shader::Relative & rel,unsigned char & mask,Temporary & address,TIntermTyped * node)2632 	int OutputASM::lvalue(TIntermTyped *&root, unsigned int &offset, sw::Shader::Relative &rel, unsigned char &mask, Temporary &address, TIntermTyped *node)
2633 	{
2634 		TIntermTyped *result = node;
2635 		TIntermBinary *binary = node->getAsBinaryNode();
2636 		TIntermSymbol *symbol = node->getAsSymbolNode();
2637 
2638 		if(binary)
2639 		{
2640 			TIntermTyped *left = binary->getLeft();
2641 			TIntermTyped *right = binary->getRight();
2642 
2643 			int leftSwizzle = lvalue(root, offset, rel, mask, address, left);   // Resolve the l-value of the left side
2644 
2645 			switch(binary->getOp())
2646 			{
2647 			case EOpIndexDirect:
2648 				{
2649 					int rightIndex = right->getAsConstantUnion()->getIConst(0);
2650 
2651 					if(left->isRegister())
2652 					{
2653 						int leftMask = mask;
2654 
2655 						mask = 1;
2656 						while((leftMask & mask) == 0)
2657 						{
2658 							mask = mask << 1;
2659 						}
2660 
2661 						int element = swizzleElement(leftSwizzle, rightIndex);
2662 						mask = 1 << element;
2663 
2664 						return element;
2665 					}
2666 					else if(left->isArray() || left->isMatrix())
2667 					{
2668 						offset += rightIndex * result->totalRegisterCount();
2669 						return 0xE4;
2670 					}
2671 					else UNREACHABLE(0);
2672 				}
2673 				break;
2674 			case EOpIndexIndirect:
2675 				{
2676 					right->traverse(this);
2677 
2678 					if(left->isRegister())
2679 					{
2680 						// Requires INSERT instruction (handled by calling function)
2681 					}
2682 					else if(left->isArray() || left->isMatrix())
2683 					{
2684 						int scale = result->totalRegisterCount();
2685 
2686 						if(rel.type == sw::Shader::PARAMETER_VOID)   // Use the index register as the relative address directly
2687 						{
2688 							if(left->totalRegisterCount() > 1)
2689 							{
2690 								sw::Shader::SourceParameter relativeRegister;
2691 								source(relativeRegister, right);
2692 
2693 								int indexId = right->getAsSymbolNode() ? right->getAsSymbolNode()->getId() : 0;
2694 
2695 								rel.index = relativeRegister.index;
2696 								rel.type = relativeRegister.type;
2697 								rel.scale = scale;
2698 								rel.dynamic = (right->getQualifier() != EvqUniform) && (deterministicVariables.count(indexId) == 0);
2699 							}
2700 						}
2701 						else if(rel.index != registerIndex(&address))   // Move the previous index register to the address register
2702 						{
2703 							if(scale == 1)
2704 							{
2705 								Constant oldScale((int)rel.scale);
2706 								Instruction *mad = emit(sw::Shader::OPCODE_IMAD, &address, &address, &oldScale, right);
2707 								mad->src[0].index = rel.index;
2708 								mad->src[0].type = rel.type;
2709 							}
2710 							else
2711 							{
2712 								Constant oldScale((int)rel.scale);
2713 								Instruction *mul = emit(sw::Shader::OPCODE_IMUL, &address, &address, &oldScale);
2714 								mul->src[0].index = rel.index;
2715 								mul->src[0].type = rel.type;
2716 
2717 								Constant newScale(scale);
2718 								emit(sw::Shader::OPCODE_IMAD, &address, right, &newScale, &address);
2719 							}
2720 
2721 							rel.type = sw::Shader::PARAMETER_TEMP;
2722 							rel.index = registerIndex(&address);
2723 							rel.scale = 1;
2724 						}
2725 						else   // Just add the new index to the address register
2726 						{
2727 							if(scale == 1)
2728 							{
2729 								emit(sw::Shader::OPCODE_IADD, &address, &address, right);
2730 							}
2731 							else
2732 							{
2733 								Constant newScale(scale);
2734 								emit(sw::Shader::OPCODE_IMAD, &address, right, &newScale, &address);
2735 							}
2736 						}
2737 					}
2738 					else UNREACHABLE(0);
2739 				}
2740 				break;
2741 			case EOpIndexDirectStruct:
2742 			case EOpIndexDirectInterfaceBlock:
2743 				{
2744 					const TFieldList& fields = (binary->getOp() == EOpIndexDirectStruct) ?
2745 					                           left->getType().getStruct()->fields() :
2746 					                           left->getType().getInterfaceBlock()->fields();
2747 					int index = right->getAsConstantUnion()->getIConst(0);
2748 					int fieldOffset = 0;
2749 
2750 					for(int i = 0; i < index; i++)
2751 					{
2752 						fieldOffset += fields[i]->type()->totalRegisterCount();
2753 					}
2754 
2755 					offset += fieldOffset;
2756 					mask = writeMask(result);
2757 
2758 					return 0xE4;
2759 				}
2760 				break;
2761 			case EOpVectorSwizzle:
2762 				{
2763 					ASSERT(left->isRegister());
2764 
2765 					int leftMask = mask;
2766 
2767 					int swizzle = 0;
2768 					int rightMask = 0;
2769 
2770 					TIntermSequence &sequence = right->getAsAggregate()->getSequence();
2771 
2772 					for(unsigned int i = 0; i < sequence.size(); i++)
2773 					{
2774 						int index = sequence[i]->getAsConstantUnion()->getIConst(0);
2775 
2776 						int element = swizzleElement(leftSwizzle, index);
2777 						rightMask = rightMask | (1 << element);
2778 						swizzle = swizzle | swizzleElement(leftSwizzle, i) << (element * 2);
2779 					}
2780 
2781 					mask = leftMask & rightMask;
2782 
2783 					return swizzle;
2784 				}
2785 				break;
2786 			default:
2787 				UNREACHABLE(binary->getOp());   // Not an l-value operator
2788 				break;
2789 			}
2790 		}
2791 		else if(symbol)
2792 		{
2793 			root = symbol;
2794 			offset = 0;
2795 			mask = writeMask(symbol);
2796 
2797 			return 0xE4;
2798 		}
2799 		else
2800 		{
2801 			node->traverse(this);
2802 
2803 			root = node;
2804 			offset = 0;
2805 			mask = writeMask(node);
2806 
2807 			return 0xE4;
2808 		}
2809 
2810 		return 0xE4;
2811 	}
2812 
registerType(TIntermTyped * operand)2813 	sw::Shader::ParameterType OutputASM::registerType(TIntermTyped *operand)
2814 	{
2815 		if(isSamplerRegister(operand))
2816 		{
2817 			return sw::Shader::PARAMETER_SAMPLER;
2818 		}
2819 
2820 		const TQualifier qualifier = operand->getQualifier();
2821 		if((qualifier == EvqFragColor) || (qualifier == EvqFragData))
2822 		{
2823 			if(((qualifier == EvqFragData) && (outputQualifier == EvqFragColor)) ||
2824 			   ((qualifier == EvqFragColor) && (outputQualifier == EvqFragData)))
2825 			{
2826 				mContext.error(operand->getLine(), "static assignment to both gl_FragData and gl_FragColor", "");
2827 			}
2828 			outputQualifier = qualifier;
2829 		}
2830 
2831 		if(qualifier == EvqConstExpr && (!operand->getAsConstantUnion() || !operand->getAsConstantUnion()->getUnionArrayPointer()))
2832 		{
2833 			// Constant arrays are in the constant register file.
2834 			if(operand->isArray() && operand->getArraySize() > 1)
2835 			{
2836 				return sw::Shader::PARAMETER_CONST;
2837 			}
2838 			else
2839 			{
2840 				return sw::Shader::PARAMETER_TEMP;
2841 			}
2842 		}
2843 
2844 		switch(qualifier)
2845 		{
2846 		case EvqTemporary:           return sw::Shader::PARAMETER_TEMP;
2847 		case EvqGlobal:              return sw::Shader::PARAMETER_TEMP;
2848 		case EvqConstExpr:           return sw::Shader::PARAMETER_FLOAT4LITERAL;   // All converted to float
2849 		case EvqAttribute:           return sw::Shader::PARAMETER_INPUT;
2850 		case EvqVaryingIn:           return sw::Shader::PARAMETER_INPUT;
2851 		case EvqVaryingOut:          return sw::Shader::PARAMETER_OUTPUT;
2852 		case EvqVertexIn:            return sw::Shader::PARAMETER_INPUT;
2853 		case EvqFragmentOut:         return sw::Shader::PARAMETER_COLOROUT;
2854 		case EvqVertexOut:           return sw::Shader::PARAMETER_OUTPUT;
2855 		case EvqFragmentIn:          return sw::Shader::PARAMETER_INPUT;
2856 		case EvqInvariantVaryingIn:  return sw::Shader::PARAMETER_INPUT;    // FIXME: Guarantee invariance at the backend
2857 		case EvqInvariantVaryingOut: return sw::Shader::PARAMETER_OUTPUT;   // FIXME: Guarantee invariance at the backend
2858 		case EvqSmooth:              return sw::Shader::PARAMETER_OUTPUT;
2859 		case EvqFlat:                return sw::Shader::PARAMETER_OUTPUT;
2860 		case EvqCentroidOut:         return sw::Shader::PARAMETER_OUTPUT;
2861 		case EvqSmoothIn:            return sw::Shader::PARAMETER_INPUT;
2862 		case EvqFlatIn:              return sw::Shader::PARAMETER_INPUT;
2863 		case EvqCentroidIn:          return sw::Shader::PARAMETER_INPUT;
2864 		case EvqUniform:             return sw::Shader::PARAMETER_CONST;
2865 		case EvqIn:                  return sw::Shader::PARAMETER_TEMP;
2866 		case EvqOut:                 return sw::Shader::PARAMETER_TEMP;
2867 		case EvqInOut:               return sw::Shader::PARAMETER_TEMP;
2868 		case EvqConstReadOnly:       return sw::Shader::PARAMETER_TEMP;
2869 		case EvqPosition:            return sw::Shader::PARAMETER_OUTPUT;
2870 		case EvqPointSize:           return sw::Shader::PARAMETER_OUTPUT;
2871 		case EvqInstanceID:          return sw::Shader::PARAMETER_MISCTYPE;
2872 		case EvqVertexID:            return sw::Shader::PARAMETER_MISCTYPE;
2873 		case EvqFragCoord:           return sw::Shader::PARAMETER_MISCTYPE;
2874 		case EvqFrontFacing:         return sw::Shader::PARAMETER_MISCTYPE;
2875 		case EvqPointCoord:          return sw::Shader::PARAMETER_INPUT;
2876 		case EvqFragColor:           return sw::Shader::PARAMETER_COLOROUT;
2877 		case EvqFragData:            return sw::Shader::PARAMETER_COLOROUT;
2878 		case EvqFragDepth:           return sw::Shader::PARAMETER_DEPTHOUT;
2879 		default: UNREACHABLE(qualifier);
2880 		}
2881 
2882 		return sw::Shader::PARAMETER_VOID;
2883 	}
2884 
hasFlatQualifier(TIntermTyped * operand)2885 	bool OutputASM::hasFlatQualifier(TIntermTyped *operand)
2886 	{
2887 		const TQualifier qualifier = operand->getQualifier();
2888 		return qualifier == EvqFlat || qualifier == EvqFlatOut || qualifier == EvqFlatIn;
2889 	}
2890 
registerIndex(TIntermTyped * operand)2891 	unsigned int OutputASM::registerIndex(TIntermTyped *operand)
2892 	{
2893 		if(isSamplerRegister(operand))
2894 		{
2895 			return samplerRegister(operand);
2896 		}
2897 		else if(operand->getType().totalSamplerRegisterCount() > 0) // Struct containing a sampler
2898 		{
2899 			samplerRegister(operand); // Make sure the sampler is declared
2900 		}
2901 
2902 		switch(operand->getQualifier())
2903 		{
2904 		case EvqTemporary:           return temporaryRegister(operand);
2905 		case EvqGlobal:              return temporaryRegister(operand);
2906 		case EvqConstExpr:           return temporaryRegister(operand);   // Unevaluated constant expression
2907 		case EvqAttribute:           return attributeRegister(operand);
2908 		case EvqVaryingIn:           return varyingRegister(operand);
2909 		case EvqVaryingOut:          return varyingRegister(operand);
2910 		case EvqVertexIn:            return attributeRegister(operand);
2911 		case EvqFragmentOut:         return fragmentOutputRegister(operand);
2912 		case EvqVertexOut:           return varyingRegister(operand);
2913 		case EvqFragmentIn:          return varyingRegister(operand);
2914 		case EvqInvariantVaryingIn:  return varyingRegister(operand);
2915 		case EvqInvariantVaryingOut: return varyingRegister(operand);
2916 		case EvqSmooth:              return varyingRegister(operand);
2917 		case EvqFlat:                return varyingRegister(operand);
2918 		case EvqCentroidOut:         return varyingRegister(operand);
2919 		case EvqSmoothIn:            return varyingRegister(operand);
2920 		case EvqFlatIn:              return varyingRegister(operand);
2921 		case EvqCentroidIn:          return varyingRegister(operand);
2922 		case EvqUniform:             return uniformRegister(operand);
2923 		case EvqIn:                  return temporaryRegister(operand);
2924 		case EvqOut:                 return temporaryRegister(operand);
2925 		case EvqInOut:               return temporaryRegister(operand);
2926 		case EvqConstReadOnly:       return temporaryRegister(operand);
2927 		case EvqPosition:            return varyingRegister(operand);
2928 		case EvqPointSize:           return varyingRegister(operand);
2929 		case EvqInstanceID:          vertexShader->declareInstanceId(); return sw::Shader::InstanceIDIndex;
2930 		case EvqVertexID:            vertexShader->declareVertexId(); return sw::Shader::VertexIDIndex;
2931 		case EvqFragCoord:           pixelShader->declareVPos();  return sw::Shader::VPosIndex;
2932 		case EvqFrontFacing:         pixelShader->declareVFace(); return sw::Shader::VFaceIndex;
2933 		case EvqPointCoord:          return varyingRegister(operand);
2934 		case EvqFragColor:           return 0;
2935 		case EvqFragData:            return fragmentOutputRegister(operand);
2936 		case EvqFragDepth:           return 0;
2937 		default: UNREACHABLE(operand->getQualifier());
2938 		}
2939 
2940 		return 0;
2941 	}
2942 
writeMask(TIntermTyped * destination,int index)2943 	int OutputASM::writeMask(TIntermTyped *destination, int index)
2944 	{
2945 		if(destination->getQualifier() == EvqPointSize)
2946 		{
2947 			return 0x2;   // Point size stored in the y component
2948 		}
2949 
2950 		return 0xF >> (4 - registerSize(destination->getType(), index));
2951 	}
2952 
readSwizzle(TIntermTyped * argument,int size)2953 	int OutputASM::readSwizzle(TIntermTyped *argument, int size)
2954 	{
2955 		if(argument->getQualifier() == EvqPointSize)
2956 		{
2957 			return 0x55;   // Point size stored in the y component
2958 		}
2959 
2960 		static const unsigned char swizzleSize[5] = {0x00, 0x00, 0x54, 0xA4, 0xE4};   // (void), xxxx, xyyy, xyzz, xyzw
2961 
2962 		return swizzleSize[size];
2963 	}
2964 
2965 	// Conservatively checks whether an expression is fast to compute and has no side effects
trivial(TIntermTyped * expression,int budget)2966 	bool OutputASM::trivial(TIntermTyped *expression, int budget)
2967 	{
2968 		if(!expression->isRegister())
2969 		{
2970 			return false;
2971 		}
2972 
2973 		return cost(expression, budget) >= 0;
2974 	}
2975 
2976 	// Returns the remaining computing budget (if < 0 the expression is too expensive or has side effects)
cost(TIntermNode * expression,int budget)2977 	int OutputASM::cost(TIntermNode *expression, int budget)
2978 	{
2979 		if(budget < 0)
2980 		{
2981 			return budget;
2982 		}
2983 
2984 		if(expression->getAsSymbolNode())
2985 		{
2986 			return budget;
2987 		}
2988 		else if(expression->getAsConstantUnion())
2989 		{
2990 			return budget;
2991 		}
2992 		else if(expression->getAsBinaryNode())
2993 		{
2994 			TIntermBinary *binary = expression->getAsBinaryNode();
2995 
2996 			switch(binary->getOp())
2997 			{
2998 			case EOpVectorSwizzle:
2999 			case EOpIndexDirect:
3000 			case EOpIndexDirectStruct:
3001 			case EOpIndexDirectInterfaceBlock:
3002 				return cost(binary->getLeft(), budget - 0);
3003 			case EOpAdd:
3004 			case EOpSub:
3005 			case EOpMul:
3006 				return cost(binary->getLeft(), cost(binary->getRight(), budget - 1));
3007 			default:
3008 				return -1;
3009 			}
3010 		}
3011 		else if(expression->getAsUnaryNode())
3012 		{
3013 			TIntermUnary *unary = expression->getAsUnaryNode();
3014 
3015 			switch(unary->getOp())
3016 			{
3017 			case EOpAbs:
3018 			case EOpNegative:
3019 				return cost(unary->getOperand(), budget - 1);
3020 			default:
3021 				return -1;
3022 			}
3023 		}
3024 		else if(expression->getAsSelectionNode())
3025 		{
3026 			TIntermSelection *selection = expression->getAsSelectionNode();
3027 
3028 			if(selection->usesTernaryOperator())
3029 			{
3030 				TIntermTyped *condition = selection->getCondition();
3031 				TIntermNode *trueBlock = selection->getTrueBlock();
3032 				TIntermNode *falseBlock = selection->getFalseBlock();
3033 				TIntermConstantUnion *constantCondition = condition->getAsConstantUnion();
3034 
3035 				if(constantCondition)
3036 				{
3037 					bool trueCondition = constantCondition->getUnionArrayPointer()->getBConst();
3038 
3039 					if(trueCondition)
3040 					{
3041 						return cost(trueBlock, budget - 0);
3042 					}
3043 					else
3044 					{
3045 						return cost(falseBlock, budget - 0);
3046 					}
3047 				}
3048 				else
3049 				{
3050 					return cost(trueBlock, cost(falseBlock, budget - 2));
3051 				}
3052 			}
3053 		}
3054 
3055 		return -1;
3056 	}
3057 
findFunction(const TString & name)3058 	const Function *OutputASM::findFunction(const TString &name)
3059 	{
3060 		for(unsigned int f = 0; f < functionArray.size(); f++)
3061 		{
3062 			if(functionArray[f].name == name)
3063 			{
3064 				return &functionArray[f];
3065 			}
3066 		}
3067 
3068 		return 0;
3069 	}
3070 
temporaryRegister(TIntermTyped * temporary)3071 	int OutputASM::temporaryRegister(TIntermTyped *temporary)
3072 	{
3073 		int index = allocate(temporaries, temporary);
3074 		if(index >= sw::NUM_TEMPORARY_REGISTERS)
3075 		{
3076 			mContext.error(temporary->getLine(),
3077 				"Too many temporary registers required to compile shader",
3078 				pixelShader ? "pixel shader" : "vertex shader");
3079 		}
3080 		return index;
3081 	}
3082 
setPixelShaderInputs(const TType & type,int var,bool flat)3083 	void OutputASM::setPixelShaderInputs(const TType& type, int var, bool flat)
3084 	{
3085 		if(type.isStruct())
3086 		{
3087 			const TFieldList &fields = type.getStruct()->fields();
3088 			int fieldVar = var;
3089 			for(const auto &field : fields)
3090 			{
3091 				const TType& fieldType = *(field->type());
3092 				setPixelShaderInputs(fieldType, fieldVar, flat);
3093 				fieldVar += fieldType.totalRegisterCount();
3094 			}
3095 		}
3096 		else
3097 		{
3098 			for(int i = 0; i < type.totalRegisterCount(); i++)
3099 			{
3100 				pixelShader->setInput(var + i, type.registerSize(), sw::Shader::Semantic(sw::Shader::USAGE_COLOR, var + i, flat));
3101 			}
3102 		}
3103 	}
3104 
varyingRegister(TIntermTyped * varying)3105 	int OutputASM::varyingRegister(TIntermTyped *varying)
3106 	{
3107 		int var = lookup(varyings, varying);
3108 
3109 		if(var == -1)
3110 		{
3111 			var = allocate(varyings, varying);
3112 			int registerCount = varying->totalRegisterCount();
3113 
3114 			if(pixelShader)
3115 			{
3116 				if((var + registerCount) > sw::MAX_FRAGMENT_INPUTS)
3117 				{
3118 					mContext.error(varying->getLine(), "Varyings packing failed: Too many varyings", "fragment shader");
3119 					return 0;
3120 				}
3121 
3122 				if(varying->getQualifier() == EvqPointCoord)
3123 				{
3124 					ASSERT(varying->isRegister());
3125 					pixelShader->setInput(var, varying->registerSize(), sw::Shader::Semantic(sw::Shader::USAGE_TEXCOORD, var));
3126 				}
3127 				else
3128 				{
3129 					setPixelShaderInputs(varying->getType(), var, hasFlatQualifier(varying));
3130 				}
3131 			}
3132 			else if(vertexShader)
3133 			{
3134 				if((var + registerCount) > sw::MAX_VERTEX_OUTPUTS)
3135 				{
3136 					mContext.error(varying->getLine(), "Varyings packing failed: Too many varyings", "vertex shader");
3137 					return 0;
3138 				}
3139 
3140 				if(varying->getQualifier() == EvqPosition)
3141 				{
3142 					ASSERT(varying->isRegister());
3143 					vertexShader->setPositionRegister(var);
3144 				}
3145 				else if(varying->getQualifier() == EvqPointSize)
3146 				{
3147 					ASSERT(varying->isRegister());
3148 					vertexShader->setPointSizeRegister(var);
3149 				}
3150 				else
3151 				{
3152 					// Semantic indexes for user varyings will be assigned during program link to match the pixel shader
3153 				}
3154 			}
3155 			else UNREACHABLE(0);
3156 
3157 			declareVarying(varying, var);
3158 		}
3159 
3160 		return var;
3161 	}
3162 
declareVarying(TIntermTyped * varying,int reg)3163 	void OutputASM::declareVarying(TIntermTyped *varying, int reg)
3164 	{
3165 		if(varying->getQualifier() != EvqPointCoord)   // gl_PointCoord does not need linking
3166 		{
3167 			TIntermSymbol *symbol = varying->getAsSymbolNode();
3168 			declareVarying(varying->getType(), symbol->getSymbol(), reg);
3169 		}
3170 	}
3171 
declareVarying(const TType & type,const TString & varyingName,int registerIndex)3172 	void OutputASM::declareVarying(const TType &type, const TString &varyingName, int registerIndex)
3173 	{
3174 		const char *name = varyingName.c_str();
3175 		VaryingList &activeVaryings = shaderObject->varyings;
3176 
3177 		TStructure* structure = type.getStruct();
3178 		if(structure)
3179 		{
3180 			int fieldRegisterIndex = registerIndex;
3181 
3182 			const TFieldList &fields = type.getStruct()->fields();
3183 			for(const auto &field : fields)
3184 			{
3185 				const TType& fieldType = *(field->type());
3186 				declareVarying(fieldType, varyingName + "." + field->name(), fieldRegisterIndex);
3187 				if(fieldRegisterIndex >= 0)
3188 				{
3189 					fieldRegisterIndex += fieldType.totalRegisterCount();
3190 				}
3191 			}
3192 		}
3193 		else
3194 		{
3195 			// Check if this varying has been declared before without having a register assigned
3196 			for(VaryingList::iterator v = activeVaryings.begin(); v != activeVaryings.end(); v++)
3197 			{
3198 				if(v->name == name)
3199 				{
3200 					if(registerIndex >= 0)
3201 					{
3202 						ASSERT(v->registerIndex < 0 || v->registerIndex == registerIndex);
3203 						v->registerIndex = registerIndex;
3204 					}
3205 
3206 					return;
3207 				}
3208 			}
3209 
3210 			activeVaryings.push_back(glsl::Varying(type, name, registerIndex, 0));
3211 		}
3212 	}
3213 
declareFragmentOutput(TIntermTyped * fragmentOutput)3214 	void OutputASM::declareFragmentOutput(TIntermTyped *fragmentOutput)
3215 	{
3216 		int requestedLocation = fragmentOutput->getType().getLayoutQualifier().location;
3217 		int registerCount = fragmentOutput->totalRegisterCount();
3218 		if(requestedLocation < 0)
3219 		{
3220 			ASSERT(requestedLocation == -1); // All other negative values would have been prevented in TParseContext::parseLayoutQualifier
3221 			return; // No requested location
3222 		}
3223 		else if((requestedLocation + registerCount) > sw::RENDERTARGETS)
3224 		{
3225 			mContext.error(fragmentOutput->getLine(), "Fragment output location larger or equal to MAX_DRAW_BUFFERS", "fragment shader");
3226 		}
3227 		else
3228 		{
3229 			int currentIndex = lookup(fragmentOutputs, fragmentOutput);
3230 			if(requestedLocation != currentIndex)
3231 			{
3232 				if(currentIndex != -1)
3233 				{
3234 					mContext.error(fragmentOutput->getLine(), "Multiple locations for fragment output", "fragment shader");
3235 				}
3236 				else
3237 				{
3238 					if(fragmentOutputs.size() <= (size_t)requestedLocation)
3239 					{
3240 						while(fragmentOutputs.size() < (size_t)requestedLocation)
3241 						{
3242 							fragmentOutputs.push_back(nullptr);
3243 						}
3244 						for(int i = 0; i < registerCount; i++)
3245 						{
3246 							fragmentOutputs.push_back(fragmentOutput);
3247 						}
3248 					}
3249 					else
3250 					{
3251 						for(int i = 0; i < registerCount; i++)
3252 						{
3253 							if(!fragmentOutputs[requestedLocation + i])
3254 							{
3255 								fragmentOutputs[requestedLocation + i] = fragmentOutput;
3256 							}
3257 							else
3258 							{
3259 								mContext.error(fragmentOutput->getLine(), "Fragment output location aliasing", "fragment shader");
3260 								return;
3261 							}
3262 						}
3263 					}
3264 				}
3265 			}
3266 		}
3267 	}
3268 
uniformRegister(TIntermTyped * uniform)3269 	int OutputASM::uniformRegister(TIntermTyped *uniform)
3270 	{
3271 		const TType &type = uniform->getType();
3272 		ASSERT(!IsSampler(type.getBasicType()));
3273 		TInterfaceBlock *block = type.getAsInterfaceBlock();
3274 		TIntermSymbol *symbol = uniform->getAsSymbolNode();
3275 		ASSERT(symbol || block);
3276 
3277 		if(symbol || block)
3278 		{
3279 			TInterfaceBlock* parentBlock = type.getInterfaceBlock();
3280 			bool isBlockMember = (!block && parentBlock);
3281 			int index = isBlockMember ? lookup(uniforms, parentBlock) : lookup(uniforms, uniform);
3282 
3283 			if(index == -1 || isBlockMember)
3284 			{
3285 				if(index == -1)
3286 				{
3287 					index = allocate(uniforms, uniform);
3288 				}
3289 
3290 				// Verify if the current uniform is a member of an already declared block
3291 				const TString &name = symbol ? symbol->getSymbol() : block->name();
3292 				int blockMemberIndex = blockMemberLookup(type, name, index);
3293 				if(blockMemberIndex == -1)
3294 				{
3295 					declareUniform(type, name, index, false);
3296 				}
3297 				else
3298 				{
3299 					index = blockMemberIndex;
3300 				}
3301 			}
3302 
3303 			return index;
3304 		}
3305 
3306 		return 0;
3307 	}
3308 
attributeRegister(TIntermTyped * attribute)3309 	int OutputASM::attributeRegister(TIntermTyped *attribute)
3310 	{
3311 		ASSERT(!attribute->isArray());
3312 
3313 		int index = lookup(attributes, attribute);
3314 
3315 		if(index == -1)
3316 		{
3317 			TIntermSymbol *symbol = attribute->getAsSymbolNode();
3318 			ASSERT(symbol);
3319 
3320 			if(symbol)
3321 			{
3322 				index = allocate(attributes, attribute);
3323 				const TType &type = attribute->getType();
3324 				int registerCount = attribute->totalRegisterCount();
3325 				sw::VertexShader::AttribType attribType = sw::VertexShader::ATTRIBTYPE_FLOAT;
3326 				switch(type.getBasicType())
3327 				{
3328 				case EbtInt:
3329 					attribType = sw::VertexShader::ATTRIBTYPE_INT;
3330 					break;
3331 				case EbtUInt:
3332 					attribType = sw::VertexShader::ATTRIBTYPE_UINT;
3333 					break;
3334 				case EbtFloat:
3335 				default:
3336 					break;
3337 				}
3338 
3339 				if(vertexShader && (index + registerCount) <= sw::MAX_VERTEX_INPUTS)
3340 				{
3341 					for(int i = 0; i < registerCount; i++)
3342 					{
3343 						vertexShader->setInput(index + i, sw::Shader::Semantic(sw::Shader::USAGE_TEXCOORD, index + i, false), attribType);
3344 					}
3345 				}
3346 
3347 				ActiveAttributes &activeAttributes = shaderObject->activeAttributes;
3348 
3349 				const char *name = symbol->getSymbol().c_str();
3350 				activeAttributes.push_back(Attribute(glVariableType(type), name, type.getArraySize(), type.getLayoutQualifier().location, index));
3351 			}
3352 		}
3353 
3354 		return index;
3355 	}
3356 
fragmentOutputRegister(TIntermTyped * fragmentOutput)3357 	int OutputASM::fragmentOutputRegister(TIntermTyped *fragmentOutput)
3358 	{
3359 		return allocate(fragmentOutputs, fragmentOutput);
3360 	}
3361 
samplerRegister(TIntermTyped * sampler)3362 	int OutputASM::samplerRegister(TIntermTyped *sampler)
3363 	{
3364 		const TType &type = sampler->getType();
3365 		ASSERT(IsSampler(type.getBasicType()) || type.isStruct());   // Structures can contain samplers
3366 
3367 		TIntermSymbol *symbol = sampler->getAsSymbolNode();
3368 		TIntermBinary *binary = sampler->getAsBinaryNode();
3369 
3370 		if(symbol)
3371 		{
3372 			switch(type.getQualifier())
3373 			{
3374 			case EvqUniform:
3375 				return samplerRegister(symbol);
3376 			case EvqIn:
3377 			case EvqConstReadOnly:
3378 				// Function arguments are not (uniform) sampler registers
3379 				return -1;
3380 			default:
3381 				UNREACHABLE(type.getQualifier());
3382 			}
3383 		}
3384 		else if(binary)
3385 		{
3386 			TIntermTyped *left = binary->getLeft();
3387 			TIntermTyped *right = binary->getRight();
3388 			const TType &leftType = left->getType();
3389 			int index = right->getAsConstantUnion() ? right->getAsConstantUnion()->getIConst(0) : 0;
3390 			int offset = 0;
3391 
3392 			switch(binary->getOp())
3393 			{
3394 			case EOpIndexDirect:
3395 				ASSERT(left->isArray());
3396 				offset = index * leftType.samplerRegisterCount();
3397 				break;
3398 			case EOpIndexDirectStruct:
3399 				ASSERT(leftType.isStruct());
3400 				{
3401 					const TFieldList &fields = leftType.getStruct()->fields();
3402 
3403 					for(int i = 0; i < index; i++)
3404 					{
3405 						offset += fields[i]->type()->totalSamplerRegisterCount();
3406 					}
3407 				}
3408 				break;
3409 			case EOpIndexIndirect:               // Indirect indexing produces a temporary, not a sampler register
3410 				return -1;
3411 			case EOpIndexDirectInterfaceBlock:   // Interface blocks can't contain samplers
3412 			default:
3413 				UNREACHABLE(binary->getOp());
3414 				return -1;
3415 			}
3416 
3417 			int base = samplerRegister(left);
3418 
3419 			if(base < 0)
3420 			{
3421 				return -1;
3422 			}
3423 
3424 			return base + offset;
3425 		}
3426 
3427 		UNREACHABLE(0);
3428 		return -1;   // Not a (uniform) sampler register
3429 	}
3430 
samplerRegister(TIntermSymbol * sampler)3431 	int OutputASM::samplerRegister(TIntermSymbol *sampler)
3432 	{
3433 		const TType &type = sampler->getType();
3434 		ASSERT(IsSampler(type.getBasicType()) || type.isStruct());   // Structures can contain samplers
3435 
3436 		int index = lookup(samplers, sampler);
3437 
3438 		if(index == -1)
3439 		{
3440 			index = allocate(samplers, sampler, true);
3441 
3442 			if(sampler->getQualifier() == EvqUniform)
3443 			{
3444 				const char *name = sampler->getSymbol().c_str();
3445 				declareUniform(type, name, index, true);
3446 			}
3447 		}
3448 
3449 		return index;
3450 	}
3451 
isSamplerRegister(TIntermTyped * operand)3452 	bool OutputASM::isSamplerRegister(TIntermTyped *operand)
3453 	{
3454 		return operand && IsSampler(operand->getBasicType()) && samplerRegister(operand) >= 0;
3455 	}
3456 
lookup(VariableArray & list,TIntermTyped * variable)3457 	int OutputASM::lookup(VariableArray &list, TIntermTyped *variable)
3458 	{
3459 		for(unsigned int i = 0; i < list.size(); i++)
3460 		{
3461 			if(list[i] == variable)
3462 			{
3463 				return i;   // Pointer match
3464 			}
3465 		}
3466 
3467 		TIntermSymbol *varSymbol = variable->getAsSymbolNode();
3468 		TInterfaceBlock *varBlock = variable->getType().getAsInterfaceBlock();
3469 
3470 		if(varBlock)
3471 		{
3472 			for(unsigned int i = 0; i < list.size(); i++)
3473 			{
3474 				if(list[i])
3475 				{
3476 					TInterfaceBlock *listBlock = list[i]->getType().getAsInterfaceBlock();
3477 
3478 					if(listBlock)
3479 					{
3480 						if(listBlock->name() == varBlock->name())
3481 						{
3482 							ASSERT(listBlock->arraySize() == varBlock->arraySize());
3483 							ASSERT(listBlock->fields() == varBlock->fields());
3484 							ASSERT(listBlock->blockStorage() == varBlock->blockStorage());
3485 							ASSERT(listBlock->matrixPacking() == varBlock->matrixPacking());
3486 
3487 							return i;
3488 						}
3489 					}
3490 				}
3491 			}
3492 		}
3493 		else if(varSymbol)
3494 		{
3495 			for(unsigned int i = 0; i < list.size(); i++)
3496 			{
3497 				if(list[i])
3498 				{
3499 					TIntermSymbol *listSymbol = list[i]->getAsSymbolNode();
3500 
3501 					if(listSymbol)
3502 					{
3503 						if(listSymbol->getId() == varSymbol->getId())
3504 						{
3505 							ASSERT(listSymbol->getSymbol() == varSymbol->getSymbol());
3506 							ASSERT(listSymbol->getType() == varSymbol->getType());
3507 							ASSERT(listSymbol->getQualifier() == varSymbol->getQualifier());
3508 
3509 							return i;
3510 						}
3511 					}
3512 				}
3513 			}
3514 		}
3515 
3516 		return -1;
3517 	}
3518 
lookup(VariableArray & list,TInterfaceBlock * block)3519 	int OutputASM::lookup(VariableArray &list, TInterfaceBlock *block)
3520 	{
3521 		for(unsigned int i = 0; i < list.size(); i++)
3522 		{
3523 			if(list[i] && (list[i]->getType().getInterfaceBlock() == block))
3524 			{
3525 				return i;   // Pointer match
3526 			}
3527 		}
3528 		return -1;
3529 	}
3530 
allocate(VariableArray & list,TIntermTyped * variable,bool samplersOnly)3531 	int OutputASM::allocate(VariableArray &list, TIntermTyped *variable, bool samplersOnly)
3532 	{
3533 		int index = lookup(list, variable);
3534 
3535 		if(index == -1)
3536 		{
3537 			unsigned int registerCount = variable->blockRegisterCount(samplersOnly);
3538 
3539 			for(unsigned int i = 0; i < list.size(); i++)
3540 			{
3541 				if(list[i] == 0)
3542 				{
3543 					unsigned int j = 1;
3544 					for( ; j < registerCount && (i + j) < list.size(); j++)
3545 					{
3546 						if(list[i + j] != 0)
3547 						{
3548 							break;
3549 						}
3550 					}
3551 
3552 					if(j == registerCount)   // Found free slots
3553 					{
3554 						for(unsigned int j = 0; j < registerCount; j++)
3555 						{
3556 							list[i + j] = variable;
3557 						}
3558 
3559 						return i;
3560 					}
3561 				}
3562 			}
3563 
3564 			index = list.size();
3565 
3566 			for(unsigned int i = 0; i < registerCount; i++)
3567 			{
3568 				list.push_back(variable);
3569 			}
3570 		}
3571 
3572 		return index;
3573 	}
3574 
free(VariableArray & list,TIntermTyped * variable)3575 	void OutputASM::free(VariableArray &list, TIntermTyped *variable)
3576 	{
3577 		int index = lookup(list, variable);
3578 
3579 		if(index >= 0)
3580 		{
3581 			list[index] = 0;
3582 		}
3583 	}
3584 
blockMemberLookup(const TType & type,const TString & name,int registerIndex)3585 	int OutputASM::blockMemberLookup(const TType &type, const TString &name, int registerIndex)
3586 	{
3587 		const TInterfaceBlock *block = type.getInterfaceBlock();
3588 
3589 		if(block)
3590 		{
3591 			ActiveUniformBlocks &activeUniformBlocks = shaderObject->activeUniformBlocks;
3592 			const TFieldList& fields = block->fields();
3593 			const TString &blockName = block->name();
3594 			int fieldRegisterIndex = registerIndex;
3595 
3596 			if(!type.isInterfaceBlock())
3597 			{
3598 				// This is a uniform that's part of a block, let's see if the block is already defined
3599 				for(size_t i = 0; i < activeUniformBlocks.size(); ++i)
3600 				{
3601 					if(activeUniformBlocks[i].name == blockName.c_str())
3602 					{
3603 						// The block is already defined, find the register for the current uniform and return it
3604 						for(size_t j = 0; j < fields.size(); j++)
3605 						{
3606 							const TString &fieldName = fields[j]->name();
3607 							if(fieldName == name)
3608 							{
3609 								return fieldRegisterIndex;
3610 							}
3611 
3612 							fieldRegisterIndex += fields[j]->type()->totalRegisterCount();
3613 						}
3614 
3615 						ASSERT(false);
3616 						return fieldRegisterIndex;
3617 					}
3618 				}
3619 			}
3620 		}
3621 
3622 		return -1;
3623 	}
3624 
declareUniform(const TType & type,const TString & name,int registerIndex,bool samplersOnly,int blockId,BlockLayoutEncoder * encoder)3625 	void OutputASM::declareUniform(const TType &type, const TString &name, int registerIndex, bool samplersOnly, int blockId, BlockLayoutEncoder* encoder)
3626 	{
3627 		const TStructure *structure = type.getStruct();
3628 		const TInterfaceBlock *block = (type.isInterfaceBlock() || (blockId == -1)) ? type.getInterfaceBlock() : nullptr;
3629 
3630 		if(!structure && !block)
3631 		{
3632 			ActiveUniforms &activeUniforms = shaderObject->activeUniforms;
3633 			const BlockMemberInfo blockInfo = encoder ? encoder->encodeType(type) : BlockMemberInfo::getDefaultBlockInfo();
3634 			if(blockId >= 0)
3635 			{
3636 				blockDefinitions[blockId].insert(BlockDefinitionIndexMap::value_type(registerIndex, TypedMemberInfo(blockInfo, type)));
3637 				shaderObject->activeUniformBlocks[blockId].fields.push_back(activeUniforms.size());
3638 			}
3639 			int fieldRegisterIndex = encoder ? shaderObject->activeUniformBlocks[blockId].registerIndex + BlockLayoutEncoder::getBlockRegister(blockInfo) : registerIndex;
3640 			bool isSampler = IsSampler(type.getBasicType());
3641 			if(isSampler && samplersOnly)
3642 			{
3643 				for(int i = 0; i < type.totalRegisterCount(); i++)
3644 				{
3645 					shader->declareSampler(fieldRegisterIndex + i);
3646 				}
3647 			}
3648 			if(isSampler == samplersOnly)
3649 			{
3650 				activeUniforms.push_back(Uniform(type, name.c_str(), fieldRegisterIndex, blockId, blockInfo));
3651 			}
3652 		}
3653 		else if(block)
3654 		{
3655 			ActiveUniformBlocks &activeUniformBlocks = shaderObject->activeUniformBlocks;
3656 			const TFieldList& fields = block->fields();
3657 			const TString &blockName = block->name();
3658 			int fieldRegisterIndex = registerIndex;
3659 			bool isUniformBlockMember = !type.isInterfaceBlock() && (blockId == -1);
3660 
3661 			blockId = activeUniformBlocks.size();
3662 			bool isRowMajor = block->matrixPacking() == EmpRowMajor;
3663 			activeUniformBlocks.push_back(UniformBlock(blockName.c_str(), 0, block->arraySize(),
3664 			                                           block->blockStorage(), isRowMajor, registerIndex, blockId));
3665 			blockDefinitions.push_back(BlockDefinitionIndexMap());
3666 
3667 			Std140BlockEncoder currentBlockEncoder;
3668 			currentBlockEncoder.enterAggregateType();
3669 			for(const auto &field : fields)
3670 			{
3671 				const TType &fieldType = *(field->type());
3672 				const TString &fieldName = field->name();
3673 				if(isUniformBlockMember && (fieldName == name))
3674 				{
3675 					registerIndex = fieldRegisterIndex;
3676 				}
3677 
3678 				const TString uniformName = block->hasInstanceName() ? blockName + "." + fieldName : fieldName;
3679 
3680 				declareUniform(fieldType, uniformName, fieldRegisterIndex, samplersOnly, blockId, &currentBlockEncoder);
3681 				fieldRegisterIndex += fieldType.totalRegisterCount();
3682 			}
3683 			currentBlockEncoder.exitAggregateType();
3684 			activeUniformBlocks[blockId].dataSize = currentBlockEncoder.getBlockSize();
3685 		}
3686 		else
3687 		{
3688 			// Store struct for program link time validation
3689 			shaderObject->activeUniformStructs.push_back(Uniform(type, name.c_str(), registerIndex, -1, BlockMemberInfo::getDefaultBlockInfo()));
3690 
3691 			int fieldRegisterIndex = registerIndex;
3692 
3693 			const TFieldList& fields = structure->fields();
3694 			if(type.isArray() && (structure || type.isInterfaceBlock()))
3695 			{
3696 				for(int i = 0; i < type.getArraySize(); i++)
3697 				{
3698 					if(encoder)
3699 					{
3700 						encoder->enterAggregateType();
3701 					}
3702 					for(const auto &field : fields)
3703 					{
3704 						const TType &fieldType = *(field->type());
3705 						const TString &fieldName = field->name();
3706 						const TString uniformName = name + "[" + str(i) + "]." + fieldName;
3707 
3708 						declareUniform(fieldType, uniformName, fieldRegisterIndex, samplersOnly, blockId, encoder);
3709 						fieldRegisterIndex += samplersOnly ? fieldType.totalSamplerRegisterCount() : fieldType.totalRegisterCount();
3710 					}
3711 					if(encoder)
3712 					{
3713 						encoder->exitAggregateType();
3714 					}
3715 				}
3716 			}
3717 			else
3718 			{
3719 				if(encoder)
3720 				{
3721 					encoder->enterAggregateType();
3722 				}
3723 				for(const auto &field : fields)
3724 				{
3725 					const TType &fieldType = *(field->type());
3726 					const TString &fieldName = field->name();
3727 					const TString uniformName = name + "." + fieldName;
3728 
3729 					declareUniform(fieldType, uniformName, fieldRegisterIndex, samplersOnly, blockId, encoder);
3730 					fieldRegisterIndex += samplersOnly ? fieldType.totalSamplerRegisterCount() : fieldType.totalRegisterCount();
3731 				}
3732 				if(encoder)
3733 				{
3734 					encoder->exitAggregateType();
3735 				}
3736 			}
3737 		}
3738 	}
3739 
dim(TIntermNode * v)3740 	int OutputASM::dim(TIntermNode *v)
3741 	{
3742 		TIntermTyped *vector = v->getAsTyped();
3743 		ASSERT(vector && vector->isRegister());
3744 		return vector->getNominalSize();
3745 	}
3746 
dim2(TIntermNode * m)3747 	int OutputASM::dim2(TIntermNode *m)
3748 	{
3749 		TIntermTyped *matrix = m->getAsTyped();
3750 		ASSERT(matrix && matrix->isMatrix() && !matrix->isArray());
3751 		return matrix->getSecondarySize();
3752 	}
3753 
3754 	// Sets iterations to ~0u if no loop count could be statically determined.
LoopInfo(TIntermLoop * node)3755 	OutputASM::LoopInfo::LoopInfo(TIntermLoop *node)
3756 	{
3757 		// Parse loops of the form:
3758 		// for(int index = initial; index [comparator] limit; index [op] increment)
3759 
3760 		// Parse index name and intial value
3761 		if(node->getInit())
3762 		{
3763 			TIntermAggregate *init = node->getInit()->getAsAggregate();
3764 
3765 			if(init)
3766 			{
3767 				TIntermSequence &sequence = init->getSequence();
3768 				TIntermTyped *variable = sequence[0]->getAsTyped();
3769 
3770 				if(variable && variable->getQualifier() == EvqTemporary && variable->getBasicType() == EbtInt)
3771 				{
3772 					TIntermBinary *assign = variable->getAsBinaryNode();
3773 
3774 					if(assign && assign->getOp() == EOpInitialize)
3775 					{
3776 						TIntermSymbol *symbol = assign->getLeft()->getAsSymbolNode();
3777 						TIntermConstantUnion *constant = assign->getRight()->getAsConstantUnion();
3778 
3779 						if(symbol && constant)
3780 						{
3781 							if(constant->getBasicType() == EbtInt && constant->getNominalSize() == 1)
3782 							{
3783 								index = symbol;
3784 								initial = constant->getUnionArrayPointer()[0].getIConst();
3785 							}
3786 						}
3787 					}
3788 				}
3789 			}
3790 		}
3791 
3792 		// Parse comparator and limit value
3793 		if(index && node->getCondition())
3794 		{
3795 			TIntermBinary *test = node->getCondition()->getAsBinaryNode();
3796 			TIntermSymbol *left = test ? test->getLeft()->getAsSymbolNode() : nullptr;
3797 
3798 			if(left && (left->getId() == index->getId()))
3799 			{
3800 				TIntermConstantUnion *constant = test->getRight()->getAsConstantUnion();
3801 
3802 				if(constant)
3803 				{
3804 					if(constant->getBasicType() == EbtInt && constant->getNominalSize() == 1)
3805 					{
3806 						comparator = test->getOp();
3807 						limit = constant->getUnionArrayPointer()[0].getIConst();
3808 					}
3809 				}
3810 			}
3811 		}
3812 
3813 		// Parse increment
3814 		if(index && comparator != EOpNull && node->getExpression())
3815 		{
3816 			TIntermBinary *binaryTerminal = node->getExpression()->getAsBinaryNode();
3817 			TIntermUnary *unaryTerminal = node->getExpression()->getAsUnaryNode();
3818 
3819 			if(binaryTerminal)
3820 			{
3821 				TIntermSymbol *operand = binaryTerminal->getLeft()->getAsSymbolNode();
3822 
3823 				if(operand && operand->getId() == index->getId())
3824 				{
3825 					TOperator op = binaryTerminal->getOp();
3826 					TIntermConstantUnion *constant = binaryTerminal->getRight()->getAsConstantUnion();
3827 
3828 					if(constant)
3829 					{
3830 						if(constant->getBasicType() == EbtInt && constant->getNominalSize() == 1)
3831 						{
3832 							int value = constant->getUnionArrayPointer()[0].getIConst();
3833 
3834 							switch(op)
3835 							{
3836 							case EOpAddAssign: increment = value;  break;
3837 							case EOpSubAssign: increment = -value; break;
3838 							default:           increment = 0;      break;   // Rare cases left unhandled. Treated as non-deterministic.
3839 							}
3840 						}
3841 					}
3842 				}
3843 			}
3844 			else if(unaryTerminal)
3845 			{
3846 				TIntermSymbol *operand = unaryTerminal->getOperand()->getAsSymbolNode();
3847 
3848 				if(operand && operand->getId() == index->getId())
3849 				{
3850 					TOperator op = unaryTerminal->getOp();
3851 
3852 					switch(op)
3853 					{
3854 					case EOpPostIncrement: increment = 1;  break;
3855 					case EOpPostDecrement: increment = -1; break;
3856 					case EOpPreIncrement:  increment = 1;  break;
3857 					case EOpPreDecrement:  increment = -1; break;
3858 					default:               increment = 0;  break;   // Rare cases left unhandled. Treated as non-deterministic.
3859 					}
3860 				}
3861 			}
3862 		}
3863 
3864 		if(index && comparator != EOpNull && increment != 0)
3865 		{
3866 			// Check the loop body for return statements or changes to the index variable that make it non-deterministic.
3867 			LoopUnrollable loopUnrollable;
3868 			bool unrollable = loopUnrollable.traverse(node, index->getId());
3869 
3870 			if(!unrollable)
3871 			{
3872 				iterations = ~0u;
3873 				return;
3874 			}
3875 
3876 			if(comparator == EOpLessThanEqual)
3877 			{
3878 				comparator = EOpLessThan;
3879 				limit += 1;
3880 			}
3881 			else if(comparator == EOpGreaterThanEqual)
3882 			{
3883 				comparator = EOpLessThan;
3884 				limit -= 1;
3885 				std::swap(initial, limit);
3886 				increment = -increment;
3887 			}
3888 			else if(comparator == EOpGreaterThan)
3889 			{
3890 				comparator = EOpLessThan;
3891 				std::swap(initial, limit);
3892 				increment = -increment;
3893 			}
3894 
3895 			if(comparator == EOpLessThan)
3896 			{
3897 				if(!(initial < limit))   // Never loops
3898 				{
3899 					iterations = 0;
3900 				}
3901 				else if(increment < 0)
3902 				{
3903 					iterations = ~0u;
3904 				}
3905 				else
3906 				{
3907 					iterations = (limit - initial + abs(increment) - 1) / increment;   // Ceiling division
3908 				}
3909 			}
3910 			else
3911 			{
3912 				// Rare cases left unhandled. Treated as non-deterministic.
3913 				iterations = ~0u;
3914 			}
3915 		}
3916 	}
3917 
traverse(TIntermLoop * loop,int indexId)3918 	bool LoopUnrollable::traverse(TIntermLoop *loop, int indexId)
3919 	{
3920 		loopUnrollable = true;
3921 
3922 		loopIndexId = indexId;
3923 		TIntermNode *body = loop->getBody();
3924 
3925 		if(body)
3926 		{
3927 			body->traverse(this);
3928 		}
3929 
3930 		return loopUnrollable;
3931 	}
3932 
visitSymbol(TIntermSymbol * node)3933 	void LoopUnrollable::visitSymbol(TIntermSymbol *node)
3934 	{
3935 		// Check that the loop index is not used as the argument to a function out or inout parameter.
3936 		if(node->getId() == loopIndexId)
3937 		{
3938 			if(node->getQualifier() == EvqOut || node->getQualifier() == EvqInOut)
3939 			{
3940 				loopUnrollable = false;
3941 			}
3942 		}
3943 	}
3944 
visitBinary(Visit visit,TIntermBinary * node)3945 	bool LoopUnrollable::visitBinary(Visit visit, TIntermBinary *node)
3946 	{
3947 		if(!loopUnrollable)
3948 		{
3949 			return false;
3950 		}
3951 
3952 		// Check that the loop index is not statically assigned to.
3953 		TIntermSymbol *symbol = node->getLeft()->getAsSymbolNode();
3954 		loopUnrollable = !(node->modifiesState() && symbol && (symbol->getId() == loopIndexId));
3955 
3956 		return loopUnrollable;
3957 	}
3958 
visitUnary(Visit visit,TIntermUnary * node)3959 	bool LoopUnrollable::visitUnary(Visit visit, TIntermUnary *node)
3960 	{
3961 		if(!loopUnrollable)
3962 		{
3963 			return false;
3964 		}
3965 
3966 		// Check that the loop index is not statically assigned to.
3967 		TIntermSymbol *symbol = node->getOperand()->getAsSymbolNode();
3968 		loopUnrollable = !(node->modifiesState() && symbol && (symbol->getId() == loopIndexId));
3969 
3970 		return loopUnrollable;
3971 	}
3972 
visitBranch(Visit visit,TIntermBranch * node)3973 	bool LoopUnrollable::visitBranch(Visit visit, TIntermBranch *node)
3974 	{
3975 		if(!loopUnrollable)
3976 		{
3977 			return false;
3978 		}
3979 
3980 		switch(node->getFlowOp())
3981 		{
3982 		case EOpKill:
3983 		case EOpReturn:
3984 		case EOpBreak:
3985 		case EOpContinue:
3986 			loopUnrollable = false;
3987 			break;
3988 		default: UNREACHABLE(node->getFlowOp());
3989 		}
3990 
3991 		return loopUnrollable;
3992 	}
3993 
visitAggregate(Visit visit,TIntermAggregate * node)3994 	bool LoopUnrollable::visitAggregate(Visit visit, TIntermAggregate *node)
3995 	{
3996 		return loopUnrollable;
3997 	}
3998 }
3999