1 // Copyright 2016 The SwiftShader Authors. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //    http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "OutputASM.h"
16 #include "Common/Math.hpp"
17 
18 #include "common/debug.h"
19 #include "InfoSink.h"
20 
21 #include "libGLESv2/Shader.h"
22 
23 #include <GLES2/gl2.h>
24 #include <GLES2/gl2ext.h>
25 #include <GLES3/gl3.h>
26 
27 namespace glsl
28 {
29 	// Integer to TString conversion
str(int i)30 	TString str(int i)
31 	{
32 		char buffer[20];
33 		sprintf(buffer, "%d", i);
34 		return buffer;
35 	}
36 
37 	class Temporary : public TIntermSymbol
38 	{
39 	public:
Temporary(OutputASM * assembler)40 		Temporary(OutputASM *assembler) : TIntermSymbol(TSymbolTableLevel::nextUniqueId(), "tmp", TType(EbtFloat, EbpHigh, EvqTemporary, 4, 1, false)), assembler(assembler)
41 		{
42 		}
43 
~Temporary()44 		~Temporary()
45 		{
46 			assembler->freeTemporary(this);
47 		}
48 
49 	private:
50 		OutputASM *const assembler;
51 	};
52 
53 	class Constant : public TIntermConstantUnion
54 	{
55 	public:
Constant(float x,float y,float z,float w)56 		Constant(float x, float y, float z, float w) : TIntermConstantUnion(constants, TType(EbtFloat, EbpHigh, EvqConstExpr, 4, 1, false))
57 		{
58 			constants[0].setFConst(x);
59 			constants[1].setFConst(y);
60 			constants[2].setFConst(z);
61 			constants[3].setFConst(w);
62 		}
63 
Constant(bool b)64 		Constant(bool b) : TIntermConstantUnion(constants, TType(EbtBool, EbpHigh, EvqConstExpr, 1, 1, false))
65 		{
66 			constants[0].setBConst(b);
67 		}
68 
Constant(int i)69 		Constant(int i) : TIntermConstantUnion(constants, TType(EbtInt, EbpHigh, EvqConstExpr, 1, 1, false))
70 		{
71 			constants[0].setIConst(i);
72 		}
73 
~Constant()74 		~Constant()
75 		{
76 		}
77 
78 	private:
79 		ConstantUnion constants[4];
80 	};
81 
Uniform(GLenum type,GLenum precision,const std::string & name,int arraySize,int registerIndex,int blockId,const BlockMemberInfo & blockMemberInfo)82 	Uniform::Uniform(GLenum type, GLenum precision, const std::string &name, int arraySize, int registerIndex, int blockId, const BlockMemberInfo& blockMemberInfo) :
83 		type(type), precision(precision), name(name), arraySize(arraySize), registerIndex(registerIndex), blockId(blockId), blockInfo(blockMemberInfo)
84 	{
85 	}
86 
UniformBlock(const std::string & name,unsigned int dataSize,unsigned int arraySize,TLayoutBlockStorage layout,bool isRowMajorLayout,int registerIndex,int blockId)87 	UniformBlock::UniformBlock(const std::string& name, unsigned int dataSize, unsigned int arraySize,
88 	                           TLayoutBlockStorage layout, bool isRowMajorLayout, int registerIndex, int blockId) :
89 		name(name), dataSize(dataSize), arraySize(arraySize), layout(layout),
90 		isRowMajorLayout(isRowMajorLayout), registerIndex(registerIndex), blockId(blockId)
91 	{
92 	}
93 
BlockLayoutEncoder(bool rowMajor)94 	BlockLayoutEncoder::BlockLayoutEncoder(bool rowMajor)
95 		: mCurrentOffset(0), isRowMajor(rowMajor)
96 	{
97 	}
98 
encodeType(const TType & type)99 	BlockMemberInfo BlockLayoutEncoder::encodeType(const TType &type)
100 	{
101 		int arrayStride;
102 		int matrixStride;
103 
104 		getBlockLayoutInfo(type, type.getArraySize(), isRowMajor, &arrayStride, &matrixStride);
105 
106 		const BlockMemberInfo memberInfo(static_cast<int>(mCurrentOffset * BytesPerComponent),
107 		                                 static_cast<int>(arrayStride * BytesPerComponent),
108 		                                 static_cast<int>(matrixStride * BytesPerComponent),
109 		                                 (matrixStride > 0) && isRowMajor);
110 
111 		advanceOffset(type, type.getArraySize(), isRowMajor, arrayStride, matrixStride);
112 
113 		return memberInfo;
114 	}
115 
116 	// static
getBlockRegister(const BlockMemberInfo & info)117 	size_t BlockLayoutEncoder::getBlockRegister(const BlockMemberInfo &info)
118 	{
119 		return (info.offset / BytesPerComponent) / ComponentsPerRegister;
120 	}
121 
122 	// static
getBlockRegisterElement(const BlockMemberInfo & info)123 	size_t BlockLayoutEncoder::getBlockRegisterElement(const BlockMemberInfo &info)
124 	{
125 		return (info.offset / BytesPerComponent) % ComponentsPerRegister;
126 	}
127 
nextRegister()128 	void BlockLayoutEncoder::nextRegister()
129 	{
130 		mCurrentOffset = sw::align(mCurrentOffset, ComponentsPerRegister);
131 	}
132 
Std140BlockEncoder(bool rowMajor)133 	Std140BlockEncoder::Std140BlockEncoder(bool rowMajor) : BlockLayoutEncoder(rowMajor)
134 	{
135 	}
136 
enterAggregateType()137 	void Std140BlockEncoder::enterAggregateType()
138 	{
139 		nextRegister();
140 	}
141 
exitAggregateType()142 	void Std140BlockEncoder::exitAggregateType()
143 	{
144 		nextRegister();
145 	}
146 
getBlockLayoutInfo(const TType & type,unsigned int arraySize,bool isRowMajorMatrix,int * arrayStrideOut,int * matrixStrideOut)147 	void Std140BlockEncoder::getBlockLayoutInfo(const TType &type, unsigned int arraySize, bool isRowMajorMatrix, int *arrayStrideOut, int *matrixStrideOut)
148 	{
149 		size_t baseAlignment = 0;
150 		int matrixStride = 0;
151 		int arrayStride = 0;
152 
153 		if(type.isMatrix())
154 		{
155 			baseAlignment = ComponentsPerRegister;
156 			matrixStride = ComponentsPerRegister;
157 
158 			if(arraySize > 0)
159 			{
160 				const int numRegisters = isRowMajorMatrix ? type.getSecondarySize() : type.getNominalSize();
161 				arrayStride = ComponentsPerRegister * numRegisters;
162 			}
163 		}
164 		else if(arraySize > 0)
165 		{
166 			baseAlignment = ComponentsPerRegister;
167 			arrayStride = ComponentsPerRegister;
168 		}
169 		else
170 		{
171 			const size_t numComponents = type.getElementSize();
172 			baseAlignment = (numComponents == 3 ? 4u : numComponents);
173 		}
174 
175 		mCurrentOffset = sw::align(mCurrentOffset, baseAlignment);
176 
177 		*matrixStrideOut = matrixStride;
178 		*arrayStrideOut = arrayStride;
179 	}
180 
advanceOffset(const TType & type,unsigned int arraySize,bool isRowMajorMatrix,int arrayStride,int matrixStride)181 	void Std140BlockEncoder::advanceOffset(const TType &type, unsigned int arraySize, bool isRowMajorMatrix, int arrayStride, int matrixStride)
182 	{
183 		if(arraySize > 0)
184 		{
185 			mCurrentOffset += arrayStride * arraySize;
186 		}
187 		else if(type.isMatrix())
188 		{
189 			ASSERT(matrixStride == ComponentsPerRegister);
190 			const int numRegisters = isRowMajorMatrix ? type.getSecondarySize() : type.getNominalSize();
191 			mCurrentOffset += ComponentsPerRegister * numRegisters;
192 		}
193 		else
194 		{
195 			mCurrentOffset += type.getElementSize();
196 		}
197 	}
198 
Attribute()199 	Attribute::Attribute()
200 	{
201 		type = GL_NONE;
202 		arraySize = 0;
203 		registerIndex = 0;
204 	}
205 
Attribute(GLenum type,const std::string & name,int arraySize,int location,int registerIndex)206 	Attribute::Attribute(GLenum type, const std::string &name, int arraySize, int location, int registerIndex)
207 	{
208 		this->type = type;
209 		this->name = name;
210 		this->arraySize = arraySize;
211 		this->location = location;
212 		this->registerIndex = registerIndex;
213 	}
214 
getPixelShader() const215 	sw::PixelShader *Shader::getPixelShader() const
216 	{
217 		return 0;
218 	}
219 
getVertexShader() const220 	sw::VertexShader *Shader::getVertexShader() const
221 	{
222 		return 0;
223 	}
224 
TextureFunction(const TString & nodeName)225 	OutputASM::TextureFunction::TextureFunction(const TString& nodeName) : method(IMPLICIT), proj(false), offset(false)
226 	{
227 		TString name = TFunction::unmangleName(nodeName);
228 
229 		if(name == "texture2D" || name == "textureCube" || name == "texture" || name == "texture3D")
230 		{
231 			method = IMPLICIT;
232 		}
233 		else if(name == "texture2DProj" || name == "textureProj")
234 		{
235 			method = IMPLICIT;
236 			proj = true;
237 		}
238 		else if(name == "texture2DLod" || name == "textureCubeLod" || name == "textureLod")
239 		{
240 			method = LOD;
241 		}
242 		else if(name == "texture2DProjLod" || name == "textureProjLod")
243 		{
244 			method = LOD;
245 			proj = true;
246 		}
247 		else if(name == "textureSize")
248 		{
249 			method = SIZE;
250 		}
251 		else if(name == "textureOffset")
252 		{
253 			method = IMPLICIT;
254 			offset = true;
255 		}
256 		else if(name == "textureProjOffset")
257 		{
258 			method = IMPLICIT;
259 			offset = true;
260 			proj = true;
261 		}
262 		else if(name == "textureLodOffset")
263 		{
264 			method = LOD;
265 			offset = true;
266 		}
267 		else if(name == "textureProjLodOffset")
268 		{
269 			method = LOD;
270 			proj = true;
271 			offset = true;
272 		}
273 		else if(name == "texelFetch")
274 		{
275 			method = FETCH;
276 		}
277 		else if(name == "texelFetchOffset")
278 		{
279 			method = FETCH;
280 			offset = true;
281 		}
282 		else if(name == "textureGrad")
283 		{
284 			method = GRAD;
285 		}
286 		else if(name == "textureGradOffset")
287 		{
288 			method = GRAD;
289 			offset = true;
290 		}
291 		else if(name == "textureProjGrad")
292 		{
293 			method = GRAD;
294 			proj = true;
295 		}
296 		else if(name == "textureProjGradOffset")
297 		{
298 			method = GRAD;
299 			proj = true;
300 			offset = true;
301 		}
302 		else UNREACHABLE(0);
303 	}
304 
OutputASM(TParseContext & context,Shader * shaderObject)305 	OutputASM::OutputASM(TParseContext &context, Shader *shaderObject) : TIntermTraverser(true, true, true), shaderObject(shaderObject), mContext(context)
306 	{
307 		shader = 0;
308 		pixelShader = 0;
309 		vertexShader = 0;
310 
311 		if(shaderObject)
312 		{
313 			shader = shaderObject->getShader();
314 			pixelShader = shaderObject->getPixelShader();
315 			vertexShader = shaderObject->getVertexShader();
316 		}
317 
318 		functionArray.push_back(Function(0, "main(", 0, 0));
319 		currentFunction = 0;
320 		outputQualifier = EvqOutput; // Set outputQualifier to any value other than EvqFragColor or EvqFragData
321 	}
322 
~OutputASM()323 	OutputASM::~OutputASM()
324 	{
325 	}
326 
output()327 	void OutputASM::output()
328 	{
329 		if(shader)
330 		{
331 			emitShader(GLOBAL);
332 
333 			if(functionArray.size() > 1)   // Only call main() when there are other functions
334 			{
335 				Instruction *callMain = emit(sw::Shader::OPCODE_CALL);
336 				callMain->dst.type = sw::Shader::PARAMETER_LABEL;
337 				callMain->dst.index = 0;   // main()
338 
339 				emit(sw::Shader::OPCODE_RET);
340 			}
341 
342 			emitShader(FUNCTION);
343 		}
344 	}
345 
emitShader(Scope scope)346 	void OutputASM::emitShader(Scope scope)
347 	{
348 		emitScope = scope;
349 		currentScope = GLOBAL;
350 		mContext.getTreeRoot()->traverse(this);
351 	}
352 
freeTemporary(Temporary * temporary)353 	void OutputASM::freeTemporary(Temporary *temporary)
354 	{
355 		free(temporaries, temporary);
356 	}
357 
getOpcode(sw::Shader::Opcode op,TIntermTyped * in) const358 	sw::Shader::Opcode OutputASM::getOpcode(sw::Shader::Opcode op, TIntermTyped *in) const
359 	{
360 		TBasicType baseType = in->getType().getBasicType();
361 
362 		switch(op)
363 		{
364 		case sw::Shader::OPCODE_NEG:
365 			switch(baseType)
366 			{
367 			case EbtInt:
368 			case EbtUInt:
369 				return sw::Shader::OPCODE_INEG;
370 			case EbtFloat:
371 			default:
372 				return op;
373 			}
374 		case sw::Shader::OPCODE_ABS:
375 			switch(baseType)
376 			{
377 			case EbtInt:
378 				return sw::Shader::OPCODE_IABS;
379 			case EbtFloat:
380 			default:
381 				return op;
382 			}
383 		case sw::Shader::OPCODE_SGN:
384 			switch(baseType)
385 			{
386 			case EbtInt:
387 				return sw::Shader::OPCODE_ISGN;
388 			case EbtFloat:
389 			default:
390 				return op;
391 			}
392 		case sw::Shader::OPCODE_ADD:
393 			switch(baseType)
394 			{
395 			case EbtInt:
396 			case EbtUInt:
397 				return sw::Shader::OPCODE_IADD;
398 			case EbtFloat:
399 			default:
400 				return op;
401 			}
402 		case sw::Shader::OPCODE_SUB:
403 			switch(baseType)
404 			{
405 			case EbtInt:
406 			case EbtUInt:
407 				return sw::Shader::OPCODE_ISUB;
408 			case EbtFloat:
409 			default:
410 				return op;
411 			}
412 		case sw::Shader::OPCODE_MUL:
413 			switch(baseType)
414 			{
415 			case EbtInt:
416 			case EbtUInt:
417 				return sw::Shader::OPCODE_IMUL;
418 			case EbtFloat:
419 			default:
420 				return op;
421 			}
422 		case sw::Shader::OPCODE_DIV:
423 			switch(baseType)
424 			{
425 			case EbtInt:
426 				return sw::Shader::OPCODE_IDIV;
427 			case EbtUInt:
428 				return sw::Shader::OPCODE_UDIV;
429 			case EbtFloat:
430 			default:
431 				return op;
432 			}
433 		case sw::Shader::OPCODE_IMOD:
434 			return baseType == EbtUInt ? sw::Shader::OPCODE_UMOD : op;
435 		case sw::Shader::OPCODE_ISHR:
436 			return baseType == EbtUInt ? sw::Shader::OPCODE_USHR : op;
437 		case sw::Shader::OPCODE_MIN:
438 			switch(baseType)
439 			{
440 			case EbtInt:
441 				return sw::Shader::OPCODE_IMIN;
442 			case EbtUInt:
443 				return sw::Shader::OPCODE_UMIN;
444 			case EbtFloat:
445 			default:
446 				return op;
447 			}
448 		case sw::Shader::OPCODE_MAX:
449 			switch(baseType)
450 			{
451 			case EbtInt:
452 				return sw::Shader::OPCODE_IMAX;
453 			case EbtUInt:
454 				return sw::Shader::OPCODE_UMAX;
455 			case EbtFloat:
456 			default:
457 				return op;
458 			}
459 		default:
460 			return op;
461 		}
462 	}
463 
visitSymbol(TIntermSymbol * symbol)464 	void OutputASM::visitSymbol(TIntermSymbol *symbol)
465 	{
466 		// Vertex varyings don't have to be actively used to successfully link
467 		// against pixel shaders that use them. So make sure they're declared.
468 		if(symbol->getQualifier() == EvqVaryingOut || symbol->getQualifier() == EvqInvariantVaryingOut || symbol->getQualifier() == EvqVertexOut)
469 		{
470 			if(symbol->getBasicType() != EbtInvariant)   // Typeless declarations are not new varyings
471 			{
472 				declareVarying(symbol, -1);
473 			}
474 		}
475 
476 		TInterfaceBlock* block = symbol->getType().getInterfaceBlock();
477 		// OpenGL ES 3.0.4 spec, section 2.12.6 Uniform Variables:
478 		// "All members of a named uniform block declared with a shared or std140 layout qualifier
479 		// are considered active, even if they are not referenced in any shader in the program.
480 		// The uniform block itself is also considered active, even if no member of the block is referenced."
481 		if(block && ((block->blockStorage() == EbsShared) || (block->blockStorage() == EbsStd140)))
482 		{
483 			uniformRegister(symbol);
484 		}
485 	}
486 
visitBinary(Visit visit,TIntermBinary * node)487 	bool OutputASM::visitBinary(Visit visit, TIntermBinary *node)
488 	{
489 		if(currentScope != emitScope)
490 		{
491 			return false;
492 		}
493 
494 		TIntermTyped *result = node;
495 		TIntermTyped *left = node->getLeft();
496 		TIntermTyped *right = node->getRight();
497 		const TType &leftType = left->getType();
498 		const TType &rightType = right->getType();
499 		const TType &resultType = node->getType();
500 
501 		if(isSamplerRegister(result))
502 		{
503 			return false;   // Don't traverse, the register index is determined statically
504 		}
505 
506 		switch(node->getOp())
507 		{
508 		case EOpAssign:
509 			if(visit == PostVisit)
510 			{
511 				assignLvalue(left, right);
512 				copy(result, right);
513 			}
514 			break;
515 		case EOpInitialize:
516 			if(visit == PostVisit)
517 			{
518 				copy(left, right);
519 			}
520 			break;
521 		case EOpMatrixTimesScalarAssign:
522 			if(visit == PostVisit)
523 			{
524 				for(int i = 0; i < leftType.getNominalSize(); i++)
525 				{
526 					emit(sw::Shader::OPCODE_MUL, result, i, left, i, right);
527 				}
528 
529 				assignLvalue(left, result);
530 			}
531 			break;
532 		case EOpVectorTimesMatrixAssign:
533 			if(visit == PostVisit)
534 			{
535 				int size = leftType.getNominalSize();
536 
537 				for(int i = 0; i < size; i++)
538 				{
539 					Instruction *dot = emit(sw::Shader::OPCODE_DP(size), result, 0, left, 0, right, i);
540 					dot->dst.mask = 1 << i;
541 				}
542 
543 				assignLvalue(left, result);
544 			}
545 			break;
546 		case EOpMatrixTimesMatrixAssign:
547 			if(visit == PostVisit)
548 			{
549 				int dim = leftType.getNominalSize();
550 
551 				for(int i = 0; i < dim; i++)
552 				{
553 					Instruction *mul = emit(sw::Shader::OPCODE_MUL, result, i, left, 0, right, i);
554 					mul->src[1].swizzle = 0x00;
555 
556 					for(int j = 1; j < dim; j++)
557 					{
558 						Instruction *mad = emit(sw::Shader::OPCODE_MAD, result, i, left, j, right, i, result, i);
559 						mad->src[1].swizzle = j * 0x55;
560 					}
561 				}
562 
563 				assignLvalue(left, result);
564 			}
565 			break;
566 		case EOpIndexDirect:
567 			if(visit == PostVisit)
568 			{
569 				int index = right->getAsConstantUnion()->getIConst(0);
570 
571 				if(result->isMatrix() || result->isStruct() || result->isInterfaceBlock())
572 				{
573 					ASSERT(left->isArray());
574 					copy(result, left, index * left->elementRegisterCount());
575 				}
576 				else if(result->isRegister())
577 				{
578 					int srcIndex = 0;
579 					if(left->isRegister())
580 					{
581 						srcIndex = 0;
582 					}
583 					else if(left->isArray())
584 					{
585 						srcIndex = index * left->elementRegisterCount();
586 					}
587 					else if(left->isMatrix())
588 					{
589 						ASSERT(index < left->getNominalSize());   // FIXME: Report semantic error
590 						srcIndex = index;
591 					}
592 					else UNREACHABLE(0);
593 
594 					Instruction *mov = emit(sw::Shader::OPCODE_MOV, result, 0, left, srcIndex);
595 
596 					if(left->isRegister())
597 					{
598 						mov->src[0].swizzle = index;
599 					}
600 				}
601 				else UNREACHABLE(0);
602 			}
603 			break;
604 		case EOpIndexIndirect:
605 			if(visit == PostVisit)
606 			{
607 				if(left->isArray() || left->isMatrix())
608 				{
609 					for(int index = 0; index < result->totalRegisterCount(); index++)
610 					{
611 						Instruction *mov = emit(sw::Shader::OPCODE_MOV, result, index, left, index);
612 						mov->dst.mask = writeMask(result, index);
613 
614 						if(left->totalRegisterCount() > 1)
615 						{
616 							sw::Shader::SourceParameter relativeRegister;
617 							argument(relativeRegister, right);
618 
619 							mov->src[0].rel.type = relativeRegister.type;
620 							mov->src[0].rel.index = relativeRegister.index;
621 							mov->src[0].rel.scale =	result->totalRegisterCount();
622 							mov->src[0].rel.deterministic = !(vertexShader && left->getQualifier() == EvqUniform);
623 						}
624 					}
625 				}
626 				else if(left->isRegister())
627 				{
628 					emit(sw::Shader::OPCODE_EXTRACT, result, left, right);
629 				}
630 				else UNREACHABLE(0);
631 			}
632 			break;
633 		case EOpIndexDirectStruct:
634 		case EOpIndexDirectInterfaceBlock:
635 			if(visit == PostVisit)
636 			{
637 				ASSERT(leftType.isStruct() || (leftType.isInterfaceBlock()));
638 
639 				const TFieldList& fields = (node->getOp() == EOpIndexDirectStruct) ?
640 				                           leftType.getStruct()->fields() :
641 				                           leftType.getInterfaceBlock()->fields();
642 				int index = right->getAsConstantUnion()->getIConst(0);
643 				int fieldOffset = 0;
644 
645 				for(int i = 0; i < index; i++)
646 				{
647 					fieldOffset += fields[i]->type()->totalRegisterCount();
648 				}
649 
650 				copy(result, left, fieldOffset);
651 			}
652 			break;
653 		case EOpVectorSwizzle:
654 			if(visit == PostVisit)
655 			{
656 				int swizzle = 0;
657 				TIntermAggregate *components = right->getAsAggregate();
658 
659 				if(components)
660 				{
661 					TIntermSequence &sequence = components->getSequence();
662 					int component = 0;
663 
664 					for(TIntermSequence::iterator sit = sequence.begin(); sit != sequence.end(); sit++)
665 					{
666 						TIntermConstantUnion *element = (*sit)->getAsConstantUnion();
667 
668 						if(element)
669 						{
670 							int i = element->getUnionArrayPointer()[0].getIConst();
671 							swizzle |= i << (component * 2);
672 							component++;
673 						}
674 						else UNREACHABLE(0);
675 					}
676 				}
677 				else UNREACHABLE(0);
678 
679 				Instruction *mov = emit(sw::Shader::OPCODE_MOV, result, left);
680 				mov->src[0].swizzle = swizzle;
681 			}
682 			break;
683 		case EOpAddAssign: if(visit == PostVisit) emitAssign(getOpcode(sw::Shader::OPCODE_ADD, result), result, left, left, right); break;
684 		case EOpAdd:       if(visit == PostVisit) emitBinary(getOpcode(sw::Shader::OPCODE_ADD, result), result, left, right);       break;
685 		case EOpSubAssign: if(visit == PostVisit) emitAssign(getOpcode(sw::Shader::OPCODE_SUB, result), result, left, left, right); break;
686 		case EOpSub:       if(visit == PostVisit) emitBinary(getOpcode(sw::Shader::OPCODE_SUB, result), result, left, right);       break;
687 		case EOpMulAssign: if(visit == PostVisit) emitAssign(getOpcode(sw::Shader::OPCODE_MUL, result), result, left, left, right); break;
688 		case EOpMul:       if(visit == PostVisit) emitBinary(getOpcode(sw::Shader::OPCODE_MUL, result), result, left, right);       break;
689 		case EOpDivAssign: if(visit == PostVisit) emitAssign(getOpcode(sw::Shader::OPCODE_DIV, result), result, left, left, right); break;
690 		case EOpDiv:       if(visit == PostVisit) emitBinary(getOpcode(sw::Shader::OPCODE_DIV, result), result, left, right);       break;
691 		case EOpIModAssign:          if(visit == PostVisit) emitAssign(getOpcode(sw::Shader::OPCODE_IMOD, result), result, left, left, right); break;
692 		case EOpIMod:                if(visit == PostVisit) emitBinary(getOpcode(sw::Shader::OPCODE_IMOD, result), result, left, right);       break;
693 		case EOpBitShiftLeftAssign:  if(visit == PostVisit) emitAssign(sw::Shader::OPCODE_SHL, result, left, left, right); break;
694 		case EOpBitShiftLeft:        if(visit == PostVisit) emitBinary(sw::Shader::OPCODE_SHL, result, left, right);       break;
695 		case EOpBitShiftRightAssign: if(visit == PostVisit) emitAssign(getOpcode(sw::Shader::OPCODE_ISHR, result), result, left, left, right); break;
696 		case EOpBitShiftRight:       if(visit == PostVisit) emitBinary(getOpcode(sw::Shader::OPCODE_ISHR, result), result, left, right);       break;
697 		case EOpBitwiseAndAssign:    if(visit == PostVisit) emitAssign(sw::Shader::OPCODE_AND, result, left, left, right); break;
698 		case EOpBitwiseAnd:          if(visit == PostVisit) emitBinary(sw::Shader::OPCODE_AND, result, left, right);       break;
699 		case EOpBitwiseXorAssign:    if(visit == PostVisit) emitAssign(sw::Shader::OPCODE_XOR, result, left, left, right); break;
700 		case EOpBitwiseXor:          if(visit == PostVisit) emitBinary(sw::Shader::OPCODE_XOR, result, left, right);       break;
701 		case EOpBitwiseOrAssign:     if(visit == PostVisit) emitAssign(sw::Shader::OPCODE_OR, result, left, left, right);  break;
702 		case EOpBitwiseOr:           if(visit == PostVisit) emitBinary(sw::Shader::OPCODE_OR, result, left, right);        break;
703 		case EOpEqual:
704 			if(visit == PostVisit)
705 			{
706 				emitBinary(sw::Shader::OPCODE_EQ, result, left, right);
707 
708 				for(int index = 1; index < left->totalRegisterCount(); index++)
709 				{
710 					Temporary equal(this);
711 					emit(sw::Shader::OPCODE_EQ, &equal, 0, left, index, right, index);
712 					emit(sw::Shader::OPCODE_AND, result, result, &equal);
713 				}
714 			}
715 			break;
716 		case EOpNotEqual:
717 			if(visit == PostVisit)
718 			{
719 				emitBinary(sw::Shader::OPCODE_NE, result, left, right);
720 
721 				for(int index = 1; index < left->totalRegisterCount(); index++)
722 				{
723 					Temporary notEqual(this);
724 					emit(sw::Shader::OPCODE_NE, &notEqual, 0, left, index, right, index);
725 					emit(sw::Shader::OPCODE_OR, result, result, &notEqual);
726 				}
727 			}
728 			break;
729 		case EOpLessThan:                if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_LT, result, left, right); break;
730 		case EOpGreaterThan:             if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_GT, result, left, right); break;
731 		case EOpLessThanEqual:           if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_LE, result, left, right); break;
732 		case EOpGreaterThanEqual:        if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_GE, result, left, right); break;
733 		case EOpVectorTimesScalarAssign: if(visit == PostVisit) emitAssign(getOpcode(sw::Shader::OPCODE_MUL, left), result, left, left, right); break;
734 		case EOpVectorTimesScalar:       if(visit == PostVisit) emit(getOpcode(sw::Shader::OPCODE_MUL, left), result, left, right); break;
735 		case EOpMatrixTimesScalar:
736 			if(visit == PostVisit)
737 			{
738 				if(left->isMatrix())
739 				{
740 					for(int i = 0; i < leftType.getNominalSize(); i++)
741 					{
742 						emit(sw::Shader::OPCODE_MUL, result, i, left, i, right, 0);
743 					}
744 				}
745 				else if(right->isMatrix())
746 				{
747 					for(int i = 0; i < rightType.getNominalSize(); i++)
748 					{
749 						emit(sw::Shader::OPCODE_MUL, result, i, left, 0, right, i);
750 					}
751 				}
752 				else UNREACHABLE(0);
753 			}
754 			break;
755 		case EOpVectorTimesMatrix:
756 			if(visit == PostVisit)
757 			{
758 				sw::Shader::Opcode dpOpcode = sw::Shader::OPCODE_DP(leftType.getNominalSize());
759 
760 				int size = rightType.getNominalSize();
761 				for(int i = 0; i < size; i++)
762 				{
763 					Instruction *dot = emit(dpOpcode, result, 0, left, 0, right, i);
764 					dot->dst.mask = 1 << i;
765 				}
766 			}
767 			break;
768 		case EOpMatrixTimesVector:
769 			if(visit == PostVisit)
770 			{
771 				Instruction *mul = emit(sw::Shader::OPCODE_MUL, result, left, right);
772 				mul->src[1].swizzle = 0x00;
773 
774 				int size = rightType.getNominalSize();
775 				for(int i = 1; i < size; i++)
776 				{
777 					Instruction *mad = emit(sw::Shader::OPCODE_MAD, result, 0, left, i, right, 0, result);
778 					mad->src[1].swizzle = i * 0x55;
779 				}
780 			}
781 			break;
782 		case EOpMatrixTimesMatrix:
783 			if(visit == PostVisit)
784 			{
785 				int dim = leftType.getNominalSize();
786 
787 				int size = rightType.getNominalSize();
788 				for(int i = 0; i < size; i++)
789 				{
790 					Instruction *mul = emit(sw::Shader::OPCODE_MUL, result, i, left, 0, right, i);
791 					mul->src[1].swizzle = 0x00;
792 
793 					for(int j = 1; j < dim; j++)
794 					{
795 						Instruction *mad = emit(sw::Shader::OPCODE_MAD, result, i, left, j, right, i, result, i);
796 						mad->src[1].swizzle = j * 0x55;
797 					}
798 				}
799 			}
800 			break;
801 		case EOpLogicalOr:
802 			if(trivial(right, 6))
803 			{
804 				if(visit == PostVisit)
805 				{
806 					emit(sw::Shader::OPCODE_OR, result, left, right);
807 				}
808 			}
809 			else   // Short-circuit evaluation
810 			{
811 				if(visit == InVisit)
812 				{
813 					emit(sw::Shader::OPCODE_MOV, result, left);
814 					Instruction *ifnot = emit(sw::Shader::OPCODE_IF, 0, result);
815 					ifnot->src[0].modifier = sw::Shader::MODIFIER_NOT;
816 				}
817 				else if(visit == PostVisit)
818 				{
819 					emit(sw::Shader::OPCODE_MOV, result, right);
820 					emit(sw::Shader::OPCODE_ENDIF);
821 				}
822 			}
823 			break;
824 		case EOpLogicalXor:        if(visit == PostVisit) emit(sw::Shader::OPCODE_XOR, result, left, right); break;
825 		case EOpLogicalAnd:
826 			if(trivial(right, 6))
827 			{
828 				if(visit == PostVisit)
829 				{
830 					emit(sw::Shader::OPCODE_AND, result, left, right);
831 				}
832 			}
833 			else   // Short-circuit evaluation
834 			{
835 				if(visit == InVisit)
836 				{
837 					emit(sw::Shader::OPCODE_MOV, result, left);
838 					emit(sw::Shader::OPCODE_IF, 0, result);
839 				}
840 				else if(visit == PostVisit)
841 				{
842 					emit(sw::Shader::OPCODE_MOV, result, right);
843 					emit(sw::Shader::OPCODE_ENDIF);
844 				}
845 			}
846 			break;
847 		default: UNREACHABLE(node->getOp());
848 		}
849 
850 		return true;
851 	}
852 
emitDeterminant(TIntermTyped * result,TIntermTyped * arg,int size,int col,int row,int outCol,int outRow)853 	void OutputASM::emitDeterminant(TIntermTyped *result, TIntermTyped *arg, int size, int col, int row, int outCol, int outRow)
854 	{
855 		switch(size)
856 		{
857 		case 1: // Used for cofactor computation only
858 			{
859 				// For a 2x2 matrix, the cofactor is simply a transposed move or negate
860 				bool isMov = (row == col);
861 				sw::Shader::Opcode op = isMov ? sw::Shader::OPCODE_MOV : sw::Shader::OPCODE_NEG;
862 				Instruction *mov = emit(op, result, outCol, arg, isMov ? 1 - row : row);
863 				mov->src[0].swizzle = 0x55 * (isMov ? 1 - col : col);
864 				mov->dst.mask = 1 << outRow;
865 			}
866 			break;
867 		case 2:
868 			{
869 				static const unsigned int swizzle[3] = { 0x99, 0x88, 0x44 }; // xy?? : yzyz, xzxz, xyxy
870 
871 				bool isCofactor = (col >= 0) && (row >= 0);
872 				int col0 = (isCofactor && (col <= 0)) ? 1 : 0;
873 				int col1 = (isCofactor && (col <= 1)) ? 2 : 1;
874 				bool negate = isCofactor && ((col & 0x01) ^ (row & 0x01));
875 
876 				Instruction *det = emit(sw::Shader::OPCODE_DET2, result, outCol, arg, negate ? col1 : col0, arg, negate ? col0 : col1);
877 				det->src[0].swizzle = det->src[1].swizzle = swizzle[isCofactor ? row : 2];
878 				det->dst.mask = 1 << outRow;
879 			}
880 			break;
881 		case 3:
882 			{
883 				static const unsigned int swizzle[4] = { 0xF9, 0xF8, 0xF4, 0xE4 }; // xyz? : yzww, xzww, xyww, xyzw
884 
885 				bool isCofactor = (col >= 0) && (row >= 0);
886 				int col0 = (isCofactor && (col <= 0)) ? 1 : 0;
887 				int col1 = (isCofactor && (col <= 1)) ? 2 : 1;
888 				int col2 = (isCofactor && (col <= 2)) ? 3 : 2;
889 				bool negate = isCofactor && ((col & 0x01) ^ (row & 0x01));
890 
891 				Instruction *det = emit(sw::Shader::OPCODE_DET3, result, outCol, arg, col0, arg, negate ? col2 : col1, arg, negate ? col1 : col2);
892 				det->src[0].swizzle = det->src[1].swizzle = det->src[2].swizzle = swizzle[isCofactor ? row : 3];
893 				det->dst.mask = 1 << outRow;
894 			}
895 			break;
896 		case 4:
897 			{
898 				Instruction *det = emit(sw::Shader::OPCODE_DET4, result, outCol, arg, 0, arg, 1, arg, 2, arg, 3);
899 				det->dst.mask = 1 << outRow;
900 			}
901 			break;
902 		default:
903 			UNREACHABLE(size);
904 			break;
905 		}
906 	}
907 
visitUnary(Visit visit,TIntermUnary * node)908 	bool OutputASM::visitUnary(Visit visit, TIntermUnary *node)
909 	{
910 		if(currentScope != emitScope)
911 		{
912 			return false;
913 		}
914 
915 		TIntermTyped *result = node;
916 		TIntermTyped *arg = node->getOperand();
917 		TBasicType basicType = arg->getType().getBasicType();
918 
919 		union
920 		{
921 			float f;
922 			int i;
923 		} one_value;
924 
925 		if(basicType == EbtInt || basicType == EbtUInt)
926 		{
927 			one_value.i = 1;
928 		}
929 		else
930 		{
931 			one_value.f = 1.0f;
932 		}
933 
934 		Constant one(one_value.f, one_value.f, one_value.f, one_value.f);
935 		Constant rad(1.74532925e-2f, 1.74532925e-2f, 1.74532925e-2f, 1.74532925e-2f);
936 		Constant deg(5.72957795e+1f, 5.72957795e+1f, 5.72957795e+1f, 5.72957795e+1f);
937 
938 		switch(node->getOp())
939 		{
940 		case EOpNegative:
941 			if(visit == PostVisit)
942 			{
943 				sw::Shader::Opcode negOpcode = getOpcode(sw::Shader::OPCODE_NEG, arg);
944 				for(int index = 0; index < arg->totalRegisterCount(); index++)
945 				{
946 					emit(negOpcode, result, index, arg, index);
947 				}
948 			}
949 			break;
950 		case EOpVectorLogicalNot: if(visit == PostVisit) emit(sw::Shader::OPCODE_NOT, result, arg); break;
951 		case EOpLogicalNot:       if(visit == PostVisit) emit(sw::Shader::OPCODE_NOT, result, arg); break;
952 		case EOpPostIncrement:
953 			if(visit == PostVisit)
954 			{
955 				copy(result, arg);
956 
957 				sw::Shader::Opcode addOpcode = getOpcode(sw::Shader::OPCODE_ADD, arg);
958 				for(int index = 0; index < arg->totalRegisterCount(); index++)
959 				{
960 					emit(addOpcode, arg, index, arg, index, &one);
961 				}
962 
963 				assignLvalue(arg, arg);
964 			}
965 			break;
966 		case EOpPostDecrement:
967 			if(visit == PostVisit)
968 			{
969 				copy(result, arg);
970 
971 				sw::Shader::Opcode subOpcode = getOpcode(sw::Shader::OPCODE_SUB, arg);
972 				for(int index = 0; index < arg->totalRegisterCount(); index++)
973 				{
974 					emit(subOpcode, arg, index, arg, index, &one);
975 				}
976 
977 				assignLvalue(arg, arg);
978 			}
979 			break;
980 		case EOpPreIncrement:
981 			if(visit == PostVisit)
982 			{
983 				sw::Shader::Opcode addOpcode = getOpcode(sw::Shader::OPCODE_ADD, arg);
984 				for(int index = 0; index < arg->totalRegisterCount(); index++)
985 				{
986 					emit(addOpcode, result, index, arg, index, &one);
987 				}
988 
989 				assignLvalue(arg, result);
990 			}
991 			break;
992 		case EOpPreDecrement:
993 			if(visit == PostVisit)
994 			{
995 				sw::Shader::Opcode subOpcode = getOpcode(sw::Shader::OPCODE_SUB, arg);
996 				for(int index = 0; index < arg->totalRegisterCount(); index++)
997 				{
998 					emit(subOpcode, result, index, arg, index, &one);
999 				}
1000 
1001 				assignLvalue(arg, result);
1002 			}
1003 			break;
1004 		case EOpRadians:          if(visit == PostVisit) emit(sw::Shader::OPCODE_MUL, result, arg, &rad); break;
1005 		case EOpDegrees:          if(visit == PostVisit) emit(sw::Shader::OPCODE_MUL, result, arg, &deg); break;
1006 		case EOpSin:              if(visit == PostVisit) emit(sw::Shader::OPCODE_SIN, result, arg); break;
1007 		case EOpCos:              if(visit == PostVisit) emit(sw::Shader::OPCODE_COS, result, arg); break;
1008 		case EOpTan:              if(visit == PostVisit) emit(sw::Shader::OPCODE_TAN, result, arg); break;
1009 		case EOpAsin:             if(visit == PostVisit) emit(sw::Shader::OPCODE_ASIN, result, arg); break;
1010 		case EOpAcos:             if(visit == PostVisit) emit(sw::Shader::OPCODE_ACOS, result, arg); break;
1011 		case EOpAtan:             if(visit == PostVisit) emit(sw::Shader::OPCODE_ATAN, result, arg); break;
1012 		case EOpSinh:             if(visit == PostVisit) emit(sw::Shader::OPCODE_SINH, result, arg); break;
1013 		case EOpCosh:             if(visit == PostVisit) emit(sw::Shader::OPCODE_COSH, result, arg); break;
1014 		case EOpTanh:             if(visit == PostVisit) emit(sw::Shader::OPCODE_TANH, result, arg); break;
1015 		case EOpAsinh:            if(visit == PostVisit) emit(sw::Shader::OPCODE_ASINH, result, arg); break;
1016 		case EOpAcosh:            if(visit == PostVisit) emit(sw::Shader::OPCODE_ACOSH, result, arg); break;
1017 		case EOpAtanh:            if(visit == PostVisit) emit(sw::Shader::OPCODE_ATANH, result, arg); break;
1018 		case EOpExp:              if(visit == PostVisit) emit(sw::Shader::OPCODE_EXP, result, arg); break;
1019 		case EOpLog:              if(visit == PostVisit) emit(sw::Shader::OPCODE_LOG, result, arg); break;
1020 		case EOpExp2:             if(visit == PostVisit) emit(sw::Shader::OPCODE_EXP2, result, arg); break;
1021 		case EOpLog2:             if(visit == PostVisit) emit(sw::Shader::OPCODE_LOG2, result, arg); break;
1022 		case EOpSqrt:             if(visit == PostVisit) emit(sw::Shader::OPCODE_SQRT, result, arg); break;
1023 		case EOpInverseSqrt:      if(visit == PostVisit) emit(sw::Shader::OPCODE_RSQ, result, arg); break;
1024 		case EOpAbs:              if(visit == PostVisit) emit(getOpcode(sw::Shader::OPCODE_ABS, result), result, arg); break;
1025 		case EOpSign:             if(visit == PostVisit) emit(getOpcode(sw::Shader::OPCODE_SGN, result), result, arg); break;
1026 		case EOpFloor:            if(visit == PostVisit) emit(sw::Shader::OPCODE_FLOOR, result, arg); break;
1027 		case EOpTrunc:            if(visit == PostVisit) emit(sw::Shader::OPCODE_TRUNC, result, arg); break;
1028 		case EOpRound:            if(visit == PostVisit) emit(sw::Shader::OPCODE_ROUND, result, arg); break;
1029 		case EOpRoundEven:        if(visit == PostVisit) emit(sw::Shader::OPCODE_ROUNDEVEN, result, arg); break;
1030 		case EOpCeil:             if(visit == PostVisit) emit(sw::Shader::OPCODE_CEIL, result, arg, result); break;
1031 		case EOpFract:            if(visit == PostVisit) emit(sw::Shader::OPCODE_FRC, result, arg); break;
1032 		case EOpIsNan:            if(visit == PostVisit) emit(sw::Shader::OPCODE_ISNAN, result, arg); break;
1033 		case EOpIsInf:            if(visit == PostVisit) emit(sw::Shader::OPCODE_ISINF, result, arg); break;
1034 		case EOpLength:           if(visit == PostVisit) emit(sw::Shader::OPCODE_LEN(dim(arg)), result, arg); break;
1035 		case EOpNormalize:        if(visit == PostVisit) emit(sw::Shader::OPCODE_NRM(dim(arg)), result, arg); break;
1036 		case EOpDFdx:             if(visit == PostVisit) emit(sw::Shader::OPCODE_DFDX, result, arg); break;
1037 		case EOpDFdy:             if(visit == PostVisit) emit(sw::Shader::OPCODE_DFDY, result, arg); break;
1038 		case EOpFwidth:           if(visit == PostVisit) emit(sw::Shader::OPCODE_FWIDTH, result, arg); break;
1039 		case EOpAny:              if(visit == PostVisit) emit(sw::Shader::OPCODE_ANY, result, arg); break;
1040 		case EOpAll:              if(visit == PostVisit) emit(sw::Shader::OPCODE_ALL, result, arg); break;
1041 		case EOpFloatBitsToInt:   if(visit == PostVisit) emit(sw::Shader::OPCODE_FLOATBITSTOINT, result, arg); break;
1042 		case EOpFloatBitsToUint:  if(visit == PostVisit) emit(sw::Shader::OPCODE_FLOATBITSTOUINT, result, arg); break;
1043 		case EOpIntBitsToFloat:   if(visit == PostVisit) emit(sw::Shader::OPCODE_INTBITSTOFLOAT, result, arg); break;
1044 		case EOpUintBitsToFloat:  if(visit == PostVisit) emit(sw::Shader::OPCODE_UINTBITSTOFLOAT, result, arg); break;
1045 		case EOpPackSnorm2x16:    if(visit == PostVisit) emit(sw::Shader::OPCODE_PACKSNORM2x16, result, arg); break;
1046 		case EOpPackUnorm2x16:    if(visit == PostVisit) emit(sw::Shader::OPCODE_PACKUNORM2x16, result, arg); break;
1047 		case EOpPackHalf2x16:     if(visit == PostVisit) emit(sw::Shader::OPCODE_PACKHALF2x16, result, arg); break;
1048 		case EOpUnpackSnorm2x16:  if(visit == PostVisit) emit(sw::Shader::OPCODE_UNPACKSNORM2x16, result, arg); break;
1049 		case EOpUnpackUnorm2x16:  if(visit == PostVisit) emit(sw::Shader::OPCODE_UNPACKUNORM2x16, result, arg); break;
1050 		case EOpUnpackHalf2x16:   if(visit == PostVisit) emit(sw::Shader::OPCODE_UNPACKHALF2x16, result, arg); break;
1051 		case EOpTranspose:
1052 			if(visit == PostVisit)
1053 			{
1054 				int numCols = arg->getNominalSize();
1055 				int numRows = arg->getSecondarySize();
1056 				for(int i = 0; i < numCols; ++i)
1057 				{
1058 					for(int j = 0; j < numRows; ++j)
1059 					{
1060 						Instruction *mov = emit(sw::Shader::OPCODE_MOV, result, j, arg, i);
1061 						mov->src[0].swizzle = 0x55 * j;
1062 						mov->dst.mask = 1 << i;
1063 					}
1064 				}
1065 			}
1066 			break;
1067 		case EOpDeterminant:
1068 			if(visit == PostVisit)
1069 			{
1070 				int size = arg->getNominalSize();
1071 				ASSERT(size == arg->getSecondarySize());
1072 
1073 				emitDeterminant(result, arg, size);
1074 			}
1075 			break;
1076 		case EOpInverse:
1077 			if(visit == PostVisit)
1078 			{
1079 				int size = arg->getNominalSize();
1080 				ASSERT(size == arg->getSecondarySize());
1081 
1082 				// Compute transposed matrix of cofactors
1083 				for(int i = 0; i < size; ++i)
1084 				{
1085 					for(int j = 0; j < size; ++j)
1086 					{
1087 						// For a 2x2 matrix, the cofactor is simply a transposed move or negate
1088 						// For a 3x3 or 4x4 matrix, the cofactor is a transposed determinant
1089 						emitDeterminant(result, arg, size - 1, j, i, i, j);
1090 					}
1091 				}
1092 
1093 				// Compute 1 / determinant
1094 				Temporary invDet(this);
1095 				emitDeterminant(&invDet, arg, size);
1096 				Constant one(1.0f, 1.0f, 1.0f, 1.0f);
1097 				Instruction *div = emit(sw::Shader::OPCODE_DIV, &invDet, &one, &invDet);
1098 				div->src[1].swizzle = 0x00; // xxxx
1099 
1100 				// Divide transposed matrix of cofactors by determinant
1101 				for(int i = 0; i < size; ++i)
1102 				{
1103 					emit(sw::Shader::OPCODE_MUL, result, i, result, i, &invDet);
1104 				}
1105 			}
1106 			break;
1107 		default: UNREACHABLE(node->getOp());
1108 		}
1109 
1110 		return true;
1111 	}
1112 
visitAggregate(Visit visit,TIntermAggregate * node)1113 	bool OutputASM::visitAggregate(Visit visit, TIntermAggregate *node)
1114 	{
1115 		if(currentScope != emitScope && node->getOp() != EOpFunction && node->getOp() != EOpSequence)
1116 		{
1117 			return false;
1118 		}
1119 
1120 		Constant zero(0.0f, 0.0f, 0.0f, 0.0f);
1121 
1122 		TIntermTyped *result = node;
1123 		const TType &resultType = node->getType();
1124 		TIntermSequence &arg = node->getSequence();
1125 		size_t argumentCount = arg.size();
1126 
1127 		switch(node->getOp())
1128 		{
1129 		case EOpSequence:             break;
1130 		case EOpDeclaration:          break;
1131 		case EOpInvariantDeclaration: break;
1132 		case EOpPrototype:            break;
1133 		case EOpComma:
1134 			if(visit == PostVisit)
1135 			{
1136 				copy(result, arg[1]);
1137 			}
1138 			break;
1139 		case EOpFunction:
1140 			if(visit == PreVisit)
1141 			{
1142 				const TString &name = node->getName();
1143 
1144 				if(emitScope == FUNCTION)
1145 				{
1146 					if(functionArray.size() > 1)   // No need for a label when there's only main()
1147 					{
1148 						Instruction *label = emit(sw::Shader::OPCODE_LABEL);
1149 						label->dst.type = sw::Shader::PARAMETER_LABEL;
1150 
1151 						const Function *function = findFunction(name);
1152 						ASSERT(function);   // Should have been added during global pass
1153 						label->dst.index = function->label;
1154 						currentFunction = function->label;
1155 					}
1156 				}
1157 				else if(emitScope == GLOBAL)
1158 				{
1159 					if(name != "main(")
1160 					{
1161 						TIntermSequence &arguments = node->getSequence()[0]->getAsAggregate()->getSequence();
1162 						functionArray.push_back(Function(functionArray.size(), name, &arguments, node));
1163 					}
1164 				}
1165 				else UNREACHABLE(emitScope);
1166 
1167 				currentScope = FUNCTION;
1168 			}
1169 			else if(visit == PostVisit)
1170 			{
1171 				if(emitScope == FUNCTION)
1172 				{
1173 					if(functionArray.size() > 1)   // No need to return when there's only main()
1174 					{
1175 						emit(sw::Shader::OPCODE_RET);
1176 					}
1177 				}
1178 
1179 				currentScope = GLOBAL;
1180 			}
1181 			break;
1182 		case EOpFunctionCall:
1183 			if(visit == PostVisit)
1184 			{
1185 				if(node->isUserDefined())
1186 				{
1187 					const TString &name = node->getName();
1188 					const Function *function = findFunction(name);
1189 
1190 					if(!function)
1191 					{
1192 						mContext.error(node->getLine(), "function definition not found", name.c_str());
1193 						return false;
1194 					}
1195 
1196 					TIntermSequence &arguments = *function->arg;
1197 
1198 					for(size_t i = 0; i < argumentCount; i++)
1199 					{
1200 						TIntermTyped *in = arguments[i]->getAsTyped();
1201 
1202 						if(in->getQualifier() == EvqIn ||
1203 						   in->getQualifier() == EvqInOut ||
1204 						   in->getQualifier() == EvqConstReadOnly)
1205 						{
1206 							copy(in, arg[i]);
1207 						}
1208 					}
1209 
1210 					Instruction *call = emit(sw::Shader::OPCODE_CALL);
1211 					call->dst.type = sw::Shader::PARAMETER_LABEL;
1212 					call->dst.index = function->label;
1213 
1214 					if(function->ret && function->ret->getType().getBasicType() != EbtVoid)
1215 					{
1216 						copy(result, function->ret);
1217 					}
1218 
1219 					for(size_t i = 0; i < argumentCount; i++)
1220 					{
1221 						TIntermTyped *argument = arguments[i]->getAsTyped();
1222 						TIntermTyped *out = arg[i]->getAsTyped();
1223 
1224 						if(argument->getQualifier() == EvqOut ||
1225 						   argument->getQualifier() == EvqInOut)
1226 						{
1227 							copy(out, argument);
1228 						}
1229 					}
1230 				}
1231 				else
1232 				{
1233 					const TextureFunction textureFunction(node->getName());
1234 					TIntermTyped *t = arg[1]->getAsTyped();
1235 
1236 					Temporary coord(this);
1237 
1238 					if(textureFunction.proj)
1239 					{
1240 						TIntermConstantUnion* constant = arg[1]->getAsConstantUnion();
1241 						if(constant)
1242 						{
1243 							float projFactor = 1.0f / constant->getFConst(t->getNominalSize() - 1);
1244 							Constant projCoord(constant->getFConst(0) * projFactor,
1245 							                   constant->getFConst(1) * projFactor,
1246 							                   constant->getFConst(2) * projFactor,
1247 							                   0.0f);
1248 							emit(sw::Shader::OPCODE_MOV, &coord, &projCoord);
1249 						}
1250 						else
1251 						{
1252 							Instruction *rcp = emit(sw::Shader::OPCODE_RCPX, &coord, arg[1]);
1253 							rcp->src[0].swizzle = 0x55 * (t->getNominalSize() - 1);
1254 							rcp->dst.mask = 0x7;
1255 
1256 							Instruction *mul = emit(sw::Shader::OPCODE_MUL, &coord, arg[1], &coord);
1257 							mul->dst.mask = 0x7;
1258 						}
1259 					}
1260 					else
1261 					{
1262 						emit(sw::Shader::OPCODE_MOV, &coord, arg[1]);
1263 					}
1264 
1265 					switch(textureFunction.method)
1266 					{
1267 					case TextureFunction::IMPLICIT:
1268 						{
1269 							TIntermNode* offset = textureFunction.offset ? arg[2] : 0;
1270 
1271 							if(argumentCount == 2 || (textureFunction.offset && argumentCount == 3))
1272 							{
1273 								Instruction *tex = emit(textureFunction.offset ? sw::Shader::OPCODE_TEXOFFSET : sw::Shader::OPCODE_TEX,
1274 								                        result, &coord, arg[0], offset);
1275 							}
1276 							else if(argumentCount == 3 || (textureFunction.offset && argumentCount == 4))   // bias
1277 							{
1278 								Instruction *bias = emit(sw::Shader::OPCODE_MOV, &coord, arg[textureFunction.offset ? 3 : 2]);
1279 								bias->dst.mask = 0x8;
1280 
1281 								Instruction *tex = emit(textureFunction.offset ? sw::Shader::OPCODE_TEXOFFSET : sw::Shader::OPCODE_TEX,
1282 								                        result, &coord, arg[0], offset); // FIXME: Implement an efficient TEXLDB instruction
1283 								tex->bias = true;
1284 							}
1285 							else UNREACHABLE(argumentCount);
1286 						}
1287 						break;
1288 					case TextureFunction::LOD:
1289 						{
1290 							Instruction *lod = emit(sw::Shader::OPCODE_MOV, &coord, arg[2]);
1291 							lod->dst.mask = 0x8;
1292 
1293 							emit(textureFunction.offset ? sw::Shader::OPCODE_TEXLDLOFFSET : sw::Shader::OPCODE_TEXLDL,
1294 							     result, &coord, arg[0], textureFunction.offset ? arg[3] : nullptr);
1295 						}
1296 						break;
1297 					case TextureFunction::FETCH:
1298 						{
1299 							if(argumentCount == 3 || (textureFunction.offset && argumentCount == 4))
1300 							{
1301 								TIntermNode *offset = textureFunction.offset ? arg[3] : nullptr;
1302 
1303 								emit(textureFunction.offset ? sw::Shader::OPCODE_TEXELFETCHOFFSET : sw::Shader::OPCODE_TEXELFETCH,
1304 								     result, arg[1], arg[0], arg[2], offset);
1305 							}
1306 							else UNREACHABLE(argumentCount);
1307 						}
1308 						break;
1309 					case TextureFunction::GRAD:
1310 						{
1311 							if(argumentCount == 4 || (textureFunction.offset && argumentCount == 5))
1312 							{
1313 								TIntermNode *offset = textureFunction.offset ? arg[4] : nullptr;
1314 
1315 								emit(textureFunction.offset ? sw::Shader::OPCODE_TEXGRADOFFSET : sw::Shader::OPCODE_TEXGRAD,
1316 								     result, &coord, arg[0], arg[2], arg[3], offset);
1317 							}
1318 							else UNREACHABLE(argumentCount);
1319 						}
1320 						break;
1321 					case TextureFunction::SIZE:
1322 						emit(sw::Shader::OPCODE_TEXSIZE, result, arg[1], arg[0]);
1323 						break;
1324 					default:
1325 						UNREACHABLE(textureFunction.method);
1326 					}
1327 				}
1328 			}
1329 			break;
1330 		case EOpParameters:
1331 			break;
1332 		case EOpConstructFloat:
1333 		case EOpConstructVec2:
1334 		case EOpConstructVec3:
1335 		case EOpConstructVec4:
1336 		case EOpConstructBool:
1337 		case EOpConstructBVec2:
1338 		case EOpConstructBVec3:
1339 		case EOpConstructBVec4:
1340 		case EOpConstructInt:
1341 		case EOpConstructIVec2:
1342 		case EOpConstructIVec3:
1343 		case EOpConstructIVec4:
1344 		case EOpConstructUInt:
1345 		case EOpConstructUVec2:
1346 		case EOpConstructUVec3:
1347 		case EOpConstructUVec4:
1348 			if(visit == PostVisit)
1349 			{
1350 				int component = 0;
1351 
1352 				for(size_t i = 0; i < argumentCount; i++)
1353 				{
1354 					TIntermTyped *argi = arg[i]->getAsTyped();
1355 					int size = argi->getNominalSize();
1356 
1357 					if(!argi->isMatrix())
1358 					{
1359 						Instruction *mov = emitCast(result, argi);
1360 						mov->dst.mask = (0xF << component) & 0xF;
1361 						mov->src[0].swizzle = readSwizzle(argi, size) << (component * 2);
1362 
1363 						component += size;
1364 					}
1365 					else   // Matrix
1366 					{
1367 						int column = 0;
1368 
1369 						while(component < resultType.getNominalSize())
1370 						{
1371 							Instruction *mov = emitCast(result, 0, argi, column);
1372 							mov->dst.mask = (0xF << component) & 0xF;
1373 							mov->src[0].swizzle = readSwizzle(argi, size) << (component * 2);
1374 
1375 							column++;
1376 							component += size;
1377 						}
1378 					}
1379 				}
1380 			}
1381 			break;
1382 		case EOpConstructMat2:
1383 		case EOpConstructMat2x3:
1384 		case EOpConstructMat2x4:
1385 		case EOpConstructMat3x2:
1386 		case EOpConstructMat3:
1387 		case EOpConstructMat3x4:
1388 		case EOpConstructMat4x2:
1389 		case EOpConstructMat4x3:
1390 		case EOpConstructMat4:
1391 			if(visit == PostVisit)
1392 			{
1393 				TIntermTyped *arg0 = arg[0]->getAsTyped();
1394 				const int outCols = result->getNominalSize();
1395 				const int outRows = result->getSecondarySize();
1396 
1397 				if(arg0->isScalar() && arg.size() == 1)   // Construct scale matrix
1398 				{
1399 					for(int i = 0; i < outCols; i++)
1400 					{
1401 						Instruction *init = emit(sw::Shader::OPCODE_MOV, result, i, &zero);
1402 						Instruction *mov = emitCast(result, i, arg0, 0);
1403 						mov->dst.mask = 1 << i;
1404 						ASSERT(mov->src[0].swizzle == 0x00);
1405 					}
1406 				}
1407 				else if(arg0->isMatrix())
1408 				{
1409 					const int inCols = arg0->getNominalSize();
1410 					const int inRows = arg0->getSecondarySize();
1411 
1412 					for(int i = 0; i < outCols; i++)
1413 					{
1414 						if(i >= inCols || outRows > inRows)
1415 						{
1416 							// Initialize to identity matrix
1417 							Constant col((i == 0 ? 1.0f : 0.0f), (i == 1 ? 1.0f : 0.0f), (i == 2 ? 1.0f : 0.0f), (i == 3 ? 1.0f : 0.0f));
1418 							Instruction *mov = emitCast(result, i, &col, 0);
1419 						}
1420 
1421 						if(i < inCols)
1422 						{
1423 							Instruction *mov = emitCast(result, i, arg0, i);
1424 							mov->dst.mask = 0xF >> (4 - inRows);
1425 						}
1426 					}
1427 				}
1428 				else
1429 				{
1430 					int column = 0;
1431 					int row = 0;
1432 
1433 					for(size_t i = 0; i < argumentCount; i++)
1434 					{
1435 						TIntermTyped *argi = arg[i]->getAsTyped();
1436 						int size = argi->getNominalSize();
1437 						int element = 0;
1438 
1439 						while(element < size)
1440 						{
1441 							Instruction *mov = emitCast(result, column, argi, 0);
1442 							mov->dst.mask = (0xF << row) & 0xF;
1443 							mov->src[0].swizzle = (readSwizzle(argi, size) << (row * 2)) + 0x55 * element;
1444 
1445 							int end = row + size - element;
1446 							column = end >= outRows ? column + 1 : column;
1447 							element = element + outRows - row;
1448 							row = end >= outRows ? 0 : end;
1449 						}
1450 					}
1451 				}
1452 			}
1453 			break;
1454 		case EOpConstructStruct:
1455 			if(visit == PostVisit)
1456 			{
1457 				int offset = 0;
1458 				for(size_t i = 0; i < argumentCount; i++)
1459 				{
1460 					TIntermTyped *argi = arg[i]->getAsTyped();
1461 					int size = argi->totalRegisterCount();
1462 
1463 					for(int index = 0; index < size; index++)
1464 					{
1465 						Instruction *mov = emit(sw::Shader::OPCODE_MOV, result, index + offset, argi, index);
1466 						mov->dst.mask = writeMask(result, offset + index);
1467 					}
1468 
1469 					offset += size;
1470 				}
1471 			}
1472 			break;
1473 		case EOpLessThan:         if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_LT, result, arg[0], arg[1]); break;
1474 		case EOpGreaterThan:      if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_GT, result, arg[0], arg[1]); break;
1475 		case EOpLessThanEqual:    if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_LE, result, arg[0], arg[1]); break;
1476 		case EOpGreaterThanEqual: if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_GE, result, arg[0], arg[1]); break;
1477 		case EOpVectorEqual:      if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_EQ, result, arg[0], arg[1]); break;
1478 		case EOpVectorNotEqual:   if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_NE, result, arg[0], arg[1]); break;
1479 		case EOpMod:              if(visit == PostVisit) emit(sw::Shader::OPCODE_MOD, result, arg[0], arg[1]); break;
1480 		case EOpModf:
1481 			if(visit == PostVisit)
1482 			{
1483 				TIntermTyped* arg1 = arg[1]->getAsTyped();
1484 				emit(sw::Shader::OPCODE_TRUNC, arg1, arg[0]);
1485 				assignLvalue(arg1, arg1);
1486 				emitBinary(sw::Shader::OPCODE_SUB, result, arg[0], arg1);
1487 			}
1488 			break;
1489 		case EOpPow:              if(visit == PostVisit) emit(sw::Shader::OPCODE_POW, result, arg[0], arg[1]); break;
1490 		case EOpAtan:             if(visit == PostVisit) emit(sw::Shader::OPCODE_ATAN2, result, arg[0], arg[1]); break;
1491 		case EOpMin:              if(visit == PostVisit) emit(getOpcode(sw::Shader::OPCODE_MIN, result), result, arg[0], arg[1]); break;
1492 		case EOpMax:              if(visit == PostVisit) emit(getOpcode(sw::Shader::OPCODE_MAX, result), result, arg[0], arg[1]); break;
1493 		case EOpClamp:
1494 			if(visit == PostVisit)
1495 			{
1496 				emit(getOpcode(sw::Shader::OPCODE_MAX, result), result, arg[0], arg[1]);
1497 				emit(getOpcode(sw::Shader::OPCODE_MIN, result), result, result, arg[2]);
1498 			}
1499 			break;
1500 		case EOpMix:         if(visit == PostVisit) emit(sw::Shader::OPCODE_LRP, result, arg[2], arg[1], arg[0]); break;
1501 		case EOpStep:        if(visit == PostVisit) emit(sw::Shader::OPCODE_STEP, result, arg[0], arg[1]); break;
1502 		case EOpSmoothStep:  if(visit == PostVisit) emit(sw::Shader::OPCODE_SMOOTH, result, arg[0], arg[1], arg[2]); break;
1503 		case EOpDistance:    if(visit == PostVisit) emit(sw::Shader::OPCODE_DIST(dim(arg[0])), result, arg[0], arg[1]); break;
1504 		case EOpDot:         if(visit == PostVisit) emit(sw::Shader::OPCODE_DP(dim(arg[0])), result, arg[0], arg[1]); break;
1505 		case EOpCross:       if(visit == PostVisit) emit(sw::Shader::OPCODE_CRS, result, arg[0], arg[1]); break;
1506 		case EOpFaceForward: if(visit == PostVisit) emit(sw::Shader::OPCODE_FORWARD(dim(arg[0])), result, arg[0], arg[1], arg[2]); break;
1507 		case EOpReflect:     if(visit == PostVisit) emit(sw::Shader::OPCODE_REFLECT(dim(arg[0])), result, arg[0], arg[1]); break;
1508 		case EOpRefract:     if(visit == PostVisit) emit(sw::Shader::OPCODE_REFRACT(dim(arg[0])), result, arg[0], arg[1], arg[2]); break;
1509 		case EOpMul:
1510 			if(visit == PostVisit)
1511 			{
1512 				TIntermTyped *arg0 = arg[0]->getAsTyped();
1513 				TIntermTyped *arg1 = arg[1]->getAsTyped();
1514 				ASSERT((arg0->getNominalSize() == arg1->getNominalSize()) && (arg0->getSecondarySize() == arg1->getSecondarySize()));
1515 
1516 				int size = arg0->getNominalSize();
1517 				for(int i = 0; i < size; i++)
1518 				{
1519 					emit(sw::Shader::OPCODE_MUL, result, i, arg[0], i, arg[1], i);
1520 				}
1521 			}
1522 			break;
1523 		case EOpOuterProduct:
1524 			if(visit == PostVisit)
1525 			{
1526 				for(int i = 0; i < dim(arg[1]); i++)
1527 				{
1528 					Instruction *mul = emit(sw::Shader::OPCODE_MUL, result, i, arg[0], 0, arg[1]);
1529 					mul->src[1].swizzle = 0x55 * i;
1530 				}
1531 			}
1532 			break;
1533 		default: UNREACHABLE(node->getOp());
1534 		}
1535 
1536 		return true;
1537 	}
1538 
visitSelection(Visit visit,TIntermSelection * node)1539 	bool OutputASM::visitSelection(Visit visit, TIntermSelection *node)
1540 	{
1541 		if(currentScope != emitScope)
1542 		{
1543 			return false;
1544 		}
1545 
1546 		TIntermTyped *condition = node->getCondition();
1547 		TIntermNode *trueBlock = node->getTrueBlock();
1548 		TIntermNode *falseBlock = node->getFalseBlock();
1549 		TIntermConstantUnion *constantCondition = condition->getAsConstantUnion();
1550 
1551 		condition->traverse(this);
1552 
1553 		if(node->usesTernaryOperator())
1554 		{
1555 			if(constantCondition)
1556 			{
1557 				bool trueCondition = constantCondition->getUnionArrayPointer()->getBConst();
1558 
1559 				if(trueCondition)
1560 				{
1561 					trueBlock->traverse(this);
1562 					copy(node, trueBlock);
1563 				}
1564 				else
1565 				{
1566 					falseBlock->traverse(this);
1567 					copy(node, falseBlock);
1568 				}
1569 			}
1570 			else if(trivial(node, 6))   // Fast to compute both potential results and no side effects
1571 			{
1572 				trueBlock->traverse(this);
1573 				falseBlock->traverse(this);
1574 				emit(sw::Shader::OPCODE_SELECT, node, condition, trueBlock, falseBlock);
1575 			}
1576 			else
1577 			{
1578 				emit(sw::Shader::OPCODE_IF, 0, condition);
1579 
1580 				if(trueBlock)
1581 				{
1582 					trueBlock->traverse(this);
1583 					copy(node, trueBlock);
1584 				}
1585 
1586 				if(falseBlock)
1587 				{
1588 					emit(sw::Shader::OPCODE_ELSE);
1589 					falseBlock->traverse(this);
1590 					copy(node, falseBlock);
1591 				}
1592 
1593 				emit(sw::Shader::OPCODE_ENDIF);
1594 			}
1595 		}
1596 		else  // if/else statement
1597 		{
1598 			if(constantCondition)
1599 			{
1600 				bool trueCondition = constantCondition->getUnionArrayPointer()->getBConst();
1601 
1602 				if(trueCondition)
1603 				{
1604 					if(trueBlock)
1605 					{
1606 						trueBlock->traverse(this);
1607 					}
1608 				}
1609 				else
1610 				{
1611 					if(falseBlock)
1612 					{
1613 						falseBlock->traverse(this);
1614 					}
1615 				}
1616 			}
1617 			else
1618 			{
1619 				emit(sw::Shader::OPCODE_IF, 0, condition);
1620 
1621 				if(trueBlock)
1622 				{
1623 					trueBlock->traverse(this);
1624 				}
1625 
1626 				if(falseBlock)
1627 				{
1628 					emit(sw::Shader::OPCODE_ELSE);
1629 					falseBlock->traverse(this);
1630 				}
1631 
1632 				emit(sw::Shader::OPCODE_ENDIF);
1633 			}
1634 		}
1635 
1636 		return false;
1637 	}
1638 
visitLoop(Visit visit,TIntermLoop * node)1639 	bool OutputASM::visitLoop(Visit visit, TIntermLoop *node)
1640 	{
1641 		if(currentScope != emitScope)
1642 		{
1643 			return false;
1644 		}
1645 
1646 		unsigned int iterations = loopCount(node);
1647 
1648 		if(iterations == 0)
1649 		{
1650 			return false;
1651 		}
1652 
1653 		bool unroll = (iterations <= 4);
1654 
1655 		if(unroll)
1656 		{
1657 			LoopUnrollable loopUnrollable;
1658 			unroll = loopUnrollable.traverse(node);
1659 		}
1660 
1661 		TIntermNode *init = node->getInit();
1662 		TIntermTyped *condition = node->getCondition();
1663 		TIntermTyped *expression = node->getExpression();
1664 		TIntermNode *body = node->getBody();
1665 		Constant True(true);
1666 
1667 		if(node->getType() == ELoopDoWhile)
1668 		{
1669 			Temporary iterate(this);
1670 			emit(sw::Shader::OPCODE_MOV, &iterate, &True);
1671 
1672 			emit(sw::Shader::OPCODE_WHILE, 0, &iterate);   // FIXME: Implement real do-while
1673 
1674 			if(body)
1675 			{
1676 				body->traverse(this);
1677 			}
1678 
1679 			emit(sw::Shader::OPCODE_TEST);
1680 
1681 			condition->traverse(this);
1682 			emit(sw::Shader::OPCODE_MOV, &iterate, condition);
1683 
1684 			emit(sw::Shader::OPCODE_ENDWHILE);
1685 		}
1686 		else
1687 		{
1688 			if(init)
1689 			{
1690 				init->traverse(this);
1691 			}
1692 
1693 			if(unroll)
1694 			{
1695 				for(unsigned int i = 0; i < iterations; i++)
1696 				{
1697 				//	condition->traverse(this);   // Condition could contain statements, but not in an unrollable loop
1698 
1699 					if(body)
1700 					{
1701 						body->traverse(this);
1702 					}
1703 
1704 					if(expression)
1705 					{
1706 						expression->traverse(this);
1707 					}
1708 				}
1709 			}
1710 			else
1711 			{
1712 				if(condition)
1713 				{
1714 					condition->traverse(this);
1715 				}
1716 				else
1717 				{
1718 					condition = &True;
1719 				}
1720 
1721 				emit(sw::Shader::OPCODE_WHILE, 0, condition);
1722 
1723 				if(body)
1724 				{
1725 					body->traverse(this);
1726 				}
1727 
1728 				emit(sw::Shader::OPCODE_TEST);
1729 
1730 				if(expression)
1731 				{
1732 					expression->traverse(this);
1733 				}
1734 
1735 				if(condition)
1736 				{
1737 					condition->traverse(this);
1738 				}
1739 
1740 				emit(sw::Shader::OPCODE_ENDWHILE);
1741 			}
1742 		}
1743 
1744 		return false;
1745 	}
1746 
visitBranch(Visit visit,TIntermBranch * node)1747 	bool OutputASM::visitBranch(Visit visit, TIntermBranch *node)
1748 	{
1749 		if(currentScope != emitScope)
1750 		{
1751 			return false;
1752 		}
1753 
1754 		switch(node->getFlowOp())
1755 		{
1756 		case EOpKill:      if(visit == PostVisit) emit(sw::Shader::OPCODE_DISCARD);  break;
1757 		case EOpBreak:     if(visit == PostVisit) emit(sw::Shader::OPCODE_BREAK);    break;
1758 		case EOpContinue:  if(visit == PostVisit) emit(sw::Shader::OPCODE_CONTINUE); break;
1759 		case EOpReturn:
1760 			if(visit == PostVisit)
1761 			{
1762 				TIntermTyped *value = node->getExpression();
1763 
1764 				if(value)
1765 				{
1766 					copy(functionArray[currentFunction].ret, value);
1767 				}
1768 
1769 				emit(sw::Shader::OPCODE_LEAVE);
1770 			}
1771 			break;
1772 		default: UNREACHABLE(node->getFlowOp());
1773 		}
1774 
1775 		return true;
1776 	}
1777 
visitSwitch(Visit visit,TIntermSwitch * node)1778 	bool OutputASM::visitSwitch(Visit visit, TIntermSwitch *node)
1779 	{
1780 		if(currentScope != emitScope)
1781 		{
1782 			return false;
1783 		}
1784 
1785 		TIntermTyped* switchValue = node->getInit();
1786 		TIntermAggregate* opList = node->getStatementList();
1787 
1788 		if(!switchValue || !opList)
1789 		{
1790 			return false;
1791 		}
1792 
1793 		switchValue->traverse(this);
1794 
1795 		emit(sw::Shader::OPCODE_SWITCH);
1796 
1797 		TIntermSequence& sequence = opList->getSequence();
1798 		TIntermSequence::iterator it = sequence.begin();
1799 		TIntermSequence::iterator defaultIt = sequence.end();
1800 		int nbCases = 0;
1801 		for(; it != sequence.end(); ++it)
1802 		{
1803 			TIntermCase* currentCase = (*it)->getAsCaseNode();
1804 			if(currentCase)
1805 			{
1806 				TIntermSequence::iterator caseIt = it;
1807 
1808 				TIntermTyped* condition = currentCase->getCondition();
1809 				if(condition) // non default case
1810 				{
1811 					if(nbCases != 0)
1812 					{
1813 						emit(sw::Shader::OPCODE_ELSE);
1814 					}
1815 
1816 					condition->traverse(this);
1817 					Temporary result(this);
1818 					emitBinary(sw::Shader::OPCODE_EQ, &result, switchValue, condition);
1819 					emit(sw::Shader::OPCODE_IF, 0, &result);
1820 					nbCases++;
1821 
1822 					for(++caseIt; caseIt != sequence.end(); ++caseIt)
1823 					{
1824 						(*caseIt)->traverse(this);
1825 						if((*caseIt)->getAsBranchNode()) // Kill, Break, Continue or Return
1826 						{
1827 							break;
1828 						}
1829 					}
1830 				}
1831 				else
1832 				{
1833 					defaultIt = it; // The default case might not be the last case, keep it for last
1834 				}
1835 			}
1836 		}
1837 
1838 		// If there's a default case, traverse it here
1839 		if(defaultIt != sequence.end())
1840 		{
1841 			emit(sw::Shader::OPCODE_ELSE);
1842 			for(++defaultIt; defaultIt != sequence.end(); ++defaultIt)
1843 			{
1844 				(*defaultIt)->traverse(this);
1845 				if((*defaultIt)->getAsBranchNode()) // Kill, Break, Continue or Return
1846 				{
1847 					break;
1848 				}
1849 			}
1850 		}
1851 
1852 		for(int i = 0; i < nbCases; ++i)
1853 		{
1854 			emit(sw::Shader::OPCODE_ENDIF);
1855 		}
1856 
1857 		emit(sw::Shader::OPCODE_ENDSWITCH);
1858 
1859 		return false;
1860 	}
1861 
emit(sw::Shader::Opcode op,TIntermTyped * dst,TIntermNode * src0,TIntermNode * src1,TIntermNode * src2,TIntermNode * src3,TIntermNode * src4)1862 	Instruction *OutputASM::emit(sw::Shader::Opcode op, TIntermTyped *dst, TIntermNode *src0, TIntermNode *src1, TIntermNode *src2, TIntermNode *src3, TIntermNode *src4)
1863 	{
1864 		return emit(op, dst, 0, src0, 0, src1, 0, src2, 0, src3, 0, src4, 0);
1865 	}
1866 
emit(sw::Shader::Opcode op,TIntermTyped * dst,int dstIndex,TIntermNode * src0,int index0,TIntermNode * src1,int index1,TIntermNode * src2,int index2,TIntermNode * src3,int index3,TIntermNode * src4,int index4)1867 	Instruction *OutputASM::emit(sw::Shader::Opcode op, TIntermTyped *dst, int dstIndex, TIntermNode *src0, int index0, TIntermNode *src1, int index1,
1868 	                             TIntermNode *src2, int index2, TIntermNode *src3, int index3, TIntermNode *src4, int index4)
1869 	{
1870 		Instruction *instruction = new Instruction(op);
1871 
1872 		if(dst)
1873 		{
1874 			instruction->dst.type = registerType(dst);
1875 			instruction->dst.index = registerIndex(dst) + dstIndex;
1876 			instruction->dst.mask = writeMask(dst);
1877 			instruction->dst.integer = (dst->getBasicType() == EbtInt);
1878 		}
1879 
1880 		argument(instruction->src[0], src0, index0);
1881 		argument(instruction->src[1], src1, index1);
1882 		argument(instruction->src[2], src2, index2);
1883 		argument(instruction->src[3], src3, index3);
1884 		argument(instruction->src[4], src4, index4);
1885 
1886 		shader->append(instruction);
1887 
1888 		return instruction;
1889 	}
1890 
emitCast(TIntermTyped * dst,TIntermTyped * src)1891 	Instruction *OutputASM::emitCast(TIntermTyped *dst, TIntermTyped *src)
1892 	{
1893 		return emitCast(dst, 0, src, 0);
1894 	}
1895 
emitCast(TIntermTyped * dst,int dstIndex,TIntermTyped * src,int srcIndex)1896 	Instruction *OutputASM::emitCast(TIntermTyped *dst, int dstIndex, TIntermTyped *src, int srcIndex)
1897 	{
1898 		switch(src->getBasicType())
1899 		{
1900 		case EbtBool:
1901 			switch(dst->getBasicType())
1902 			{
1903 			case EbtInt:   return emit(sw::Shader::OPCODE_B2I, dst, dstIndex, src, srcIndex);
1904 			case EbtUInt:  return emit(sw::Shader::OPCODE_B2I, dst, dstIndex, src, srcIndex);
1905 			case EbtFloat: return emit(sw::Shader::OPCODE_B2F, dst, dstIndex, src, srcIndex);
1906 			default:       break;
1907 			}
1908 			break;
1909 		case EbtInt:
1910 			switch(dst->getBasicType())
1911 			{
1912 			case EbtBool:  return emit(sw::Shader::OPCODE_I2B, dst, dstIndex, src, srcIndex);
1913 			case EbtFloat: return emit(sw::Shader::OPCODE_I2F, dst, dstIndex, src, srcIndex);
1914 			default:       break;
1915 			}
1916 			break;
1917 		case EbtUInt:
1918 			switch(dst->getBasicType())
1919 			{
1920 			case EbtBool:  return emit(sw::Shader::OPCODE_I2B, dst, dstIndex, src, srcIndex);
1921 			case EbtFloat: return emit(sw::Shader::OPCODE_U2F, dst, dstIndex, src, srcIndex);
1922 			default:       break;
1923 			}
1924 			break;
1925 		case EbtFloat:
1926 			switch(dst->getBasicType())
1927 			{
1928 			case EbtBool: return emit(sw::Shader::OPCODE_F2B, dst, dstIndex, src, srcIndex);
1929 			case EbtInt:  return emit(sw::Shader::OPCODE_F2I, dst, dstIndex, src, srcIndex);
1930 			case EbtUInt: return emit(sw::Shader::OPCODE_F2U, dst, dstIndex, src, srcIndex);
1931 			default:      break;
1932 			}
1933 			break;
1934 		default:
1935 			break;
1936 		}
1937 
1938 		ASSERT((src->getBasicType() == dst->getBasicType()) ||
1939 		      ((src->getBasicType() == EbtInt) && (dst->getBasicType() == EbtUInt)) ||
1940 		      ((src->getBasicType() == EbtUInt) && (dst->getBasicType() == EbtInt)));
1941 
1942 		return emit(sw::Shader::OPCODE_MOV, dst, dstIndex, src, srcIndex);
1943 	}
1944 
emitBinary(sw::Shader::Opcode op,TIntermTyped * dst,TIntermNode * src0,TIntermNode * src1,TIntermNode * src2)1945 	void OutputASM::emitBinary(sw::Shader::Opcode op, TIntermTyped *dst, TIntermNode *src0, TIntermNode *src1, TIntermNode *src2)
1946 	{
1947 		for(int index = 0; index < dst->elementRegisterCount(); index++)
1948 		{
1949 			emit(op, dst, index, src0, index, src1, index, src2, index);
1950 		}
1951 	}
1952 
emitAssign(sw::Shader::Opcode op,TIntermTyped * result,TIntermTyped * lhs,TIntermTyped * src0,TIntermTyped * src1)1953 	void OutputASM::emitAssign(sw::Shader::Opcode op, TIntermTyped *result, TIntermTyped *lhs, TIntermTyped *src0, TIntermTyped *src1)
1954 	{
1955 		emitBinary(op, result, src0, src1);
1956 		assignLvalue(lhs, result);
1957 	}
1958 
emitCmp(sw::Shader::Control cmpOp,TIntermTyped * dst,TIntermNode * left,TIntermNode * right,int index)1959 	void OutputASM::emitCmp(sw::Shader::Control cmpOp, TIntermTyped *dst, TIntermNode *left, TIntermNode *right, int index)
1960 	{
1961 		sw::Shader::Opcode opcode;
1962 		switch(left->getAsTyped()->getBasicType())
1963 		{
1964 		case EbtBool:
1965 		case EbtInt:
1966 			opcode = sw::Shader::OPCODE_ICMP;
1967 			break;
1968 		case EbtUInt:
1969 			opcode = sw::Shader::OPCODE_UCMP;
1970 			break;
1971 		default:
1972 			opcode = sw::Shader::OPCODE_CMP;
1973 			break;
1974 		}
1975 
1976 		Instruction *cmp = emit(opcode, dst, 0, left, index, right, index);
1977 		cmp->control = cmpOp;
1978 	}
1979 
componentCount(const TType & type,int registers)1980 	int componentCount(const TType &type, int registers)
1981 	{
1982 		if(registers == 0)
1983 		{
1984 			return 0;
1985 		}
1986 
1987 		if(type.isArray() && registers >= type.elementRegisterCount())
1988 		{
1989 			int index = registers / type.elementRegisterCount();
1990 			registers -= index * type.elementRegisterCount();
1991 			return index * type.getElementSize() + componentCount(type, registers);
1992 		}
1993 
1994 		if(type.isStruct() || type.isInterfaceBlock())
1995 		{
1996 			const TFieldList& fields = type.getStruct() ? type.getStruct()->fields() : type.getInterfaceBlock()->fields();
1997 			int elements = 0;
1998 
1999 			for(TFieldList::const_iterator field = fields.begin(); field != fields.end(); field++)
2000 			{
2001 				const TType &fieldType = *((*field)->type());
2002 
2003 				if(fieldType.totalRegisterCount() <= registers)
2004 				{
2005 					registers -= fieldType.totalRegisterCount();
2006 					elements += fieldType.getObjectSize();
2007 				}
2008 				else   // Register within this field
2009 				{
2010 					return elements + componentCount(fieldType, registers);
2011 				}
2012 			}
2013 		}
2014 		else if(type.isMatrix())
2015 		{
2016 			return registers * type.registerSize();
2017 		}
2018 
2019 		UNREACHABLE(0);
2020 		return 0;
2021 	}
2022 
registerSize(const TType & type,int registers)2023 	int registerSize(const TType &type, int registers)
2024 	{
2025 		if(registers == 0)
2026 		{
2027 			if(type.isStruct())
2028 			{
2029 				return registerSize(*((*(type.getStruct()->fields().begin()))->type()), 0);
2030 			}
2031 			else if(type.isInterfaceBlock())
2032 			{
2033 				return registerSize(*((*(type.getInterfaceBlock()->fields().begin()))->type()), 0);
2034 			}
2035 
2036 			return type.registerSize();
2037 		}
2038 
2039 		if(type.isArray() && registers >= type.elementRegisterCount())
2040 		{
2041 			int index = registers / type.elementRegisterCount();
2042 			registers -= index * type.elementRegisterCount();
2043 			return registerSize(type, registers);
2044 		}
2045 
2046 		if(type.isStruct() || type.isInterfaceBlock())
2047 		{
2048 			const TFieldList& fields = type.getStruct() ? type.getStruct()->fields() : type.getInterfaceBlock()->fields();
2049 			int elements = 0;
2050 
2051 			for(TFieldList::const_iterator field = fields.begin(); field != fields.end(); field++)
2052 			{
2053 				const TType &fieldType = *((*field)->type());
2054 
2055 				if(fieldType.totalRegisterCount() <= registers)
2056 				{
2057 					registers -= fieldType.totalRegisterCount();
2058 					elements += fieldType.getObjectSize();
2059 				}
2060 				else   // Register within this field
2061 				{
2062 					return registerSize(fieldType, registers);
2063 				}
2064 			}
2065 		}
2066 		else if(type.isMatrix())
2067 		{
2068 			return registerSize(type, 0);
2069 		}
2070 
2071 		UNREACHABLE(0);
2072 		return 0;
2073 	}
2074 
getBlockId(TIntermTyped * arg)2075 	int OutputASM::getBlockId(TIntermTyped *arg)
2076 	{
2077 		if(arg)
2078 		{
2079 			const TType &type = arg->getType();
2080 			TInterfaceBlock* block = type.getInterfaceBlock();
2081 			if(block && (type.getQualifier() == EvqUniform))
2082 			{
2083 				// Make sure the uniform block is declared
2084 				uniformRegister(arg);
2085 
2086 				const char* blockName = block->name().c_str();
2087 
2088 				// Fetch uniform block index from array of blocks
2089 				for(ActiveUniformBlocks::const_iterator it = shaderObject->activeUniformBlocks.begin(); it != shaderObject->activeUniformBlocks.end(); ++it)
2090 				{
2091 					if(blockName == it->name)
2092 					{
2093 						return it->blockId;
2094 					}
2095 				}
2096 
2097 				ASSERT(false);
2098 			}
2099 		}
2100 
2101 		return -1;
2102 	}
2103 
getArgumentInfo(TIntermTyped * arg,int index)2104 	OutputASM::ArgumentInfo OutputASM::getArgumentInfo(TIntermTyped *arg, int index)
2105 	{
2106 		const TType &type = arg->getType();
2107 		int blockId = getBlockId(arg);
2108 		ArgumentInfo argumentInfo(BlockMemberInfo::getDefaultBlockInfo(), type, -1, -1);
2109 		if(blockId != -1)
2110 		{
2111 			argumentInfo.bufferIndex = 0;
2112 			for(int i = 0; i < blockId; ++i)
2113 			{
2114 				int blockArraySize = shaderObject->activeUniformBlocks[i].arraySize;
2115 				argumentInfo.bufferIndex += blockArraySize > 0 ? blockArraySize : 1;
2116 			}
2117 
2118 			const BlockDefinitionIndexMap& blockDefinition = blockDefinitions[blockId];
2119 
2120 			BlockDefinitionIndexMap::const_iterator itEnd = blockDefinition.end();
2121 			BlockDefinitionIndexMap::const_iterator it = itEnd;
2122 
2123 			argumentInfo.clampedIndex = index;
2124 			if(type.isInterfaceBlock())
2125 			{
2126 				// Offset index to the beginning of the selected instance
2127 				int blockRegisters = type.elementRegisterCount();
2128 				int bufferOffset = argumentInfo.clampedIndex / blockRegisters;
2129 				argumentInfo.bufferIndex += bufferOffset;
2130 				argumentInfo.clampedIndex -= bufferOffset * blockRegisters;
2131 			}
2132 
2133 			int regIndex = registerIndex(arg);
2134 			for(int i = regIndex + argumentInfo.clampedIndex; i >= regIndex; --i)
2135 			{
2136 				it = blockDefinition.find(i);
2137 				if(it != itEnd)
2138 				{
2139 					argumentInfo.clampedIndex -= (i - regIndex);
2140 					break;
2141 				}
2142 			}
2143 			ASSERT(it != itEnd);
2144 
2145 			argumentInfo.typedMemberInfo = it->second;
2146 
2147 			int registerCount = argumentInfo.typedMemberInfo.type.totalRegisterCount();
2148 			argumentInfo.clampedIndex = (argumentInfo.clampedIndex >= registerCount) ? registerCount - 1 : argumentInfo.clampedIndex;
2149 		}
2150 		else
2151 		{
2152 			argumentInfo.clampedIndex = (index >= arg->totalRegisterCount()) ? arg->totalRegisterCount() - 1 : index;
2153 		}
2154 
2155 		return argumentInfo;
2156 	}
2157 
argument(sw::Shader::SourceParameter & parameter,TIntermNode * argument,int index)2158 	void OutputASM::argument(sw::Shader::SourceParameter &parameter, TIntermNode *argument, int index)
2159 	{
2160 		if(argument)
2161 		{
2162 			TIntermTyped *arg = argument->getAsTyped();
2163 			Temporary unpackedUniform(this);
2164 
2165 			const TType& srcType = arg->getType();
2166 			TInterfaceBlock* srcBlock = srcType.getInterfaceBlock();
2167 			if(srcBlock && (srcType.getQualifier() == EvqUniform))
2168 			{
2169 				const ArgumentInfo argumentInfo = getArgumentInfo(arg, index);
2170 				const TType &memberType = argumentInfo.typedMemberInfo.type;
2171 
2172 				if(memberType.getBasicType() == EbtBool)
2173 				{
2174 					int arraySize = (memberType.isArray() ? memberType.getArraySize() : 1);
2175 					ASSERT(argumentInfo.clampedIndex < arraySize);
2176 
2177 					// Convert the packed bool, which is currently an int, to a true bool
2178 					Instruction *instruction = new Instruction(sw::Shader::OPCODE_I2B);
2179 					instruction->dst.type = sw::Shader::PARAMETER_TEMP;
2180 					instruction->dst.index = registerIndex(&unpackedUniform);
2181 					instruction->src[0].type = sw::Shader::PARAMETER_CONST;
2182 					instruction->src[0].bufferIndex = argumentInfo.bufferIndex;
2183 					instruction->src[0].index = argumentInfo.typedMemberInfo.offset + argumentInfo.clampedIndex * argumentInfo.typedMemberInfo.arrayStride;
2184 
2185 					shader->append(instruction);
2186 
2187 					arg = &unpackedUniform;
2188 					index = 0;
2189 				}
2190 				else if((srcBlock->matrixPacking() == EmpRowMajor) && memberType.isMatrix())
2191 				{
2192 					int numCols = memberType.getNominalSize();
2193 					int numRows = memberType.getSecondarySize();
2194 					int arraySize = (memberType.isArray() ? memberType.getArraySize() : 1);
2195 
2196 					ASSERT(argumentInfo.clampedIndex < (numCols * arraySize));
2197 
2198 					unsigned int dstIndex = registerIndex(&unpackedUniform);
2199 					unsigned int srcSwizzle = (argumentInfo.clampedIndex % numCols) * 0x55;
2200 					int arrayIndex = argumentInfo.clampedIndex / numCols;
2201 					int matrixStartOffset = argumentInfo.typedMemberInfo.offset + arrayIndex * argumentInfo.typedMemberInfo.arrayStride;
2202 
2203 					for(int j = 0; j < numRows; ++j)
2204 					{
2205 						// Transpose the row major matrix
2206 						Instruction *instruction = new Instruction(sw::Shader::OPCODE_MOV);
2207 						instruction->dst.type = sw::Shader::PARAMETER_TEMP;
2208 						instruction->dst.index = dstIndex;
2209 						instruction->dst.mask = 1 << j;
2210 						instruction->src[0].type = sw::Shader::PARAMETER_CONST;
2211 						instruction->src[0].bufferIndex = argumentInfo.bufferIndex;
2212 						instruction->src[0].index = matrixStartOffset + j * argumentInfo.typedMemberInfo.matrixStride;
2213 						instruction->src[0].swizzle = srcSwizzle;
2214 
2215 						shader->append(instruction);
2216 					}
2217 
2218 					arg = &unpackedUniform;
2219 					index = 0;
2220 				}
2221 			}
2222 
2223 			const ArgumentInfo argumentInfo = getArgumentInfo(arg, index);
2224 			const TType &type = argumentInfo.typedMemberInfo.type;
2225 
2226 			int size = registerSize(type, argumentInfo.clampedIndex);
2227 
2228 			parameter.type = registerType(arg);
2229 			parameter.bufferIndex = argumentInfo.bufferIndex;
2230 
2231 			if(arg->getAsConstantUnion() && arg->getAsConstantUnion()->getUnionArrayPointer())
2232 			{
2233 				int component = componentCount(type, argumentInfo.clampedIndex);
2234 				ConstantUnion *constants = arg->getAsConstantUnion()->getUnionArrayPointer();
2235 
2236 				for(int i = 0; i < 4; i++)
2237 				{
2238 					if(size == 1)   // Replicate
2239 					{
2240 						parameter.value[i] = constants[component + 0].getAsFloat();
2241 					}
2242 					else if(i < size)
2243 					{
2244 						parameter.value[i] = constants[component + i].getAsFloat();
2245 					}
2246 					else
2247 					{
2248 						parameter.value[i] = 0.0f;
2249 					}
2250 				}
2251 			}
2252 			else
2253 			{
2254 				parameter.index = registerIndex(arg) + argumentInfo.clampedIndex;
2255 
2256 				if(parameter.bufferIndex != -1)
2257 				{
2258 					int stride = (argumentInfo.typedMemberInfo.matrixStride > 0) ? argumentInfo.typedMemberInfo.matrixStride : argumentInfo.typedMemberInfo.arrayStride;
2259 					parameter.index = argumentInfo.typedMemberInfo.offset + argumentInfo.clampedIndex * stride;
2260 				}
2261 			}
2262 
2263 			if(!IsSampler(arg->getBasicType()))
2264 			{
2265 				parameter.swizzle = readSwizzle(arg, size);
2266 			}
2267 		}
2268 	}
2269 
copy(TIntermTyped * dst,TIntermNode * src,int offset)2270 	void OutputASM::copy(TIntermTyped *dst, TIntermNode *src, int offset)
2271 	{
2272 		for(int index = 0; index < dst->totalRegisterCount(); index++)
2273 		{
2274 			Instruction *mov = emit(sw::Shader::OPCODE_MOV, dst, index, src, offset + index);
2275 			mov->dst.mask = writeMask(dst, index);
2276 		}
2277 	}
2278 
swizzleElement(int swizzle,int index)2279 	int swizzleElement(int swizzle, int index)
2280 	{
2281 		return (swizzle >> (index * 2)) & 0x03;
2282 	}
2283 
swizzleSwizzle(int leftSwizzle,int rightSwizzle)2284 	int swizzleSwizzle(int leftSwizzle, int rightSwizzle)
2285 	{
2286 		return (swizzleElement(leftSwizzle, swizzleElement(rightSwizzle, 0)) << 0) |
2287 		       (swizzleElement(leftSwizzle, swizzleElement(rightSwizzle, 1)) << 2) |
2288 		       (swizzleElement(leftSwizzle, swizzleElement(rightSwizzle, 2)) << 4) |
2289 		       (swizzleElement(leftSwizzle, swizzleElement(rightSwizzle, 3)) << 6);
2290 	}
2291 
assignLvalue(TIntermTyped * dst,TIntermTyped * src)2292 	void OutputASM::assignLvalue(TIntermTyped *dst, TIntermTyped *src)
2293 	{
2294 		if(src &&
2295 			((src->isVector() && (!dst->isVector() || (src->getNominalSize() != dst->getNominalSize()))) ||
2296 			 (src->isMatrix() && (!dst->isMatrix() || (src->getNominalSize() != dst->getNominalSize()) || (src->getSecondarySize() != dst->getSecondarySize())))))
2297 		{
2298 			return mContext.error(src->getLine(), "Result type should match the l-value type in compound assignment", src->isVector() ? "vector" : "matrix");
2299 		}
2300 
2301 		TIntermBinary *binary = dst->getAsBinaryNode();
2302 
2303 		if(binary && binary->getOp() == EOpIndexIndirect && binary->getLeft()->isVector() && dst->isScalar())
2304 		{
2305 			Instruction *insert = new Instruction(sw::Shader::OPCODE_INSERT);
2306 
2307 			Temporary address(this);
2308 			lvalue(insert->dst, address, dst);
2309 
2310 			insert->src[0].type = insert->dst.type;
2311 			insert->src[0].index = insert->dst.index;
2312 			insert->src[0].rel = insert->dst.rel;
2313 			argument(insert->src[1], src);
2314 			argument(insert->src[2], binary->getRight());
2315 
2316 			shader->append(insert);
2317 		}
2318 		else
2319 		{
2320 			for(int offset = 0; offset < dst->totalRegisterCount(); offset++)
2321 			{
2322 				Instruction *mov = new Instruction(sw::Shader::OPCODE_MOV);
2323 
2324 				Temporary address(this);
2325 				int swizzle = lvalue(mov->dst, address, dst);
2326 				mov->dst.index += offset;
2327 
2328 				if(offset > 0)
2329 				{
2330 					mov->dst.mask = writeMask(dst, offset);
2331 				}
2332 
2333 				argument(mov->src[0], src, offset);
2334 				mov->src[0].swizzle = swizzleSwizzle(mov->src[0].swizzle, swizzle);
2335 
2336 				shader->append(mov);
2337 			}
2338 		}
2339 	}
2340 
lvalue(sw::Shader::DestinationParameter & dst,Temporary & address,TIntermTyped * node)2341 	int OutputASM::lvalue(sw::Shader::DestinationParameter &dst, Temporary &address, TIntermTyped *node)
2342 	{
2343 		TIntermTyped *result = node;
2344 		TIntermBinary *binary = node->getAsBinaryNode();
2345 		TIntermSymbol *symbol = node->getAsSymbolNode();
2346 
2347 		if(binary)
2348 		{
2349 			TIntermTyped *left = binary->getLeft();
2350 			TIntermTyped *right = binary->getRight();
2351 
2352 			int leftSwizzle = lvalue(dst, address, left);   // Resolve the l-value of the left side
2353 
2354 			switch(binary->getOp())
2355 			{
2356 			case EOpIndexDirect:
2357 				{
2358 					int rightIndex = right->getAsConstantUnion()->getIConst(0);
2359 
2360 					if(left->isRegister())
2361 					{
2362 						int leftMask = dst.mask;
2363 
2364 						dst.mask = 1;
2365 						while((leftMask & dst.mask) == 0)
2366 						{
2367 							dst.mask = dst.mask << 1;
2368 						}
2369 
2370 						int element = swizzleElement(leftSwizzle, rightIndex);
2371 						dst.mask = 1 << element;
2372 
2373 						return element;
2374 					}
2375 					else if(left->isArray() || left->isMatrix())
2376 					{
2377 						dst.index += rightIndex * result->totalRegisterCount();
2378 						return 0xE4;
2379 					}
2380 					else UNREACHABLE(0);
2381 				}
2382 				break;
2383 			case EOpIndexIndirect:
2384 				{
2385 					if(left->isRegister())
2386 					{
2387 						// Requires INSERT instruction (handled by calling function)
2388 					}
2389 					else if(left->isArray() || left->isMatrix())
2390 					{
2391 						int scale = result->totalRegisterCount();
2392 
2393 						if(dst.rel.type == sw::Shader::PARAMETER_VOID)   // Use the index register as the relative address directly
2394 						{
2395 							if(left->totalRegisterCount() > 1)
2396 							{
2397 								sw::Shader::SourceParameter relativeRegister;
2398 								argument(relativeRegister, right);
2399 
2400 								dst.rel.index = relativeRegister.index;
2401 								dst.rel.type = relativeRegister.type;
2402 								dst.rel.scale = scale;
2403 								dst.rel.deterministic = !(vertexShader && left->getQualifier() == EvqUniform);
2404 							}
2405 						}
2406 						else if(dst.rel.index != registerIndex(&address))   // Move the previous index register to the address register
2407 						{
2408 							if(scale == 1)
2409 							{
2410 								Constant oldScale((int)dst.rel.scale);
2411 								Instruction *mad = emit(sw::Shader::OPCODE_IMAD, &address, &address, &oldScale, right);
2412 								mad->src[0].index = dst.rel.index;
2413 								mad->src[0].type = dst.rel.type;
2414 							}
2415 							else
2416 							{
2417 								Constant oldScale((int)dst.rel.scale);
2418 								Instruction *mul = emit(sw::Shader::OPCODE_IMUL, &address, &address, &oldScale);
2419 								mul->src[0].index = dst.rel.index;
2420 								mul->src[0].type = dst.rel.type;
2421 
2422 								Constant newScale(scale);
2423 								emit(sw::Shader::OPCODE_IMAD, &address, right, &newScale, &address);
2424 							}
2425 
2426 							dst.rel.type = sw::Shader::PARAMETER_TEMP;
2427 							dst.rel.index = registerIndex(&address);
2428 							dst.rel.scale = 1;
2429 						}
2430 						else   // Just add the new index to the address register
2431 						{
2432 							if(scale == 1)
2433 							{
2434 								emit(sw::Shader::OPCODE_IADD, &address, &address, right);
2435 							}
2436 							else
2437 							{
2438 								Constant newScale(scale);
2439 								emit(sw::Shader::OPCODE_IMAD, &address, right, &newScale, &address);
2440 							}
2441 						}
2442 					}
2443 					else UNREACHABLE(0);
2444 				}
2445 				break;
2446 			case EOpIndexDirectStruct:
2447 			case EOpIndexDirectInterfaceBlock:
2448 				{
2449 					const TFieldList& fields = (binary->getOp() == EOpIndexDirectStruct) ?
2450 					                           left->getType().getStruct()->fields() :
2451 					                           left->getType().getInterfaceBlock()->fields();
2452 					int index = right->getAsConstantUnion()->getIConst(0);
2453 					int fieldOffset = 0;
2454 
2455 					for(int i = 0; i < index; i++)
2456 					{
2457 						fieldOffset += fields[i]->type()->totalRegisterCount();
2458 					}
2459 
2460 					dst.type = registerType(left);
2461 					dst.index += fieldOffset;
2462 					dst.mask = writeMask(right);
2463 
2464 					return 0xE4;
2465 				}
2466 				break;
2467 			case EOpVectorSwizzle:
2468 				{
2469 					ASSERT(left->isRegister());
2470 
2471 					int leftMask = dst.mask;
2472 
2473 					int swizzle = 0;
2474 					int rightMask = 0;
2475 
2476 					TIntermSequence &sequence = right->getAsAggregate()->getSequence();
2477 
2478 					for(unsigned int i = 0; i < sequence.size(); i++)
2479 					{
2480 						int index = sequence[i]->getAsConstantUnion()->getIConst(0);
2481 
2482 						int element = swizzleElement(leftSwizzle, index);
2483 						rightMask = rightMask | (1 << element);
2484 						swizzle = swizzle | swizzleElement(leftSwizzle, i) << (element * 2);
2485 					}
2486 
2487 					dst.mask = leftMask & rightMask;
2488 
2489 					return swizzle;
2490 				}
2491 				break;
2492 			default:
2493 				UNREACHABLE(binary->getOp());   // Not an l-value operator
2494 				break;
2495 			}
2496 		}
2497 		else if(symbol)
2498 		{
2499 			dst.type = registerType(symbol);
2500 			dst.index = registerIndex(symbol);
2501 			dst.mask = writeMask(symbol);
2502 			return 0xE4;
2503 		}
2504 
2505 		return 0xE4;
2506 	}
2507 
registerType(TIntermTyped * operand)2508 	sw::Shader::ParameterType OutputASM::registerType(TIntermTyped *operand)
2509 	{
2510 		if(isSamplerRegister(operand))
2511 		{
2512 			return sw::Shader::PARAMETER_SAMPLER;
2513 		}
2514 
2515 		const TQualifier qualifier = operand->getQualifier();
2516 		if((EvqFragColor == qualifier) || (EvqFragData == qualifier))
2517 		{
2518 			if(((EvqFragData == qualifier) && (EvqFragColor == outputQualifier)) ||
2519 			   ((EvqFragColor == qualifier) && (EvqFragData == outputQualifier)))
2520 			{
2521 				mContext.error(operand->getLine(), "static assignment to both gl_FragData and gl_FragColor", "");
2522 			}
2523 			outputQualifier = qualifier;
2524 		}
2525 
2526 		if(qualifier == EvqConstExpr && (!operand->getAsConstantUnion() || !operand->getAsConstantUnion()->getUnionArrayPointer()))
2527 		{
2528 			return sw::Shader::PARAMETER_TEMP;
2529 		}
2530 
2531 		switch(qualifier)
2532 		{
2533 		case EvqTemporary:           return sw::Shader::PARAMETER_TEMP;
2534 		case EvqGlobal:              return sw::Shader::PARAMETER_TEMP;
2535 		case EvqConstExpr:           return sw::Shader::PARAMETER_FLOAT4LITERAL;   // All converted to float
2536 		case EvqAttribute:           return sw::Shader::PARAMETER_INPUT;
2537 		case EvqVaryingIn:           return sw::Shader::PARAMETER_INPUT;
2538 		case EvqVaryingOut:          return sw::Shader::PARAMETER_OUTPUT;
2539 		case EvqVertexIn:            return sw::Shader::PARAMETER_INPUT;
2540 		case EvqFragmentOut:         return sw::Shader::PARAMETER_COLOROUT;
2541 		case EvqVertexOut:           return sw::Shader::PARAMETER_OUTPUT;
2542 		case EvqFragmentIn:          return sw::Shader::PARAMETER_INPUT;
2543 		case EvqInvariantVaryingIn:  return sw::Shader::PARAMETER_INPUT;    // FIXME: Guarantee invariance at the backend
2544 		case EvqInvariantVaryingOut: return sw::Shader::PARAMETER_OUTPUT;   // FIXME: Guarantee invariance at the backend
2545 		case EvqSmooth:              return sw::Shader::PARAMETER_OUTPUT;
2546 		case EvqFlat:                return sw::Shader::PARAMETER_OUTPUT;
2547 		case EvqCentroidOut:         return sw::Shader::PARAMETER_OUTPUT;
2548 		case EvqSmoothIn:            return sw::Shader::PARAMETER_INPUT;
2549 		case EvqFlatIn:              return sw::Shader::PARAMETER_INPUT;
2550 		case EvqCentroidIn:          return sw::Shader::PARAMETER_INPUT;
2551 		case EvqUniform:             return sw::Shader::PARAMETER_CONST;
2552 		case EvqIn:                  return sw::Shader::PARAMETER_TEMP;
2553 		case EvqOut:                 return sw::Shader::PARAMETER_TEMP;
2554 		case EvqInOut:               return sw::Shader::PARAMETER_TEMP;
2555 		case EvqConstReadOnly:       return sw::Shader::PARAMETER_TEMP;
2556 		case EvqPosition:            return sw::Shader::PARAMETER_OUTPUT;
2557 		case EvqPointSize:           return sw::Shader::PARAMETER_OUTPUT;
2558 		case EvqInstanceID:          return sw::Shader::PARAMETER_MISCTYPE;
2559 		case EvqFragCoord:           return sw::Shader::PARAMETER_MISCTYPE;
2560 		case EvqFrontFacing:         return sw::Shader::PARAMETER_MISCTYPE;
2561 		case EvqPointCoord:          return sw::Shader::PARAMETER_INPUT;
2562 		case EvqFragColor:           return sw::Shader::PARAMETER_COLOROUT;
2563 		case EvqFragData:            return sw::Shader::PARAMETER_COLOROUT;
2564 		case EvqFragDepth:           return sw::Shader::PARAMETER_DEPTHOUT;
2565 		default: UNREACHABLE(qualifier);
2566 		}
2567 
2568 		return sw::Shader::PARAMETER_VOID;
2569 	}
2570 
registerIndex(TIntermTyped * operand)2571 	unsigned int OutputASM::registerIndex(TIntermTyped *operand)
2572 	{
2573 		if(isSamplerRegister(operand))
2574 		{
2575 			return samplerRegister(operand);
2576 		}
2577 
2578 		switch(operand->getQualifier())
2579 		{
2580 		case EvqTemporary:           return temporaryRegister(operand);
2581 		case EvqGlobal:              return temporaryRegister(operand);
2582 		case EvqConstExpr:           return temporaryRegister(operand);   // Unevaluated constant expression
2583 		case EvqAttribute:           return attributeRegister(operand);
2584 		case EvqVaryingIn:           return varyingRegister(operand);
2585 		case EvqVaryingOut:          return varyingRegister(operand);
2586 		case EvqVertexIn:            return attributeRegister(operand);
2587 		case EvqFragmentOut:         return fragmentOutputRegister(operand);
2588 		case EvqVertexOut:           return varyingRegister(operand);
2589 		case EvqFragmentIn:          return varyingRegister(operand);
2590 		case EvqInvariantVaryingIn:  return varyingRegister(operand);
2591 		case EvqInvariantVaryingOut: return varyingRegister(operand);
2592 		case EvqSmooth:              return varyingRegister(operand);
2593 		case EvqFlat:                return varyingRegister(operand);
2594 		case EvqCentroidOut:         return varyingRegister(operand);
2595 		case EvqSmoothIn:            return varyingRegister(operand);
2596 		case EvqFlatIn:              return varyingRegister(operand);
2597 		case EvqCentroidIn:          return varyingRegister(operand);
2598 		case EvqUniform:             return uniformRegister(operand);
2599 		case EvqIn:                  return temporaryRegister(operand);
2600 		case EvqOut:                 return temporaryRegister(operand);
2601 		case EvqInOut:               return temporaryRegister(operand);
2602 		case EvqConstReadOnly:       return temporaryRegister(operand);
2603 		case EvqPosition:            return varyingRegister(operand);
2604 		case EvqPointSize:           return varyingRegister(operand);
2605 		case EvqInstanceID:          vertexShader->instanceIdDeclared = true; return 0;
2606 		case EvqFragCoord:           pixelShader->vPosDeclared = true;  return 0;
2607 		case EvqFrontFacing:         pixelShader->vFaceDeclared = true; return 1;
2608 		case EvqPointCoord:          return varyingRegister(operand);
2609 		case EvqFragColor:           return 0;
2610 		case EvqFragData:            return fragmentOutputRegister(operand);
2611 		case EvqFragDepth:           return 0;
2612 		default: UNREACHABLE(operand->getQualifier());
2613 		}
2614 
2615 		return 0;
2616 	}
2617 
writeMask(TIntermTyped * destination,int index)2618 	int OutputASM::writeMask(TIntermTyped *destination, int index)
2619 	{
2620 		if(destination->getQualifier() == EvqPointSize)
2621 		{
2622 			return 0x2;   // Point size stored in the y component
2623 		}
2624 
2625 		return 0xF >> (4 - registerSize(destination->getType(), index));
2626 	}
2627 
readSwizzle(TIntermTyped * argument,int size)2628 	int OutputASM::readSwizzle(TIntermTyped *argument, int size)
2629 	{
2630 		if(argument->getQualifier() == EvqPointSize)
2631 		{
2632 			return 0x55;   // Point size stored in the y component
2633 		}
2634 
2635 		static const unsigned char swizzleSize[5] = {0x00, 0x00, 0x54, 0xA4, 0xE4};   // (void), xxxx, xyyy, xyzz, xyzw
2636 
2637 		return swizzleSize[size];
2638 	}
2639 
2640 	// Conservatively checks whether an expression is fast to compute and has no side effects
trivial(TIntermTyped * expression,int budget)2641 	bool OutputASM::trivial(TIntermTyped *expression, int budget)
2642 	{
2643 		if(!expression->isRegister())
2644 		{
2645 			return false;
2646 		}
2647 
2648 		return cost(expression, budget) >= 0;
2649 	}
2650 
2651 	// Returns the remaining computing budget (if < 0 the expression is too expensive or has side effects)
cost(TIntermNode * expression,int budget)2652 	int OutputASM::cost(TIntermNode *expression, int budget)
2653 	{
2654 		if(budget < 0)
2655 		{
2656 			return budget;
2657 		}
2658 
2659 		if(expression->getAsSymbolNode())
2660 		{
2661 			return budget;
2662 		}
2663 		else if(expression->getAsConstantUnion())
2664 		{
2665 			return budget;
2666 		}
2667 		else if(expression->getAsBinaryNode())
2668 		{
2669 			TIntermBinary *binary = expression->getAsBinaryNode();
2670 
2671 			switch(binary->getOp())
2672 			{
2673 			case EOpVectorSwizzle:
2674 			case EOpIndexDirect:
2675 			case EOpIndexDirectStruct:
2676 			case EOpIndexDirectInterfaceBlock:
2677 				return cost(binary->getLeft(), budget - 0);
2678 			case EOpAdd:
2679 			case EOpSub:
2680 			case EOpMul:
2681 				return cost(binary->getLeft(), cost(binary->getRight(), budget - 1));
2682 			default:
2683 				return -1;
2684 			}
2685 		}
2686 		else if(expression->getAsUnaryNode())
2687 		{
2688 			TIntermUnary *unary = expression->getAsUnaryNode();
2689 
2690 			switch(unary->getOp())
2691 			{
2692 			case EOpAbs:
2693 			case EOpNegative:
2694 				return cost(unary->getOperand(), budget - 1);
2695 			default:
2696 				return -1;
2697 			}
2698 		}
2699 		else if(expression->getAsSelectionNode())
2700 		{
2701 			TIntermSelection *selection = expression->getAsSelectionNode();
2702 
2703 			if(selection->usesTernaryOperator())
2704 			{
2705 				TIntermTyped *condition = selection->getCondition();
2706 				TIntermNode *trueBlock = selection->getTrueBlock();
2707 				TIntermNode *falseBlock = selection->getFalseBlock();
2708 				TIntermConstantUnion *constantCondition = condition->getAsConstantUnion();
2709 
2710 				if(constantCondition)
2711 				{
2712 					bool trueCondition = constantCondition->getUnionArrayPointer()->getBConst();
2713 
2714 					if(trueCondition)
2715 					{
2716 						return cost(trueBlock, budget - 0);
2717 					}
2718 					else
2719 					{
2720 						return cost(falseBlock, budget - 0);
2721 					}
2722 				}
2723 				else
2724 				{
2725 					return cost(trueBlock, cost(falseBlock, budget - 2));
2726 				}
2727 			}
2728 		}
2729 
2730 		return -1;
2731 	}
2732 
findFunction(const TString & name)2733 	const Function *OutputASM::findFunction(const TString &name)
2734 	{
2735 		for(unsigned int f = 0; f < functionArray.size(); f++)
2736 		{
2737 			if(functionArray[f].name == name)
2738 			{
2739 				return &functionArray[f];
2740 			}
2741 		}
2742 
2743 		return 0;
2744 	}
2745 
temporaryRegister(TIntermTyped * temporary)2746 	int OutputASM::temporaryRegister(TIntermTyped *temporary)
2747 	{
2748 		return allocate(temporaries, temporary);
2749 	}
2750 
varyingRegister(TIntermTyped * varying)2751 	int OutputASM::varyingRegister(TIntermTyped *varying)
2752 	{
2753 		int var = lookup(varyings, varying);
2754 
2755 		if(var == -1)
2756 		{
2757 			var = allocate(varyings, varying);
2758 			int componentCount = varying->registerSize();
2759 			int registerCount = varying->totalRegisterCount();
2760 
2761 			if(pixelShader)
2762 			{
2763 				if((var + registerCount) > sw::MAX_FRAGMENT_INPUTS)
2764 				{
2765 					mContext.error(varying->getLine(), "Varyings packing failed: Too many varyings", "fragment shader");
2766 					return 0;
2767 				}
2768 
2769 				if(varying->getQualifier() == EvqPointCoord)
2770 				{
2771 					ASSERT(varying->isRegister());
2772 					if(componentCount >= 1) pixelShader->semantic[var][0] = sw::Shader::Semantic(sw::Shader::USAGE_TEXCOORD, var);
2773 					if(componentCount >= 2) pixelShader->semantic[var][1] = sw::Shader::Semantic(sw::Shader::USAGE_TEXCOORD, var);
2774 					if(componentCount >= 3) pixelShader->semantic[var][2] = sw::Shader::Semantic(sw::Shader::USAGE_TEXCOORD, var);
2775 					if(componentCount >= 4) pixelShader->semantic[var][3] = sw::Shader::Semantic(sw::Shader::USAGE_TEXCOORD, var);
2776 				}
2777 				else
2778 				{
2779 					for(int i = 0; i < varying->totalRegisterCount(); i++)
2780 					{
2781 						if(componentCount >= 1) pixelShader->semantic[var + i][0] = sw::Shader::Semantic(sw::Shader::USAGE_COLOR, var + i);
2782 						if(componentCount >= 2) pixelShader->semantic[var + i][1] = sw::Shader::Semantic(sw::Shader::USAGE_COLOR, var + i);
2783 						if(componentCount >= 3) pixelShader->semantic[var + i][2] = sw::Shader::Semantic(sw::Shader::USAGE_COLOR, var + i);
2784 						if(componentCount >= 4) pixelShader->semantic[var + i][3] = sw::Shader::Semantic(sw::Shader::USAGE_COLOR, var + i);
2785 					}
2786 				}
2787 			}
2788 			else if(vertexShader)
2789 			{
2790 				if((var + registerCount) > sw::MAX_VERTEX_OUTPUTS)
2791 				{
2792 					mContext.error(varying->getLine(), "Varyings packing failed: Too many varyings", "vertex shader");
2793 					return 0;
2794 				}
2795 
2796 				if(varying->getQualifier() == EvqPosition)
2797 				{
2798 					ASSERT(varying->isRegister());
2799 					vertexShader->output[var][0] = sw::Shader::Semantic(sw::Shader::USAGE_POSITION, 0);
2800 					vertexShader->output[var][1] = sw::Shader::Semantic(sw::Shader::USAGE_POSITION, 0);
2801 					vertexShader->output[var][2] = sw::Shader::Semantic(sw::Shader::USAGE_POSITION, 0);
2802 					vertexShader->output[var][3] = sw::Shader::Semantic(sw::Shader::USAGE_POSITION, 0);
2803 					vertexShader->positionRegister = var;
2804 				}
2805 				else if(varying->getQualifier() == EvqPointSize)
2806 				{
2807 					ASSERT(varying->isRegister());
2808 					vertexShader->output[var][0] = sw::Shader::Semantic(sw::Shader::USAGE_PSIZE, 0);
2809 					vertexShader->output[var][1] = sw::Shader::Semantic(sw::Shader::USAGE_PSIZE, 0);
2810 					vertexShader->output[var][2] = sw::Shader::Semantic(sw::Shader::USAGE_PSIZE, 0);
2811 					vertexShader->output[var][3] = sw::Shader::Semantic(sw::Shader::USAGE_PSIZE, 0);
2812 					vertexShader->pointSizeRegister = var;
2813 				}
2814 				else
2815 				{
2816 					// Semantic indexes for user varyings will be assigned during program link to match the pixel shader
2817 				}
2818 			}
2819 			else UNREACHABLE(0);
2820 
2821 			declareVarying(varying, var);
2822 		}
2823 
2824 		return var;
2825 	}
2826 
declareVarying(TIntermTyped * varying,int reg)2827 	void OutputASM::declareVarying(TIntermTyped *varying, int reg)
2828 	{
2829 		if(varying->getQualifier() != EvqPointCoord)   // gl_PointCoord does not need linking
2830 		{
2831 			const TType &type = varying->getType();
2832 			const char *name = varying->getAsSymbolNode()->getSymbol().c_str();
2833 			VaryingList &activeVaryings = shaderObject->varyings;
2834 
2835 			// Check if this varying has been declared before without having a register assigned
2836 			for(VaryingList::iterator v = activeVaryings.begin(); v != activeVaryings.end(); v++)
2837 			{
2838 				if(v->name == name)
2839 				{
2840 					if(reg >= 0)
2841 					{
2842 						ASSERT(v->reg < 0 || v->reg == reg);
2843 						v->reg = reg;
2844 					}
2845 
2846 					return;
2847 				}
2848 			}
2849 
2850 			activeVaryings.push_back(glsl::Varying(glVariableType(type), name, varying->getArraySize(), reg, 0));
2851 		}
2852 	}
2853 
uniformRegister(TIntermTyped * uniform)2854 	int OutputASM::uniformRegister(TIntermTyped *uniform)
2855 	{
2856 		const TType &type = uniform->getType();
2857 		ASSERT(!IsSampler(type.getBasicType()));
2858 		TInterfaceBlock *block = type.getAsInterfaceBlock();
2859 		TIntermSymbol *symbol = uniform->getAsSymbolNode();
2860 		ASSERT(symbol || block);
2861 
2862 		if(symbol || block)
2863 		{
2864 			TInterfaceBlock* parentBlock = type.getInterfaceBlock();
2865 			bool isBlockMember = (!block && parentBlock);
2866 			int index = isBlockMember ? lookup(uniforms, parentBlock) : lookup(uniforms, uniform);
2867 
2868 			if(index == -1 || isBlockMember)
2869 			{
2870 				if(index == -1)
2871 				{
2872 					index = allocate(uniforms, uniform);
2873 				}
2874 
2875 				// Verify if the current uniform is a member of an already declared block
2876 				const TString &name = symbol ? symbol->getSymbol() : block->name();
2877 				int blockMemberIndex = blockMemberLookup(type, name, index);
2878 				if(blockMemberIndex == -1)
2879 				{
2880 					declareUniform(type, name, index);
2881 				}
2882 				else
2883 				{
2884 					index = blockMemberIndex;
2885 				}
2886 			}
2887 
2888 			return index;
2889 		}
2890 
2891 		return 0;
2892 	}
2893 
attributeRegister(TIntermTyped * attribute)2894 	int OutputASM::attributeRegister(TIntermTyped *attribute)
2895 	{
2896 		ASSERT(!attribute->isArray());
2897 
2898 		int index = lookup(attributes, attribute);
2899 
2900 		if(index == -1)
2901 		{
2902 			TIntermSymbol *symbol = attribute->getAsSymbolNode();
2903 			ASSERT(symbol);
2904 
2905 			if(symbol)
2906 			{
2907 				index = allocate(attributes, attribute);
2908 				const TType &type = attribute->getType();
2909 				int registerCount = attribute->totalRegisterCount();
2910 
2911 				if(vertexShader && (index + registerCount) <= sw::MAX_VERTEX_INPUTS)
2912 				{
2913 					for(int i = 0; i < registerCount; i++)
2914 					{
2915 						vertexShader->input[index + i] = sw::Shader::Semantic(sw::Shader::USAGE_TEXCOORD, index + i);
2916 					}
2917 				}
2918 
2919 				ActiveAttributes &activeAttributes = shaderObject->activeAttributes;
2920 
2921 				const char *name = symbol->getSymbol().c_str();
2922 				activeAttributes.push_back(Attribute(glVariableType(type), name, type.getArraySize(), type.getLayoutQualifier().location, index));
2923 			}
2924 		}
2925 
2926 		return index;
2927 	}
2928 
fragmentOutputRegister(TIntermTyped * fragmentOutput)2929 	int OutputASM::fragmentOutputRegister(TIntermTyped *fragmentOutput)
2930 	{
2931 		return allocate(fragmentOutputs, fragmentOutput);
2932 	}
2933 
samplerRegister(TIntermTyped * sampler)2934 	int OutputASM::samplerRegister(TIntermTyped *sampler)
2935 	{
2936 		const TType &type = sampler->getType();
2937 		ASSERT(IsSampler(type.getBasicType()) || type.isStruct());   // Structures can contain samplers
2938 
2939 		TIntermSymbol *symbol = sampler->getAsSymbolNode();
2940 		TIntermBinary *binary = sampler->getAsBinaryNode();
2941 
2942 		if(symbol && type.getQualifier() == EvqUniform)
2943 		{
2944 			return samplerRegister(symbol);
2945 		}
2946 		else if(binary)
2947 		{
2948 			TIntermTyped *left = binary->getLeft();
2949 			TIntermTyped *right = binary->getRight();
2950 			const TType &leftType = left->getType();
2951 			int index = right->getAsConstantUnion() ? right->getAsConstantUnion()->getIConst(0) : 0;
2952 			int offset = 0;
2953 
2954 			switch(binary->getOp())
2955 			{
2956 			case EOpIndexDirect:
2957 				ASSERT(left->isArray());
2958 				offset = index * leftType.elementRegisterCount();
2959 				break;
2960 			case EOpIndexDirectStruct:
2961 				ASSERT(leftType.isStruct());
2962 				{
2963 					const TFieldList &fields = leftType.getStruct()->fields();
2964 
2965 					for(int i = 0; i < index; i++)
2966 					{
2967 						offset += fields[i]->type()->totalRegisterCount();
2968 					}
2969 				}
2970 				break;
2971 			case EOpIndexIndirect:               // Indirect indexing produces a temporary, not a sampler register
2972 				return -1;
2973 			case EOpIndexDirectInterfaceBlock:   // Interface blocks can't contain samplers
2974 			default:
2975 				UNREACHABLE(binary->getOp());
2976 				return -1;
2977 			}
2978 
2979 			int base = samplerRegister(left);
2980 
2981 			if(base < 0)
2982 			{
2983 				return -1;
2984 			}
2985 
2986 			return base + offset;
2987 		}
2988 
2989 		UNREACHABLE(0);
2990 		return -1;   // Not a sampler register
2991 	}
2992 
samplerRegister(TIntermSymbol * sampler)2993 	int OutputASM::samplerRegister(TIntermSymbol *sampler)
2994 	{
2995 		const TType &type = sampler->getType();
2996 		ASSERT(IsSampler(type.getBasicType()) || type.isStruct());   // Structures can contain samplers
2997 
2998 		int index = lookup(samplers, sampler);
2999 
3000 		if(index == -1)
3001 		{
3002 			index = allocate(samplers, sampler);
3003 
3004 			if(sampler->getQualifier() == EvqUniform)
3005 			{
3006 				const char *name = sampler->getSymbol().c_str();
3007 				declareUniform(type, name, index);
3008 			}
3009 		}
3010 
3011 		return index;
3012 	}
3013 
isSamplerRegister(TIntermTyped * operand)3014 	bool OutputASM::isSamplerRegister(TIntermTyped *operand)
3015 	{
3016 		return operand && IsSampler(operand->getBasicType()) && samplerRegister(operand) >= 0;
3017 	}
3018 
lookup(VariableArray & list,TIntermTyped * variable)3019 	int OutputASM::lookup(VariableArray &list, TIntermTyped *variable)
3020 	{
3021 		for(unsigned int i = 0; i < list.size(); i++)
3022 		{
3023 			if(list[i] == variable)
3024 			{
3025 				return i;   // Pointer match
3026 			}
3027 		}
3028 
3029 		TIntermSymbol *varSymbol = variable->getAsSymbolNode();
3030 		TInterfaceBlock *varBlock = variable->getType().getAsInterfaceBlock();
3031 
3032 		if(varBlock)
3033 		{
3034 			for(unsigned int i = 0; i < list.size(); i++)
3035 			{
3036 				if(list[i])
3037 				{
3038 					TInterfaceBlock *listBlock = list[i]->getType().getAsInterfaceBlock();
3039 
3040 					if(listBlock)
3041 					{
3042 						if(listBlock->name() == varBlock->name())
3043 						{
3044 							ASSERT(listBlock->arraySize() == varBlock->arraySize());
3045 							ASSERT(listBlock->fields() == varBlock->fields());
3046 							ASSERT(listBlock->blockStorage() == varBlock->blockStorage());
3047 							ASSERT(listBlock->matrixPacking() == varBlock->matrixPacking());
3048 
3049 							return i;
3050 						}
3051 					}
3052 				}
3053 			}
3054 		}
3055 		else if(varSymbol)
3056 		{
3057 			for(unsigned int i = 0; i < list.size(); i++)
3058 			{
3059 				if(list[i])
3060 				{
3061 					TIntermSymbol *listSymbol = list[i]->getAsSymbolNode();
3062 
3063 					if(listSymbol)
3064 					{
3065 						if(listSymbol->getId() == varSymbol->getId())
3066 						{
3067 							ASSERT(listSymbol->getSymbol() == varSymbol->getSymbol());
3068 							ASSERT(listSymbol->getType() == varSymbol->getType());
3069 							ASSERT(listSymbol->getQualifier() == varSymbol->getQualifier());
3070 
3071 							return i;
3072 						}
3073 					}
3074 				}
3075 			}
3076 		}
3077 
3078 		return -1;
3079 	}
3080 
lookup(VariableArray & list,TInterfaceBlock * block)3081 	int OutputASM::lookup(VariableArray &list, TInterfaceBlock *block)
3082 	{
3083 		for(unsigned int i = 0; i < list.size(); i++)
3084 		{
3085 			if(list[i] && (list[i]->getType().getInterfaceBlock() == block))
3086 			{
3087 				return i;   // Pointer match
3088 			}
3089 		}
3090 		return -1;
3091 	}
3092 
allocate(VariableArray & list,TIntermTyped * variable)3093 	int OutputASM::allocate(VariableArray &list, TIntermTyped *variable)
3094 	{
3095 		int index = lookup(list, variable);
3096 
3097 		if(index == -1)
3098 		{
3099 			unsigned int registerCount = variable->blockRegisterCount();
3100 
3101 			for(unsigned int i = 0; i < list.size(); i++)
3102 			{
3103 				if(list[i] == 0)
3104 				{
3105 					unsigned int j = 1;
3106 					for( ; j < registerCount && (i + j) < list.size(); j++)
3107 					{
3108 						if(list[i + j] != 0)
3109 						{
3110 							break;
3111 						}
3112 					}
3113 
3114 					if(j == registerCount)   // Found free slots
3115 					{
3116 						for(unsigned int j = 0; j < registerCount; j++)
3117 						{
3118 							list[i + j] = variable;
3119 						}
3120 
3121 						return i;
3122 					}
3123 				}
3124 			}
3125 
3126 			index = list.size();
3127 
3128 			for(unsigned int i = 0; i < registerCount; i++)
3129 			{
3130 				list.push_back(variable);
3131 			}
3132 		}
3133 
3134 		return index;
3135 	}
3136 
free(VariableArray & list,TIntermTyped * variable)3137 	void OutputASM::free(VariableArray &list, TIntermTyped *variable)
3138 	{
3139 		int index = lookup(list, variable);
3140 
3141 		if(index >= 0)
3142 		{
3143 			list[index] = 0;
3144 		}
3145 	}
3146 
blockMemberLookup(const TType & type,const TString & name,int registerIndex)3147 	int OutputASM::blockMemberLookup(const TType &type, const TString &name, int registerIndex)
3148 	{
3149 		const TInterfaceBlock *block = type.getInterfaceBlock();
3150 
3151 		if(block)
3152 		{
3153 			ActiveUniformBlocks &activeUniformBlocks = shaderObject->activeUniformBlocks;
3154 			const TFieldList& fields = block->fields();
3155 			const TString &blockName = block->name();
3156 			int fieldRegisterIndex = registerIndex;
3157 
3158 			if(!type.isInterfaceBlock())
3159 			{
3160 				// This is a uniform that's part of a block, let's see if the block is already defined
3161 				for(size_t i = 0; i < activeUniformBlocks.size(); ++i)
3162 				{
3163 					if(activeUniformBlocks[i].name == blockName.c_str())
3164 					{
3165 						// The block is already defined, find the register for the current uniform and return it
3166 						for(size_t j = 0; j < fields.size(); j++)
3167 						{
3168 							const TString &fieldName = fields[j]->name();
3169 							if(fieldName == name)
3170 							{
3171 								return fieldRegisterIndex;
3172 							}
3173 
3174 							fieldRegisterIndex += fields[j]->type()->totalRegisterCount();
3175 						}
3176 
3177 						ASSERT(false);
3178 						return fieldRegisterIndex;
3179 					}
3180 				}
3181 			}
3182 		}
3183 
3184 		return -1;
3185 	}
3186 
declareUniform(const TType & type,const TString & name,int registerIndex,int blockId,BlockLayoutEncoder * encoder)3187 	void OutputASM::declareUniform(const TType &type, const TString &name, int registerIndex, int blockId, BlockLayoutEncoder* encoder)
3188 	{
3189 		const TStructure *structure = type.getStruct();
3190 		const TInterfaceBlock *block = (type.isInterfaceBlock() || (blockId == -1)) ? type.getInterfaceBlock() : nullptr;
3191 
3192 		if(!structure && !block)
3193 		{
3194 			ActiveUniforms &activeUniforms = shaderObject->activeUniforms;
3195 			const BlockMemberInfo blockInfo = encoder ? encoder->encodeType(type) : BlockMemberInfo::getDefaultBlockInfo();
3196 			if(blockId >= 0)
3197 			{
3198 				blockDefinitions[blockId][registerIndex] = TypedMemberInfo(blockInfo, type);
3199 				shaderObject->activeUniformBlocks[blockId].fields.push_back(activeUniforms.size());
3200 			}
3201 			int fieldRegisterIndex = encoder ? shaderObject->activeUniformBlocks[blockId].registerIndex + BlockLayoutEncoder::getBlockRegister(blockInfo) : registerIndex;
3202 			activeUniforms.push_back(Uniform(glVariableType(type), glVariablePrecision(type), name.c_str(), type.getArraySize(),
3203 			                                 fieldRegisterIndex, blockId, blockInfo));
3204 			if(IsSampler(type.getBasicType()))
3205 			{
3206 				for(int i = 0; i < type.totalRegisterCount(); i++)
3207 				{
3208 					shader->declareSampler(fieldRegisterIndex + i);
3209 				}
3210 			}
3211 		}
3212 		else if(block)
3213 		{
3214 			ActiveUniformBlocks &activeUniformBlocks = shaderObject->activeUniformBlocks;
3215 			const TFieldList& fields = block->fields();
3216 			const TString &blockName = block->name();
3217 			int fieldRegisterIndex = registerIndex;
3218 			bool isUniformBlockMember = !type.isInterfaceBlock() && (blockId == -1);
3219 
3220 			blockId = activeUniformBlocks.size();
3221 			bool isRowMajor = block->matrixPacking() == EmpRowMajor;
3222 			activeUniformBlocks.push_back(UniformBlock(blockName.c_str(), 0, block->arraySize(),
3223 			                                           block->blockStorage(), isRowMajor, registerIndex, blockId));
3224 			blockDefinitions.push_back(BlockDefinitionIndexMap());
3225 
3226 			Std140BlockEncoder currentBlockEncoder(isRowMajor);
3227 			currentBlockEncoder.enterAggregateType();
3228 			for(size_t i = 0; i < fields.size(); i++)
3229 			{
3230 				const TType &fieldType = *(fields[i]->type());
3231 				const TString &fieldName = fields[i]->name();
3232 				if(isUniformBlockMember && (fieldName == name))
3233 				{
3234 					registerIndex = fieldRegisterIndex;
3235 				}
3236 
3237 				const TString uniformName = block->hasInstanceName() ? blockName + "." + fieldName : fieldName;
3238 
3239 				declareUniform(fieldType, uniformName, fieldRegisterIndex, blockId, &currentBlockEncoder);
3240 				fieldRegisterIndex += fieldType.totalRegisterCount();
3241 			}
3242 			currentBlockEncoder.exitAggregateType();
3243 			activeUniformBlocks[blockId].dataSize = currentBlockEncoder.getBlockSize();
3244 		}
3245 		else
3246 		{
3247 			int fieldRegisterIndex = registerIndex;
3248 
3249 			const TFieldList& fields = structure->fields();
3250 			if(type.isArray() && (structure || type.isInterfaceBlock()))
3251 			{
3252 				for(int i = 0; i < type.getArraySize(); i++)
3253 				{
3254 					if(encoder)
3255 					{
3256 						encoder->enterAggregateType();
3257 					}
3258 					for(size_t j = 0; j < fields.size(); j++)
3259 					{
3260 						const TType &fieldType = *(fields[j]->type());
3261 						const TString &fieldName = fields[j]->name();
3262 						const TString uniformName = name + "[" + str(i) + "]." + fieldName;
3263 
3264 						declareUniform(fieldType, uniformName, fieldRegisterIndex, blockId, encoder);
3265 						fieldRegisterIndex += fieldType.totalRegisterCount();
3266 					}
3267 					if(encoder)
3268 					{
3269 						encoder->exitAggregateType();
3270 					}
3271 				}
3272 			}
3273 			else
3274 			{
3275 				if(encoder)
3276 				{
3277 					encoder->enterAggregateType();
3278 				}
3279 				for(size_t i = 0; i < fields.size(); i++)
3280 				{
3281 					const TType &fieldType = *(fields[i]->type());
3282 					const TString &fieldName = fields[i]->name();
3283 					const TString uniformName = name + "." + fieldName;
3284 
3285 					declareUniform(fieldType, uniformName, fieldRegisterIndex, blockId, encoder);
3286 					fieldRegisterIndex += fieldType.totalRegisterCount();
3287 				}
3288 				if(encoder)
3289 				{
3290 					encoder->exitAggregateType();
3291 				}
3292 			}
3293 		}
3294 	}
3295 
glVariableType(const TType & type)3296 	GLenum OutputASM::glVariableType(const TType &type)
3297 	{
3298 		switch(type.getBasicType())
3299 		{
3300 		case EbtFloat:
3301 			if(type.isScalar())
3302 			{
3303 				return GL_FLOAT;
3304 			}
3305 			else if(type.isVector())
3306 			{
3307 				switch(type.getNominalSize())
3308 				{
3309 				case 2: return GL_FLOAT_VEC2;
3310 				case 3: return GL_FLOAT_VEC3;
3311 				case 4: return GL_FLOAT_VEC4;
3312 				default: UNREACHABLE(type.getNominalSize());
3313 				}
3314 			}
3315 			else if(type.isMatrix())
3316 			{
3317 				switch(type.getNominalSize())
3318 				{
3319 				case 2:
3320 					switch(type.getSecondarySize())
3321 					{
3322 					case 2: return GL_FLOAT_MAT2;
3323 					case 3: return GL_FLOAT_MAT2x3;
3324 					case 4: return GL_FLOAT_MAT2x4;
3325 					default: UNREACHABLE(type.getSecondarySize());
3326 					}
3327 				case 3:
3328 					switch(type.getSecondarySize())
3329 					{
3330 					case 2: return GL_FLOAT_MAT3x2;
3331 					case 3: return GL_FLOAT_MAT3;
3332 					case 4: return GL_FLOAT_MAT3x4;
3333 					default: UNREACHABLE(type.getSecondarySize());
3334 					}
3335 				case 4:
3336 					switch(type.getSecondarySize())
3337 					{
3338 					case 2: return GL_FLOAT_MAT4x2;
3339 					case 3: return GL_FLOAT_MAT4x3;
3340 					case 4: return GL_FLOAT_MAT4;
3341 					default: UNREACHABLE(type.getSecondarySize());
3342 					}
3343 				default: UNREACHABLE(type.getNominalSize());
3344 				}
3345 			}
3346 			else UNREACHABLE(0);
3347 			break;
3348 		case EbtInt:
3349 			if(type.isScalar())
3350 			{
3351 				return GL_INT;
3352 			}
3353 			else if(type.isVector())
3354 			{
3355 				switch(type.getNominalSize())
3356 				{
3357 				case 2: return GL_INT_VEC2;
3358 				case 3: return GL_INT_VEC3;
3359 				case 4: return GL_INT_VEC4;
3360 				default: UNREACHABLE(type.getNominalSize());
3361 				}
3362 			}
3363 			else UNREACHABLE(0);
3364 			break;
3365 		case EbtUInt:
3366 			if(type.isScalar())
3367 			{
3368 				return GL_UNSIGNED_INT;
3369 			}
3370 			else if(type.isVector())
3371 			{
3372 				switch(type.getNominalSize())
3373 				{
3374 				case 2: return GL_UNSIGNED_INT_VEC2;
3375 				case 3: return GL_UNSIGNED_INT_VEC3;
3376 				case 4: return GL_UNSIGNED_INT_VEC4;
3377 				default: UNREACHABLE(type.getNominalSize());
3378 				}
3379 			}
3380 			else UNREACHABLE(0);
3381 			break;
3382 		case EbtBool:
3383 			if(type.isScalar())
3384 			{
3385 				return GL_BOOL;
3386 			}
3387 			else if(type.isVector())
3388 			{
3389 				switch(type.getNominalSize())
3390 				{
3391 				case 2: return GL_BOOL_VEC2;
3392 				case 3: return GL_BOOL_VEC3;
3393 				case 4: return GL_BOOL_VEC4;
3394 				default: UNREACHABLE(type.getNominalSize());
3395 				}
3396 			}
3397 			else UNREACHABLE(0);
3398 			break;
3399 		case EbtSampler2D:
3400 			return GL_SAMPLER_2D;
3401 		case EbtISampler2D:
3402 			return GL_INT_SAMPLER_2D;
3403 		case EbtUSampler2D:
3404 			return GL_UNSIGNED_INT_SAMPLER_2D;
3405 		case EbtSamplerCube:
3406 			return GL_SAMPLER_CUBE;
3407 		case EbtISamplerCube:
3408 			return GL_INT_SAMPLER_CUBE;
3409 		case EbtUSamplerCube:
3410 			return GL_UNSIGNED_INT_SAMPLER_CUBE;
3411 		case EbtSamplerExternalOES:
3412 			return GL_SAMPLER_EXTERNAL_OES;
3413 		case EbtSampler3D:
3414 			return GL_SAMPLER_3D_OES;
3415 		case EbtISampler3D:
3416 			return GL_INT_SAMPLER_3D;
3417 		case EbtUSampler3D:
3418 			return GL_UNSIGNED_INT_SAMPLER_3D;
3419 		case EbtSampler2DArray:
3420 			return GL_SAMPLER_2D_ARRAY;
3421 		case EbtISampler2DArray:
3422 			return GL_INT_SAMPLER_2D_ARRAY;
3423 		case EbtUSampler2DArray:
3424 			return GL_UNSIGNED_INT_SAMPLER_2D_ARRAY;
3425 		case EbtSampler2DShadow:
3426 			return GL_SAMPLER_2D_SHADOW;
3427 		case EbtSamplerCubeShadow:
3428 			return GL_SAMPLER_CUBE_SHADOW;
3429 		case EbtSampler2DArrayShadow:
3430 			return GL_SAMPLER_2D_ARRAY_SHADOW;
3431 		default:
3432 			UNREACHABLE(type.getBasicType());
3433 			break;
3434 		}
3435 
3436 		return GL_NONE;
3437 	}
3438 
glVariablePrecision(const TType & type)3439 	GLenum OutputASM::glVariablePrecision(const TType &type)
3440 	{
3441 		if(type.getBasicType() == EbtFloat)
3442 		{
3443 			switch(type.getPrecision())
3444 			{
3445 			case EbpHigh:   return GL_HIGH_FLOAT;
3446 			case EbpMedium: return GL_MEDIUM_FLOAT;
3447 			case EbpLow:    return GL_LOW_FLOAT;
3448 			case EbpUndefined:
3449 				// Should be defined as the default precision by the parser
3450 			default: UNREACHABLE(type.getPrecision());
3451 			}
3452 		}
3453 		else if(type.getBasicType() == EbtInt)
3454 		{
3455 			switch(type.getPrecision())
3456 			{
3457 			case EbpHigh:   return GL_HIGH_INT;
3458 			case EbpMedium: return GL_MEDIUM_INT;
3459 			case EbpLow:    return GL_LOW_INT;
3460 			case EbpUndefined:
3461 				// Should be defined as the default precision by the parser
3462 			default: UNREACHABLE(type.getPrecision());
3463 			}
3464 		}
3465 
3466 		// Other types (boolean, sampler) don't have a precision
3467 		return GL_NONE;
3468 	}
3469 
dim(TIntermNode * v)3470 	int OutputASM::dim(TIntermNode *v)
3471 	{
3472 		TIntermTyped *vector = v->getAsTyped();
3473 		ASSERT(vector && vector->isRegister());
3474 		return vector->getNominalSize();
3475 	}
3476 
dim2(TIntermNode * m)3477 	int OutputASM::dim2(TIntermNode *m)
3478 	{
3479 		TIntermTyped *matrix = m->getAsTyped();
3480 		ASSERT(matrix && matrix->isMatrix() && !matrix->isArray());
3481 		return matrix->getSecondarySize();
3482 	}
3483 
3484 	// Returns ~0u if no loop count could be determined
loopCount(TIntermLoop * node)3485 	unsigned int OutputASM::loopCount(TIntermLoop *node)
3486 	{
3487 		// Parse loops of the form:
3488 		// for(int index = initial; index [comparator] limit; index += increment)
3489 		TIntermSymbol *index = 0;
3490 		TOperator comparator = EOpNull;
3491 		int initial = 0;
3492 		int limit = 0;
3493 		int increment = 0;
3494 
3495 		// Parse index name and intial value
3496 		if(node->getInit())
3497 		{
3498 			TIntermAggregate *init = node->getInit()->getAsAggregate();
3499 
3500 			if(init)
3501 			{
3502 				TIntermSequence &sequence = init->getSequence();
3503 				TIntermTyped *variable = sequence[0]->getAsTyped();
3504 
3505 				if(variable && variable->getQualifier() == EvqTemporary)
3506 				{
3507 					TIntermBinary *assign = variable->getAsBinaryNode();
3508 
3509 					if(assign->getOp() == EOpInitialize)
3510 					{
3511 						TIntermSymbol *symbol = assign->getLeft()->getAsSymbolNode();
3512 						TIntermConstantUnion *constant = assign->getRight()->getAsConstantUnion();
3513 
3514 						if(symbol && constant)
3515 						{
3516 							if(constant->getBasicType() == EbtInt && constant->getNominalSize() == 1)
3517 							{
3518 								index = symbol;
3519 								initial = constant->getUnionArrayPointer()[0].getIConst();
3520 							}
3521 						}
3522 					}
3523 				}
3524 			}
3525 		}
3526 
3527 		// Parse comparator and limit value
3528 		if(index && node->getCondition())
3529 		{
3530 			TIntermBinary *test = node->getCondition()->getAsBinaryNode();
3531 			TIntermSymbol *left = test ? test->getLeft()->getAsSymbolNode() : nullptr;
3532 
3533 			if(left && (left->getId() == index->getId()))
3534 			{
3535 				TIntermConstantUnion *constant = test->getRight()->getAsConstantUnion();
3536 
3537 				if(constant)
3538 				{
3539 					if(constant->getBasicType() == EbtInt && constant->getNominalSize() == 1)
3540 					{
3541 						comparator = test->getOp();
3542 						limit = constant->getUnionArrayPointer()[0].getIConst();
3543 					}
3544 				}
3545 			}
3546 		}
3547 
3548 		// Parse increment
3549 		if(index && comparator != EOpNull && node->getExpression())
3550 		{
3551 			TIntermBinary *binaryTerminal = node->getExpression()->getAsBinaryNode();
3552 			TIntermUnary *unaryTerminal = node->getExpression()->getAsUnaryNode();
3553 
3554 			if(binaryTerminal)
3555 			{
3556 				TOperator op = binaryTerminal->getOp();
3557 				TIntermConstantUnion *constant = binaryTerminal->getRight()->getAsConstantUnion();
3558 
3559 				if(constant)
3560 				{
3561 					if(constant->getBasicType() == EbtInt && constant->getNominalSize() == 1)
3562 					{
3563 						int value = constant->getUnionArrayPointer()[0].getIConst();
3564 
3565 						switch(op)
3566 						{
3567 						case EOpAddAssign: increment = value;  break;
3568 						case EOpSubAssign: increment = -value; break;
3569 						default: UNIMPLEMENTED();
3570 						}
3571 					}
3572 				}
3573 			}
3574 			else if(unaryTerminal)
3575 			{
3576 				TOperator op = unaryTerminal->getOp();
3577 
3578 				switch(op)
3579 				{
3580 				case EOpPostIncrement: increment = 1;  break;
3581 				case EOpPostDecrement: increment = -1; break;
3582 				case EOpPreIncrement:  increment = 1;  break;
3583 				case EOpPreDecrement:  increment = -1; break;
3584 				default: UNIMPLEMENTED();
3585 				}
3586 			}
3587 		}
3588 
3589 		if(index && comparator != EOpNull && increment != 0)
3590 		{
3591 			if(comparator == EOpLessThanEqual)
3592 			{
3593 				comparator = EOpLessThan;
3594 				limit += 1;
3595 			}
3596 
3597 			if(comparator == EOpLessThan)
3598 			{
3599 				int iterations = (limit - initial) / increment;
3600 
3601 				if(iterations <= 0)
3602 				{
3603 					iterations = 0;
3604 				}
3605 
3606 				return iterations;
3607 			}
3608 			else UNIMPLEMENTED();   // Falls through
3609 		}
3610 
3611 		return ~0u;
3612 	}
3613 
traverse(TIntermNode * node)3614 	bool LoopUnrollable::traverse(TIntermNode *node)
3615 	{
3616 		loopDepth = 0;
3617 		loopUnrollable = true;
3618 
3619 		node->traverse(this);
3620 
3621 		return loopUnrollable;
3622 	}
3623 
visitLoop(Visit visit,TIntermLoop * loop)3624 	bool LoopUnrollable::visitLoop(Visit visit, TIntermLoop *loop)
3625 	{
3626 		if(visit == PreVisit)
3627 		{
3628 			loopDepth++;
3629 		}
3630 		else if(visit == PostVisit)
3631 		{
3632 			loopDepth++;
3633 		}
3634 
3635 		return true;
3636 	}
3637 
visitBranch(Visit visit,TIntermBranch * node)3638 	bool LoopUnrollable::visitBranch(Visit visit, TIntermBranch *node)
3639 	{
3640 		if(!loopUnrollable)
3641 		{
3642 			return false;
3643 		}
3644 
3645 		if(!loopDepth)
3646 		{
3647 			return true;
3648 		}
3649 
3650 		switch(node->getFlowOp())
3651 		{
3652 		case EOpKill:
3653 		case EOpReturn:
3654 			break;
3655 		case EOpBreak:
3656 		case EOpContinue:
3657 			loopUnrollable = false;
3658 			break;
3659 		default: UNREACHABLE(node->getFlowOp());
3660 		}
3661 
3662 		return loopUnrollable;
3663 	}
3664 
visitAggregate(Visit visit,TIntermAggregate * node)3665 	bool LoopUnrollable::visitAggregate(Visit visit, TIntermAggregate *node)
3666 	{
3667 		return loopUnrollable;
3668 	}
3669 }
3670