1 // Copyright 2016 The SwiftShader Authors. All Rights Reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #include "OutputASM.h" 16 #include "Common/Math.hpp" 17 18 #include "common/debug.h" 19 #include "InfoSink.h" 20 21 #include "libGLESv2/Shader.h" 22 23 #include <GLES2/gl2.h> 24 #include <GLES2/gl2ext.h> 25 #include <GLES3/gl3.h> 26 27 namespace glsl 28 { 29 // Integer to TString conversion str(int i)30 TString str(int i) 31 { 32 char buffer[20]; 33 sprintf(buffer, "%d", i); 34 return buffer; 35 } 36 37 class Temporary : public TIntermSymbol 38 { 39 public: Temporary(OutputASM * assembler)40 Temporary(OutputASM *assembler) : TIntermSymbol(TSymbolTableLevel::nextUniqueId(), "tmp", TType(EbtFloat, EbpHigh, EvqTemporary, 4, 1, false)), assembler(assembler) 41 { 42 } 43 ~Temporary()44 ~Temporary() 45 { 46 assembler->freeTemporary(this); 47 } 48 49 private: 50 OutputASM *const assembler; 51 }; 52 53 class Constant : public TIntermConstantUnion 54 { 55 public: Constant(float x,float y,float z,float w)56 Constant(float x, float y, float z, float w) : TIntermConstantUnion(constants, TType(EbtFloat, EbpHigh, EvqConstExpr, 4, 1, false)) 57 { 58 constants[0].setFConst(x); 59 constants[1].setFConst(y); 60 constants[2].setFConst(z); 61 constants[3].setFConst(w); 62 } 63 Constant(bool b)64 Constant(bool b) : TIntermConstantUnion(constants, TType(EbtBool, EbpHigh, EvqConstExpr, 1, 1, false)) 65 { 66 constants[0].setBConst(b); 67 } 68 Constant(int i)69 Constant(int i) : TIntermConstantUnion(constants, TType(EbtInt, EbpHigh, EvqConstExpr, 1, 1, false)) 70 { 71 constants[0].setIConst(i); 72 } 73 ~Constant()74 ~Constant() 75 { 76 } 77 78 private: 79 ConstantUnion constants[4]; 80 }; 81 Uniform(GLenum type,GLenum precision,const std::string & name,int arraySize,int registerIndex,int blockId,const BlockMemberInfo & blockMemberInfo)82 Uniform::Uniform(GLenum type, GLenum precision, const std::string &name, int arraySize, int registerIndex, int blockId, const BlockMemberInfo& blockMemberInfo) : 83 type(type), precision(precision), name(name), arraySize(arraySize), registerIndex(registerIndex), blockId(blockId), blockInfo(blockMemberInfo) 84 { 85 } 86 UniformBlock(const std::string & name,unsigned int dataSize,unsigned int arraySize,TLayoutBlockStorage layout,bool isRowMajorLayout,int registerIndex,int blockId)87 UniformBlock::UniformBlock(const std::string& name, unsigned int dataSize, unsigned int arraySize, 88 TLayoutBlockStorage layout, bool isRowMajorLayout, int registerIndex, int blockId) : 89 name(name), dataSize(dataSize), arraySize(arraySize), layout(layout), 90 isRowMajorLayout(isRowMajorLayout), registerIndex(registerIndex), blockId(blockId) 91 { 92 } 93 BlockLayoutEncoder(bool rowMajor)94 BlockLayoutEncoder::BlockLayoutEncoder(bool rowMajor) 95 : mCurrentOffset(0), isRowMajor(rowMajor) 96 { 97 } 98 encodeType(const TType & type)99 BlockMemberInfo BlockLayoutEncoder::encodeType(const TType &type) 100 { 101 int arrayStride; 102 int matrixStride; 103 104 getBlockLayoutInfo(type, type.getArraySize(), isRowMajor, &arrayStride, &matrixStride); 105 106 const BlockMemberInfo memberInfo(static_cast<int>(mCurrentOffset * BytesPerComponent), 107 static_cast<int>(arrayStride * BytesPerComponent), 108 static_cast<int>(matrixStride * BytesPerComponent), 109 (matrixStride > 0) && isRowMajor); 110 111 advanceOffset(type, type.getArraySize(), isRowMajor, arrayStride, matrixStride); 112 113 return memberInfo; 114 } 115 116 // static getBlockRegister(const BlockMemberInfo & info)117 size_t BlockLayoutEncoder::getBlockRegister(const BlockMemberInfo &info) 118 { 119 return (info.offset / BytesPerComponent) / ComponentsPerRegister; 120 } 121 122 // static getBlockRegisterElement(const BlockMemberInfo & info)123 size_t BlockLayoutEncoder::getBlockRegisterElement(const BlockMemberInfo &info) 124 { 125 return (info.offset / BytesPerComponent) % ComponentsPerRegister; 126 } 127 nextRegister()128 void BlockLayoutEncoder::nextRegister() 129 { 130 mCurrentOffset = sw::align(mCurrentOffset, ComponentsPerRegister); 131 } 132 Std140BlockEncoder(bool rowMajor)133 Std140BlockEncoder::Std140BlockEncoder(bool rowMajor) : BlockLayoutEncoder(rowMajor) 134 { 135 } 136 enterAggregateType()137 void Std140BlockEncoder::enterAggregateType() 138 { 139 nextRegister(); 140 } 141 exitAggregateType()142 void Std140BlockEncoder::exitAggregateType() 143 { 144 nextRegister(); 145 } 146 getBlockLayoutInfo(const TType & type,unsigned int arraySize,bool isRowMajorMatrix,int * arrayStrideOut,int * matrixStrideOut)147 void Std140BlockEncoder::getBlockLayoutInfo(const TType &type, unsigned int arraySize, bool isRowMajorMatrix, int *arrayStrideOut, int *matrixStrideOut) 148 { 149 size_t baseAlignment = 0; 150 int matrixStride = 0; 151 int arrayStride = 0; 152 153 if(type.isMatrix()) 154 { 155 baseAlignment = ComponentsPerRegister; 156 matrixStride = ComponentsPerRegister; 157 158 if(arraySize > 0) 159 { 160 const int numRegisters = isRowMajorMatrix ? type.getSecondarySize() : type.getNominalSize(); 161 arrayStride = ComponentsPerRegister * numRegisters; 162 } 163 } 164 else if(arraySize > 0) 165 { 166 baseAlignment = ComponentsPerRegister; 167 arrayStride = ComponentsPerRegister; 168 } 169 else 170 { 171 const size_t numComponents = type.getElementSize(); 172 baseAlignment = (numComponents == 3 ? 4u : numComponents); 173 } 174 175 mCurrentOffset = sw::align(mCurrentOffset, baseAlignment); 176 177 *matrixStrideOut = matrixStride; 178 *arrayStrideOut = arrayStride; 179 } 180 advanceOffset(const TType & type,unsigned int arraySize,bool isRowMajorMatrix,int arrayStride,int matrixStride)181 void Std140BlockEncoder::advanceOffset(const TType &type, unsigned int arraySize, bool isRowMajorMatrix, int arrayStride, int matrixStride) 182 { 183 if(arraySize > 0) 184 { 185 mCurrentOffset += arrayStride * arraySize; 186 } 187 else if(type.isMatrix()) 188 { 189 ASSERT(matrixStride == ComponentsPerRegister); 190 const int numRegisters = isRowMajorMatrix ? type.getSecondarySize() : type.getNominalSize(); 191 mCurrentOffset += ComponentsPerRegister * numRegisters; 192 } 193 else 194 { 195 mCurrentOffset += type.getElementSize(); 196 } 197 } 198 Attribute()199 Attribute::Attribute() 200 { 201 type = GL_NONE; 202 arraySize = 0; 203 registerIndex = 0; 204 } 205 Attribute(GLenum type,const std::string & name,int arraySize,int location,int registerIndex)206 Attribute::Attribute(GLenum type, const std::string &name, int arraySize, int location, int registerIndex) 207 { 208 this->type = type; 209 this->name = name; 210 this->arraySize = arraySize; 211 this->location = location; 212 this->registerIndex = registerIndex; 213 } 214 getPixelShader() const215 sw::PixelShader *Shader::getPixelShader() const 216 { 217 return 0; 218 } 219 getVertexShader() const220 sw::VertexShader *Shader::getVertexShader() const 221 { 222 return 0; 223 } 224 TextureFunction(const TString & nodeName)225 OutputASM::TextureFunction::TextureFunction(const TString& nodeName) : method(IMPLICIT), proj(false), offset(false) 226 { 227 TString name = TFunction::unmangleName(nodeName); 228 229 if(name == "texture2D" || name == "textureCube" || name == "texture" || name == "texture3D") 230 { 231 method = IMPLICIT; 232 } 233 else if(name == "texture2DProj" || name == "textureProj") 234 { 235 method = IMPLICIT; 236 proj = true; 237 } 238 else if(name == "texture2DLod" || name == "textureCubeLod" || name == "textureLod") 239 { 240 method = LOD; 241 } 242 else if(name == "texture2DProjLod" || name == "textureProjLod") 243 { 244 method = LOD; 245 proj = true; 246 } 247 else if(name == "textureSize") 248 { 249 method = SIZE; 250 } 251 else if(name == "textureOffset") 252 { 253 method = IMPLICIT; 254 offset = true; 255 } 256 else if(name == "textureProjOffset") 257 { 258 method = IMPLICIT; 259 offset = true; 260 proj = true; 261 } 262 else if(name == "textureLodOffset") 263 { 264 method = LOD; 265 offset = true; 266 } 267 else if(name == "textureProjLodOffset") 268 { 269 method = LOD; 270 proj = true; 271 offset = true; 272 } 273 else if(name == "texelFetch") 274 { 275 method = FETCH; 276 } 277 else if(name == "texelFetchOffset") 278 { 279 method = FETCH; 280 offset = true; 281 } 282 else if(name == "textureGrad") 283 { 284 method = GRAD; 285 } 286 else if(name == "textureGradOffset") 287 { 288 method = GRAD; 289 offset = true; 290 } 291 else if(name == "textureProjGrad") 292 { 293 method = GRAD; 294 proj = true; 295 } 296 else if(name == "textureProjGradOffset") 297 { 298 method = GRAD; 299 proj = true; 300 offset = true; 301 } 302 else UNREACHABLE(0); 303 } 304 OutputASM(TParseContext & context,Shader * shaderObject)305 OutputASM::OutputASM(TParseContext &context, Shader *shaderObject) : TIntermTraverser(true, true, true), shaderObject(shaderObject), mContext(context) 306 { 307 shader = 0; 308 pixelShader = 0; 309 vertexShader = 0; 310 311 if(shaderObject) 312 { 313 shader = shaderObject->getShader(); 314 pixelShader = shaderObject->getPixelShader(); 315 vertexShader = shaderObject->getVertexShader(); 316 } 317 318 functionArray.push_back(Function(0, "main(", 0, 0)); 319 currentFunction = 0; 320 outputQualifier = EvqOutput; // Set outputQualifier to any value other than EvqFragColor or EvqFragData 321 } 322 ~OutputASM()323 OutputASM::~OutputASM() 324 { 325 } 326 output()327 void OutputASM::output() 328 { 329 if(shader) 330 { 331 emitShader(GLOBAL); 332 333 if(functionArray.size() > 1) // Only call main() when there are other functions 334 { 335 Instruction *callMain = emit(sw::Shader::OPCODE_CALL); 336 callMain->dst.type = sw::Shader::PARAMETER_LABEL; 337 callMain->dst.index = 0; // main() 338 339 emit(sw::Shader::OPCODE_RET); 340 } 341 342 emitShader(FUNCTION); 343 } 344 } 345 emitShader(Scope scope)346 void OutputASM::emitShader(Scope scope) 347 { 348 emitScope = scope; 349 currentScope = GLOBAL; 350 mContext.getTreeRoot()->traverse(this); 351 } 352 freeTemporary(Temporary * temporary)353 void OutputASM::freeTemporary(Temporary *temporary) 354 { 355 free(temporaries, temporary); 356 } 357 getOpcode(sw::Shader::Opcode op,TIntermTyped * in) const358 sw::Shader::Opcode OutputASM::getOpcode(sw::Shader::Opcode op, TIntermTyped *in) const 359 { 360 TBasicType baseType = in->getType().getBasicType(); 361 362 switch(op) 363 { 364 case sw::Shader::OPCODE_NEG: 365 switch(baseType) 366 { 367 case EbtInt: 368 case EbtUInt: 369 return sw::Shader::OPCODE_INEG; 370 case EbtFloat: 371 default: 372 return op; 373 } 374 case sw::Shader::OPCODE_ABS: 375 switch(baseType) 376 { 377 case EbtInt: 378 return sw::Shader::OPCODE_IABS; 379 case EbtFloat: 380 default: 381 return op; 382 } 383 case sw::Shader::OPCODE_SGN: 384 switch(baseType) 385 { 386 case EbtInt: 387 return sw::Shader::OPCODE_ISGN; 388 case EbtFloat: 389 default: 390 return op; 391 } 392 case sw::Shader::OPCODE_ADD: 393 switch(baseType) 394 { 395 case EbtInt: 396 case EbtUInt: 397 return sw::Shader::OPCODE_IADD; 398 case EbtFloat: 399 default: 400 return op; 401 } 402 case sw::Shader::OPCODE_SUB: 403 switch(baseType) 404 { 405 case EbtInt: 406 case EbtUInt: 407 return sw::Shader::OPCODE_ISUB; 408 case EbtFloat: 409 default: 410 return op; 411 } 412 case sw::Shader::OPCODE_MUL: 413 switch(baseType) 414 { 415 case EbtInt: 416 case EbtUInt: 417 return sw::Shader::OPCODE_IMUL; 418 case EbtFloat: 419 default: 420 return op; 421 } 422 case sw::Shader::OPCODE_DIV: 423 switch(baseType) 424 { 425 case EbtInt: 426 return sw::Shader::OPCODE_IDIV; 427 case EbtUInt: 428 return sw::Shader::OPCODE_UDIV; 429 case EbtFloat: 430 default: 431 return op; 432 } 433 case sw::Shader::OPCODE_IMOD: 434 return baseType == EbtUInt ? sw::Shader::OPCODE_UMOD : op; 435 case sw::Shader::OPCODE_ISHR: 436 return baseType == EbtUInt ? sw::Shader::OPCODE_USHR : op; 437 case sw::Shader::OPCODE_MIN: 438 switch(baseType) 439 { 440 case EbtInt: 441 return sw::Shader::OPCODE_IMIN; 442 case EbtUInt: 443 return sw::Shader::OPCODE_UMIN; 444 case EbtFloat: 445 default: 446 return op; 447 } 448 case sw::Shader::OPCODE_MAX: 449 switch(baseType) 450 { 451 case EbtInt: 452 return sw::Shader::OPCODE_IMAX; 453 case EbtUInt: 454 return sw::Shader::OPCODE_UMAX; 455 case EbtFloat: 456 default: 457 return op; 458 } 459 default: 460 return op; 461 } 462 } 463 visitSymbol(TIntermSymbol * symbol)464 void OutputASM::visitSymbol(TIntermSymbol *symbol) 465 { 466 // Vertex varyings don't have to be actively used to successfully link 467 // against pixel shaders that use them. So make sure they're declared. 468 if(symbol->getQualifier() == EvqVaryingOut || symbol->getQualifier() == EvqInvariantVaryingOut || symbol->getQualifier() == EvqVertexOut) 469 { 470 if(symbol->getBasicType() != EbtInvariant) // Typeless declarations are not new varyings 471 { 472 declareVarying(symbol, -1); 473 } 474 } 475 476 TInterfaceBlock* block = symbol->getType().getInterfaceBlock(); 477 // OpenGL ES 3.0.4 spec, section 2.12.6 Uniform Variables: 478 // "All members of a named uniform block declared with a shared or std140 layout qualifier 479 // are considered active, even if they are not referenced in any shader in the program. 480 // The uniform block itself is also considered active, even if no member of the block is referenced." 481 if(block && ((block->blockStorage() == EbsShared) || (block->blockStorage() == EbsStd140))) 482 { 483 uniformRegister(symbol); 484 } 485 } 486 visitBinary(Visit visit,TIntermBinary * node)487 bool OutputASM::visitBinary(Visit visit, TIntermBinary *node) 488 { 489 if(currentScope != emitScope) 490 { 491 return false; 492 } 493 494 TIntermTyped *result = node; 495 TIntermTyped *left = node->getLeft(); 496 TIntermTyped *right = node->getRight(); 497 const TType &leftType = left->getType(); 498 const TType &rightType = right->getType(); 499 const TType &resultType = node->getType(); 500 501 if(isSamplerRegister(result)) 502 { 503 return false; // Don't traverse, the register index is determined statically 504 } 505 506 switch(node->getOp()) 507 { 508 case EOpAssign: 509 if(visit == PostVisit) 510 { 511 assignLvalue(left, right); 512 copy(result, right); 513 } 514 break; 515 case EOpInitialize: 516 if(visit == PostVisit) 517 { 518 copy(left, right); 519 } 520 break; 521 case EOpMatrixTimesScalarAssign: 522 if(visit == PostVisit) 523 { 524 for(int i = 0; i < leftType.getNominalSize(); i++) 525 { 526 emit(sw::Shader::OPCODE_MUL, result, i, left, i, right); 527 } 528 529 assignLvalue(left, result); 530 } 531 break; 532 case EOpVectorTimesMatrixAssign: 533 if(visit == PostVisit) 534 { 535 int size = leftType.getNominalSize(); 536 537 for(int i = 0; i < size; i++) 538 { 539 Instruction *dot = emit(sw::Shader::OPCODE_DP(size), result, 0, left, 0, right, i); 540 dot->dst.mask = 1 << i; 541 } 542 543 assignLvalue(left, result); 544 } 545 break; 546 case EOpMatrixTimesMatrixAssign: 547 if(visit == PostVisit) 548 { 549 int dim = leftType.getNominalSize(); 550 551 for(int i = 0; i < dim; i++) 552 { 553 Instruction *mul = emit(sw::Shader::OPCODE_MUL, result, i, left, 0, right, i); 554 mul->src[1].swizzle = 0x00; 555 556 for(int j = 1; j < dim; j++) 557 { 558 Instruction *mad = emit(sw::Shader::OPCODE_MAD, result, i, left, j, right, i, result, i); 559 mad->src[1].swizzle = j * 0x55; 560 } 561 } 562 563 assignLvalue(left, result); 564 } 565 break; 566 case EOpIndexDirect: 567 if(visit == PostVisit) 568 { 569 int index = right->getAsConstantUnion()->getIConst(0); 570 571 if(result->isMatrix() || result->isStruct() || result->isInterfaceBlock()) 572 { 573 ASSERT(left->isArray()); 574 copy(result, left, index * left->elementRegisterCount()); 575 } 576 else if(result->isRegister()) 577 { 578 int srcIndex = 0; 579 if(left->isRegister()) 580 { 581 srcIndex = 0; 582 } 583 else if(left->isArray()) 584 { 585 srcIndex = index * left->elementRegisterCount(); 586 } 587 else if(left->isMatrix()) 588 { 589 ASSERT(index < left->getNominalSize()); // FIXME: Report semantic error 590 srcIndex = index; 591 } 592 else UNREACHABLE(0); 593 594 Instruction *mov = emit(sw::Shader::OPCODE_MOV, result, 0, left, srcIndex); 595 596 if(left->isRegister()) 597 { 598 mov->src[0].swizzle = index; 599 } 600 } 601 else UNREACHABLE(0); 602 } 603 break; 604 case EOpIndexIndirect: 605 if(visit == PostVisit) 606 { 607 if(left->isArray() || left->isMatrix()) 608 { 609 for(int index = 0; index < result->totalRegisterCount(); index++) 610 { 611 Instruction *mov = emit(sw::Shader::OPCODE_MOV, result, index, left, index); 612 mov->dst.mask = writeMask(result, index); 613 614 if(left->totalRegisterCount() > 1) 615 { 616 sw::Shader::SourceParameter relativeRegister; 617 argument(relativeRegister, right); 618 619 mov->src[0].rel.type = relativeRegister.type; 620 mov->src[0].rel.index = relativeRegister.index; 621 mov->src[0].rel.scale = result->totalRegisterCount(); 622 mov->src[0].rel.deterministic = !(vertexShader && left->getQualifier() == EvqUniform); 623 } 624 } 625 } 626 else if(left->isRegister()) 627 { 628 emit(sw::Shader::OPCODE_EXTRACT, result, left, right); 629 } 630 else UNREACHABLE(0); 631 } 632 break; 633 case EOpIndexDirectStruct: 634 case EOpIndexDirectInterfaceBlock: 635 if(visit == PostVisit) 636 { 637 ASSERT(leftType.isStruct() || (leftType.isInterfaceBlock())); 638 639 const TFieldList& fields = (node->getOp() == EOpIndexDirectStruct) ? 640 leftType.getStruct()->fields() : 641 leftType.getInterfaceBlock()->fields(); 642 int index = right->getAsConstantUnion()->getIConst(0); 643 int fieldOffset = 0; 644 645 for(int i = 0; i < index; i++) 646 { 647 fieldOffset += fields[i]->type()->totalRegisterCount(); 648 } 649 650 copy(result, left, fieldOffset); 651 } 652 break; 653 case EOpVectorSwizzle: 654 if(visit == PostVisit) 655 { 656 int swizzle = 0; 657 TIntermAggregate *components = right->getAsAggregate(); 658 659 if(components) 660 { 661 TIntermSequence &sequence = components->getSequence(); 662 int component = 0; 663 664 for(TIntermSequence::iterator sit = sequence.begin(); sit != sequence.end(); sit++) 665 { 666 TIntermConstantUnion *element = (*sit)->getAsConstantUnion(); 667 668 if(element) 669 { 670 int i = element->getUnionArrayPointer()[0].getIConst(); 671 swizzle |= i << (component * 2); 672 component++; 673 } 674 else UNREACHABLE(0); 675 } 676 } 677 else UNREACHABLE(0); 678 679 Instruction *mov = emit(sw::Shader::OPCODE_MOV, result, left); 680 mov->src[0].swizzle = swizzle; 681 } 682 break; 683 case EOpAddAssign: if(visit == PostVisit) emitAssign(getOpcode(sw::Shader::OPCODE_ADD, result), result, left, left, right); break; 684 case EOpAdd: if(visit == PostVisit) emitBinary(getOpcode(sw::Shader::OPCODE_ADD, result), result, left, right); break; 685 case EOpSubAssign: if(visit == PostVisit) emitAssign(getOpcode(sw::Shader::OPCODE_SUB, result), result, left, left, right); break; 686 case EOpSub: if(visit == PostVisit) emitBinary(getOpcode(sw::Shader::OPCODE_SUB, result), result, left, right); break; 687 case EOpMulAssign: if(visit == PostVisit) emitAssign(getOpcode(sw::Shader::OPCODE_MUL, result), result, left, left, right); break; 688 case EOpMul: if(visit == PostVisit) emitBinary(getOpcode(sw::Shader::OPCODE_MUL, result), result, left, right); break; 689 case EOpDivAssign: if(visit == PostVisit) emitAssign(getOpcode(sw::Shader::OPCODE_DIV, result), result, left, left, right); break; 690 case EOpDiv: if(visit == PostVisit) emitBinary(getOpcode(sw::Shader::OPCODE_DIV, result), result, left, right); break; 691 case EOpIModAssign: if(visit == PostVisit) emitAssign(getOpcode(sw::Shader::OPCODE_IMOD, result), result, left, left, right); break; 692 case EOpIMod: if(visit == PostVisit) emitBinary(getOpcode(sw::Shader::OPCODE_IMOD, result), result, left, right); break; 693 case EOpBitShiftLeftAssign: if(visit == PostVisit) emitAssign(sw::Shader::OPCODE_SHL, result, left, left, right); break; 694 case EOpBitShiftLeft: if(visit == PostVisit) emitBinary(sw::Shader::OPCODE_SHL, result, left, right); break; 695 case EOpBitShiftRightAssign: if(visit == PostVisit) emitAssign(getOpcode(sw::Shader::OPCODE_ISHR, result), result, left, left, right); break; 696 case EOpBitShiftRight: if(visit == PostVisit) emitBinary(getOpcode(sw::Shader::OPCODE_ISHR, result), result, left, right); break; 697 case EOpBitwiseAndAssign: if(visit == PostVisit) emitAssign(sw::Shader::OPCODE_AND, result, left, left, right); break; 698 case EOpBitwiseAnd: if(visit == PostVisit) emitBinary(sw::Shader::OPCODE_AND, result, left, right); break; 699 case EOpBitwiseXorAssign: if(visit == PostVisit) emitAssign(sw::Shader::OPCODE_XOR, result, left, left, right); break; 700 case EOpBitwiseXor: if(visit == PostVisit) emitBinary(sw::Shader::OPCODE_XOR, result, left, right); break; 701 case EOpBitwiseOrAssign: if(visit == PostVisit) emitAssign(sw::Shader::OPCODE_OR, result, left, left, right); break; 702 case EOpBitwiseOr: if(visit == PostVisit) emitBinary(sw::Shader::OPCODE_OR, result, left, right); break; 703 case EOpEqual: 704 if(visit == PostVisit) 705 { 706 emitBinary(sw::Shader::OPCODE_EQ, result, left, right); 707 708 for(int index = 1; index < left->totalRegisterCount(); index++) 709 { 710 Temporary equal(this); 711 emit(sw::Shader::OPCODE_EQ, &equal, 0, left, index, right, index); 712 emit(sw::Shader::OPCODE_AND, result, result, &equal); 713 } 714 } 715 break; 716 case EOpNotEqual: 717 if(visit == PostVisit) 718 { 719 emitBinary(sw::Shader::OPCODE_NE, result, left, right); 720 721 for(int index = 1; index < left->totalRegisterCount(); index++) 722 { 723 Temporary notEqual(this); 724 emit(sw::Shader::OPCODE_NE, ¬Equal, 0, left, index, right, index); 725 emit(sw::Shader::OPCODE_OR, result, result, ¬Equal); 726 } 727 } 728 break; 729 case EOpLessThan: if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_LT, result, left, right); break; 730 case EOpGreaterThan: if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_GT, result, left, right); break; 731 case EOpLessThanEqual: if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_LE, result, left, right); break; 732 case EOpGreaterThanEqual: if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_GE, result, left, right); break; 733 case EOpVectorTimesScalarAssign: if(visit == PostVisit) emitAssign(getOpcode(sw::Shader::OPCODE_MUL, left), result, left, left, right); break; 734 case EOpVectorTimesScalar: if(visit == PostVisit) emit(getOpcode(sw::Shader::OPCODE_MUL, left), result, left, right); break; 735 case EOpMatrixTimesScalar: 736 if(visit == PostVisit) 737 { 738 if(left->isMatrix()) 739 { 740 for(int i = 0; i < leftType.getNominalSize(); i++) 741 { 742 emit(sw::Shader::OPCODE_MUL, result, i, left, i, right, 0); 743 } 744 } 745 else if(right->isMatrix()) 746 { 747 for(int i = 0; i < rightType.getNominalSize(); i++) 748 { 749 emit(sw::Shader::OPCODE_MUL, result, i, left, 0, right, i); 750 } 751 } 752 else UNREACHABLE(0); 753 } 754 break; 755 case EOpVectorTimesMatrix: 756 if(visit == PostVisit) 757 { 758 sw::Shader::Opcode dpOpcode = sw::Shader::OPCODE_DP(leftType.getNominalSize()); 759 760 int size = rightType.getNominalSize(); 761 for(int i = 0; i < size; i++) 762 { 763 Instruction *dot = emit(dpOpcode, result, 0, left, 0, right, i); 764 dot->dst.mask = 1 << i; 765 } 766 } 767 break; 768 case EOpMatrixTimesVector: 769 if(visit == PostVisit) 770 { 771 Instruction *mul = emit(sw::Shader::OPCODE_MUL, result, left, right); 772 mul->src[1].swizzle = 0x00; 773 774 int size = rightType.getNominalSize(); 775 for(int i = 1; i < size; i++) 776 { 777 Instruction *mad = emit(sw::Shader::OPCODE_MAD, result, 0, left, i, right, 0, result); 778 mad->src[1].swizzle = i * 0x55; 779 } 780 } 781 break; 782 case EOpMatrixTimesMatrix: 783 if(visit == PostVisit) 784 { 785 int dim = leftType.getNominalSize(); 786 787 int size = rightType.getNominalSize(); 788 for(int i = 0; i < size; i++) 789 { 790 Instruction *mul = emit(sw::Shader::OPCODE_MUL, result, i, left, 0, right, i); 791 mul->src[1].swizzle = 0x00; 792 793 for(int j = 1; j < dim; j++) 794 { 795 Instruction *mad = emit(sw::Shader::OPCODE_MAD, result, i, left, j, right, i, result, i); 796 mad->src[1].swizzle = j * 0x55; 797 } 798 } 799 } 800 break; 801 case EOpLogicalOr: 802 if(trivial(right, 6)) 803 { 804 if(visit == PostVisit) 805 { 806 emit(sw::Shader::OPCODE_OR, result, left, right); 807 } 808 } 809 else // Short-circuit evaluation 810 { 811 if(visit == InVisit) 812 { 813 emit(sw::Shader::OPCODE_MOV, result, left); 814 Instruction *ifnot = emit(sw::Shader::OPCODE_IF, 0, result); 815 ifnot->src[0].modifier = sw::Shader::MODIFIER_NOT; 816 } 817 else if(visit == PostVisit) 818 { 819 emit(sw::Shader::OPCODE_MOV, result, right); 820 emit(sw::Shader::OPCODE_ENDIF); 821 } 822 } 823 break; 824 case EOpLogicalXor: if(visit == PostVisit) emit(sw::Shader::OPCODE_XOR, result, left, right); break; 825 case EOpLogicalAnd: 826 if(trivial(right, 6)) 827 { 828 if(visit == PostVisit) 829 { 830 emit(sw::Shader::OPCODE_AND, result, left, right); 831 } 832 } 833 else // Short-circuit evaluation 834 { 835 if(visit == InVisit) 836 { 837 emit(sw::Shader::OPCODE_MOV, result, left); 838 emit(sw::Shader::OPCODE_IF, 0, result); 839 } 840 else if(visit == PostVisit) 841 { 842 emit(sw::Shader::OPCODE_MOV, result, right); 843 emit(sw::Shader::OPCODE_ENDIF); 844 } 845 } 846 break; 847 default: UNREACHABLE(node->getOp()); 848 } 849 850 return true; 851 } 852 emitDeterminant(TIntermTyped * result,TIntermTyped * arg,int size,int col,int row,int outCol,int outRow)853 void OutputASM::emitDeterminant(TIntermTyped *result, TIntermTyped *arg, int size, int col, int row, int outCol, int outRow) 854 { 855 switch(size) 856 { 857 case 1: // Used for cofactor computation only 858 { 859 // For a 2x2 matrix, the cofactor is simply a transposed move or negate 860 bool isMov = (row == col); 861 sw::Shader::Opcode op = isMov ? sw::Shader::OPCODE_MOV : sw::Shader::OPCODE_NEG; 862 Instruction *mov = emit(op, result, outCol, arg, isMov ? 1 - row : row); 863 mov->src[0].swizzle = 0x55 * (isMov ? 1 - col : col); 864 mov->dst.mask = 1 << outRow; 865 } 866 break; 867 case 2: 868 { 869 static const unsigned int swizzle[3] = { 0x99, 0x88, 0x44 }; // xy?? : yzyz, xzxz, xyxy 870 871 bool isCofactor = (col >= 0) && (row >= 0); 872 int col0 = (isCofactor && (col <= 0)) ? 1 : 0; 873 int col1 = (isCofactor && (col <= 1)) ? 2 : 1; 874 bool negate = isCofactor && ((col & 0x01) ^ (row & 0x01)); 875 876 Instruction *det = emit(sw::Shader::OPCODE_DET2, result, outCol, arg, negate ? col1 : col0, arg, negate ? col0 : col1); 877 det->src[0].swizzle = det->src[1].swizzle = swizzle[isCofactor ? row : 2]; 878 det->dst.mask = 1 << outRow; 879 } 880 break; 881 case 3: 882 { 883 static const unsigned int swizzle[4] = { 0xF9, 0xF8, 0xF4, 0xE4 }; // xyz? : yzww, xzww, xyww, xyzw 884 885 bool isCofactor = (col >= 0) && (row >= 0); 886 int col0 = (isCofactor && (col <= 0)) ? 1 : 0; 887 int col1 = (isCofactor && (col <= 1)) ? 2 : 1; 888 int col2 = (isCofactor && (col <= 2)) ? 3 : 2; 889 bool negate = isCofactor && ((col & 0x01) ^ (row & 0x01)); 890 891 Instruction *det = emit(sw::Shader::OPCODE_DET3, result, outCol, arg, col0, arg, negate ? col2 : col1, arg, negate ? col1 : col2); 892 det->src[0].swizzle = det->src[1].swizzle = det->src[2].swizzle = swizzle[isCofactor ? row : 3]; 893 det->dst.mask = 1 << outRow; 894 } 895 break; 896 case 4: 897 { 898 Instruction *det = emit(sw::Shader::OPCODE_DET4, result, outCol, arg, 0, arg, 1, arg, 2, arg, 3); 899 det->dst.mask = 1 << outRow; 900 } 901 break; 902 default: 903 UNREACHABLE(size); 904 break; 905 } 906 } 907 visitUnary(Visit visit,TIntermUnary * node)908 bool OutputASM::visitUnary(Visit visit, TIntermUnary *node) 909 { 910 if(currentScope != emitScope) 911 { 912 return false; 913 } 914 915 TIntermTyped *result = node; 916 TIntermTyped *arg = node->getOperand(); 917 TBasicType basicType = arg->getType().getBasicType(); 918 919 union 920 { 921 float f; 922 int i; 923 } one_value; 924 925 if(basicType == EbtInt || basicType == EbtUInt) 926 { 927 one_value.i = 1; 928 } 929 else 930 { 931 one_value.f = 1.0f; 932 } 933 934 Constant one(one_value.f, one_value.f, one_value.f, one_value.f); 935 Constant rad(1.74532925e-2f, 1.74532925e-2f, 1.74532925e-2f, 1.74532925e-2f); 936 Constant deg(5.72957795e+1f, 5.72957795e+1f, 5.72957795e+1f, 5.72957795e+1f); 937 938 switch(node->getOp()) 939 { 940 case EOpNegative: 941 if(visit == PostVisit) 942 { 943 sw::Shader::Opcode negOpcode = getOpcode(sw::Shader::OPCODE_NEG, arg); 944 for(int index = 0; index < arg->totalRegisterCount(); index++) 945 { 946 emit(negOpcode, result, index, arg, index); 947 } 948 } 949 break; 950 case EOpVectorLogicalNot: if(visit == PostVisit) emit(sw::Shader::OPCODE_NOT, result, arg); break; 951 case EOpLogicalNot: if(visit == PostVisit) emit(sw::Shader::OPCODE_NOT, result, arg); break; 952 case EOpPostIncrement: 953 if(visit == PostVisit) 954 { 955 copy(result, arg); 956 957 sw::Shader::Opcode addOpcode = getOpcode(sw::Shader::OPCODE_ADD, arg); 958 for(int index = 0; index < arg->totalRegisterCount(); index++) 959 { 960 emit(addOpcode, arg, index, arg, index, &one); 961 } 962 963 assignLvalue(arg, arg); 964 } 965 break; 966 case EOpPostDecrement: 967 if(visit == PostVisit) 968 { 969 copy(result, arg); 970 971 sw::Shader::Opcode subOpcode = getOpcode(sw::Shader::OPCODE_SUB, arg); 972 for(int index = 0; index < arg->totalRegisterCount(); index++) 973 { 974 emit(subOpcode, arg, index, arg, index, &one); 975 } 976 977 assignLvalue(arg, arg); 978 } 979 break; 980 case EOpPreIncrement: 981 if(visit == PostVisit) 982 { 983 sw::Shader::Opcode addOpcode = getOpcode(sw::Shader::OPCODE_ADD, arg); 984 for(int index = 0; index < arg->totalRegisterCount(); index++) 985 { 986 emit(addOpcode, result, index, arg, index, &one); 987 } 988 989 assignLvalue(arg, result); 990 } 991 break; 992 case EOpPreDecrement: 993 if(visit == PostVisit) 994 { 995 sw::Shader::Opcode subOpcode = getOpcode(sw::Shader::OPCODE_SUB, arg); 996 for(int index = 0; index < arg->totalRegisterCount(); index++) 997 { 998 emit(subOpcode, result, index, arg, index, &one); 999 } 1000 1001 assignLvalue(arg, result); 1002 } 1003 break; 1004 case EOpRadians: if(visit == PostVisit) emit(sw::Shader::OPCODE_MUL, result, arg, &rad); break; 1005 case EOpDegrees: if(visit == PostVisit) emit(sw::Shader::OPCODE_MUL, result, arg, °); break; 1006 case EOpSin: if(visit == PostVisit) emit(sw::Shader::OPCODE_SIN, result, arg); break; 1007 case EOpCos: if(visit == PostVisit) emit(sw::Shader::OPCODE_COS, result, arg); break; 1008 case EOpTan: if(visit == PostVisit) emit(sw::Shader::OPCODE_TAN, result, arg); break; 1009 case EOpAsin: if(visit == PostVisit) emit(sw::Shader::OPCODE_ASIN, result, arg); break; 1010 case EOpAcos: if(visit == PostVisit) emit(sw::Shader::OPCODE_ACOS, result, arg); break; 1011 case EOpAtan: if(visit == PostVisit) emit(sw::Shader::OPCODE_ATAN, result, arg); break; 1012 case EOpSinh: if(visit == PostVisit) emit(sw::Shader::OPCODE_SINH, result, arg); break; 1013 case EOpCosh: if(visit == PostVisit) emit(sw::Shader::OPCODE_COSH, result, arg); break; 1014 case EOpTanh: if(visit == PostVisit) emit(sw::Shader::OPCODE_TANH, result, arg); break; 1015 case EOpAsinh: if(visit == PostVisit) emit(sw::Shader::OPCODE_ASINH, result, arg); break; 1016 case EOpAcosh: if(visit == PostVisit) emit(sw::Shader::OPCODE_ACOSH, result, arg); break; 1017 case EOpAtanh: if(visit == PostVisit) emit(sw::Shader::OPCODE_ATANH, result, arg); break; 1018 case EOpExp: if(visit == PostVisit) emit(sw::Shader::OPCODE_EXP, result, arg); break; 1019 case EOpLog: if(visit == PostVisit) emit(sw::Shader::OPCODE_LOG, result, arg); break; 1020 case EOpExp2: if(visit == PostVisit) emit(sw::Shader::OPCODE_EXP2, result, arg); break; 1021 case EOpLog2: if(visit == PostVisit) emit(sw::Shader::OPCODE_LOG2, result, arg); break; 1022 case EOpSqrt: if(visit == PostVisit) emit(sw::Shader::OPCODE_SQRT, result, arg); break; 1023 case EOpInverseSqrt: if(visit == PostVisit) emit(sw::Shader::OPCODE_RSQ, result, arg); break; 1024 case EOpAbs: if(visit == PostVisit) emit(getOpcode(sw::Shader::OPCODE_ABS, result), result, arg); break; 1025 case EOpSign: if(visit == PostVisit) emit(getOpcode(sw::Shader::OPCODE_SGN, result), result, arg); break; 1026 case EOpFloor: if(visit == PostVisit) emit(sw::Shader::OPCODE_FLOOR, result, arg); break; 1027 case EOpTrunc: if(visit == PostVisit) emit(sw::Shader::OPCODE_TRUNC, result, arg); break; 1028 case EOpRound: if(visit == PostVisit) emit(sw::Shader::OPCODE_ROUND, result, arg); break; 1029 case EOpRoundEven: if(visit == PostVisit) emit(sw::Shader::OPCODE_ROUNDEVEN, result, arg); break; 1030 case EOpCeil: if(visit == PostVisit) emit(sw::Shader::OPCODE_CEIL, result, arg, result); break; 1031 case EOpFract: if(visit == PostVisit) emit(sw::Shader::OPCODE_FRC, result, arg); break; 1032 case EOpIsNan: if(visit == PostVisit) emit(sw::Shader::OPCODE_ISNAN, result, arg); break; 1033 case EOpIsInf: if(visit == PostVisit) emit(sw::Shader::OPCODE_ISINF, result, arg); break; 1034 case EOpLength: if(visit == PostVisit) emit(sw::Shader::OPCODE_LEN(dim(arg)), result, arg); break; 1035 case EOpNormalize: if(visit == PostVisit) emit(sw::Shader::OPCODE_NRM(dim(arg)), result, arg); break; 1036 case EOpDFdx: if(visit == PostVisit) emit(sw::Shader::OPCODE_DFDX, result, arg); break; 1037 case EOpDFdy: if(visit == PostVisit) emit(sw::Shader::OPCODE_DFDY, result, arg); break; 1038 case EOpFwidth: if(visit == PostVisit) emit(sw::Shader::OPCODE_FWIDTH, result, arg); break; 1039 case EOpAny: if(visit == PostVisit) emit(sw::Shader::OPCODE_ANY, result, arg); break; 1040 case EOpAll: if(visit == PostVisit) emit(sw::Shader::OPCODE_ALL, result, arg); break; 1041 case EOpFloatBitsToInt: if(visit == PostVisit) emit(sw::Shader::OPCODE_FLOATBITSTOINT, result, arg); break; 1042 case EOpFloatBitsToUint: if(visit == PostVisit) emit(sw::Shader::OPCODE_FLOATBITSTOUINT, result, arg); break; 1043 case EOpIntBitsToFloat: if(visit == PostVisit) emit(sw::Shader::OPCODE_INTBITSTOFLOAT, result, arg); break; 1044 case EOpUintBitsToFloat: if(visit == PostVisit) emit(sw::Shader::OPCODE_UINTBITSTOFLOAT, result, arg); break; 1045 case EOpPackSnorm2x16: if(visit == PostVisit) emit(sw::Shader::OPCODE_PACKSNORM2x16, result, arg); break; 1046 case EOpPackUnorm2x16: if(visit == PostVisit) emit(sw::Shader::OPCODE_PACKUNORM2x16, result, arg); break; 1047 case EOpPackHalf2x16: if(visit == PostVisit) emit(sw::Shader::OPCODE_PACKHALF2x16, result, arg); break; 1048 case EOpUnpackSnorm2x16: if(visit == PostVisit) emit(sw::Shader::OPCODE_UNPACKSNORM2x16, result, arg); break; 1049 case EOpUnpackUnorm2x16: if(visit == PostVisit) emit(sw::Shader::OPCODE_UNPACKUNORM2x16, result, arg); break; 1050 case EOpUnpackHalf2x16: if(visit == PostVisit) emit(sw::Shader::OPCODE_UNPACKHALF2x16, result, arg); break; 1051 case EOpTranspose: 1052 if(visit == PostVisit) 1053 { 1054 int numCols = arg->getNominalSize(); 1055 int numRows = arg->getSecondarySize(); 1056 for(int i = 0; i < numCols; ++i) 1057 { 1058 for(int j = 0; j < numRows; ++j) 1059 { 1060 Instruction *mov = emit(sw::Shader::OPCODE_MOV, result, j, arg, i); 1061 mov->src[0].swizzle = 0x55 * j; 1062 mov->dst.mask = 1 << i; 1063 } 1064 } 1065 } 1066 break; 1067 case EOpDeterminant: 1068 if(visit == PostVisit) 1069 { 1070 int size = arg->getNominalSize(); 1071 ASSERT(size == arg->getSecondarySize()); 1072 1073 emitDeterminant(result, arg, size); 1074 } 1075 break; 1076 case EOpInverse: 1077 if(visit == PostVisit) 1078 { 1079 int size = arg->getNominalSize(); 1080 ASSERT(size == arg->getSecondarySize()); 1081 1082 // Compute transposed matrix of cofactors 1083 for(int i = 0; i < size; ++i) 1084 { 1085 for(int j = 0; j < size; ++j) 1086 { 1087 // For a 2x2 matrix, the cofactor is simply a transposed move or negate 1088 // For a 3x3 or 4x4 matrix, the cofactor is a transposed determinant 1089 emitDeterminant(result, arg, size - 1, j, i, i, j); 1090 } 1091 } 1092 1093 // Compute 1 / determinant 1094 Temporary invDet(this); 1095 emitDeterminant(&invDet, arg, size); 1096 Constant one(1.0f, 1.0f, 1.0f, 1.0f); 1097 Instruction *div = emit(sw::Shader::OPCODE_DIV, &invDet, &one, &invDet); 1098 div->src[1].swizzle = 0x00; // xxxx 1099 1100 // Divide transposed matrix of cofactors by determinant 1101 for(int i = 0; i < size; ++i) 1102 { 1103 emit(sw::Shader::OPCODE_MUL, result, i, result, i, &invDet); 1104 } 1105 } 1106 break; 1107 default: UNREACHABLE(node->getOp()); 1108 } 1109 1110 return true; 1111 } 1112 visitAggregate(Visit visit,TIntermAggregate * node)1113 bool OutputASM::visitAggregate(Visit visit, TIntermAggregate *node) 1114 { 1115 if(currentScope != emitScope && node->getOp() != EOpFunction && node->getOp() != EOpSequence) 1116 { 1117 return false; 1118 } 1119 1120 Constant zero(0.0f, 0.0f, 0.0f, 0.0f); 1121 1122 TIntermTyped *result = node; 1123 const TType &resultType = node->getType(); 1124 TIntermSequence &arg = node->getSequence(); 1125 size_t argumentCount = arg.size(); 1126 1127 switch(node->getOp()) 1128 { 1129 case EOpSequence: break; 1130 case EOpDeclaration: break; 1131 case EOpInvariantDeclaration: break; 1132 case EOpPrototype: break; 1133 case EOpComma: 1134 if(visit == PostVisit) 1135 { 1136 copy(result, arg[1]); 1137 } 1138 break; 1139 case EOpFunction: 1140 if(visit == PreVisit) 1141 { 1142 const TString &name = node->getName(); 1143 1144 if(emitScope == FUNCTION) 1145 { 1146 if(functionArray.size() > 1) // No need for a label when there's only main() 1147 { 1148 Instruction *label = emit(sw::Shader::OPCODE_LABEL); 1149 label->dst.type = sw::Shader::PARAMETER_LABEL; 1150 1151 const Function *function = findFunction(name); 1152 ASSERT(function); // Should have been added during global pass 1153 label->dst.index = function->label; 1154 currentFunction = function->label; 1155 } 1156 } 1157 else if(emitScope == GLOBAL) 1158 { 1159 if(name != "main(") 1160 { 1161 TIntermSequence &arguments = node->getSequence()[0]->getAsAggregate()->getSequence(); 1162 functionArray.push_back(Function(functionArray.size(), name, &arguments, node)); 1163 } 1164 } 1165 else UNREACHABLE(emitScope); 1166 1167 currentScope = FUNCTION; 1168 } 1169 else if(visit == PostVisit) 1170 { 1171 if(emitScope == FUNCTION) 1172 { 1173 if(functionArray.size() > 1) // No need to return when there's only main() 1174 { 1175 emit(sw::Shader::OPCODE_RET); 1176 } 1177 } 1178 1179 currentScope = GLOBAL; 1180 } 1181 break; 1182 case EOpFunctionCall: 1183 if(visit == PostVisit) 1184 { 1185 if(node->isUserDefined()) 1186 { 1187 const TString &name = node->getName(); 1188 const Function *function = findFunction(name); 1189 1190 if(!function) 1191 { 1192 mContext.error(node->getLine(), "function definition not found", name.c_str()); 1193 return false; 1194 } 1195 1196 TIntermSequence &arguments = *function->arg; 1197 1198 for(size_t i = 0; i < argumentCount; i++) 1199 { 1200 TIntermTyped *in = arguments[i]->getAsTyped(); 1201 1202 if(in->getQualifier() == EvqIn || 1203 in->getQualifier() == EvqInOut || 1204 in->getQualifier() == EvqConstReadOnly) 1205 { 1206 copy(in, arg[i]); 1207 } 1208 } 1209 1210 Instruction *call = emit(sw::Shader::OPCODE_CALL); 1211 call->dst.type = sw::Shader::PARAMETER_LABEL; 1212 call->dst.index = function->label; 1213 1214 if(function->ret && function->ret->getType().getBasicType() != EbtVoid) 1215 { 1216 copy(result, function->ret); 1217 } 1218 1219 for(size_t i = 0; i < argumentCount; i++) 1220 { 1221 TIntermTyped *argument = arguments[i]->getAsTyped(); 1222 TIntermTyped *out = arg[i]->getAsTyped(); 1223 1224 if(argument->getQualifier() == EvqOut || 1225 argument->getQualifier() == EvqInOut) 1226 { 1227 copy(out, argument); 1228 } 1229 } 1230 } 1231 else 1232 { 1233 const TextureFunction textureFunction(node->getName()); 1234 TIntermTyped *t = arg[1]->getAsTyped(); 1235 1236 Temporary coord(this); 1237 1238 if(textureFunction.proj) 1239 { 1240 TIntermConstantUnion* constant = arg[1]->getAsConstantUnion(); 1241 if(constant) 1242 { 1243 float projFactor = 1.0f / constant->getFConst(t->getNominalSize() - 1); 1244 Constant projCoord(constant->getFConst(0) * projFactor, 1245 constant->getFConst(1) * projFactor, 1246 constant->getFConst(2) * projFactor, 1247 0.0f); 1248 emit(sw::Shader::OPCODE_MOV, &coord, &projCoord); 1249 } 1250 else 1251 { 1252 Instruction *rcp = emit(sw::Shader::OPCODE_RCPX, &coord, arg[1]); 1253 rcp->src[0].swizzle = 0x55 * (t->getNominalSize() - 1); 1254 rcp->dst.mask = 0x7; 1255 1256 Instruction *mul = emit(sw::Shader::OPCODE_MUL, &coord, arg[1], &coord); 1257 mul->dst.mask = 0x7; 1258 } 1259 } 1260 else 1261 { 1262 emit(sw::Shader::OPCODE_MOV, &coord, arg[1]); 1263 } 1264 1265 switch(textureFunction.method) 1266 { 1267 case TextureFunction::IMPLICIT: 1268 { 1269 TIntermNode* offset = textureFunction.offset ? arg[2] : 0; 1270 1271 if(argumentCount == 2 || (textureFunction.offset && argumentCount == 3)) 1272 { 1273 Instruction *tex = emit(textureFunction.offset ? sw::Shader::OPCODE_TEXOFFSET : sw::Shader::OPCODE_TEX, 1274 result, &coord, arg[0], offset); 1275 } 1276 else if(argumentCount == 3 || (textureFunction.offset && argumentCount == 4)) // bias 1277 { 1278 Instruction *bias = emit(sw::Shader::OPCODE_MOV, &coord, arg[textureFunction.offset ? 3 : 2]); 1279 bias->dst.mask = 0x8; 1280 1281 Instruction *tex = emit(textureFunction.offset ? sw::Shader::OPCODE_TEXOFFSET : sw::Shader::OPCODE_TEX, 1282 result, &coord, arg[0], offset); // FIXME: Implement an efficient TEXLDB instruction 1283 tex->bias = true; 1284 } 1285 else UNREACHABLE(argumentCount); 1286 } 1287 break; 1288 case TextureFunction::LOD: 1289 { 1290 Instruction *lod = emit(sw::Shader::OPCODE_MOV, &coord, arg[2]); 1291 lod->dst.mask = 0x8; 1292 1293 emit(textureFunction.offset ? sw::Shader::OPCODE_TEXLDLOFFSET : sw::Shader::OPCODE_TEXLDL, 1294 result, &coord, arg[0], textureFunction.offset ? arg[3] : nullptr); 1295 } 1296 break; 1297 case TextureFunction::FETCH: 1298 { 1299 if(argumentCount == 3 || (textureFunction.offset && argumentCount == 4)) 1300 { 1301 TIntermNode *offset = textureFunction.offset ? arg[3] : nullptr; 1302 1303 emit(textureFunction.offset ? sw::Shader::OPCODE_TEXELFETCHOFFSET : sw::Shader::OPCODE_TEXELFETCH, 1304 result, arg[1], arg[0], arg[2], offset); 1305 } 1306 else UNREACHABLE(argumentCount); 1307 } 1308 break; 1309 case TextureFunction::GRAD: 1310 { 1311 if(argumentCount == 4 || (textureFunction.offset && argumentCount == 5)) 1312 { 1313 TIntermNode *offset = textureFunction.offset ? arg[4] : nullptr; 1314 1315 emit(textureFunction.offset ? sw::Shader::OPCODE_TEXGRADOFFSET : sw::Shader::OPCODE_TEXGRAD, 1316 result, &coord, arg[0], arg[2], arg[3], offset); 1317 } 1318 else UNREACHABLE(argumentCount); 1319 } 1320 break; 1321 case TextureFunction::SIZE: 1322 emit(sw::Shader::OPCODE_TEXSIZE, result, arg[1], arg[0]); 1323 break; 1324 default: 1325 UNREACHABLE(textureFunction.method); 1326 } 1327 } 1328 } 1329 break; 1330 case EOpParameters: 1331 break; 1332 case EOpConstructFloat: 1333 case EOpConstructVec2: 1334 case EOpConstructVec3: 1335 case EOpConstructVec4: 1336 case EOpConstructBool: 1337 case EOpConstructBVec2: 1338 case EOpConstructBVec3: 1339 case EOpConstructBVec4: 1340 case EOpConstructInt: 1341 case EOpConstructIVec2: 1342 case EOpConstructIVec3: 1343 case EOpConstructIVec4: 1344 case EOpConstructUInt: 1345 case EOpConstructUVec2: 1346 case EOpConstructUVec3: 1347 case EOpConstructUVec4: 1348 if(visit == PostVisit) 1349 { 1350 int component = 0; 1351 1352 for(size_t i = 0; i < argumentCount; i++) 1353 { 1354 TIntermTyped *argi = arg[i]->getAsTyped(); 1355 int size = argi->getNominalSize(); 1356 1357 if(!argi->isMatrix()) 1358 { 1359 Instruction *mov = emitCast(result, argi); 1360 mov->dst.mask = (0xF << component) & 0xF; 1361 mov->src[0].swizzle = readSwizzle(argi, size) << (component * 2); 1362 1363 component += size; 1364 } 1365 else // Matrix 1366 { 1367 int column = 0; 1368 1369 while(component < resultType.getNominalSize()) 1370 { 1371 Instruction *mov = emitCast(result, 0, argi, column); 1372 mov->dst.mask = (0xF << component) & 0xF; 1373 mov->src[0].swizzle = readSwizzle(argi, size) << (component * 2); 1374 1375 column++; 1376 component += size; 1377 } 1378 } 1379 } 1380 } 1381 break; 1382 case EOpConstructMat2: 1383 case EOpConstructMat2x3: 1384 case EOpConstructMat2x4: 1385 case EOpConstructMat3x2: 1386 case EOpConstructMat3: 1387 case EOpConstructMat3x4: 1388 case EOpConstructMat4x2: 1389 case EOpConstructMat4x3: 1390 case EOpConstructMat4: 1391 if(visit == PostVisit) 1392 { 1393 TIntermTyped *arg0 = arg[0]->getAsTyped(); 1394 const int outCols = result->getNominalSize(); 1395 const int outRows = result->getSecondarySize(); 1396 1397 if(arg0->isScalar() && arg.size() == 1) // Construct scale matrix 1398 { 1399 for(int i = 0; i < outCols; i++) 1400 { 1401 Instruction *init = emit(sw::Shader::OPCODE_MOV, result, i, &zero); 1402 Instruction *mov = emitCast(result, i, arg0, 0); 1403 mov->dst.mask = 1 << i; 1404 ASSERT(mov->src[0].swizzle == 0x00); 1405 } 1406 } 1407 else if(arg0->isMatrix()) 1408 { 1409 const int inCols = arg0->getNominalSize(); 1410 const int inRows = arg0->getSecondarySize(); 1411 1412 for(int i = 0; i < outCols; i++) 1413 { 1414 if(i >= inCols || outRows > inRows) 1415 { 1416 // Initialize to identity matrix 1417 Constant col((i == 0 ? 1.0f : 0.0f), (i == 1 ? 1.0f : 0.0f), (i == 2 ? 1.0f : 0.0f), (i == 3 ? 1.0f : 0.0f)); 1418 Instruction *mov = emitCast(result, i, &col, 0); 1419 } 1420 1421 if(i < inCols) 1422 { 1423 Instruction *mov = emitCast(result, i, arg0, i); 1424 mov->dst.mask = 0xF >> (4 - inRows); 1425 } 1426 } 1427 } 1428 else 1429 { 1430 int column = 0; 1431 int row = 0; 1432 1433 for(size_t i = 0; i < argumentCount; i++) 1434 { 1435 TIntermTyped *argi = arg[i]->getAsTyped(); 1436 int size = argi->getNominalSize(); 1437 int element = 0; 1438 1439 while(element < size) 1440 { 1441 Instruction *mov = emitCast(result, column, argi, 0); 1442 mov->dst.mask = (0xF << row) & 0xF; 1443 mov->src[0].swizzle = (readSwizzle(argi, size) << (row * 2)) + 0x55 * element; 1444 1445 int end = row + size - element; 1446 column = end >= outRows ? column + 1 : column; 1447 element = element + outRows - row; 1448 row = end >= outRows ? 0 : end; 1449 } 1450 } 1451 } 1452 } 1453 break; 1454 case EOpConstructStruct: 1455 if(visit == PostVisit) 1456 { 1457 int offset = 0; 1458 for(size_t i = 0; i < argumentCount; i++) 1459 { 1460 TIntermTyped *argi = arg[i]->getAsTyped(); 1461 int size = argi->totalRegisterCount(); 1462 1463 for(int index = 0; index < size; index++) 1464 { 1465 Instruction *mov = emit(sw::Shader::OPCODE_MOV, result, index + offset, argi, index); 1466 mov->dst.mask = writeMask(result, offset + index); 1467 } 1468 1469 offset += size; 1470 } 1471 } 1472 break; 1473 case EOpLessThan: if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_LT, result, arg[0], arg[1]); break; 1474 case EOpGreaterThan: if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_GT, result, arg[0], arg[1]); break; 1475 case EOpLessThanEqual: if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_LE, result, arg[0], arg[1]); break; 1476 case EOpGreaterThanEqual: if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_GE, result, arg[0], arg[1]); break; 1477 case EOpVectorEqual: if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_EQ, result, arg[0], arg[1]); break; 1478 case EOpVectorNotEqual: if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_NE, result, arg[0], arg[1]); break; 1479 case EOpMod: if(visit == PostVisit) emit(sw::Shader::OPCODE_MOD, result, arg[0], arg[1]); break; 1480 case EOpModf: 1481 if(visit == PostVisit) 1482 { 1483 TIntermTyped* arg1 = arg[1]->getAsTyped(); 1484 emit(sw::Shader::OPCODE_TRUNC, arg1, arg[0]); 1485 assignLvalue(arg1, arg1); 1486 emitBinary(sw::Shader::OPCODE_SUB, result, arg[0], arg1); 1487 } 1488 break; 1489 case EOpPow: if(visit == PostVisit) emit(sw::Shader::OPCODE_POW, result, arg[0], arg[1]); break; 1490 case EOpAtan: if(visit == PostVisit) emit(sw::Shader::OPCODE_ATAN2, result, arg[0], arg[1]); break; 1491 case EOpMin: if(visit == PostVisit) emit(getOpcode(sw::Shader::OPCODE_MIN, result), result, arg[0], arg[1]); break; 1492 case EOpMax: if(visit == PostVisit) emit(getOpcode(sw::Shader::OPCODE_MAX, result), result, arg[0], arg[1]); break; 1493 case EOpClamp: 1494 if(visit == PostVisit) 1495 { 1496 emit(getOpcode(sw::Shader::OPCODE_MAX, result), result, arg[0], arg[1]); 1497 emit(getOpcode(sw::Shader::OPCODE_MIN, result), result, result, arg[2]); 1498 } 1499 break; 1500 case EOpMix: if(visit == PostVisit) emit(sw::Shader::OPCODE_LRP, result, arg[2], arg[1], arg[0]); break; 1501 case EOpStep: if(visit == PostVisit) emit(sw::Shader::OPCODE_STEP, result, arg[0], arg[1]); break; 1502 case EOpSmoothStep: if(visit == PostVisit) emit(sw::Shader::OPCODE_SMOOTH, result, arg[0], arg[1], arg[2]); break; 1503 case EOpDistance: if(visit == PostVisit) emit(sw::Shader::OPCODE_DIST(dim(arg[0])), result, arg[0], arg[1]); break; 1504 case EOpDot: if(visit == PostVisit) emit(sw::Shader::OPCODE_DP(dim(arg[0])), result, arg[0], arg[1]); break; 1505 case EOpCross: if(visit == PostVisit) emit(sw::Shader::OPCODE_CRS, result, arg[0], arg[1]); break; 1506 case EOpFaceForward: if(visit == PostVisit) emit(sw::Shader::OPCODE_FORWARD(dim(arg[0])), result, arg[0], arg[1], arg[2]); break; 1507 case EOpReflect: if(visit == PostVisit) emit(sw::Shader::OPCODE_REFLECT(dim(arg[0])), result, arg[0], arg[1]); break; 1508 case EOpRefract: if(visit == PostVisit) emit(sw::Shader::OPCODE_REFRACT(dim(arg[0])), result, arg[0], arg[1], arg[2]); break; 1509 case EOpMul: 1510 if(visit == PostVisit) 1511 { 1512 TIntermTyped *arg0 = arg[0]->getAsTyped(); 1513 TIntermTyped *arg1 = arg[1]->getAsTyped(); 1514 ASSERT((arg0->getNominalSize() == arg1->getNominalSize()) && (arg0->getSecondarySize() == arg1->getSecondarySize())); 1515 1516 int size = arg0->getNominalSize(); 1517 for(int i = 0; i < size; i++) 1518 { 1519 emit(sw::Shader::OPCODE_MUL, result, i, arg[0], i, arg[1], i); 1520 } 1521 } 1522 break; 1523 case EOpOuterProduct: 1524 if(visit == PostVisit) 1525 { 1526 for(int i = 0; i < dim(arg[1]); i++) 1527 { 1528 Instruction *mul = emit(sw::Shader::OPCODE_MUL, result, i, arg[0], 0, arg[1]); 1529 mul->src[1].swizzle = 0x55 * i; 1530 } 1531 } 1532 break; 1533 default: UNREACHABLE(node->getOp()); 1534 } 1535 1536 return true; 1537 } 1538 visitSelection(Visit visit,TIntermSelection * node)1539 bool OutputASM::visitSelection(Visit visit, TIntermSelection *node) 1540 { 1541 if(currentScope != emitScope) 1542 { 1543 return false; 1544 } 1545 1546 TIntermTyped *condition = node->getCondition(); 1547 TIntermNode *trueBlock = node->getTrueBlock(); 1548 TIntermNode *falseBlock = node->getFalseBlock(); 1549 TIntermConstantUnion *constantCondition = condition->getAsConstantUnion(); 1550 1551 condition->traverse(this); 1552 1553 if(node->usesTernaryOperator()) 1554 { 1555 if(constantCondition) 1556 { 1557 bool trueCondition = constantCondition->getUnionArrayPointer()->getBConst(); 1558 1559 if(trueCondition) 1560 { 1561 trueBlock->traverse(this); 1562 copy(node, trueBlock); 1563 } 1564 else 1565 { 1566 falseBlock->traverse(this); 1567 copy(node, falseBlock); 1568 } 1569 } 1570 else if(trivial(node, 6)) // Fast to compute both potential results and no side effects 1571 { 1572 trueBlock->traverse(this); 1573 falseBlock->traverse(this); 1574 emit(sw::Shader::OPCODE_SELECT, node, condition, trueBlock, falseBlock); 1575 } 1576 else 1577 { 1578 emit(sw::Shader::OPCODE_IF, 0, condition); 1579 1580 if(trueBlock) 1581 { 1582 trueBlock->traverse(this); 1583 copy(node, trueBlock); 1584 } 1585 1586 if(falseBlock) 1587 { 1588 emit(sw::Shader::OPCODE_ELSE); 1589 falseBlock->traverse(this); 1590 copy(node, falseBlock); 1591 } 1592 1593 emit(sw::Shader::OPCODE_ENDIF); 1594 } 1595 } 1596 else // if/else statement 1597 { 1598 if(constantCondition) 1599 { 1600 bool trueCondition = constantCondition->getUnionArrayPointer()->getBConst(); 1601 1602 if(trueCondition) 1603 { 1604 if(trueBlock) 1605 { 1606 trueBlock->traverse(this); 1607 } 1608 } 1609 else 1610 { 1611 if(falseBlock) 1612 { 1613 falseBlock->traverse(this); 1614 } 1615 } 1616 } 1617 else 1618 { 1619 emit(sw::Shader::OPCODE_IF, 0, condition); 1620 1621 if(trueBlock) 1622 { 1623 trueBlock->traverse(this); 1624 } 1625 1626 if(falseBlock) 1627 { 1628 emit(sw::Shader::OPCODE_ELSE); 1629 falseBlock->traverse(this); 1630 } 1631 1632 emit(sw::Shader::OPCODE_ENDIF); 1633 } 1634 } 1635 1636 return false; 1637 } 1638 visitLoop(Visit visit,TIntermLoop * node)1639 bool OutputASM::visitLoop(Visit visit, TIntermLoop *node) 1640 { 1641 if(currentScope != emitScope) 1642 { 1643 return false; 1644 } 1645 1646 unsigned int iterations = loopCount(node); 1647 1648 if(iterations == 0) 1649 { 1650 return false; 1651 } 1652 1653 bool unroll = (iterations <= 4); 1654 1655 if(unroll) 1656 { 1657 LoopUnrollable loopUnrollable; 1658 unroll = loopUnrollable.traverse(node); 1659 } 1660 1661 TIntermNode *init = node->getInit(); 1662 TIntermTyped *condition = node->getCondition(); 1663 TIntermTyped *expression = node->getExpression(); 1664 TIntermNode *body = node->getBody(); 1665 Constant True(true); 1666 1667 if(node->getType() == ELoopDoWhile) 1668 { 1669 Temporary iterate(this); 1670 emit(sw::Shader::OPCODE_MOV, &iterate, &True); 1671 1672 emit(sw::Shader::OPCODE_WHILE, 0, &iterate); // FIXME: Implement real do-while 1673 1674 if(body) 1675 { 1676 body->traverse(this); 1677 } 1678 1679 emit(sw::Shader::OPCODE_TEST); 1680 1681 condition->traverse(this); 1682 emit(sw::Shader::OPCODE_MOV, &iterate, condition); 1683 1684 emit(sw::Shader::OPCODE_ENDWHILE); 1685 } 1686 else 1687 { 1688 if(init) 1689 { 1690 init->traverse(this); 1691 } 1692 1693 if(unroll) 1694 { 1695 for(unsigned int i = 0; i < iterations; i++) 1696 { 1697 // condition->traverse(this); // Condition could contain statements, but not in an unrollable loop 1698 1699 if(body) 1700 { 1701 body->traverse(this); 1702 } 1703 1704 if(expression) 1705 { 1706 expression->traverse(this); 1707 } 1708 } 1709 } 1710 else 1711 { 1712 if(condition) 1713 { 1714 condition->traverse(this); 1715 } 1716 else 1717 { 1718 condition = &True; 1719 } 1720 1721 emit(sw::Shader::OPCODE_WHILE, 0, condition); 1722 1723 if(body) 1724 { 1725 body->traverse(this); 1726 } 1727 1728 emit(sw::Shader::OPCODE_TEST); 1729 1730 if(expression) 1731 { 1732 expression->traverse(this); 1733 } 1734 1735 if(condition) 1736 { 1737 condition->traverse(this); 1738 } 1739 1740 emit(sw::Shader::OPCODE_ENDWHILE); 1741 } 1742 } 1743 1744 return false; 1745 } 1746 visitBranch(Visit visit,TIntermBranch * node)1747 bool OutputASM::visitBranch(Visit visit, TIntermBranch *node) 1748 { 1749 if(currentScope != emitScope) 1750 { 1751 return false; 1752 } 1753 1754 switch(node->getFlowOp()) 1755 { 1756 case EOpKill: if(visit == PostVisit) emit(sw::Shader::OPCODE_DISCARD); break; 1757 case EOpBreak: if(visit == PostVisit) emit(sw::Shader::OPCODE_BREAK); break; 1758 case EOpContinue: if(visit == PostVisit) emit(sw::Shader::OPCODE_CONTINUE); break; 1759 case EOpReturn: 1760 if(visit == PostVisit) 1761 { 1762 TIntermTyped *value = node->getExpression(); 1763 1764 if(value) 1765 { 1766 copy(functionArray[currentFunction].ret, value); 1767 } 1768 1769 emit(sw::Shader::OPCODE_LEAVE); 1770 } 1771 break; 1772 default: UNREACHABLE(node->getFlowOp()); 1773 } 1774 1775 return true; 1776 } 1777 visitSwitch(Visit visit,TIntermSwitch * node)1778 bool OutputASM::visitSwitch(Visit visit, TIntermSwitch *node) 1779 { 1780 if(currentScope != emitScope) 1781 { 1782 return false; 1783 } 1784 1785 TIntermTyped* switchValue = node->getInit(); 1786 TIntermAggregate* opList = node->getStatementList(); 1787 1788 if(!switchValue || !opList) 1789 { 1790 return false; 1791 } 1792 1793 switchValue->traverse(this); 1794 1795 emit(sw::Shader::OPCODE_SWITCH); 1796 1797 TIntermSequence& sequence = opList->getSequence(); 1798 TIntermSequence::iterator it = sequence.begin(); 1799 TIntermSequence::iterator defaultIt = sequence.end(); 1800 int nbCases = 0; 1801 for(; it != sequence.end(); ++it) 1802 { 1803 TIntermCase* currentCase = (*it)->getAsCaseNode(); 1804 if(currentCase) 1805 { 1806 TIntermSequence::iterator caseIt = it; 1807 1808 TIntermTyped* condition = currentCase->getCondition(); 1809 if(condition) // non default case 1810 { 1811 if(nbCases != 0) 1812 { 1813 emit(sw::Shader::OPCODE_ELSE); 1814 } 1815 1816 condition->traverse(this); 1817 Temporary result(this); 1818 emitBinary(sw::Shader::OPCODE_EQ, &result, switchValue, condition); 1819 emit(sw::Shader::OPCODE_IF, 0, &result); 1820 nbCases++; 1821 1822 for(++caseIt; caseIt != sequence.end(); ++caseIt) 1823 { 1824 (*caseIt)->traverse(this); 1825 if((*caseIt)->getAsBranchNode()) // Kill, Break, Continue or Return 1826 { 1827 break; 1828 } 1829 } 1830 } 1831 else 1832 { 1833 defaultIt = it; // The default case might not be the last case, keep it for last 1834 } 1835 } 1836 } 1837 1838 // If there's a default case, traverse it here 1839 if(defaultIt != sequence.end()) 1840 { 1841 emit(sw::Shader::OPCODE_ELSE); 1842 for(++defaultIt; defaultIt != sequence.end(); ++defaultIt) 1843 { 1844 (*defaultIt)->traverse(this); 1845 if((*defaultIt)->getAsBranchNode()) // Kill, Break, Continue or Return 1846 { 1847 break; 1848 } 1849 } 1850 } 1851 1852 for(int i = 0; i < nbCases; ++i) 1853 { 1854 emit(sw::Shader::OPCODE_ENDIF); 1855 } 1856 1857 emit(sw::Shader::OPCODE_ENDSWITCH); 1858 1859 return false; 1860 } 1861 emit(sw::Shader::Opcode op,TIntermTyped * dst,TIntermNode * src0,TIntermNode * src1,TIntermNode * src2,TIntermNode * src3,TIntermNode * src4)1862 Instruction *OutputASM::emit(sw::Shader::Opcode op, TIntermTyped *dst, TIntermNode *src0, TIntermNode *src1, TIntermNode *src2, TIntermNode *src3, TIntermNode *src4) 1863 { 1864 return emit(op, dst, 0, src0, 0, src1, 0, src2, 0, src3, 0, src4, 0); 1865 } 1866 emit(sw::Shader::Opcode op,TIntermTyped * dst,int dstIndex,TIntermNode * src0,int index0,TIntermNode * src1,int index1,TIntermNode * src2,int index2,TIntermNode * src3,int index3,TIntermNode * src4,int index4)1867 Instruction *OutputASM::emit(sw::Shader::Opcode op, TIntermTyped *dst, int dstIndex, TIntermNode *src0, int index0, TIntermNode *src1, int index1, 1868 TIntermNode *src2, int index2, TIntermNode *src3, int index3, TIntermNode *src4, int index4) 1869 { 1870 Instruction *instruction = new Instruction(op); 1871 1872 if(dst) 1873 { 1874 instruction->dst.type = registerType(dst); 1875 instruction->dst.index = registerIndex(dst) + dstIndex; 1876 instruction->dst.mask = writeMask(dst); 1877 instruction->dst.integer = (dst->getBasicType() == EbtInt); 1878 } 1879 1880 argument(instruction->src[0], src0, index0); 1881 argument(instruction->src[1], src1, index1); 1882 argument(instruction->src[2], src2, index2); 1883 argument(instruction->src[3], src3, index3); 1884 argument(instruction->src[4], src4, index4); 1885 1886 shader->append(instruction); 1887 1888 return instruction; 1889 } 1890 emitCast(TIntermTyped * dst,TIntermTyped * src)1891 Instruction *OutputASM::emitCast(TIntermTyped *dst, TIntermTyped *src) 1892 { 1893 return emitCast(dst, 0, src, 0); 1894 } 1895 emitCast(TIntermTyped * dst,int dstIndex,TIntermTyped * src,int srcIndex)1896 Instruction *OutputASM::emitCast(TIntermTyped *dst, int dstIndex, TIntermTyped *src, int srcIndex) 1897 { 1898 switch(src->getBasicType()) 1899 { 1900 case EbtBool: 1901 switch(dst->getBasicType()) 1902 { 1903 case EbtInt: return emit(sw::Shader::OPCODE_B2I, dst, dstIndex, src, srcIndex); 1904 case EbtUInt: return emit(sw::Shader::OPCODE_B2I, dst, dstIndex, src, srcIndex); 1905 case EbtFloat: return emit(sw::Shader::OPCODE_B2F, dst, dstIndex, src, srcIndex); 1906 default: break; 1907 } 1908 break; 1909 case EbtInt: 1910 switch(dst->getBasicType()) 1911 { 1912 case EbtBool: return emit(sw::Shader::OPCODE_I2B, dst, dstIndex, src, srcIndex); 1913 case EbtFloat: return emit(sw::Shader::OPCODE_I2F, dst, dstIndex, src, srcIndex); 1914 default: break; 1915 } 1916 break; 1917 case EbtUInt: 1918 switch(dst->getBasicType()) 1919 { 1920 case EbtBool: return emit(sw::Shader::OPCODE_I2B, dst, dstIndex, src, srcIndex); 1921 case EbtFloat: return emit(sw::Shader::OPCODE_U2F, dst, dstIndex, src, srcIndex); 1922 default: break; 1923 } 1924 break; 1925 case EbtFloat: 1926 switch(dst->getBasicType()) 1927 { 1928 case EbtBool: return emit(sw::Shader::OPCODE_F2B, dst, dstIndex, src, srcIndex); 1929 case EbtInt: return emit(sw::Shader::OPCODE_F2I, dst, dstIndex, src, srcIndex); 1930 case EbtUInt: return emit(sw::Shader::OPCODE_F2U, dst, dstIndex, src, srcIndex); 1931 default: break; 1932 } 1933 break; 1934 default: 1935 break; 1936 } 1937 1938 ASSERT((src->getBasicType() == dst->getBasicType()) || 1939 ((src->getBasicType() == EbtInt) && (dst->getBasicType() == EbtUInt)) || 1940 ((src->getBasicType() == EbtUInt) && (dst->getBasicType() == EbtInt))); 1941 1942 return emit(sw::Shader::OPCODE_MOV, dst, dstIndex, src, srcIndex); 1943 } 1944 emitBinary(sw::Shader::Opcode op,TIntermTyped * dst,TIntermNode * src0,TIntermNode * src1,TIntermNode * src2)1945 void OutputASM::emitBinary(sw::Shader::Opcode op, TIntermTyped *dst, TIntermNode *src0, TIntermNode *src1, TIntermNode *src2) 1946 { 1947 for(int index = 0; index < dst->elementRegisterCount(); index++) 1948 { 1949 emit(op, dst, index, src0, index, src1, index, src2, index); 1950 } 1951 } 1952 emitAssign(sw::Shader::Opcode op,TIntermTyped * result,TIntermTyped * lhs,TIntermTyped * src0,TIntermTyped * src1)1953 void OutputASM::emitAssign(sw::Shader::Opcode op, TIntermTyped *result, TIntermTyped *lhs, TIntermTyped *src0, TIntermTyped *src1) 1954 { 1955 emitBinary(op, result, src0, src1); 1956 assignLvalue(lhs, result); 1957 } 1958 emitCmp(sw::Shader::Control cmpOp,TIntermTyped * dst,TIntermNode * left,TIntermNode * right,int index)1959 void OutputASM::emitCmp(sw::Shader::Control cmpOp, TIntermTyped *dst, TIntermNode *left, TIntermNode *right, int index) 1960 { 1961 sw::Shader::Opcode opcode; 1962 switch(left->getAsTyped()->getBasicType()) 1963 { 1964 case EbtBool: 1965 case EbtInt: 1966 opcode = sw::Shader::OPCODE_ICMP; 1967 break; 1968 case EbtUInt: 1969 opcode = sw::Shader::OPCODE_UCMP; 1970 break; 1971 default: 1972 opcode = sw::Shader::OPCODE_CMP; 1973 break; 1974 } 1975 1976 Instruction *cmp = emit(opcode, dst, 0, left, index, right, index); 1977 cmp->control = cmpOp; 1978 } 1979 componentCount(const TType & type,int registers)1980 int componentCount(const TType &type, int registers) 1981 { 1982 if(registers == 0) 1983 { 1984 return 0; 1985 } 1986 1987 if(type.isArray() && registers >= type.elementRegisterCount()) 1988 { 1989 int index = registers / type.elementRegisterCount(); 1990 registers -= index * type.elementRegisterCount(); 1991 return index * type.getElementSize() + componentCount(type, registers); 1992 } 1993 1994 if(type.isStruct() || type.isInterfaceBlock()) 1995 { 1996 const TFieldList& fields = type.getStruct() ? type.getStruct()->fields() : type.getInterfaceBlock()->fields(); 1997 int elements = 0; 1998 1999 for(TFieldList::const_iterator field = fields.begin(); field != fields.end(); field++) 2000 { 2001 const TType &fieldType = *((*field)->type()); 2002 2003 if(fieldType.totalRegisterCount() <= registers) 2004 { 2005 registers -= fieldType.totalRegisterCount(); 2006 elements += fieldType.getObjectSize(); 2007 } 2008 else // Register within this field 2009 { 2010 return elements + componentCount(fieldType, registers); 2011 } 2012 } 2013 } 2014 else if(type.isMatrix()) 2015 { 2016 return registers * type.registerSize(); 2017 } 2018 2019 UNREACHABLE(0); 2020 return 0; 2021 } 2022 registerSize(const TType & type,int registers)2023 int registerSize(const TType &type, int registers) 2024 { 2025 if(registers == 0) 2026 { 2027 if(type.isStruct()) 2028 { 2029 return registerSize(*((*(type.getStruct()->fields().begin()))->type()), 0); 2030 } 2031 else if(type.isInterfaceBlock()) 2032 { 2033 return registerSize(*((*(type.getInterfaceBlock()->fields().begin()))->type()), 0); 2034 } 2035 2036 return type.registerSize(); 2037 } 2038 2039 if(type.isArray() && registers >= type.elementRegisterCount()) 2040 { 2041 int index = registers / type.elementRegisterCount(); 2042 registers -= index * type.elementRegisterCount(); 2043 return registerSize(type, registers); 2044 } 2045 2046 if(type.isStruct() || type.isInterfaceBlock()) 2047 { 2048 const TFieldList& fields = type.getStruct() ? type.getStruct()->fields() : type.getInterfaceBlock()->fields(); 2049 int elements = 0; 2050 2051 for(TFieldList::const_iterator field = fields.begin(); field != fields.end(); field++) 2052 { 2053 const TType &fieldType = *((*field)->type()); 2054 2055 if(fieldType.totalRegisterCount() <= registers) 2056 { 2057 registers -= fieldType.totalRegisterCount(); 2058 elements += fieldType.getObjectSize(); 2059 } 2060 else // Register within this field 2061 { 2062 return registerSize(fieldType, registers); 2063 } 2064 } 2065 } 2066 else if(type.isMatrix()) 2067 { 2068 return registerSize(type, 0); 2069 } 2070 2071 UNREACHABLE(0); 2072 return 0; 2073 } 2074 getBlockId(TIntermTyped * arg)2075 int OutputASM::getBlockId(TIntermTyped *arg) 2076 { 2077 if(arg) 2078 { 2079 const TType &type = arg->getType(); 2080 TInterfaceBlock* block = type.getInterfaceBlock(); 2081 if(block && (type.getQualifier() == EvqUniform)) 2082 { 2083 // Make sure the uniform block is declared 2084 uniformRegister(arg); 2085 2086 const char* blockName = block->name().c_str(); 2087 2088 // Fetch uniform block index from array of blocks 2089 for(ActiveUniformBlocks::const_iterator it = shaderObject->activeUniformBlocks.begin(); it != shaderObject->activeUniformBlocks.end(); ++it) 2090 { 2091 if(blockName == it->name) 2092 { 2093 return it->blockId; 2094 } 2095 } 2096 2097 ASSERT(false); 2098 } 2099 } 2100 2101 return -1; 2102 } 2103 getArgumentInfo(TIntermTyped * arg,int index)2104 OutputASM::ArgumentInfo OutputASM::getArgumentInfo(TIntermTyped *arg, int index) 2105 { 2106 const TType &type = arg->getType(); 2107 int blockId = getBlockId(arg); 2108 ArgumentInfo argumentInfo(BlockMemberInfo::getDefaultBlockInfo(), type, -1, -1); 2109 if(blockId != -1) 2110 { 2111 argumentInfo.bufferIndex = 0; 2112 for(int i = 0; i < blockId; ++i) 2113 { 2114 int blockArraySize = shaderObject->activeUniformBlocks[i].arraySize; 2115 argumentInfo.bufferIndex += blockArraySize > 0 ? blockArraySize : 1; 2116 } 2117 2118 const BlockDefinitionIndexMap& blockDefinition = blockDefinitions[blockId]; 2119 2120 BlockDefinitionIndexMap::const_iterator itEnd = blockDefinition.end(); 2121 BlockDefinitionIndexMap::const_iterator it = itEnd; 2122 2123 argumentInfo.clampedIndex = index; 2124 if(type.isInterfaceBlock()) 2125 { 2126 // Offset index to the beginning of the selected instance 2127 int blockRegisters = type.elementRegisterCount(); 2128 int bufferOffset = argumentInfo.clampedIndex / blockRegisters; 2129 argumentInfo.bufferIndex += bufferOffset; 2130 argumentInfo.clampedIndex -= bufferOffset * blockRegisters; 2131 } 2132 2133 int regIndex = registerIndex(arg); 2134 for(int i = regIndex + argumentInfo.clampedIndex; i >= regIndex; --i) 2135 { 2136 it = blockDefinition.find(i); 2137 if(it != itEnd) 2138 { 2139 argumentInfo.clampedIndex -= (i - regIndex); 2140 break; 2141 } 2142 } 2143 ASSERT(it != itEnd); 2144 2145 argumentInfo.typedMemberInfo = it->second; 2146 2147 int registerCount = argumentInfo.typedMemberInfo.type.totalRegisterCount(); 2148 argumentInfo.clampedIndex = (argumentInfo.clampedIndex >= registerCount) ? registerCount - 1 : argumentInfo.clampedIndex; 2149 } 2150 else 2151 { 2152 argumentInfo.clampedIndex = (index >= arg->totalRegisterCount()) ? arg->totalRegisterCount() - 1 : index; 2153 } 2154 2155 return argumentInfo; 2156 } 2157 argument(sw::Shader::SourceParameter & parameter,TIntermNode * argument,int index)2158 void OutputASM::argument(sw::Shader::SourceParameter ¶meter, TIntermNode *argument, int index) 2159 { 2160 if(argument) 2161 { 2162 TIntermTyped *arg = argument->getAsTyped(); 2163 Temporary unpackedUniform(this); 2164 2165 const TType& srcType = arg->getType(); 2166 TInterfaceBlock* srcBlock = srcType.getInterfaceBlock(); 2167 if(srcBlock && (srcType.getQualifier() == EvqUniform)) 2168 { 2169 const ArgumentInfo argumentInfo = getArgumentInfo(arg, index); 2170 const TType &memberType = argumentInfo.typedMemberInfo.type; 2171 2172 if(memberType.getBasicType() == EbtBool) 2173 { 2174 int arraySize = (memberType.isArray() ? memberType.getArraySize() : 1); 2175 ASSERT(argumentInfo.clampedIndex < arraySize); 2176 2177 // Convert the packed bool, which is currently an int, to a true bool 2178 Instruction *instruction = new Instruction(sw::Shader::OPCODE_I2B); 2179 instruction->dst.type = sw::Shader::PARAMETER_TEMP; 2180 instruction->dst.index = registerIndex(&unpackedUniform); 2181 instruction->src[0].type = sw::Shader::PARAMETER_CONST; 2182 instruction->src[0].bufferIndex = argumentInfo.bufferIndex; 2183 instruction->src[0].index = argumentInfo.typedMemberInfo.offset + argumentInfo.clampedIndex * argumentInfo.typedMemberInfo.arrayStride; 2184 2185 shader->append(instruction); 2186 2187 arg = &unpackedUniform; 2188 index = 0; 2189 } 2190 else if((srcBlock->matrixPacking() == EmpRowMajor) && memberType.isMatrix()) 2191 { 2192 int numCols = memberType.getNominalSize(); 2193 int numRows = memberType.getSecondarySize(); 2194 int arraySize = (memberType.isArray() ? memberType.getArraySize() : 1); 2195 2196 ASSERT(argumentInfo.clampedIndex < (numCols * arraySize)); 2197 2198 unsigned int dstIndex = registerIndex(&unpackedUniform); 2199 unsigned int srcSwizzle = (argumentInfo.clampedIndex % numCols) * 0x55; 2200 int arrayIndex = argumentInfo.clampedIndex / numCols; 2201 int matrixStartOffset = argumentInfo.typedMemberInfo.offset + arrayIndex * argumentInfo.typedMemberInfo.arrayStride; 2202 2203 for(int j = 0; j < numRows; ++j) 2204 { 2205 // Transpose the row major matrix 2206 Instruction *instruction = new Instruction(sw::Shader::OPCODE_MOV); 2207 instruction->dst.type = sw::Shader::PARAMETER_TEMP; 2208 instruction->dst.index = dstIndex; 2209 instruction->dst.mask = 1 << j; 2210 instruction->src[0].type = sw::Shader::PARAMETER_CONST; 2211 instruction->src[0].bufferIndex = argumentInfo.bufferIndex; 2212 instruction->src[0].index = matrixStartOffset + j * argumentInfo.typedMemberInfo.matrixStride; 2213 instruction->src[0].swizzle = srcSwizzle; 2214 2215 shader->append(instruction); 2216 } 2217 2218 arg = &unpackedUniform; 2219 index = 0; 2220 } 2221 } 2222 2223 const ArgumentInfo argumentInfo = getArgumentInfo(arg, index); 2224 const TType &type = argumentInfo.typedMemberInfo.type; 2225 2226 int size = registerSize(type, argumentInfo.clampedIndex); 2227 2228 parameter.type = registerType(arg); 2229 parameter.bufferIndex = argumentInfo.bufferIndex; 2230 2231 if(arg->getAsConstantUnion() && arg->getAsConstantUnion()->getUnionArrayPointer()) 2232 { 2233 int component = componentCount(type, argumentInfo.clampedIndex); 2234 ConstantUnion *constants = arg->getAsConstantUnion()->getUnionArrayPointer(); 2235 2236 for(int i = 0; i < 4; i++) 2237 { 2238 if(size == 1) // Replicate 2239 { 2240 parameter.value[i] = constants[component + 0].getAsFloat(); 2241 } 2242 else if(i < size) 2243 { 2244 parameter.value[i] = constants[component + i].getAsFloat(); 2245 } 2246 else 2247 { 2248 parameter.value[i] = 0.0f; 2249 } 2250 } 2251 } 2252 else 2253 { 2254 parameter.index = registerIndex(arg) + argumentInfo.clampedIndex; 2255 2256 if(parameter.bufferIndex != -1) 2257 { 2258 int stride = (argumentInfo.typedMemberInfo.matrixStride > 0) ? argumentInfo.typedMemberInfo.matrixStride : argumentInfo.typedMemberInfo.arrayStride; 2259 parameter.index = argumentInfo.typedMemberInfo.offset + argumentInfo.clampedIndex * stride; 2260 } 2261 } 2262 2263 if(!IsSampler(arg->getBasicType())) 2264 { 2265 parameter.swizzle = readSwizzle(arg, size); 2266 } 2267 } 2268 } 2269 copy(TIntermTyped * dst,TIntermNode * src,int offset)2270 void OutputASM::copy(TIntermTyped *dst, TIntermNode *src, int offset) 2271 { 2272 for(int index = 0; index < dst->totalRegisterCount(); index++) 2273 { 2274 Instruction *mov = emit(sw::Shader::OPCODE_MOV, dst, index, src, offset + index); 2275 mov->dst.mask = writeMask(dst, index); 2276 } 2277 } 2278 swizzleElement(int swizzle,int index)2279 int swizzleElement(int swizzle, int index) 2280 { 2281 return (swizzle >> (index * 2)) & 0x03; 2282 } 2283 swizzleSwizzle(int leftSwizzle,int rightSwizzle)2284 int swizzleSwizzle(int leftSwizzle, int rightSwizzle) 2285 { 2286 return (swizzleElement(leftSwizzle, swizzleElement(rightSwizzle, 0)) << 0) | 2287 (swizzleElement(leftSwizzle, swizzleElement(rightSwizzle, 1)) << 2) | 2288 (swizzleElement(leftSwizzle, swizzleElement(rightSwizzle, 2)) << 4) | 2289 (swizzleElement(leftSwizzle, swizzleElement(rightSwizzle, 3)) << 6); 2290 } 2291 assignLvalue(TIntermTyped * dst,TIntermTyped * src)2292 void OutputASM::assignLvalue(TIntermTyped *dst, TIntermTyped *src) 2293 { 2294 if(src && 2295 ((src->isVector() && (!dst->isVector() || (src->getNominalSize() != dst->getNominalSize()))) || 2296 (src->isMatrix() && (!dst->isMatrix() || (src->getNominalSize() != dst->getNominalSize()) || (src->getSecondarySize() != dst->getSecondarySize()))))) 2297 { 2298 return mContext.error(src->getLine(), "Result type should match the l-value type in compound assignment", src->isVector() ? "vector" : "matrix"); 2299 } 2300 2301 TIntermBinary *binary = dst->getAsBinaryNode(); 2302 2303 if(binary && binary->getOp() == EOpIndexIndirect && binary->getLeft()->isVector() && dst->isScalar()) 2304 { 2305 Instruction *insert = new Instruction(sw::Shader::OPCODE_INSERT); 2306 2307 Temporary address(this); 2308 lvalue(insert->dst, address, dst); 2309 2310 insert->src[0].type = insert->dst.type; 2311 insert->src[0].index = insert->dst.index; 2312 insert->src[0].rel = insert->dst.rel; 2313 argument(insert->src[1], src); 2314 argument(insert->src[2], binary->getRight()); 2315 2316 shader->append(insert); 2317 } 2318 else 2319 { 2320 for(int offset = 0; offset < dst->totalRegisterCount(); offset++) 2321 { 2322 Instruction *mov = new Instruction(sw::Shader::OPCODE_MOV); 2323 2324 Temporary address(this); 2325 int swizzle = lvalue(mov->dst, address, dst); 2326 mov->dst.index += offset; 2327 2328 if(offset > 0) 2329 { 2330 mov->dst.mask = writeMask(dst, offset); 2331 } 2332 2333 argument(mov->src[0], src, offset); 2334 mov->src[0].swizzle = swizzleSwizzle(mov->src[0].swizzle, swizzle); 2335 2336 shader->append(mov); 2337 } 2338 } 2339 } 2340 lvalue(sw::Shader::DestinationParameter & dst,Temporary & address,TIntermTyped * node)2341 int OutputASM::lvalue(sw::Shader::DestinationParameter &dst, Temporary &address, TIntermTyped *node) 2342 { 2343 TIntermTyped *result = node; 2344 TIntermBinary *binary = node->getAsBinaryNode(); 2345 TIntermSymbol *symbol = node->getAsSymbolNode(); 2346 2347 if(binary) 2348 { 2349 TIntermTyped *left = binary->getLeft(); 2350 TIntermTyped *right = binary->getRight(); 2351 2352 int leftSwizzle = lvalue(dst, address, left); // Resolve the l-value of the left side 2353 2354 switch(binary->getOp()) 2355 { 2356 case EOpIndexDirect: 2357 { 2358 int rightIndex = right->getAsConstantUnion()->getIConst(0); 2359 2360 if(left->isRegister()) 2361 { 2362 int leftMask = dst.mask; 2363 2364 dst.mask = 1; 2365 while((leftMask & dst.mask) == 0) 2366 { 2367 dst.mask = dst.mask << 1; 2368 } 2369 2370 int element = swizzleElement(leftSwizzle, rightIndex); 2371 dst.mask = 1 << element; 2372 2373 return element; 2374 } 2375 else if(left->isArray() || left->isMatrix()) 2376 { 2377 dst.index += rightIndex * result->totalRegisterCount(); 2378 return 0xE4; 2379 } 2380 else UNREACHABLE(0); 2381 } 2382 break; 2383 case EOpIndexIndirect: 2384 { 2385 if(left->isRegister()) 2386 { 2387 // Requires INSERT instruction (handled by calling function) 2388 } 2389 else if(left->isArray() || left->isMatrix()) 2390 { 2391 int scale = result->totalRegisterCount(); 2392 2393 if(dst.rel.type == sw::Shader::PARAMETER_VOID) // Use the index register as the relative address directly 2394 { 2395 if(left->totalRegisterCount() > 1) 2396 { 2397 sw::Shader::SourceParameter relativeRegister; 2398 argument(relativeRegister, right); 2399 2400 dst.rel.index = relativeRegister.index; 2401 dst.rel.type = relativeRegister.type; 2402 dst.rel.scale = scale; 2403 dst.rel.deterministic = !(vertexShader && left->getQualifier() == EvqUniform); 2404 } 2405 } 2406 else if(dst.rel.index != registerIndex(&address)) // Move the previous index register to the address register 2407 { 2408 if(scale == 1) 2409 { 2410 Constant oldScale((int)dst.rel.scale); 2411 Instruction *mad = emit(sw::Shader::OPCODE_IMAD, &address, &address, &oldScale, right); 2412 mad->src[0].index = dst.rel.index; 2413 mad->src[0].type = dst.rel.type; 2414 } 2415 else 2416 { 2417 Constant oldScale((int)dst.rel.scale); 2418 Instruction *mul = emit(sw::Shader::OPCODE_IMUL, &address, &address, &oldScale); 2419 mul->src[0].index = dst.rel.index; 2420 mul->src[0].type = dst.rel.type; 2421 2422 Constant newScale(scale); 2423 emit(sw::Shader::OPCODE_IMAD, &address, right, &newScale, &address); 2424 } 2425 2426 dst.rel.type = sw::Shader::PARAMETER_TEMP; 2427 dst.rel.index = registerIndex(&address); 2428 dst.rel.scale = 1; 2429 } 2430 else // Just add the new index to the address register 2431 { 2432 if(scale == 1) 2433 { 2434 emit(sw::Shader::OPCODE_IADD, &address, &address, right); 2435 } 2436 else 2437 { 2438 Constant newScale(scale); 2439 emit(sw::Shader::OPCODE_IMAD, &address, right, &newScale, &address); 2440 } 2441 } 2442 } 2443 else UNREACHABLE(0); 2444 } 2445 break; 2446 case EOpIndexDirectStruct: 2447 case EOpIndexDirectInterfaceBlock: 2448 { 2449 const TFieldList& fields = (binary->getOp() == EOpIndexDirectStruct) ? 2450 left->getType().getStruct()->fields() : 2451 left->getType().getInterfaceBlock()->fields(); 2452 int index = right->getAsConstantUnion()->getIConst(0); 2453 int fieldOffset = 0; 2454 2455 for(int i = 0; i < index; i++) 2456 { 2457 fieldOffset += fields[i]->type()->totalRegisterCount(); 2458 } 2459 2460 dst.type = registerType(left); 2461 dst.index += fieldOffset; 2462 dst.mask = writeMask(right); 2463 2464 return 0xE4; 2465 } 2466 break; 2467 case EOpVectorSwizzle: 2468 { 2469 ASSERT(left->isRegister()); 2470 2471 int leftMask = dst.mask; 2472 2473 int swizzle = 0; 2474 int rightMask = 0; 2475 2476 TIntermSequence &sequence = right->getAsAggregate()->getSequence(); 2477 2478 for(unsigned int i = 0; i < sequence.size(); i++) 2479 { 2480 int index = sequence[i]->getAsConstantUnion()->getIConst(0); 2481 2482 int element = swizzleElement(leftSwizzle, index); 2483 rightMask = rightMask | (1 << element); 2484 swizzle = swizzle | swizzleElement(leftSwizzle, i) << (element * 2); 2485 } 2486 2487 dst.mask = leftMask & rightMask; 2488 2489 return swizzle; 2490 } 2491 break; 2492 default: 2493 UNREACHABLE(binary->getOp()); // Not an l-value operator 2494 break; 2495 } 2496 } 2497 else if(symbol) 2498 { 2499 dst.type = registerType(symbol); 2500 dst.index = registerIndex(symbol); 2501 dst.mask = writeMask(symbol); 2502 return 0xE4; 2503 } 2504 2505 return 0xE4; 2506 } 2507 registerType(TIntermTyped * operand)2508 sw::Shader::ParameterType OutputASM::registerType(TIntermTyped *operand) 2509 { 2510 if(isSamplerRegister(operand)) 2511 { 2512 return sw::Shader::PARAMETER_SAMPLER; 2513 } 2514 2515 const TQualifier qualifier = operand->getQualifier(); 2516 if((EvqFragColor == qualifier) || (EvqFragData == qualifier)) 2517 { 2518 if(((EvqFragData == qualifier) && (EvqFragColor == outputQualifier)) || 2519 ((EvqFragColor == qualifier) && (EvqFragData == outputQualifier))) 2520 { 2521 mContext.error(operand->getLine(), "static assignment to both gl_FragData and gl_FragColor", ""); 2522 } 2523 outputQualifier = qualifier; 2524 } 2525 2526 if(qualifier == EvqConstExpr && (!operand->getAsConstantUnion() || !operand->getAsConstantUnion()->getUnionArrayPointer())) 2527 { 2528 return sw::Shader::PARAMETER_TEMP; 2529 } 2530 2531 switch(qualifier) 2532 { 2533 case EvqTemporary: return sw::Shader::PARAMETER_TEMP; 2534 case EvqGlobal: return sw::Shader::PARAMETER_TEMP; 2535 case EvqConstExpr: return sw::Shader::PARAMETER_FLOAT4LITERAL; // All converted to float 2536 case EvqAttribute: return sw::Shader::PARAMETER_INPUT; 2537 case EvqVaryingIn: return sw::Shader::PARAMETER_INPUT; 2538 case EvqVaryingOut: return sw::Shader::PARAMETER_OUTPUT; 2539 case EvqVertexIn: return sw::Shader::PARAMETER_INPUT; 2540 case EvqFragmentOut: return sw::Shader::PARAMETER_COLOROUT; 2541 case EvqVertexOut: return sw::Shader::PARAMETER_OUTPUT; 2542 case EvqFragmentIn: return sw::Shader::PARAMETER_INPUT; 2543 case EvqInvariantVaryingIn: return sw::Shader::PARAMETER_INPUT; // FIXME: Guarantee invariance at the backend 2544 case EvqInvariantVaryingOut: return sw::Shader::PARAMETER_OUTPUT; // FIXME: Guarantee invariance at the backend 2545 case EvqSmooth: return sw::Shader::PARAMETER_OUTPUT; 2546 case EvqFlat: return sw::Shader::PARAMETER_OUTPUT; 2547 case EvqCentroidOut: return sw::Shader::PARAMETER_OUTPUT; 2548 case EvqSmoothIn: return sw::Shader::PARAMETER_INPUT; 2549 case EvqFlatIn: return sw::Shader::PARAMETER_INPUT; 2550 case EvqCentroidIn: return sw::Shader::PARAMETER_INPUT; 2551 case EvqUniform: return sw::Shader::PARAMETER_CONST; 2552 case EvqIn: return sw::Shader::PARAMETER_TEMP; 2553 case EvqOut: return sw::Shader::PARAMETER_TEMP; 2554 case EvqInOut: return sw::Shader::PARAMETER_TEMP; 2555 case EvqConstReadOnly: return sw::Shader::PARAMETER_TEMP; 2556 case EvqPosition: return sw::Shader::PARAMETER_OUTPUT; 2557 case EvqPointSize: return sw::Shader::PARAMETER_OUTPUT; 2558 case EvqInstanceID: return sw::Shader::PARAMETER_MISCTYPE; 2559 case EvqFragCoord: return sw::Shader::PARAMETER_MISCTYPE; 2560 case EvqFrontFacing: return sw::Shader::PARAMETER_MISCTYPE; 2561 case EvqPointCoord: return sw::Shader::PARAMETER_INPUT; 2562 case EvqFragColor: return sw::Shader::PARAMETER_COLOROUT; 2563 case EvqFragData: return sw::Shader::PARAMETER_COLOROUT; 2564 case EvqFragDepth: return sw::Shader::PARAMETER_DEPTHOUT; 2565 default: UNREACHABLE(qualifier); 2566 } 2567 2568 return sw::Shader::PARAMETER_VOID; 2569 } 2570 registerIndex(TIntermTyped * operand)2571 unsigned int OutputASM::registerIndex(TIntermTyped *operand) 2572 { 2573 if(isSamplerRegister(operand)) 2574 { 2575 return samplerRegister(operand); 2576 } 2577 2578 switch(operand->getQualifier()) 2579 { 2580 case EvqTemporary: return temporaryRegister(operand); 2581 case EvqGlobal: return temporaryRegister(operand); 2582 case EvqConstExpr: return temporaryRegister(operand); // Unevaluated constant expression 2583 case EvqAttribute: return attributeRegister(operand); 2584 case EvqVaryingIn: return varyingRegister(operand); 2585 case EvqVaryingOut: return varyingRegister(operand); 2586 case EvqVertexIn: return attributeRegister(operand); 2587 case EvqFragmentOut: return fragmentOutputRegister(operand); 2588 case EvqVertexOut: return varyingRegister(operand); 2589 case EvqFragmentIn: return varyingRegister(operand); 2590 case EvqInvariantVaryingIn: return varyingRegister(operand); 2591 case EvqInvariantVaryingOut: return varyingRegister(operand); 2592 case EvqSmooth: return varyingRegister(operand); 2593 case EvqFlat: return varyingRegister(operand); 2594 case EvqCentroidOut: return varyingRegister(operand); 2595 case EvqSmoothIn: return varyingRegister(operand); 2596 case EvqFlatIn: return varyingRegister(operand); 2597 case EvqCentroidIn: return varyingRegister(operand); 2598 case EvqUniform: return uniformRegister(operand); 2599 case EvqIn: return temporaryRegister(operand); 2600 case EvqOut: return temporaryRegister(operand); 2601 case EvqInOut: return temporaryRegister(operand); 2602 case EvqConstReadOnly: return temporaryRegister(operand); 2603 case EvqPosition: return varyingRegister(operand); 2604 case EvqPointSize: return varyingRegister(operand); 2605 case EvqInstanceID: vertexShader->instanceIdDeclared = true; return 0; 2606 case EvqFragCoord: pixelShader->vPosDeclared = true; return 0; 2607 case EvqFrontFacing: pixelShader->vFaceDeclared = true; return 1; 2608 case EvqPointCoord: return varyingRegister(operand); 2609 case EvqFragColor: return 0; 2610 case EvqFragData: return fragmentOutputRegister(operand); 2611 case EvqFragDepth: return 0; 2612 default: UNREACHABLE(operand->getQualifier()); 2613 } 2614 2615 return 0; 2616 } 2617 writeMask(TIntermTyped * destination,int index)2618 int OutputASM::writeMask(TIntermTyped *destination, int index) 2619 { 2620 if(destination->getQualifier() == EvqPointSize) 2621 { 2622 return 0x2; // Point size stored in the y component 2623 } 2624 2625 return 0xF >> (4 - registerSize(destination->getType(), index)); 2626 } 2627 readSwizzle(TIntermTyped * argument,int size)2628 int OutputASM::readSwizzle(TIntermTyped *argument, int size) 2629 { 2630 if(argument->getQualifier() == EvqPointSize) 2631 { 2632 return 0x55; // Point size stored in the y component 2633 } 2634 2635 static const unsigned char swizzleSize[5] = {0x00, 0x00, 0x54, 0xA4, 0xE4}; // (void), xxxx, xyyy, xyzz, xyzw 2636 2637 return swizzleSize[size]; 2638 } 2639 2640 // Conservatively checks whether an expression is fast to compute and has no side effects trivial(TIntermTyped * expression,int budget)2641 bool OutputASM::trivial(TIntermTyped *expression, int budget) 2642 { 2643 if(!expression->isRegister()) 2644 { 2645 return false; 2646 } 2647 2648 return cost(expression, budget) >= 0; 2649 } 2650 2651 // Returns the remaining computing budget (if < 0 the expression is too expensive or has side effects) cost(TIntermNode * expression,int budget)2652 int OutputASM::cost(TIntermNode *expression, int budget) 2653 { 2654 if(budget < 0) 2655 { 2656 return budget; 2657 } 2658 2659 if(expression->getAsSymbolNode()) 2660 { 2661 return budget; 2662 } 2663 else if(expression->getAsConstantUnion()) 2664 { 2665 return budget; 2666 } 2667 else if(expression->getAsBinaryNode()) 2668 { 2669 TIntermBinary *binary = expression->getAsBinaryNode(); 2670 2671 switch(binary->getOp()) 2672 { 2673 case EOpVectorSwizzle: 2674 case EOpIndexDirect: 2675 case EOpIndexDirectStruct: 2676 case EOpIndexDirectInterfaceBlock: 2677 return cost(binary->getLeft(), budget - 0); 2678 case EOpAdd: 2679 case EOpSub: 2680 case EOpMul: 2681 return cost(binary->getLeft(), cost(binary->getRight(), budget - 1)); 2682 default: 2683 return -1; 2684 } 2685 } 2686 else if(expression->getAsUnaryNode()) 2687 { 2688 TIntermUnary *unary = expression->getAsUnaryNode(); 2689 2690 switch(unary->getOp()) 2691 { 2692 case EOpAbs: 2693 case EOpNegative: 2694 return cost(unary->getOperand(), budget - 1); 2695 default: 2696 return -1; 2697 } 2698 } 2699 else if(expression->getAsSelectionNode()) 2700 { 2701 TIntermSelection *selection = expression->getAsSelectionNode(); 2702 2703 if(selection->usesTernaryOperator()) 2704 { 2705 TIntermTyped *condition = selection->getCondition(); 2706 TIntermNode *trueBlock = selection->getTrueBlock(); 2707 TIntermNode *falseBlock = selection->getFalseBlock(); 2708 TIntermConstantUnion *constantCondition = condition->getAsConstantUnion(); 2709 2710 if(constantCondition) 2711 { 2712 bool trueCondition = constantCondition->getUnionArrayPointer()->getBConst(); 2713 2714 if(trueCondition) 2715 { 2716 return cost(trueBlock, budget - 0); 2717 } 2718 else 2719 { 2720 return cost(falseBlock, budget - 0); 2721 } 2722 } 2723 else 2724 { 2725 return cost(trueBlock, cost(falseBlock, budget - 2)); 2726 } 2727 } 2728 } 2729 2730 return -1; 2731 } 2732 findFunction(const TString & name)2733 const Function *OutputASM::findFunction(const TString &name) 2734 { 2735 for(unsigned int f = 0; f < functionArray.size(); f++) 2736 { 2737 if(functionArray[f].name == name) 2738 { 2739 return &functionArray[f]; 2740 } 2741 } 2742 2743 return 0; 2744 } 2745 temporaryRegister(TIntermTyped * temporary)2746 int OutputASM::temporaryRegister(TIntermTyped *temporary) 2747 { 2748 return allocate(temporaries, temporary); 2749 } 2750 varyingRegister(TIntermTyped * varying)2751 int OutputASM::varyingRegister(TIntermTyped *varying) 2752 { 2753 int var = lookup(varyings, varying); 2754 2755 if(var == -1) 2756 { 2757 var = allocate(varyings, varying); 2758 int componentCount = varying->registerSize(); 2759 int registerCount = varying->totalRegisterCount(); 2760 2761 if(pixelShader) 2762 { 2763 if((var + registerCount) > sw::MAX_FRAGMENT_INPUTS) 2764 { 2765 mContext.error(varying->getLine(), "Varyings packing failed: Too many varyings", "fragment shader"); 2766 return 0; 2767 } 2768 2769 if(varying->getQualifier() == EvqPointCoord) 2770 { 2771 ASSERT(varying->isRegister()); 2772 if(componentCount >= 1) pixelShader->semantic[var][0] = sw::Shader::Semantic(sw::Shader::USAGE_TEXCOORD, var); 2773 if(componentCount >= 2) pixelShader->semantic[var][1] = sw::Shader::Semantic(sw::Shader::USAGE_TEXCOORD, var); 2774 if(componentCount >= 3) pixelShader->semantic[var][2] = sw::Shader::Semantic(sw::Shader::USAGE_TEXCOORD, var); 2775 if(componentCount >= 4) pixelShader->semantic[var][3] = sw::Shader::Semantic(sw::Shader::USAGE_TEXCOORD, var); 2776 } 2777 else 2778 { 2779 for(int i = 0; i < varying->totalRegisterCount(); i++) 2780 { 2781 if(componentCount >= 1) pixelShader->semantic[var + i][0] = sw::Shader::Semantic(sw::Shader::USAGE_COLOR, var + i); 2782 if(componentCount >= 2) pixelShader->semantic[var + i][1] = sw::Shader::Semantic(sw::Shader::USAGE_COLOR, var + i); 2783 if(componentCount >= 3) pixelShader->semantic[var + i][2] = sw::Shader::Semantic(sw::Shader::USAGE_COLOR, var + i); 2784 if(componentCount >= 4) pixelShader->semantic[var + i][3] = sw::Shader::Semantic(sw::Shader::USAGE_COLOR, var + i); 2785 } 2786 } 2787 } 2788 else if(vertexShader) 2789 { 2790 if((var + registerCount) > sw::MAX_VERTEX_OUTPUTS) 2791 { 2792 mContext.error(varying->getLine(), "Varyings packing failed: Too many varyings", "vertex shader"); 2793 return 0; 2794 } 2795 2796 if(varying->getQualifier() == EvqPosition) 2797 { 2798 ASSERT(varying->isRegister()); 2799 vertexShader->output[var][0] = sw::Shader::Semantic(sw::Shader::USAGE_POSITION, 0); 2800 vertexShader->output[var][1] = sw::Shader::Semantic(sw::Shader::USAGE_POSITION, 0); 2801 vertexShader->output[var][2] = sw::Shader::Semantic(sw::Shader::USAGE_POSITION, 0); 2802 vertexShader->output[var][3] = sw::Shader::Semantic(sw::Shader::USAGE_POSITION, 0); 2803 vertexShader->positionRegister = var; 2804 } 2805 else if(varying->getQualifier() == EvqPointSize) 2806 { 2807 ASSERT(varying->isRegister()); 2808 vertexShader->output[var][0] = sw::Shader::Semantic(sw::Shader::USAGE_PSIZE, 0); 2809 vertexShader->output[var][1] = sw::Shader::Semantic(sw::Shader::USAGE_PSIZE, 0); 2810 vertexShader->output[var][2] = sw::Shader::Semantic(sw::Shader::USAGE_PSIZE, 0); 2811 vertexShader->output[var][3] = sw::Shader::Semantic(sw::Shader::USAGE_PSIZE, 0); 2812 vertexShader->pointSizeRegister = var; 2813 } 2814 else 2815 { 2816 // Semantic indexes for user varyings will be assigned during program link to match the pixel shader 2817 } 2818 } 2819 else UNREACHABLE(0); 2820 2821 declareVarying(varying, var); 2822 } 2823 2824 return var; 2825 } 2826 declareVarying(TIntermTyped * varying,int reg)2827 void OutputASM::declareVarying(TIntermTyped *varying, int reg) 2828 { 2829 if(varying->getQualifier() != EvqPointCoord) // gl_PointCoord does not need linking 2830 { 2831 const TType &type = varying->getType(); 2832 const char *name = varying->getAsSymbolNode()->getSymbol().c_str(); 2833 VaryingList &activeVaryings = shaderObject->varyings; 2834 2835 // Check if this varying has been declared before without having a register assigned 2836 for(VaryingList::iterator v = activeVaryings.begin(); v != activeVaryings.end(); v++) 2837 { 2838 if(v->name == name) 2839 { 2840 if(reg >= 0) 2841 { 2842 ASSERT(v->reg < 0 || v->reg == reg); 2843 v->reg = reg; 2844 } 2845 2846 return; 2847 } 2848 } 2849 2850 activeVaryings.push_back(glsl::Varying(glVariableType(type), name, varying->getArraySize(), reg, 0)); 2851 } 2852 } 2853 uniformRegister(TIntermTyped * uniform)2854 int OutputASM::uniformRegister(TIntermTyped *uniform) 2855 { 2856 const TType &type = uniform->getType(); 2857 ASSERT(!IsSampler(type.getBasicType())); 2858 TInterfaceBlock *block = type.getAsInterfaceBlock(); 2859 TIntermSymbol *symbol = uniform->getAsSymbolNode(); 2860 ASSERT(symbol || block); 2861 2862 if(symbol || block) 2863 { 2864 TInterfaceBlock* parentBlock = type.getInterfaceBlock(); 2865 bool isBlockMember = (!block && parentBlock); 2866 int index = isBlockMember ? lookup(uniforms, parentBlock) : lookup(uniforms, uniform); 2867 2868 if(index == -1 || isBlockMember) 2869 { 2870 if(index == -1) 2871 { 2872 index = allocate(uniforms, uniform); 2873 } 2874 2875 // Verify if the current uniform is a member of an already declared block 2876 const TString &name = symbol ? symbol->getSymbol() : block->name(); 2877 int blockMemberIndex = blockMemberLookup(type, name, index); 2878 if(blockMemberIndex == -1) 2879 { 2880 declareUniform(type, name, index); 2881 } 2882 else 2883 { 2884 index = blockMemberIndex; 2885 } 2886 } 2887 2888 return index; 2889 } 2890 2891 return 0; 2892 } 2893 attributeRegister(TIntermTyped * attribute)2894 int OutputASM::attributeRegister(TIntermTyped *attribute) 2895 { 2896 ASSERT(!attribute->isArray()); 2897 2898 int index = lookup(attributes, attribute); 2899 2900 if(index == -1) 2901 { 2902 TIntermSymbol *symbol = attribute->getAsSymbolNode(); 2903 ASSERT(symbol); 2904 2905 if(symbol) 2906 { 2907 index = allocate(attributes, attribute); 2908 const TType &type = attribute->getType(); 2909 int registerCount = attribute->totalRegisterCount(); 2910 2911 if(vertexShader && (index + registerCount) <= sw::MAX_VERTEX_INPUTS) 2912 { 2913 for(int i = 0; i < registerCount; i++) 2914 { 2915 vertexShader->input[index + i] = sw::Shader::Semantic(sw::Shader::USAGE_TEXCOORD, index + i); 2916 } 2917 } 2918 2919 ActiveAttributes &activeAttributes = shaderObject->activeAttributes; 2920 2921 const char *name = symbol->getSymbol().c_str(); 2922 activeAttributes.push_back(Attribute(glVariableType(type), name, type.getArraySize(), type.getLayoutQualifier().location, index)); 2923 } 2924 } 2925 2926 return index; 2927 } 2928 fragmentOutputRegister(TIntermTyped * fragmentOutput)2929 int OutputASM::fragmentOutputRegister(TIntermTyped *fragmentOutput) 2930 { 2931 return allocate(fragmentOutputs, fragmentOutput); 2932 } 2933 samplerRegister(TIntermTyped * sampler)2934 int OutputASM::samplerRegister(TIntermTyped *sampler) 2935 { 2936 const TType &type = sampler->getType(); 2937 ASSERT(IsSampler(type.getBasicType()) || type.isStruct()); // Structures can contain samplers 2938 2939 TIntermSymbol *symbol = sampler->getAsSymbolNode(); 2940 TIntermBinary *binary = sampler->getAsBinaryNode(); 2941 2942 if(symbol && type.getQualifier() == EvqUniform) 2943 { 2944 return samplerRegister(symbol); 2945 } 2946 else if(binary) 2947 { 2948 TIntermTyped *left = binary->getLeft(); 2949 TIntermTyped *right = binary->getRight(); 2950 const TType &leftType = left->getType(); 2951 int index = right->getAsConstantUnion() ? right->getAsConstantUnion()->getIConst(0) : 0; 2952 int offset = 0; 2953 2954 switch(binary->getOp()) 2955 { 2956 case EOpIndexDirect: 2957 ASSERT(left->isArray()); 2958 offset = index * leftType.elementRegisterCount(); 2959 break; 2960 case EOpIndexDirectStruct: 2961 ASSERT(leftType.isStruct()); 2962 { 2963 const TFieldList &fields = leftType.getStruct()->fields(); 2964 2965 for(int i = 0; i < index; i++) 2966 { 2967 offset += fields[i]->type()->totalRegisterCount(); 2968 } 2969 } 2970 break; 2971 case EOpIndexIndirect: // Indirect indexing produces a temporary, not a sampler register 2972 return -1; 2973 case EOpIndexDirectInterfaceBlock: // Interface blocks can't contain samplers 2974 default: 2975 UNREACHABLE(binary->getOp()); 2976 return -1; 2977 } 2978 2979 int base = samplerRegister(left); 2980 2981 if(base < 0) 2982 { 2983 return -1; 2984 } 2985 2986 return base + offset; 2987 } 2988 2989 UNREACHABLE(0); 2990 return -1; // Not a sampler register 2991 } 2992 samplerRegister(TIntermSymbol * sampler)2993 int OutputASM::samplerRegister(TIntermSymbol *sampler) 2994 { 2995 const TType &type = sampler->getType(); 2996 ASSERT(IsSampler(type.getBasicType()) || type.isStruct()); // Structures can contain samplers 2997 2998 int index = lookup(samplers, sampler); 2999 3000 if(index == -1) 3001 { 3002 index = allocate(samplers, sampler); 3003 3004 if(sampler->getQualifier() == EvqUniform) 3005 { 3006 const char *name = sampler->getSymbol().c_str(); 3007 declareUniform(type, name, index); 3008 } 3009 } 3010 3011 return index; 3012 } 3013 isSamplerRegister(TIntermTyped * operand)3014 bool OutputASM::isSamplerRegister(TIntermTyped *operand) 3015 { 3016 return operand && IsSampler(operand->getBasicType()) && samplerRegister(operand) >= 0; 3017 } 3018 lookup(VariableArray & list,TIntermTyped * variable)3019 int OutputASM::lookup(VariableArray &list, TIntermTyped *variable) 3020 { 3021 for(unsigned int i = 0; i < list.size(); i++) 3022 { 3023 if(list[i] == variable) 3024 { 3025 return i; // Pointer match 3026 } 3027 } 3028 3029 TIntermSymbol *varSymbol = variable->getAsSymbolNode(); 3030 TInterfaceBlock *varBlock = variable->getType().getAsInterfaceBlock(); 3031 3032 if(varBlock) 3033 { 3034 for(unsigned int i = 0; i < list.size(); i++) 3035 { 3036 if(list[i]) 3037 { 3038 TInterfaceBlock *listBlock = list[i]->getType().getAsInterfaceBlock(); 3039 3040 if(listBlock) 3041 { 3042 if(listBlock->name() == varBlock->name()) 3043 { 3044 ASSERT(listBlock->arraySize() == varBlock->arraySize()); 3045 ASSERT(listBlock->fields() == varBlock->fields()); 3046 ASSERT(listBlock->blockStorage() == varBlock->blockStorage()); 3047 ASSERT(listBlock->matrixPacking() == varBlock->matrixPacking()); 3048 3049 return i; 3050 } 3051 } 3052 } 3053 } 3054 } 3055 else if(varSymbol) 3056 { 3057 for(unsigned int i = 0; i < list.size(); i++) 3058 { 3059 if(list[i]) 3060 { 3061 TIntermSymbol *listSymbol = list[i]->getAsSymbolNode(); 3062 3063 if(listSymbol) 3064 { 3065 if(listSymbol->getId() == varSymbol->getId()) 3066 { 3067 ASSERT(listSymbol->getSymbol() == varSymbol->getSymbol()); 3068 ASSERT(listSymbol->getType() == varSymbol->getType()); 3069 ASSERT(listSymbol->getQualifier() == varSymbol->getQualifier()); 3070 3071 return i; 3072 } 3073 } 3074 } 3075 } 3076 } 3077 3078 return -1; 3079 } 3080 lookup(VariableArray & list,TInterfaceBlock * block)3081 int OutputASM::lookup(VariableArray &list, TInterfaceBlock *block) 3082 { 3083 for(unsigned int i = 0; i < list.size(); i++) 3084 { 3085 if(list[i] && (list[i]->getType().getInterfaceBlock() == block)) 3086 { 3087 return i; // Pointer match 3088 } 3089 } 3090 return -1; 3091 } 3092 allocate(VariableArray & list,TIntermTyped * variable)3093 int OutputASM::allocate(VariableArray &list, TIntermTyped *variable) 3094 { 3095 int index = lookup(list, variable); 3096 3097 if(index == -1) 3098 { 3099 unsigned int registerCount = variable->blockRegisterCount(); 3100 3101 for(unsigned int i = 0; i < list.size(); i++) 3102 { 3103 if(list[i] == 0) 3104 { 3105 unsigned int j = 1; 3106 for( ; j < registerCount && (i + j) < list.size(); j++) 3107 { 3108 if(list[i + j] != 0) 3109 { 3110 break; 3111 } 3112 } 3113 3114 if(j == registerCount) // Found free slots 3115 { 3116 for(unsigned int j = 0; j < registerCount; j++) 3117 { 3118 list[i + j] = variable; 3119 } 3120 3121 return i; 3122 } 3123 } 3124 } 3125 3126 index = list.size(); 3127 3128 for(unsigned int i = 0; i < registerCount; i++) 3129 { 3130 list.push_back(variable); 3131 } 3132 } 3133 3134 return index; 3135 } 3136 free(VariableArray & list,TIntermTyped * variable)3137 void OutputASM::free(VariableArray &list, TIntermTyped *variable) 3138 { 3139 int index = lookup(list, variable); 3140 3141 if(index >= 0) 3142 { 3143 list[index] = 0; 3144 } 3145 } 3146 blockMemberLookup(const TType & type,const TString & name,int registerIndex)3147 int OutputASM::blockMemberLookup(const TType &type, const TString &name, int registerIndex) 3148 { 3149 const TInterfaceBlock *block = type.getInterfaceBlock(); 3150 3151 if(block) 3152 { 3153 ActiveUniformBlocks &activeUniformBlocks = shaderObject->activeUniformBlocks; 3154 const TFieldList& fields = block->fields(); 3155 const TString &blockName = block->name(); 3156 int fieldRegisterIndex = registerIndex; 3157 3158 if(!type.isInterfaceBlock()) 3159 { 3160 // This is a uniform that's part of a block, let's see if the block is already defined 3161 for(size_t i = 0; i < activeUniformBlocks.size(); ++i) 3162 { 3163 if(activeUniformBlocks[i].name == blockName.c_str()) 3164 { 3165 // The block is already defined, find the register for the current uniform and return it 3166 for(size_t j = 0; j < fields.size(); j++) 3167 { 3168 const TString &fieldName = fields[j]->name(); 3169 if(fieldName == name) 3170 { 3171 return fieldRegisterIndex; 3172 } 3173 3174 fieldRegisterIndex += fields[j]->type()->totalRegisterCount(); 3175 } 3176 3177 ASSERT(false); 3178 return fieldRegisterIndex; 3179 } 3180 } 3181 } 3182 } 3183 3184 return -1; 3185 } 3186 declareUniform(const TType & type,const TString & name,int registerIndex,int blockId,BlockLayoutEncoder * encoder)3187 void OutputASM::declareUniform(const TType &type, const TString &name, int registerIndex, int blockId, BlockLayoutEncoder* encoder) 3188 { 3189 const TStructure *structure = type.getStruct(); 3190 const TInterfaceBlock *block = (type.isInterfaceBlock() || (blockId == -1)) ? type.getInterfaceBlock() : nullptr; 3191 3192 if(!structure && !block) 3193 { 3194 ActiveUniforms &activeUniforms = shaderObject->activeUniforms; 3195 const BlockMemberInfo blockInfo = encoder ? encoder->encodeType(type) : BlockMemberInfo::getDefaultBlockInfo(); 3196 if(blockId >= 0) 3197 { 3198 blockDefinitions[blockId][registerIndex] = TypedMemberInfo(blockInfo, type); 3199 shaderObject->activeUniformBlocks[blockId].fields.push_back(activeUniforms.size()); 3200 } 3201 int fieldRegisterIndex = encoder ? shaderObject->activeUniformBlocks[blockId].registerIndex + BlockLayoutEncoder::getBlockRegister(blockInfo) : registerIndex; 3202 activeUniforms.push_back(Uniform(glVariableType(type), glVariablePrecision(type), name.c_str(), type.getArraySize(), 3203 fieldRegisterIndex, blockId, blockInfo)); 3204 if(IsSampler(type.getBasicType())) 3205 { 3206 for(int i = 0; i < type.totalRegisterCount(); i++) 3207 { 3208 shader->declareSampler(fieldRegisterIndex + i); 3209 } 3210 } 3211 } 3212 else if(block) 3213 { 3214 ActiveUniformBlocks &activeUniformBlocks = shaderObject->activeUniformBlocks; 3215 const TFieldList& fields = block->fields(); 3216 const TString &blockName = block->name(); 3217 int fieldRegisterIndex = registerIndex; 3218 bool isUniformBlockMember = !type.isInterfaceBlock() && (blockId == -1); 3219 3220 blockId = activeUniformBlocks.size(); 3221 bool isRowMajor = block->matrixPacking() == EmpRowMajor; 3222 activeUniformBlocks.push_back(UniformBlock(blockName.c_str(), 0, block->arraySize(), 3223 block->blockStorage(), isRowMajor, registerIndex, blockId)); 3224 blockDefinitions.push_back(BlockDefinitionIndexMap()); 3225 3226 Std140BlockEncoder currentBlockEncoder(isRowMajor); 3227 currentBlockEncoder.enterAggregateType(); 3228 for(size_t i = 0; i < fields.size(); i++) 3229 { 3230 const TType &fieldType = *(fields[i]->type()); 3231 const TString &fieldName = fields[i]->name(); 3232 if(isUniformBlockMember && (fieldName == name)) 3233 { 3234 registerIndex = fieldRegisterIndex; 3235 } 3236 3237 const TString uniformName = block->hasInstanceName() ? blockName + "." + fieldName : fieldName; 3238 3239 declareUniform(fieldType, uniformName, fieldRegisterIndex, blockId, ¤tBlockEncoder); 3240 fieldRegisterIndex += fieldType.totalRegisterCount(); 3241 } 3242 currentBlockEncoder.exitAggregateType(); 3243 activeUniformBlocks[blockId].dataSize = currentBlockEncoder.getBlockSize(); 3244 } 3245 else 3246 { 3247 int fieldRegisterIndex = registerIndex; 3248 3249 const TFieldList& fields = structure->fields(); 3250 if(type.isArray() && (structure || type.isInterfaceBlock())) 3251 { 3252 for(int i = 0; i < type.getArraySize(); i++) 3253 { 3254 if(encoder) 3255 { 3256 encoder->enterAggregateType(); 3257 } 3258 for(size_t j = 0; j < fields.size(); j++) 3259 { 3260 const TType &fieldType = *(fields[j]->type()); 3261 const TString &fieldName = fields[j]->name(); 3262 const TString uniformName = name + "[" + str(i) + "]." + fieldName; 3263 3264 declareUniform(fieldType, uniformName, fieldRegisterIndex, blockId, encoder); 3265 fieldRegisterIndex += fieldType.totalRegisterCount(); 3266 } 3267 if(encoder) 3268 { 3269 encoder->exitAggregateType(); 3270 } 3271 } 3272 } 3273 else 3274 { 3275 if(encoder) 3276 { 3277 encoder->enterAggregateType(); 3278 } 3279 for(size_t i = 0; i < fields.size(); i++) 3280 { 3281 const TType &fieldType = *(fields[i]->type()); 3282 const TString &fieldName = fields[i]->name(); 3283 const TString uniformName = name + "." + fieldName; 3284 3285 declareUniform(fieldType, uniformName, fieldRegisterIndex, blockId, encoder); 3286 fieldRegisterIndex += fieldType.totalRegisterCount(); 3287 } 3288 if(encoder) 3289 { 3290 encoder->exitAggregateType(); 3291 } 3292 } 3293 } 3294 } 3295 glVariableType(const TType & type)3296 GLenum OutputASM::glVariableType(const TType &type) 3297 { 3298 switch(type.getBasicType()) 3299 { 3300 case EbtFloat: 3301 if(type.isScalar()) 3302 { 3303 return GL_FLOAT; 3304 } 3305 else if(type.isVector()) 3306 { 3307 switch(type.getNominalSize()) 3308 { 3309 case 2: return GL_FLOAT_VEC2; 3310 case 3: return GL_FLOAT_VEC3; 3311 case 4: return GL_FLOAT_VEC4; 3312 default: UNREACHABLE(type.getNominalSize()); 3313 } 3314 } 3315 else if(type.isMatrix()) 3316 { 3317 switch(type.getNominalSize()) 3318 { 3319 case 2: 3320 switch(type.getSecondarySize()) 3321 { 3322 case 2: return GL_FLOAT_MAT2; 3323 case 3: return GL_FLOAT_MAT2x3; 3324 case 4: return GL_FLOAT_MAT2x4; 3325 default: UNREACHABLE(type.getSecondarySize()); 3326 } 3327 case 3: 3328 switch(type.getSecondarySize()) 3329 { 3330 case 2: return GL_FLOAT_MAT3x2; 3331 case 3: return GL_FLOAT_MAT3; 3332 case 4: return GL_FLOAT_MAT3x4; 3333 default: UNREACHABLE(type.getSecondarySize()); 3334 } 3335 case 4: 3336 switch(type.getSecondarySize()) 3337 { 3338 case 2: return GL_FLOAT_MAT4x2; 3339 case 3: return GL_FLOAT_MAT4x3; 3340 case 4: return GL_FLOAT_MAT4; 3341 default: UNREACHABLE(type.getSecondarySize()); 3342 } 3343 default: UNREACHABLE(type.getNominalSize()); 3344 } 3345 } 3346 else UNREACHABLE(0); 3347 break; 3348 case EbtInt: 3349 if(type.isScalar()) 3350 { 3351 return GL_INT; 3352 } 3353 else if(type.isVector()) 3354 { 3355 switch(type.getNominalSize()) 3356 { 3357 case 2: return GL_INT_VEC2; 3358 case 3: return GL_INT_VEC3; 3359 case 4: return GL_INT_VEC4; 3360 default: UNREACHABLE(type.getNominalSize()); 3361 } 3362 } 3363 else UNREACHABLE(0); 3364 break; 3365 case EbtUInt: 3366 if(type.isScalar()) 3367 { 3368 return GL_UNSIGNED_INT; 3369 } 3370 else if(type.isVector()) 3371 { 3372 switch(type.getNominalSize()) 3373 { 3374 case 2: return GL_UNSIGNED_INT_VEC2; 3375 case 3: return GL_UNSIGNED_INT_VEC3; 3376 case 4: return GL_UNSIGNED_INT_VEC4; 3377 default: UNREACHABLE(type.getNominalSize()); 3378 } 3379 } 3380 else UNREACHABLE(0); 3381 break; 3382 case EbtBool: 3383 if(type.isScalar()) 3384 { 3385 return GL_BOOL; 3386 } 3387 else if(type.isVector()) 3388 { 3389 switch(type.getNominalSize()) 3390 { 3391 case 2: return GL_BOOL_VEC2; 3392 case 3: return GL_BOOL_VEC3; 3393 case 4: return GL_BOOL_VEC4; 3394 default: UNREACHABLE(type.getNominalSize()); 3395 } 3396 } 3397 else UNREACHABLE(0); 3398 break; 3399 case EbtSampler2D: 3400 return GL_SAMPLER_2D; 3401 case EbtISampler2D: 3402 return GL_INT_SAMPLER_2D; 3403 case EbtUSampler2D: 3404 return GL_UNSIGNED_INT_SAMPLER_2D; 3405 case EbtSamplerCube: 3406 return GL_SAMPLER_CUBE; 3407 case EbtISamplerCube: 3408 return GL_INT_SAMPLER_CUBE; 3409 case EbtUSamplerCube: 3410 return GL_UNSIGNED_INT_SAMPLER_CUBE; 3411 case EbtSamplerExternalOES: 3412 return GL_SAMPLER_EXTERNAL_OES; 3413 case EbtSampler3D: 3414 return GL_SAMPLER_3D_OES; 3415 case EbtISampler3D: 3416 return GL_INT_SAMPLER_3D; 3417 case EbtUSampler3D: 3418 return GL_UNSIGNED_INT_SAMPLER_3D; 3419 case EbtSampler2DArray: 3420 return GL_SAMPLER_2D_ARRAY; 3421 case EbtISampler2DArray: 3422 return GL_INT_SAMPLER_2D_ARRAY; 3423 case EbtUSampler2DArray: 3424 return GL_UNSIGNED_INT_SAMPLER_2D_ARRAY; 3425 case EbtSampler2DShadow: 3426 return GL_SAMPLER_2D_SHADOW; 3427 case EbtSamplerCubeShadow: 3428 return GL_SAMPLER_CUBE_SHADOW; 3429 case EbtSampler2DArrayShadow: 3430 return GL_SAMPLER_2D_ARRAY_SHADOW; 3431 default: 3432 UNREACHABLE(type.getBasicType()); 3433 break; 3434 } 3435 3436 return GL_NONE; 3437 } 3438 glVariablePrecision(const TType & type)3439 GLenum OutputASM::glVariablePrecision(const TType &type) 3440 { 3441 if(type.getBasicType() == EbtFloat) 3442 { 3443 switch(type.getPrecision()) 3444 { 3445 case EbpHigh: return GL_HIGH_FLOAT; 3446 case EbpMedium: return GL_MEDIUM_FLOAT; 3447 case EbpLow: return GL_LOW_FLOAT; 3448 case EbpUndefined: 3449 // Should be defined as the default precision by the parser 3450 default: UNREACHABLE(type.getPrecision()); 3451 } 3452 } 3453 else if(type.getBasicType() == EbtInt) 3454 { 3455 switch(type.getPrecision()) 3456 { 3457 case EbpHigh: return GL_HIGH_INT; 3458 case EbpMedium: return GL_MEDIUM_INT; 3459 case EbpLow: return GL_LOW_INT; 3460 case EbpUndefined: 3461 // Should be defined as the default precision by the parser 3462 default: UNREACHABLE(type.getPrecision()); 3463 } 3464 } 3465 3466 // Other types (boolean, sampler) don't have a precision 3467 return GL_NONE; 3468 } 3469 dim(TIntermNode * v)3470 int OutputASM::dim(TIntermNode *v) 3471 { 3472 TIntermTyped *vector = v->getAsTyped(); 3473 ASSERT(vector && vector->isRegister()); 3474 return vector->getNominalSize(); 3475 } 3476 dim2(TIntermNode * m)3477 int OutputASM::dim2(TIntermNode *m) 3478 { 3479 TIntermTyped *matrix = m->getAsTyped(); 3480 ASSERT(matrix && matrix->isMatrix() && !matrix->isArray()); 3481 return matrix->getSecondarySize(); 3482 } 3483 3484 // Returns ~0u if no loop count could be determined loopCount(TIntermLoop * node)3485 unsigned int OutputASM::loopCount(TIntermLoop *node) 3486 { 3487 // Parse loops of the form: 3488 // for(int index = initial; index [comparator] limit; index += increment) 3489 TIntermSymbol *index = 0; 3490 TOperator comparator = EOpNull; 3491 int initial = 0; 3492 int limit = 0; 3493 int increment = 0; 3494 3495 // Parse index name and intial value 3496 if(node->getInit()) 3497 { 3498 TIntermAggregate *init = node->getInit()->getAsAggregate(); 3499 3500 if(init) 3501 { 3502 TIntermSequence &sequence = init->getSequence(); 3503 TIntermTyped *variable = sequence[0]->getAsTyped(); 3504 3505 if(variable && variable->getQualifier() == EvqTemporary) 3506 { 3507 TIntermBinary *assign = variable->getAsBinaryNode(); 3508 3509 if(assign->getOp() == EOpInitialize) 3510 { 3511 TIntermSymbol *symbol = assign->getLeft()->getAsSymbolNode(); 3512 TIntermConstantUnion *constant = assign->getRight()->getAsConstantUnion(); 3513 3514 if(symbol && constant) 3515 { 3516 if(constant->getBasicType() == EbtInt && constant->getNominalSize() == 1) 3517 { 3518 index = symbol; 3519 initial = constant->getUnionArrayPointer()[0].getIConst(); 3520 } 3521 } 3522 } 3523 } 3524 } 3525 } 3526 3527 // Parse comparator and limit value 3528 if(index && node->getCondition()) 3529 { 3530 TIntermBinary *test = node->getCondition()->getAsBinaryNode(); 3531 TIntermSymbol *left = test ? test->getLeft()->getAsSymbolNode() : nullptr; 3532 3533 if(left && (left->getId() == index->getId())) 3534 { 3535 TIntermConstantUnion *constant = test->getRight()->getAsConstantUnion(); 3536 3537 if(constant) 3538 { 3539 if(constant->getBasicType() == EbtInt && constant->getNominalSize() == 1) 3540 { 3541 comparator = test->getOp(); 3542 limit = constant->getUnionArrayPointer()[0].getIConst(); 3543 } 3544 } 3545 } 3546 } 3547 3548 // Parse increment 3549 if(index && comparator != EOpNull && node->getExpression()) 3550 { 3551 TIntermBinary *binaryTerminal = node->getExpression()->getAsBinaryNode(); 3552 TIntermUnary *unaryTerminal = node->getExpression()->getAsUnaryNode(); 3553 3554 if(binaryTerminal) 3555 { 3556 TOperator op = binaryTerminal->getOp(); 3557 TIntermConstantUnion *constant = binaryTerminal->getRight()->getAsConstantUnion(); 3558 3559 if(constant) 3560 { 3561 if(constant->getBasicType() == EbtInt && constant->getNominalSize() == 1) 3562 { 3563 int value = constant->getUnionArrayPointer()[0].getIConst(); 3564 3565 switch(op) 3566 { 3567 case EOpAddAssign: increment = value; break; 3568 case EOpSubAssign: increment = -value; break; 3569 default: UNIMPLEMENTED(); 3570 } 3571 } 3572 } 3573 } 3574 else if(unaryTerminal) 3575 { 3576 TOperator op = unaryTerminal->getOp(); 3577 3578 switch(op) 3579 { 3580 case EOpPostIncrement: increment = 1; break; 3581 case EOpPostDecrement: increment = -1; break; 3582 case EOpPreIncrement: increment = 1; break; 3583 case EOpPreDecrement: increment = -1; break; 3584 default: UNIMPLEMENTED(); 3585 } 3586 } 3587 } 3588 3589 if(index && comparator != EOpNull && increment != 0) 3590 { 3591 if(comparator == EOpLessThanEqual) 3592 { 3593 comparator = EOpLessThan; 3594 limit += 1; 3595 } 3596 3597 if(comparator == EOpLessThan) 3598 { 3599 int iterations = (limit - initial) / increment; 3600 3601 if(iterations <= 0) 3602 { 3603 iterations = 0; 3604 } 3605 3606 return iterations; 3607 } 3608 else UNIMPLEMENTED(); // Falls through 3609 } 3610 3611 return ~0u; 3612 } 3613 traverse(TIntermNode * node)3614 bool LoopUnrollable::traverse(TIntermNode *node) 3615 { 3616 loopDepth = 0; 3617 loopUnrollable = true; 3618 3619 node->traverse(this); 3620 3621 return loopUnrollable; 3622 } 3623 visitLoop(Visit visit,TIntermLoop * loop)3624 bool LoopUnrollable::visitLoop(Visit visit, TIntermLoop *loop) 3625 { 3626 if(visit == PreVisit) 3627 { 3628 loopDepth++; 3629 } 3630 else if(visit == PostVisit) 3631 { 3632 loopDepth++; 3633 } 3634 3635 return true; 3636 } 3637 visitBranch(Visit visit,TIntermBranch * node)3638 bool LoopUnrollable::visitBranch(Visit visit, TIntermBranch *node) 3639 { 3640 if(!loopUnrollable) 3641 { 3642 return false; 3643 } 3644 3645 if(!loopDepth) 3646 { 3647 return true; 3648 } 3649 3650 switch(node->getFlowOp()) 3651 { 3652 case EOpKill: 3653 case EOpReturn: 3654 break; 3655 case EOpBreak: 3656 case EOpContinue: 3657 loopUnrollable = false; 3658 break; 3659 default: UNREACHABLE(node->getFlowOp()); 3660 } 3661 3662 return loopUnrollable; 3663 } 3664 visitAggregate(Visit visit,TIntermAggregate * node)3665 bool LoopUnrollable::visitAggregate(Visit visit, TIntermAggregate *node) 3666 { 3667 return loopUnrollable; 3668 } 3669 } 3670