1 // Copyright 2016 The SwiftShader Authors. All Rights Reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #include "Renderer.hpp" 16 17 #include "Clipper.hpp" 18 #include "Surface.hpp" 19 #include "Primitive.hpp" 20 #include "Polygon.hpp" 21 #include "WSI/FrameBuffer.hpp" 22 #include "Device/SwiftConfig.hpp" 23 #include "Reactor/Reactor.hpp" 24 #include "Pipeline/Constants.hpp" 25 #include "System/MutexLock.hpp" 26 #include "System/CPUID.hpp" 27 #include "System/Memory.hpp" 28 #include "System/Resource.hpp" 29 #include "System/Half.hpp" 30 #include "System/Math.hpp" 31 #include "System/Timer.hpp" 32 #include "Vulkan/VkDebug.hpp" 33 34 #undef max 35 36 bool disableServer = true; 37 38 #ifndef NDEBUG 39 unsigned int minPrimitives = 1; 40 unsigned int maxPrimitives = 1 << 21; 41 #endif 42 43 namespace sw 44 { 45 extern bool halfIntegerCoordinates; // Pixel centers are not at integer coordinates 46 extern bool booleanFaceRegister; 47 extern bool fullPixelPositionRegister; 48 extern bool leadingVertexFirst; // Flat shading uses first vertex, else last 49 extern bool secondaryColor; // Specular lighting is applied after texturing 50 extern bool colorsDefaultToZero; 51 52 extern bool forceWindowed; 53 extern bool complementaryDepthBuffer; 54 extern bool postBlendSRGB; 55 extern bool exactColorRounding; 56 extern TransparencyAntialiasing transparencyAntialiasing; 57 extern bool forceClearRegisters; 58 59 extern bool precacheVertex; 60 extern bool precacheSetup; 61 extern bool precachePixel; 62 63 static const int batchSize = 128; 64 AtomicInt threadCount(1); 65 AtomicInt Renderer::unitCount(1); 66 AtomicInt Renderer::clusterCount(1); 67 68 TranscendentalPrecision logPrecision = ACCURATE; 69 TranscendentalPrecision expPrecision = ACCURATE; 70 TranscendentalPrecision rcpPrecision = ACCURATE; 71 TranscendentalPrecision rsqPrecision = ACCURATE; 72 bool perspectiveCorrection = true; 73 setGlobalRenderingSettings(Conventions conventions,bool exactColorRounding)74 static void setGlobalRenderingSettings(Conventions conventions, bool exactColorRounding) 75 { 76 static bool initialized = false; 77 78 if(!initialized) 79 { 80 sw::halfIntegerCoordinates = conventions.halfIntegerCoordinates; 81 sw::booleanFaceRegister = conventions.booleanFaceRegister; 82 sw::fullPixelPositionRegister = conventions.fullPixelPositionRegister; 83 sw::leadingVertexFirst = conventions.leadingVertexFirst; 84 sw::secondaryColor = conventions.secondaryColor; 85 sw::colorsDefaultToZero = conventions.colorsDefaultToZero; 86 sw::exactColorRounding = exactColorRounding; 87 initialized = true; 88 } 89 } 90 91 struct Parameters 92 { 93 Renderer *renderer; 94 int threadIndex; 95 }; 96 DrawCall()97 DrawCall::DrawCall() 98 { 99 queries = 0; 100 101 vsDirtyConstF = VERTEX_UNIFORM_VECTORS + 1; 102 vsDirtyConstI = 16; 103 vsDirtyConstB = 16; 104 105 psDirtyConstF = FRAGMENT_UNIFORM_VECTORS; 106 psDirtyConstI = 16; 107 psDirtyConstB = 16; 108 109 references = -1; 110 111 data = (DrawData*)allocate(sizeof(DrawData)); 112 data->constants = &constants; 113 } 114 ~DrawCall()115 DrawCall::~DrawCall() 116 { 117 delete queries; 118 119 deallocate(data); 120 } 121 Renderer(Context * context,Conventions conventions,bool exactColorRounding)122 Renderer::Renderer(Context *context, Conventions conventions, bool exactColorRounding) : VertexProcessor(context), PixelProcessor(context), SetupProcessor(context), context(context), viewport() 123 { 124 setGlobalRenderingSettings(conventions, exactColorRounding); 125 126 setRenderTarget(0, nullptr); 127 clipper = new Clipper; 128 blitter = new Blitter; 129 130 updateClipPlanes = true; 131 132 #if PERF_HUD 133 resetTimers(); 134 #endif 135 136 for(int i = 0; i < 16; i++) 137 { 138 vertexTask[i] = nullptr; 139 140 worker[i] = nullptr; 141 resume[i] = nullptr; 142 suspend[i] = nullptr; 143 } 144 145 threadsAwake = 0; 146 resumeApp = new Event(); 147 148 currentDraw = 0; 149 nextDraw = 0; 150 151 qHead = 0; 152 qSize = 0; 153 154 for(int i = 0; i < 16; i++) 155 { 156 triangleBatch[i] = nullptr; 157 primitiveBatch[i] = nullptr; 158 } 159 160 for(int draw = 0; draw < DRAW_COUNT; draw++) 161 { 162 drawCall[draw] = new DrawCall(); 163 drawList[draw] = drawCall[draw]; 164 } 165 166 for(int unit = 0; unit < 16; unit++) 167 { 168 primitiveProgress[unit].init(); 169 } 170 171 for(int cluster = 0; cluster < 16; cluster++) 172 { 173 pixelProgress[cluster].init(); 174 } 175 176 clipFlags = 0; 177 178 swiftConfig = new SwiftConfig(disableServer); 179 updateConfiguration(true); 180 181 sync = new Resource(0); 182 } 183 ~Renderer()184 Renderer::~Renderer() 185 { 186 sync->destruct(); 187 188 delete clipper; 189 clipper = nullptr; 190 191 delete blitter; 192 blitter = nullptr; 193 194 terminateThreads(); 195 delete resumeApp; 196 197 for(int draw = 0; draw < DRAW_COUNT; draw++) 198 { 199 delete drawCall[draw]; 200 } 201 202 delete swiftConfig; 203 } 204 205 // This object has to be mem aligned operator new(size_t size)206 void* Renderer::operator new(size_t size) 207 { 208 ASSERT(size == sizeof(Renderer)); // This operator can't be called from a derived class 209 return sw::allocate(sizeof(Renderer), 16); 210 } 211 operator delete(void * mem)212 void Renderer::operator delete(void * mem) 213 { 214 sw::deallocate(mem); 215 } 216 draw(DrawType drawType,unsigned int indexOffset,unsigned int count,bool update)217 void Renderer::draw(DrawType drawType, unsigned int indexOffset, unsigned int count, bool update) 218 { 219 #ifndef NDEBUG 220 if(count < minPrimitives || count > maxPrimitives) 221 { 222 return; 223 } 224 #endif 225 226 context->drawType = drawType; 227 228 updateConfiguration(); 229 updateClipper(); 230 231 int ms = context->getMultiSampleCount(); 232 unsigned int oldMultiSampleMask = context->multiSampleMask; 233 context->multiSampleMask = context->sampleMask & ((unsigned)0xFFFFFFFF >> (32 - ms)); 234 235 if(!context->multiSampleMask) 236 { 237 return; 238 } 239 240 sync->lock(sw::PRIVATE); 241 242 if(update || oldMultiSampleMask != context->multiSampleMask) 243 { 244 vertexState = VertexProcessor::update(drawType); 245 setupState = SetupProcessor::update(); 246 pixelState = PixelProcessor::update(); 247 248 vertexRoutine = VertexProcessor::routine(vertexState); 249 setupRoutine = SetupProcessor::routine(setupState); 250 pixelRoutine = PixelProcessor::routine(pixelState); 251 } 252 253 int batch = batchSize / ms; 254 255 int (Renderer::*setupPrimitives)(int batch, int count); 256 257 if(context->isDrawTriangle()) 258 { 259 setupPrimitives = &Renderer::setupTriangles; 260 } 261 else if(context->isDrawLine()) 262 { 263 setupPrimitives = &Renderer::setupLines; 264 } 265 else // Point draw 266 { 267 setupPrimitives = &Renderer::setupPoints; 268 } 269 270 DrawCall *draw = nullptr; 271 272 do 273 { 274 for(int i = 0; i < DRAW_COUNT; i++) 275 { 276 if(drawCall[i]->references == -1) 277 { 278 draw = drawCall[i]; 279 drawList[nextDraw & DRAW_COUNT_BITS] = draw; 280 281 break; 282 } 283 } 284 285 if(!draw) 286 { 287 resumeApp->wait(); 288 } 289 } 290 while(!draw); 291 292 DrawData *data = draw->data; 293 294 if(queries.size() != 0) 295 { 296 draw->queries = new std::list<Query*>(); 297 bool includePrimitivesWrittenQueries = vertexState.transformFeedbackQueryEnabled && vertexState.transformFeedbackEnabled; 298 for(auto &query : queries) 299 { 300 if(includePrimitivesWrittenQueries || (query->type != Query::TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN)) 301 { 302 ++query->reference; // Atomic 303 draw->queries->push_back(query); 304 } 305 } 306 } 307 308 draw->drawType = drawType; 309 draw->batchSize = batch; 310 311 vertexRoutine->bind(); 312 setupRoutine->bind(); 313 pixelRoutine->bind(); 314 315 draw->vertexRoutine = vertexRoutine; 316 draw->setupRoutine = setupRoutine; 317 draw->pixelRoutine = pixelRoutine; 318 draw->vertexPointer = (VertexProcessor::RoutinePointer)vertexRoutine->getEntry(); 319 draw->setupPointer = (SetupProcessor::RoutinePointer)setupRoutine->getEntry(); 320 draw->pixelPointer = (PixelProcessor::RoutinePointer)pixelRoutine->getEntry(); 321 draw->setupPrimitives = setupPrimitives; 322 draw->setupState = setupState; 323 324 for(int i = 0; i < MAX_VERTEX_INPUTS; i++) 325 { 326 draw->vertexStream[i] = context->input[i].resource; 327 data->input[i] = context->input[i].buffer; 328 data->stride[i] = context->input[i].stride; 329 330 if(draw->vertexStream[i]) 331 { 332 draw->vertexStream[i]->lock(PUBLIC, PRIVATE); 333 } 334 } 335 336 if(context->indexBuffer) 337 { 338 data->indices = (unsigned char*)context->indexBuffer->lock(PUBLIC, PRIVATE) + indexOffset; 339 } 340 341 draw->indexBuffer = context->indexBuffer; 342 343 for(int sampler = 0; sampler < TOTAL_IMAGE_UNITS; sampler++) 344 { 345 draw->texture[sampler] = 0; 346 } 347 348 for(int sampler = 0; sampler < TEXTURE_IMAGE_UNITS; sampler++) 349 { 350 if(pixelState.sampler[sampler].textureType != TEXTURE_NULL) 351 { 352 draw->texture[sampler] = context->texture[sampler]; 353 draw->texture[sampler]->lock(PUBLIC, isReadWriteTexture(sampler) ? MANAGED : PRIVATE); // If the texure is both read and written, use the same read/write lock as render targets 354 355 data->mipmap[sampler] = context->sampler[sampler].getTextureData(); 356 } 357 } 358 359 if(context->pixelShader) 360 { 361 if(draw->psDirtyConstF) 362 { 363 memcpy(&data->ps.c, PixelProcessor::c, sizeof(float4) * draw->psDirtyConstF); 364 draw->psDirtyConstF = 0; 365 } 366 367 if(draw->psDirtyConstI) 368 { 369 memcpy(&data->ps.i, PixelProcessor::i, sizeof(int4) * draw->psDirtyConstI); 370 draw->psDirtyConstI = 0; 371 } 372 373 if(draw->psDirtyConstB) 374 { 375 memcpy(&data->ps.b, PixelProcessor::b, sizeof(bool) * draw->psDirtyConstB); 376 draw->psDirtyConstB = 0; 377 } 378 379 PixelProcessor::lockUniformBuffers(data->ps.u, draw->pUniformBuffers); 380 } 381 else 382 { 383 for(int i = 0; i < MAX_UNIFORM_BUFFER_BINDINGS; i++) 384 { 385 draw->pUniformBuffers[i] = nullptr; 386 } 387 } 388 389 for(int sampler = 0; sampler < VERTEX_TEXTURE_IMAGE_UNITS; sampler++) 390 { 391 if(vertexState.sampler[sampler].textureType != TEXTURE_NULL) 392 { 393 draw->texture[TEXTURE_IMAGE_UNITS + sampler] = context->texture[TEXTURE_IMAGE_UNITS + sampler]; 394 draw->texture[TEXTURE_IMAGE_UNITS + sampler]->lock(PUBLIC, PRIVATE); 395 396 data->mipmap[TEXTURE_IMAGE_UNITS + sampler] = context->sampler[TEXTURE_IMAGE_UNITS + sampler].getTextureData(); 397 } 398 } 399 400 if(draw->vsDirtyConstF) 401 { 402 memcpy(&data->vs.c, VertexProcessor::c, sizeof(float4) * draw->vsDirtyConstF); 403 draw->vsDirtyConstF = 0; 404 } 405 406 if(draw->vsDirtyConstI) 407 { 408 memcpy(&data->vs.i, VertexProcessor::i, sizeof(int4) * draw->vsDirtyConstI); 409 draw->vsDirtyConstI = 0; 410 } 411 412 if(draw->vsDirtyConstB) 413 { 414 memcpy(&data->vs.b, VertexProcessor::b, sizeof(bool) * draw->vsDirtyConstB); 415 draw->vsDirtyConstB = 0; 416 } 417 418 if(context->vertexShader->isInstanceIdDeclared()) 419 { 420 data->instanceID = context->instanceID; 421 } 422 423 VertexProcessor::lockUniformBuffers(data->vs.u, draw->vUniformBuffers); 424 VertexProcessor::lockTransformFeedbackBuffers(data->vs.t, data->vs.reg, data->vs.row, data->vs.col, data->vs.str, draw->transformFeedbackBuffers); 425 426 if(pixelState.stencilActive) 427 { 428 data->stencil[0] = stencil; 429 data->stencil[1] = stencilCCW; 430 } 431 432 if(setupState.isDrawPoint) 433 { 434 data->pointSizeMin = pointSizeMin; 435 data->pointSizeMax = pointSizeMax; 436 } 437 438 data->lineWidth = context->lineWidth; 439 440 data->factor = factor; 441 442 if(pixelState.transparencyAntialiasing == TRANSPARENCY_ALPHA_TO_COVERAGE) 443 { 444 float ref = context->alphaReference * (1.0f / 255.0f); 445 float margin = sw::min(ref, 1.0f - ref); 446 447 if(ms == 4) 448 { 449 data->a2c0 = replicate(ref - margin * 0.6f); 450 data->a2c1 = replicate(ref - margin * 0.2f); 451 data->a2c2 = replicate(ref + margin * 0.2f); 452 data->a2c3 = replicate(ref + margin * 0.6f); 453 } 454 else if(ms == 2) 455 { 456 data->a2c0 = replicate(ref - margin * 0.3f); 457 data->a2c1 = replicate(ref + margin * 0.3f); 458 } 459 else ASSERT(false); 460 } 461 462 if(pixelState.occlusionEnabled) 463 { 464 for(int cluster = 0; cluster < clusterCount; cluster++) 465 { 466 data->occlusion[cluster] = 0; 467 } 468 } 469 470 #if PERF_PROFILE 471 for(int cluster = 0; cluster < clusterCount; cluster++) 472 { 473 for(int i = 0; i < PERF_TIMERS; i++) 474 { 475 data->cycles[i][cluster] = 0; 476 } 477 } 478 #endif 479 480 // Viewport 481 { 482 float W = 0.5f * viewport.width; 483 float H = 0.5f * viewport.height; 484 float X0 = viewport.x + W; 485 float Y0 = viewport.y + H; 486 float N = viewport.minDepth; 487 float F = viewport.maxDepth; 488 float Z = F - N; 489 490 if(context->isDrawTriangle()) 491 { 492 N += context->depthBias; 493 } 494 495 if(complementaryDepthBuffer) 496 { 497 Z = -Z; 498 N = 1 - N; 499 } 500 501 data->Wx16 = replicate(W * 16); 502 data->Hx16 = replicate(H * 16); 503 data->X0x16 = replicate(X0 * 16 - 8); 504 data->Y0x16 = replicate(Y0 * 16 - 8); 505 data->halfPixelX = replicate(0.5f / W); 506 data->halfPixelY = replicate(0.5f / H); 507 data->viewportHeight = abs(viewport.height); 508 data->slopeDepthBias = context->slopeDepthBias; 509 data->depthRange = Z; 510 data->depthNear = N; 511 draw->clipFlags = clipFlags; 512 513 if(clipFlags) 514 { 515 if(clipFlags & Clipper::CLIP_PLANE0) data->clipPlane[0] = clipPlane[0]; 516 if(clipFlags & Clipper::CLIP_PLANE1) data->clipPlane[1] = clipPlane[1]; 517 if(clipFlags & Clipper::CLIP_PLANE2) data->clipPlane[2] = clipPlane[2]; 518 if(clipFlags & Clipper::CLIP_PLANE3) data->clipPlane[3] = clipPlane[3]; 519 if(clipFlags & Clipper::CLIP_PLANE4) data->clipPlane[4] = clipPlane[4]; 520 if(clipFlags & Clipper::CLIP_PLANE5) data->clipPlane[5] = clipPlane[5]; 521 } 522 } 523 524 // Target 525 { 526 for(int index = 0; index < RENDERTARGETS; index++) 527 { 528 draw->renderTarget[index] = context->renderTarget[index]; 529 530 if(draw->renderTarget[index]) 531 { 532 unsigned int layer = context->renderTargetLayer[index]; 533 data->colorBuffer[index] = (unsigned int*)context->renderTarget[index]->lockInternal(0, 0, layer, LOCK_READWRITE, MANAGED); 534 data->colorPitchB[index] = context->renderTarget[index]->getInternalPitchB(); 535 data->colorSliceB[index] = context->renderTarget[index]->getInternalSliceB(); 536 } 537 } 538 539 draw->depthBuffer = context->depthBuffer; 540 draw->stencilBuffer = context->stencilBuffer; 541 542 if(draw->depthBuffer) 543 { 544 unsigned int layer = context->depthBufferLayer; 545 data->depthBuffer = (float*)context->depthBuffer->lockInternal(0, 0, layer, LOCK_READWRITE, MANAGED); 546 data->depthPitchB = context->depthBuffer->getInternalPitchB(); 547 data->depthSliceB = context->depthBuffer->getInternalSliceB(); 548 } 549 550 if(draw->stencilBuffer) 551 { 552 unsigned int layer = context->stencilBufferLayer; 553 data->stencilBuffer = (unsigned char*)context->stencilBuffer->lockStencil(0, 0, layer, MANAGED); 554 data->stencilPitchB = context->stencilBuffer->getStencilPitchB(); 555 data->stencilSliceB = context->stencilBuffer->getStencilSliceB(); 556 } 557 } 558 559 // Scissor 560 { 561 data->scissorX0 = scissor.x0; 562 data->scissorX1 = scissor.x1; 563 data->scissorY0 = scissor.y0; 564 data->scissorY1 = scissor.y1; 565 } 566 567 draw->primitive = 0; 568 draw->count = count; 569 570 draw->references = (count + batch - 1) / batch; 571 572 schedulerMutex.lock(); 573 ++nextDraw; // Atomic 574 schedulerMutex.unlock(); 575 576 #ifndef NDEBUG 577 if(threadCount == 1) // Use main thread for draw execution 578 { 579 threadsAwake = 1; 580 task[0].type = Task::RESUME; 581 582 taskLoop(0); 583 } 584 else 585 #endif 586 { 587 if(!threadsAwake) 588 { 589 suspend[0]->wait(); 590 591 threadsAwake = 1; 592 task[0].type = Task::RESUME; 593 594 resume[0]->signal(); 595 } 596 } 597 } 598 clear(void * value,VkFormat format,Surface * dest,const Rect & clearRect,unsigned int rgbaMask)599 void Renderer::clear(void *value, VkFormat format, Surface *dest, const Rect &clearRect, unsigned int rgbaMask) 600 { 601 blitter->clear(value, format, dest, clearRect, rgbaMask); 602 } 603 blit(Surface * source,const SliceRectF & sRect,Surface * dest,const SliceRect & dRect,bool filter,bool isStencil,bool sRGBconversion)604 void Renderer::blit(Surface *source, const SliceRectF &sRect, Surface *dest, const SliceRect &dRect, bool filter, bool isStencil, bool sRGBconversion) 605 { 606 blitter->blit(source, sRect, dest, dRect, {filter, isStencil, sRGBconversion}); 607 } 608 blit3D(Surface * source,Surface * dest)609 void Renderer::blit3D(Surface *source, Surface *dest) 610 { 611 blitter->blit3D(source, dest); 612 } 613 threadFunction(void * parameters)614 void Renderer::threadFunction(void *parameters) 615 { 616 Renderer *renderer = static_cast<Parameters*>(parameters)->renderer; 617 int threadIndex = static_cast<Parameters*>(parameters)->threadIndex; 618 619 if(logPrecision < IEEE) 620 { 621 CPUID::setFlushToZero(true); 622 CPUID::setDenormalsAreZero(true); 623 } 624 625 renderer->threadLoop(threadIndex); 626 } 627 threadLoop(int threadIndex)628 void Renderer::threadLoop(int threadIndex) 629 { 630 while(!exitThreads) 631 { 632 taskLoop(threadIndex); 633 634 suspend[threadIndex]->signal(); 635 resume[threadIndex]->wait(); 636 } 637 } 638 taskLoop(int threadIndex)639 void Renderer::taskLoop(int threadIndex) 640 { 641 while(task[threadIndex].type != Task::SUSPEND) 642 { 643 scheduleTask(threadIndex); 644 executeTask(threadIndex); 645 } 646 } 647 findAvailableTasks()648 void Renderer::findAvailableTasks() 649 { 650 // Find pixel tasks 651 for(int cluster = 0; cluster < clusterCount; cluster++) 652 { 653 if(!pixelProgress[cluster].executing) 654 { 655 for(int unit = 0; unit < unitCount; unit++) 656 { 657 if(primitiveProgress[unit].references > 0) // Contains processed primitives 658 { 659 if(pixelProgress[cluster].drawCall == primitiveProgress[unit].drawCall) 660 { 661 if(pixelProgress[cluster].processedPrimitives == primitiveProgress[unit].firstPrimitive) // Previous primitives have been rendered 662 { 663 Task &task = taskQueue[qHead]; 664 task.type = Task::PIXELS; 665 task.primitiveUnit = unit; 666 task.pixelCluster = cluster; 667 668 pixelProgress[cluster].executing = true; 669 670 // Commit to the task queue 671 qHead = (qHead + 1) & TASK_COUNT_BITS; 672 qSize++; 673 674 break; 675 } 676 } 677 } 678 } 679 } 680 } 681 682 // Find primitive tasks 683 if(currentDraw == nextDraw) 684 { 685 return; // No more primitives to process 686 } 687 688 for(int unit = 0; unit < unitCount; unit++) 689 { 690 DrawCall *draw = drawList[currentDraw & DRAW_COUNT_BITS]; 691 692 int primitive = draw->primitive; 693 int count = draw->count; 694 695 if(primitive >= count) 696 { 697 ++currentDraw; // Atomic 698 699 if(currentDraw == nextDraw) 700 { 701 return; // No more primitives to process 702 } 703 704 draw = drawList[currentDraw & DRAW_COUNT_BITS]; 705 } 706 707 if(!primitiveProgress[unit].references) // Task not already being executed and not still in use by a pixel unit 708 { 709 primitive = draw->primitive; 710 count = draw->count; 711 int batch = draw->batchSize; 712 713 primitiveProgress[unit].drawCall = currentDraw; 714 primitiveProgress[unit].firstPrimitive = primitive; 715 primitiveProgress[unit].primitiveCount = count - primitive >= batch ? batch : count - primitive; 716 717 draw->primitive += batch; 718 719 Task &task = taskQueue[qHead]; 720 task.type = Task::PRIMITIVES; 721 task.primitiveUnit = unit; 722 723 primitiveProgress[unit].references = -1; 724 725 // Commit to the task queue 726 qHead = (qHead + 1) & TASK_COUNT_BITS; 727 qSize++; 728 } 729 } 730 } 731 scheduleTask(int threadIndex)732 void Renderer::scheduleTask(int threadIndex) 733 { 734 schedulerMutex.lock(); 735 736 int curThreadsAwake = threadsAwake; 737 738 if((int)qSize < threadCount - curThreadsAwake + 1) 739 { 740 findAvailableTasks(); 741 } 742 743 if(qSize != 0) 744 { 745 task[threadIndex] = taskQueue[(qHead - qSize) & TASK_COUNT_BITS]; 746 qSize--; 747 748 if(curThreadsAwake != threadCount) 749 { 750 int wakeup = qSize - curThreadsAwake + 1; 751 752 for(int i = 0; i < threadCount && wakeup > 0; i++) 753 { 754 if(task[i].type == Task::SUSPEND) 755 { 756 suspend[i]->wait(); 757 task[i].type = Task::RESUME; 758 resume[i]->signal(); 759 760 ++threadsAwake; // Atomic 761 wakeup--; 762 } 763 } 764 } 765 } 766 else 767 { 768 task[threadIndex].type = Task::SUSPEND; 769 770 --threadsAwake; // Atomic 771 } 772 773 schedulerMutex.unlock(); 774 } 775 executeTask(int threadIndex)776 void Renderer::executeTask(int threadIndex) 777 { 778 #if PERF_HUD 779 int64_t startTick = Timer::ticks(); 780 #endif 781 782 switch(task[threadIndex].type) 783 { 784 case Task::PRIMITIVES: 785 { 786 int unit = task[threadIndex].primitiveUnit; 787 788 int input = primitiveProgress[unit].firstPrimitive; 789 int count = primitiveProgress[unit].primitiveCount; 790 DrawCall *draw = drawList[primitiveProgress[unit].drawCall & DRAW_COUNT_BITS]; 791 int (Renderer::*setupPrimitives)(int batch, int count) = draw->setupPrimitives; 792 793 processPrimitiveVertices(unit, input, count, draw->count, threadIndex); 794 795 #if PERF_HUD 796 int64_t time = Timer::ticks(); 797 vertexTime[threadIndex] += time - startTick; 798 startTick = time; 799 #endif 800 801 int visible = 0; 802 803 if(!draw->setupState.rasterizerDiscard) 804 { 805 visible = (this->*setupPrimitives)(unit, count); 806 } 807 808 primitiveProgress[unit].visible = visible; 809 primitiveProgress[unit].references = clusterCount; 810 811 #if PERF_HUD 812 setupTime[threadIndex] += Timer::ticks() - startTick; 813 #endif 814 } 815 break; 816 case Task::PIXELS: 817 { 818 int unit = task[threadIndex].primitiveUnit; 819 int visible = primitiveProgress[unit].visible; 820 821 if(visible > 0) 822 { 823 int cluster = task[threadIndex].pixelCluster; 824 Primitive *primitive = primitiveBatch[unit]; 825 DrawCall *draw = drawList[pixelProgress[cluster].drawCall & DRAW_COUNT_BITS]; 826 DrawData *data = draw->data; 827 PixelProcessor::RoutinePointer pixelRoutine = draw->pixelPointer; 828 829 pixelRoutine(primitive, visible, cluster, data); 830 } 831 832 finishRendering(task[threadIndex]); 833 834 #if PERF_HUD 835 pixelTime[threadIndex] += Timer::ticks() - startTick; 836 #endif 837 } 838 break; 839 case Task::RESUME: 840 break; 841 case Task::SUSPEND: 842 break; 843 default: 844 ASSERT(false); 845 } 846 } 847 synchronize()848 void Renderer::synchronize() 849 { 850 sync->lock(sw::PUBLIC); 851 sync->unlock(); 852 } 853 finishRendering(Task & pixelTask)854 void Renderer::finishRendering(Task &pixelTask) 855 { 856 int unit = pixelTask.primitiveUnit; 857 int cluster = pixelTask.pixelCluster; 858 859 DrawCall &draw = *drawList[primitiveProgress[unit].drawCall & DRAW_COUNT_BITS]; 860 DrawData &data = *draw.data; 861 int primitive = primitiveProgress[unit].firstPrimitive; 862 int count = primitiveProgress[unit].primitiveCount; 863 int processedPrimitives = primitive + count; 864 865 pixelProgress[cluster].processedPrimitives = processedPrimitives; 866 867 if(pixelProgress[cluster].processedPrimitives >= draw.count) 868 { 869 ++pixelProgress[cluster].drawCall; // Atomic 870 pixelProgress[cluster].processedPrimitives = 0; 871 } 872 873 int ref = primitiveProgress[unit].references--; // Atomic 874 875 if(ref == 0) 876 { 877 ref = draw.references--; // Atomic 878 879 if(ref == 0) 880 { 881 #if PERF_PROFILE 882 for(int cluster = 0; cluster < clusterCount; cluster++) 883 { 884 for(int i = 0; i < PERF_TIMERS; i++) 885 { 886 profiler.cycles[i] += data.cycles[i][cluster]; 887 } 888 } 889 #endif 890 891 if(draw.queries) 892 { 893 for(auto &query : *(draw.queries)) 894 { 895 switch(query->type) 896 { 897 case Query::FRAGMENTS_PASSED: 898 for(int cluster = 0; cluster < clusterCount; cluster++) 899 { 900 query->data += data.occlusion[cluster]; 901 } 902 break; 903 case Query::TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN: 904 query->data += processedPrimitives; 905 break; 906 default: 907 break; 908 } 909 910 --query->reference; // Atomic 911 } 912 913 delete draw.queries; 914 draw.queries = 0; 915 } 916 917 for(int i = 0; i < RENDERTARGETS; i++) 918 { 919 if(draw.renderTarget[i]) 920 { 921 draw.renderTarget[i]->unlockInternal(); 922 } 923 } 924 925 if(draw.depthBuffer) 926 { 927 draw.depthBuffer->unlockInternal(); 928 } 929 930 if(draw.stencilBuffer) 931 { 932 draw.stencilBuffer->unlockStencil(); 933 } 934 935 for(int i = 0; i < TOTAL_IMAGE_UNITS; i++) 936 { 937 if(draw.texture[i]) 938 { 939 draw.texture[i]->unlock(); 940 } 941 } 942 943 for(int i = 0; i < MAX_VERTEX_INPUTS; i++) 944 { 945 if(draw.vertexStream[i]) 946 { 947 draw.vertexStream[i]->unlock(); 948 } 949 } 950 951 if(draw.indexBuffer) 952 { 953 draw.indexBuffer->unlock(); 954 } 955 956 for(int i = 0; i < MAX_UNIFORM_BUFFER_BINDINGS; i++) 957 { 958 if(draw.pUniformBuffers[i]) 959 { 960 draw.pUniformBuffers[i]->unlock(); 961 } 962 if(draw.vUniformBuffers[i]) 963 { 964 draw.vUniformBuffers[i]->unlock(); 965 } 966 } 967 968 for(int i = 0; i < MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS; i++) 969 { 970 if(draw.transformFeedbackBuffers[i]) 971 { 972 draw.transformFeedbackBuffers[i]->unlock(); 973 } 974 } 975 976 draw.vertexRoutine->unbind(); 977 draw.setupRoutine->unbind(); 978 draw.pixelRoutine->unbind(); 979 980 sync->unlock(); 981 982 draw.references = -1; 983 resumeApp->signal(); 984 } 985 } 986 987 pixelProgress[cluster].executing = false; 988 } 989 processPrimitiveVertices(int unit,unsigned int start,unsigned int triangleCount,unsigned int loop,int thread)990 void Renderer::processPrimitiveVertices(int unit, unsigned int start, unsigned int triangleCount, unsigned int loop, int thread) 991 { 992 Triangle *triangle = triangleBatch[unit]; 993 int primitiveDrawCall = primitiveProgress[unit].drawCall; 994 DrawCall *draw = drawList[primitiveDrawCall & DRAW_COUNT_BITS]; 995 DrawData *data = draw->data; 996 VertexTask *task = vertexTask[thread]; 997 998 const void *indices = data->indices; 999 VertexProcessor::RoutinePointer vertexRoutine = draw->vertexPointer; 1000 1001 if(task->vertexCache.drawCall != primitiveDrawCall) 1002 { 1003 task->vertexCache.clear(); 1004 task->vertexCache.drawCall = primitiveDrawCall; 1005 } 1006 1007 unsigned int batch[128][3]; // FIXME: Adjust to dynamic batch size 1008 1009 switch(draw->drawType) 1010 { 1011 case DRAW_POINTLIST: 1012 { 1013 unsigned int index = start; 1014 1015 for(unsigned int i = 0; i < triangleCount; i++) 1016 { 1017 batch[i][0] = index; 1018 batch[i][1] = index; 1019 batch[i][2] = index; 1020 1021 index += 1; 1022 } 1023 } 1024 break; 1025 case DRAW_LINELIST: 1026 { 1027 unsigned int index = 2 * start; 1028 1029 for(unsigned int i = 0; i < triangleCount; i++) 1030 { 1031 batch[i][0] = index + 0; 1032 batch[i][1] = index + 1; 1033 batch[i][2] = index + 1; 1034 1035 index += 2; 1036 } 1037 } 1038 break; 1039 case DRAW_LINESTRIP: 1040 { 1041 unsigned int index = start; 1042 1043 for(unsigned int i = 0; i < triangleCount; i++) 1044 { 1045 batch[i][0] = index + 0; 1046 batch[i][1] = index + 1; 1047 batch[i][2] = index + 1; 1048 1049 index += 1; 1050 } 1051 } 1052 break; 1053 case DRAW_TRIANGLELIST: 1054 { 1055 unsigned int index = 3 * start; 1056 1057 for(unsigned int i = 0; i < triangleCount; i++) 1058 { 1059 batch[i][0] = index + 0; 1060 batch[i][1] = index + 1; 1061 batch[i][2] = index + 2; 1062 1063 index += 3; 1064 } 1065 } 1066 break; 1067 case DRAW_TRIANGLESTRIP: 1068 { 1069 unsigned int index = start; 1070 1071 for(unsigned int i = 0; i < triangleCount; i++) 1072 { 1073 if(leadingVertexFirst) 1074 { 1075 batch[i][0] = index + 0; 1076 batch[i][1] = index + (index & 1) + 1; 1077 batch[i][2] = index + (~index & 1) + 1; 1078 } 1079 else 1080 { 1081 batch[i][0] = index + (index & 1); 1082 batch[i][1] = index + (~index & 1); 1083 batch[i][2] = index + 2; 1084 } 1085 1086 index += 1; 1087 } 1088 } 1089 break; 1090 case DRAW_TRIANGLEFAN: 1091 { 1092 unsigned int index = start; 1093 1094 for(unsigned int i = 0; i < triangleCount; i++) 1095 { 1096 if(leadingVertexFirst) 1097 { 1098 batch[i][0] = index + 1; 1099 batch[i][1] = index + 2; 1100 batch[i][2] = 0; 1101 } 1102 else 1103 { 1104 batch[i][0] = 0; 1105 batch[i][1] = index + 1; 1106 batch[i][2] = index + 2; 1107 } 1108 1109 index += 1; 1110 } 1111 } 1112 break; 1113 case DRAW_INDEXEDPOINTLIST16: 1114 { 1115 const unsigned short *index = (const unsigned short*)indices + start; 1116 1117 for(unsigned int i = 0; i < triangleCount; i++) 1118 { 1119 batch[i][0] = *index; 1120 batch[i][1] = *index; 1121 batch[i][2] = *index; 1122 1123 index += 1; 1124 } 1125 } 1126 break; 1127 case DRAW_INDEXEDPOINTLIST32: 1128 { 1129 const unsigned int *index = (const unsigned int*)indices + start; 1130 1131 for(unsigned int i = 0; i < triangleCount; i++) 1132 { 1133 batch[i][0] = *index; 1134 batch[i][1] = *index; 1135 batch[i][2] = *index; 1136 1137 index += 1; 1138 } 1139 } 1140 break; 1141 case DRAW_INDEXEDLINELIST16: 1142 { 1143 const unsigned short *index = (const unsigned short*)indices + 2 * start; 1144 1145 for(unsigned int i = 0; i < triangleCount; i++) 1146 { 1147 batch[i][0] = index[0]; 1148 batch[i][1] = index[1]; 1149 batch[i][2] = index[1]; 1150 1151 index += 2; 1152 } 1153 } 1154 break; 1155 case DRAW_INDEXEDLINELIST32: 1156 { 1157 const unsigned int *index = (const unsigned int*)indices + 2 * start; 1158 1159 for(unsigned int i = 0; i < triangleCount; i++) 1160 { 1161 batch[i][0] = index[0]; 1162 batch[i][1] = index[1]; 1163 batch[i][2] = index[1]; 1164 1165 index += 2; 1166 } 1167 } 1168 break; 1169 case DRAW_INDEXEDLINESTRIP16: 1170 { 1171 const unsigned short *index = (const unsigned short*)indices + start; 1172 1173 for(unsigned int i = 0; i < triangleCount; i++) 1174 { 1175 batch[i][0] = index[0]; 1176 batch[i][1] = index[1]; 1177 batch[i][2] = index[1]; 1178 1179 index += 1; 1180 } 1181 } 1182 break; 1183 case DRAW_INDEXEDLINESTRIP32: 1184 { 1185 const unsigned int *index = (const unsigned int*)indices + start; 1186 1187 for(unsigned int i = 0; i < triangleCount; i++) 1188 { 1189 batch[i][0] = index[0]; 1190 batch[i][1] = index[1]; 1191 batch[i][2] = index[1]; 1192 1193 index += 1; 1194 } 1195 } 1196 break; 1197 case DRAW_INDEXEDTRIANGLELIST16: 1198 { 1199 const unsigned short *index = (const unsigned short*)indices + 3 * start; 1200 1201 for(unsigned int i = 0; i < triangleCount; i++) 1202 { 1203 batch[i][0] = index[0]; 1204 batch[i][1] = index[1]; 1205 batch[i][2] = index[2]; 1206 1207 index += 3; 1208 } 1209 } 1210 break; 1211 case DRAW_INDEXEDTRIANGLELIST32: 1212 { 1213 const unsigned int *index = (const unsigned int*)indices + 3 * start; 1214 1215 for(unsigned int i = 0; i < triangleCount; i++) 1216 { 1217 batch[i][0] = index[0]; 1218 batch[i][1] = index[1]; 1219 batch[i][2] = index[2]; 1220 1221 index += 3; 1222 } 1223 } 1224 break; 1225 case DRAW_INDEXEDTRIANGLESTRIP16: 1226 { 1227 const unsigned short *index = (const unsigned short*)indices + start; 1228 1229 for(unsigned int i = 0; i < triangleCount; i++) 1230 { 1231 batch[i][0] = index[0]; 1232 batch[i][1] = index[((start + i) & 1) + 1]; 1233 batch[i][2] = index[(~(start + i) & 1) + 1]; 1234 1235 index += 1; 1236 } 1237 } 1238 break; 1239 case DRAW_INDEXEDTRIANGLESTRIP32: 1240 { 1241 const unsigned int *index = (const unsigned int*)indices + start; 1242 1243 for(unsigned int i = 0; i < triangleCount; i++) 1244 { 1245 batch[i][0] = index[0]; 1246 batch[i][1] = index[((start + i) & 1) + 1]; 1247 batch[i][2] = index[(~(start + i) & 1) + 1]; 1248 1249 index += 1; 1250 } 1251 } 1252 break; 1253 case DRAW_INDEXEDTRIANGLEFAN16: 1254 { 1255 const unsigned short *index = (const unsigned short*)indices; 1256 1257 for(unsigned int i = 0; i < triangleCount; i++) 1258 { 1259 batch[i][0] = index[start + i + 1]; 1260 batch[i][1] = index[start + i + 2]; 1261 batch[i][2] = index[0]; 1262 } 1263 } 1264 break; 1265 case DRAW_INDEXEDTRIANGLEFAN32: 1266 { 1267 const unsigned int *index = (const unsigned int*)indices; 1268 1269 for(unsigned int i = 0; i < triangleCount; i++) 1270 { 1271 batch[i][0] = index[start + i + 1]; 1272 batch[i][1] = index[start + i + 2]; 1273 batch[i][2] = index[0]; 1274 } 1275 } 1276 break; 1277 default: 1278 ASSERT(false); 1279 return; 1280 } 1281 1282 task->primitiveStart = start; 1283 task->vertexCount = triangleCount * 3; 1284 vertexRoutine(&triangle->v0, (unsigned int*)&batch, task, data); 1285 } 1286 setupTriangles(int unit,int count)1287 int Renderer::setupTriangles(int unit, int count) 1288 { 1289 Triangle *triangle = triangleBatch[unit]; 1290 Primitive *primitive = primitiveBatch[unit]; 1291 1292 DrawCall &draw = *drawList[primitiveProgress[unit].drawCall & DRAW_COUNT_BITS]; 1293 SetupProcessor::State &state = draw.setupState; 1294 const SetupProcessor::RoutinePointer &setupRoutine = draw.setupPointer; 1295 1296 int ms = state.multiSample; 1297 int pos = state.positionRegister; 1298 const DrawData *data = draw.data; 1299 int visible = 0; 1300 1301 for(int i = 0; i < count; i++, triangle++) 1302 { 1303 Vertex &v0 = triangle->v0; 1304 Vertex &v1 = triangle->v1; 1305 Vertex &v2 = triangle->v2; 1306 1307 if((v0.clipFlags & v1.clipFlags & v2.clipFlags) == Clipper::CLIP_FINITE) 1308 { 1309 Polygon polygon(&v0.v[pos], &v1.v[pos], &v2.v[pos]); 1310 1311 int clipFlagsOr = v0.clipFlags | v1.clipFlags | v2.clipFlags | draw.clipFlags; 1312 1313 if(clipFlagsOr != Clipper::CLIP_FINITE) 1314 { 1315 if(!clipper->clip(polygon, clipFlagsOr, draw)) 1316 { 1317 continue; 1318 } 1319 } 1320 1321 if(setupRoutine(primitive, triangle, &polygon, data)) 1322 { 1323 primitive += ms; 1324 visible++; 1325 } 1326 } 1327 } 1328 1329 return visible; 1330 } 1331 setupLines(int unit,int count)1332 int Renderer::setupLines(int unit, int count) 1333 { 1334 Triangle *triangle = triangleBatch[unit]; 1335 Primitive *primitive = primitiveBatch[unit]; 1336 int visible = 0; 1337 1338 DrawCall &draw = *drawList[primitiveProgress[unit].drawCall & DRAW_COUNT_BITS]; 1339 SetupProcessor::State &state = draw.setupState; 1340 1341 int ms = state.multiSample; 1342 1343 for(int i = 0; i < count; i++) 1344 { 1345 if(setupLine(*primitive, *triangle, draw)) 1346 { 1347 primitive += ms; 1348 visible++; 1349 } 1350 1351 triangle++; 1352 } 1353 1354 return visible; 1355 } 1356 setupPoints(int unit,int count)1357 int Renderer::setupPoints(int unit, int count) 1358 { 1359 Triangle *triangle = triangleBatch[unit]; 1360 Primitive *primitive = primitiveBatch[unit]; 1361 int visible = 0; 1362 1363 DrawCall &draw = *drawList[primitiveProgress[unit].drawCall & DRAW_COUNT_BITS]; 1364 SetupProcessor::State &state = draw.setupState; 1365 1366 int ms = state.multiSample; 1367 1368 for(int i = 0; i < count; i++) 1369 { 1370 if(setupPoint(*primitive, *triangle, draw)) 1371 { 1372 primitive += ms; 1373 visible++; 1374 } 1375 1376 triangle++; 1377 } 1378 1379 return visible; 1380 } 1381 setupLine(Primitive & primitive,Triangle & triangle,const DrawCall & draw)1382 bool Renderer::setupLine(Primitive &primitive, Triangle &triangle, const DrawCall &draw) 1383 { 1384 const SetupProcessor::RoutinePointer &setupRoutine = draw.setupPointer; 1385 const SetupProcessor::State &state = draw.setupState; 1386 const DrawData &data = *draw.data; 1387 1388 float lineWidth = data.lineWidth; 1389 1390 Vertex &v0 = triangle.v0; 1391 Vertex &v1 = triangle.v1; 1392 1393 int pos = state.positionRegister; 1394 1395 const float4 &P0 = v0.v[pos]; 1396 const float4 &P1 = v1.v[pos]; 1397 1398 if(P0.w <= 0 && P1.w <= 0) 1399 { 1400 return false; 1401 } 1402 1403 const float W = data.Wx16[0] * (1.0f / 16.0f); 1404 const float H = data.Hx16[0] * (1.0f / 16.0f); 1405 1406 float dx = W * (P1.x / P1.w - P0.x / P0.w); 1407 float dy = H * (P1.y / P1.w - P0.y / P0.w); 1408 1409 if(dx == 0 && dy == 0) 1410 { 1411 return false; 1412 } 1413 1414 if(state.multiSample > 1) // Rectangle 1415 { 1416 float4 P[4]; 1417 int C[4]; 1418 1419 P[0] = P0; 1420 P[1] = P1; 1421 P[2] = P1; 1422 P[3] = P0; 1423 1424 float scale = lineWidth * 0.5f / sqrt(dx*dx + dy*dy); 1425 1426 dx *= scale; 1427 dy *= scale; 1428 1429 float dx0h = dx * P0.w / H; 1430 float dy0w = dy * P0.w / W; 1431 1432 float dx1h = dx * P1.w / H; 1433 float dy1w = dy * P1.w / W; 1434 1435 P[0].x += -dy0w; 1436 P[0].y += +dx0h; 1437 C[0] = clipper->computeClipFlags(P[0]); 1438 1439 P[1].x += -dy1w; 1440 P[1].y += +dx1h; 1441 C[1] = clipper->computeClipFlags(P[1]); 1442 1443 P[2].x += +dy1w; 1444 P[2].y += -dx1h; 1445 C[2] = clipper->computeClipFlags(P[2]); 1446 1447 P[3].x += +dy0w; 1448 P[3].y += -dx0h; 1449 C[3] = clipper->computeClipFlags(P[3]); 1450 1451 if((C[0] & C[1] & C[2] & C[3]) == Clipper::CLIP_FINITE) 1452 { 1453 Polygon polygon(P, 4); 1454 1455 int clipFlagsOr = C[0] | C[1] | C[2] | C[3] | draw.clipFlags; 1456 1457 if(clipFlagsOr != Clipper::CLIP_FINITE) 1458 { 1459 if(!clipper->clip(polygon, clipFlagsOr, draw)) 1460 { 1461 return false; 1462 } 1463 } 1464 1465 return setupRoutine(&primitive, &triangle, &polygon, &data); 1466 } 1467 } 1468 else // Diamond test convention 1469 { 1470 float4 P[8]; 1471 int C[8]; 1472 1473 P[0] = P0; 1474 P[1] = P0; 1475 P[2] = P0; 1476 P[3] = P0; 1477 P[4] = P1; 1478 P[5] = P1; 1479 P[6] = P1; 1480 P[7] = P1; 1481 1482 float dx0 = lineWidth * 0.5f * P0.w / W; 1483 float dy0 = lineWidth * 0.5f * P0.w / H; 1484 1485 float dx1 = lineWidth * 0.5f * P1.w / W; 1486 float dy1 = lineWidth * 0.5f * P1.w / H; 1487 1488 P[0].x += -dx0; 1489 C[0] = clipper->computeClipFlags(P[0]); 1490 1491 P[1].y += +dy0; 1492 C[1] = clipper->computeClipFlags(P[1]); 1493 1494 P[2].x += +dx0; 1495 C[2] = clipper->computeClipFlags(P[2]); 1496 1497 P[3].y += -dy0; 1498 C[3] = clipper->computeClipFlags(P[3]); 1499 1500 P[4].x += -dx1; 1501 C[4] = clipper->computeClipFlags(P[4]); 1502 1503 P[5].y += +dy1; 1504 C[5] = clipper->computeClipFlags(P[5]); 1505 1506 P[6].x += +dx1; 1507 C[6] = clipper->computeClipFlags(P[6]); 1508 1509 P[7].y += -dy1; 1510 C[7] = clipper->computeClipFlags(P[7]); 1511 1512 if((C[0] & C[1] & C[2] & C[3] & C[4] & C[5] & C[6] & C[7]) == Clipper::CLIP_FINITE) 1513 { 1514 float4 L[6]; 1515 1516 if(dx > -dy) 1517 { 1518 if(dx > dy) // Right 1519 { 1520 L[0] = P[0]; 1521 L[1] = P[1]; 1522 L[2] = P[5]; 1523 L[3] = P[6]; 1524 L[4] = P[7]; 1525 L[5] = P[3]; 1526 } 1527 else // Down 1528 { 1529 L[0] = P[0]; 1530 L[1] = P[4]; 1531 L[2] = P[5]; 1532 L[3] = P[6]; 1533 L[4] = P[2]; 1534 L[5] = P[3]; 1535 } 1536 } 1537 else 1538 { 1539 if(dx > dy) // Up 1540 { 1541 L[0] = P[0]; 1542 L[1] = P[1]; 1543 L[2] = P[2]; 1544 L[3] = P[6]; 1545 L[4] = P[7]; 1546 L[5] = P[4]; 1547 } 1548 else // Left 1549 { 1550 L[0] = P[1]; 1551 L[1] = P[2]; 1552 L[2] = P[3]; 1553 L[3] = P[7]; 1554 L[4] = P[4]; 1555 L[5] = P[5]; 1556 } 1557 } 1558 1559 Polygon polygon(L, 6); 1560 1561 int clipFlagsOr = C[0] | C[1] | C[2] | C[3] | C[4] | C[5] | C[6] | C[7] | draw.clipFlags; 1562 1563 if(clipFlagsOr != Clipper::CLIP_FINITE) 1564 { 1565 if(!clipper->clip(polygon, clipFlagsOr, draw)) 1566 { 1567 return false; 1568 } 1569 } 1570 1571 return setupRoutine(&primitive, &triangle, &polygon, &data); 1572 } 1573 } 1574 1575 return false; 1576 } 1577 setupPoint(Primitive & primitive,Triangle & triangle,const DrawCall & draw)1578 bool Renderer::setupPoint(Primitive &primitive, Triangle &triangle, const DrawCall &draw) 1579 { 1580 const SetupProcessor::RoutinePointer &setupRoutine = draw.setupPointer; 1581 const SetupProcessor::State &state = draw.setupState; 1582 const DrawData &data = *draw.data; 1583 1584 Vertex &v = triangle.v0; 1585 1586 float pSize; 1587 1588 int pts = state.pointSizeRegister; 1589 1590 if(state.pointSizeRegister != Unused) 1591 { 1592 pSize = v.v[pts].y; 1593 } 1594 else 1595 { 1596 pSize = 1.0f; 1597 } 1598 1599 pSize = clamp(pSize, data.pointSizeMin, data.pointSizeMax); 1600 1601 float4 P[4]; 1602 int C[4]; 1603 1604 int pos = state.positionRegister; 1605 1606 P[0] = v.v[pos]; 1607 P[1] = v.v[pos]; 1608 P[2] = v.v[pos]; 1609 P[3] = v.v[pos]; 1610 1611 const float X = pSize * P[0].w * data.halfPixelX[0]; 1612 const float Y = pSize * P[0].w * data.halfPixelY[0]; 1613 1614 P[0].x -= X; 1615 P[0].y += Y; 1616 C[0] = clipper->computeClipFlags(P[0]); 1617 1618 P[1].x += X; 1619 P[1].y += Y; 1620 C[1] = clipper->computeClipFlags(P[1]); 1621 1622 P[2].x += X; 1623 P[2].y -= Y; 1624 C[2] = clipper->computeClipFlags(P[2]); 1625 1626 P[3].x -= X; 1627 P[3].y -= Y; 1628 C[3] = clipper->computeClipFlags(P[3]); 1629 1630 triangle.v1 = triangle.v0; 1631 triangle.v2 = triangle.v0; 1632 1633 triangle.v1.X += iround(16 * 0.5f * pSize); 1634 triangle.v2.Y -= iround(16 * 0.5f * pSize) * (data.Hx16[0] > 0.0f ? 1 : -1); // Both Direct3D and OpenGL expect (0, 0) in the top-left corner 1635 1636 Polygon polygon(P, 4); 1637 1638 if((C[0] & C[1] & C[2] & C[3]) == Clipper::CLIP_FINITE) 1639 { 1640 int clipFlagsOr = C[0] | C[1] | C[2] | C[3] | draw.clipFlags; 1641 1642 if(clipFlagsOr != Clipper::CLIP_FINITE) 1643 { 1644 if(!clipper->clip(polygon, clipFlagsOr, draw)) 1645 { 1646 return false; 1647 } 1648 } 1649 1650 return setupRoutine(&primitive, &triangle, &polygon, &data); 1651 } 1652 1653 return false; 1654 } 1655 initializeThreads()1656 void Renderer::initializeThreads() 1657 { 1658 unitCount = ceilPow2(threadCount); 1659 clusterCount = ceilPow2(threadCount); 1660 1661 for(int i = 0; i < unitCount; i++) 1662 { 1663 triangleBatch[i] = (Triangle*)allocate(batchSize * sizeof(Triangle)); 1664 primitiveBatch[i] = (Primitive*)allocate(batchSize * sizeof(Primitive)); 1665 } 1666 1667 for(int i = 0; i < threadCount; i++) 1668 { 1669 vertexTask[i] = (VertexTask*)allocate(sizeof(VertexTask)); 1670 vertexTask[i]->vertexCache.drawCall = -1; 1671 1672 task[i].type = Task::SUSPEND; 1673 1674 resume[i] = new Event(); 1675 suspend[i] = new Event(); 1676 1677 Parameters parameters; 1678 parameters.threadIndex = i; 1679 parameters.renderer = this; 1680 1681 exitThreads = false; 1682 worker[i] = new Thread(threadFunction, ¶meters); 1683 1684 suspend[i]->wait(); 1685 suspend[i]->signal(); 1686 } 1687 } 1688 terminateThreads()1689 void Renderer::terminateThreads() 1690 { 1691 while(threadsAwake != 0) 1692 { 1693 Thread::sleep(1); 1694 } 1695 1696 for(int thread = 0; thread < threadCount; thread++) 1697 { 1698 if(worker[thread]) 1699 { 1700 exitThreads = true; 1701 resume[thread]->signal(); 1702 worker[thread]->join(); 1703 1704 delete worker[thread]; 1705 worker[thread] = 0; 1706 delete resume[thread]; 1707 resume[thread] = 0; 1708 delete suspend[thread]; 1709 suspend[thread] = 0; 1710 } 1711 1712 deallocate(vertexTask[thread]); 1713 vertexTask[thread] = 0; 1714 } 1715 1716 for(int i = 0; i < 16; i++) 1717 { 1718 deallocate(triangleBatch[i]); 1719 triangleBatch[i] = 0; 1720 1721 deallocate(primitiveBatch[i]); 1722 primitiveBatch[i] = 0; 1723 } 1724 } 1725 loadConstants(const VertexShader * vertexShader)1726 void Renderer::loadConstants(const VertexShader *vertexShader) 1727 { 1728 size_t count = vertexShader->getLength(); 1729 1730 for(size_t i = 0; i < count; i++) 1731 { 1732 const Shader::Instruction *instruction = vertexShader->getInstruction(i); 1733 1734 if(instruction->opcode == Shader::OPCODE_DEF) 1735 { 1736 int index = instruction->dst.index; 1737 float value[4]; 1738 1739 value[0] = instruction->src[0].value[0]; 1740 value[1] = instruction->src[0].value[1]; 1741 value[2] = instruction->src[0].value[2]; 1742 value[3] = instruction->src[0].value[3]; 1743 1744 setVertexShaderConstantF(index, value); 1745 } 1746 else if(instruction->opcode == Shader::OPCODE_DEFI) 1747 { 1748 int index = instruction->dst.index; 1749 int integer[4]; 1750 1751 integer[0] = instruction->src[0].integer[0]; 1752 integer[1] = instruction->src[0].integer[1]; 1753 integer[2] = instruction->src[0].integer[2]; 1754 integer[3] = instruction->src[0].integer[3]; 1755 1756 setVertexShaderConstantI(index, integer); 1757 } 1758 else if(instruction->opcode == Shader::OPCODE_DEFB) 1759 { 1760 int index = instruction->dst.index; 1761 int boolean = instruction->src[0].boolean[0]; 1762 1763 setVertexShaderConstantB(index, &boolean); 1764 } 1765 } 1766 } 1767 loadConstants(const PixelShader * pixelShader)1768 void Renderer::loadConstants(const PixelShader *pixelShader) 1769 { 1770 if(!pixelShader) return; 1771 1772 size_t count = pixelShader->getLength(); 1773 1774 for(size_t i = 0; i < count; i++) 1775 { 1776 const Shader::Instruction *instruction = pixelShader->getInstruction(i); 1777 1778 if(instruction->opcode == Shader::OPCODE_DEF) 1779 { 1780 int index = instruction->dst.index; 1781 float value[4]; 1782 1783 value[0] = instruction->src[0].value[0]; 1784 value[1] = instruction->src[0].value[1]; 1785 value[2] = instruction->src[0].value[2]; 1786 value[3] = instruction->src[0].value[3]; 1787 1788 setPixelShaderConstantF(index, value); 1789 } 1790 else if(instruction->opcode == Shader::OPCODE_DEFI) 1791 { 1792 int index = instruction->dst.index; 1793 int integer[4]; 1794 1795 integer[0] = instruction->src[0].integer[0]; 1796 integer[1] = instruction->src[0].integer[1]; 1797 integer[2] = instruction->src[0].integer[2]; 1798 integer[3] = instruction->src[0].integer[3]; 1799 1800 setPixelShaderConstantI(index, integer); 1801 } 1802 else if(instruction->opcode == Shader::OPCODE_DEFB) 1803 { 1804 int index = instruction->dst.index; 1805 int boolean = instruction->src[0].boolean[0]; 1806 1807 setPixelShaderConstantB(index, &boolean); 1808 } 1809 } 1810 } 1811 setIndexBuffer(Resource * indexBuffer)1812 void Renderer::setIndexBuffer(Resource *indexBuffer) 1813 { 1814 context->indexBuffer = indexBuffer; 1815 } 1816 setMultiSampleMask(unsigned int mask)1817 void Renderer::setMultiSampleMask(unsigned int mask) 1818 { 1819 context->sampleMask = mask; 1820 } 1821 setTransparencyAntialiasing(TransparencyAntialiasing transparencyAntialiasing)1822 void Renderer::setTransparencyAntialiasing(TransparencyAntialiasing transparencyAntialiasing) 1823 { 1824 sw::transparencyAntialiasing = transparencyAntialiasing; 1825 } 1826 isReadWriteTexture(int sampler)1827 bool Renderer::isReadWriteTexture(int sampler) 1828 { 1829 for(int index = 0; index < RENDERTARGETS; index++) 1830 { 1831 if(context->renderTarget[index] && context->texture[sampler] == context->renderTarget[index]->getResource()) 1832 { 1833 return true; 1834 } 1835 } 1836 1837 if(context->depthBuffer && context->texture[sampler] == context->depthBuffer->getResource()) 1838 { 1839 return true; 1840 } 1841 1842 return false; 1843 } 1844 updateClipper()1845 void Renderer::updateClipper() 1846 { 1847 if(updateClipPlanes) 1848 { 1849 if(clipFlags & Clipper::CLIP_PLANE0) clipPlane[0] = userPlane[0]; 1850 if(clipFlags & Clipper::CLIP_PLANE1) clipPlane[1] = userPlane[1]; 1851 if(clipFlags & Clipper::CLIP_PLANE2) clipPlane[2] = userPlane[2]; 1852 if(clipFlags & Clipper::CLIP_PLANE3) clipPlane[3] = userPlane[3]; 1853 if(clipFlags & Clipper::CLIP_PLANE4) clipPlane[4] = userPlane[4]; 1854 if(clipFlags & Clipper::CLIP_PLANE5) clipPlane[5] = userPlane[5]; 1855 1856 updateClipPlanes = false; 1857 } 1858 } 1859 setTextureResource(unsigned int sampler,Resource * resource)1860 void Renderer::setTextureResource(unsigned int sampler, Resource *resource) 1861 { 1862 ASSERT(sampler < TOTAL_IMAGE_UNITS); 1863 1864 context->texture[sampler] = resource; 1865 } 1866 setTextureLevel(unsigned int sampler,unsigned int face,unsigned int level,Surface * surface,TextureType type)1867 void Renderer::setTextureLevel(unsigned int sampler, unsigned int face, unsigned int level, Surface *surface, TextureType type) 1868 { 1869 ASSERT(sampler < TOTAL_IMAGE_UNITS && face < 6 && level < MIPMAP_LEVELS); 1870 1871 context->sampler[sampler].setTextureLevel(face, level, surface, type); 1872 } 1873 setTextureFilter(SamplerType type,int sampler,FilterType textureFilter)1874 void Renderer::setTextureFilter(SamplerType type, int sampler, FilterType textureFilter) 1875 { 1876 if(type == SAMPLER_PIXEL) 1877 { 1878 PixelProcessor::setTextureFilter(sampler, textureFilter); 1879 } 1880 else 1881 { 1882 VertexProcessor::setTextureFilter(sampler, textureFilter); 1883 } 1884 } 1885 setMipmapFilter(SamplerType type,int sampler,MipmapType mipmapFilter)1886 void Renderer::setMipmapFilter(SamplerType type, int sampler, MipmapType mipmapFilter) 1887 { 1888 if(type == SAMPLER_PIXEL) 1889 { 1890 PixelProcessor::setMipmapFilter(sampler, mipmapFilter); 1891 } 1892 else 1893 { 1894 VertexProcessor::setMipmapFilter(sampler, mipmapFilter); 1895 } 1896 } 1897 setGatherEnable(SamplerType type,int sampler,bool enable)1898 void Renderer::setGatherEnable(SamplerType type, int sampler, bool enable) 1899 { 1900 if(type == SAMPLER_PIXEL) 1901 { 1902 PixelProcessor::setGatherEnable(sampler, enable); 1903 } 1904 else 1905 { 1906 VertexProcessor::setGatherEnable(sampler, enable); 1907 } 1908 } 1909 setAddressingModeU(SamplerType type,int sampler,AddressingMode addressMode)1910 void Renderer::setAddressingModeU(SamplerType type, int sampler, AddressingMode addressMode) 1911 { 1912 if(type == SAMPLER_PIXEL) 1913 { 1914 PixelProcessor::setAddressingModeU(sampler, addressMode); 1915 } 1916 else 1917 { 1918 VertexProcessor::setAddressingModeU(sampler, addressMode); 1919 } 1920 } 1921 setAddressingModeV(SamplerType type,int sampler,AddressingMode addressMode)1922 void Renderer::setAddressingModeV(SamplerType type, int sampler, AddressingMode addressMode) 1923 { 1924 if(type == SAMPLER_PIXEL) 1925 { 1926 PixelProcessor::setAddressingModeV(sampler, addressMode); 1927 } 1928 else 1929 { 1930 VertexProcessor::setAddressingModeV(sampler, addressMode); 1931 } 1932 } 1933 setAddressingModeW(SamplerType type,int sampler,AddressingMode addressMode)1934 void Renderer::setAddressingModeW(SamplerType type, int sampler, AddressingMode addressMode) 1935 { 1936 if(type == SAMPLER_PIXEL) 1937 { 1938 PixelProcessor::setAddressingModeW(sampler, addressMode); 1939 } 1940 else 1941 { 1942 VertexProcessor::setAddressingModeW(sampler, addressMode); 1943 } 1944 } 1945 setReadSRGB(SamplerType type,int sampler,bool sRGB)1946 void Renderer::setReadSRGB(SamplerType type, int sampler, bool sRGB) 1947 { 1948 if(type == SAMPLER_PIXEL) 1949 { 1950 PixelProcessor::setReadSRGB(sampler, sRGB); 1951 } 1952 else 1953 { 1954 VertexProcessor::setReadSRGB(sampler, sRGB); 1955 } 1956 } 1957 setMipmapLOD(SamplerType type,int sampler,float bias)1958 void Renderer::setMipmapLOD(SamplerType type, int sampler, float bias) 1959 { 1960 if(type == SAMPLER_PIXEL) 1961 { 1962 PixelProcessor::setMipmapLOD(sampler, bias); 1963 } 1964 else 1965 { 1966 VertexProcessor::setMipmapLOD(sampler, bias); 1967 } 1968 } 1969 setBorderColor(SamplerType type,int sampler,const Color<float> & borderColor)1970 void Renderer::setBorderColor(SamplerType type, int sampler, const Color<float> &borderColor) 1971 { 1972 if(type == SAMPLER_PIXEL) 1973 { 1974 PixelProcessor::setBorderColor(sampler, borderColor); 1975 } 1976 else 1977 { 1978 VertexProcessor::setBorderColor(sampler, borderColor); 1979 } 1980 } 1981 setMaxAnisotropy(SamplerType type,int sampler,float maxAnisotropy)1982 void Renderer::setMaxAnisotropy(SamplerType type, int sampler, float maxAnisotropy) 1983 { 1984 if(type == SAMPLER_PIXEL) 1985 { 1986 PixelProcessor::setMaxAnisotropy(sampler, maxAnisotropy); 1987 } 1988 else 1989 { 1990 VertexProcessor::setMaxAnisotropy(sampler, maxAnisotropy); 1991 } 1992 } 1993 setHighPrecisionFiltering(SamplerType type,int sampler,bool highPrecisionFiltering)1994 void Renderer::setHighPrecisionFiltering(SamplerType type, int sampler, bool highPrecisionFiltering) 1995 { 1996 if(type == SAMPLER_PIXEL) 1997 { 1998 PixelProcessor::setHighPrecisionFiltering(sampler, highPrecisionFiltering); 1999 } 2000 else 2001 { 2002 VertexProcessor::setHighPrecisionFiltering(sampler, highPrecisionFiltering); 2003 } 2004 } 2005 setSwizzleR(SamplerType type,int sampler,SwizzleType swizzleR)2006 void Renderer::setSwizzleR(SamplerType type, int sampler, SwizzleType swizzleR) 2007 { 2008 if(type == SAMPLER_PIXEL) 2009 { 2010 PixelProcessor::setSwizzleR(sampler, swizzleR); 2011 } 2012 else 2013 { 2014 VertexProcessor::setSwizzleR(sampler, swizzleR); 2015 } 2016 } 2017 setSwizzleG(SamplerType type,int sampler,SwizzleType swizzleG)2018 void Renderer::setSwizzleG(SamplerType type, int sampler, SwizzleType swizzleG) 2019 { 2020 if(type == SAMPLER_PIXEL) 2021 { 2022 PixelProcessor::setSwizzleG(sampler, swizzleG); 2023 } 2024 else 2025 { 2026 VertexProcessor::setSwizzleG(sampler, swizzleG); 2027 } 2028 } 2029 setSwizzleB(SamplerType type,int sampler,SwizzleType swizzleB)2030 void Renderer::setSwizzleB(SamplerType type, int sampler, SwizzleType swizzleB) 2031 { 2032 if(type == SAMPLER_PIXEL) 2033 { 2034 PixelProcessor::setSwizzleB(sampler, swizzleB); 2035 } 2036 else 2037 { 2038 VertexProcessor::setSwizzleB(sampler, swizzleB); 2039 } 2040 } 2041 setSwizzleA(SamplerType type,int sampler,SwizzleType swizzleA)2042 void Renderer::setSwizzleA(SamplerType type, int sampler, SwizzleType swizzleA) 2043 { 2044 if(type == SAMPLER_PIXEL) 2045 { 2046 PixelProcessor::setSwizzleA(sampler, swizzleA); 2047 } 2048 else 2049 { 2050 VertexProcessor::setSwizzleA(sampler, swizzleA); 2051 } 2052 } 2053 setCompareFunc(SamplerType type,int sampler,CompareFunc compFunc)2054 void Renderer::setCompareFunc(SamplerType type, int sampler, CompareFunc compFunc) 2055 { 2056 if(type == SAMPLER_PIXEL) 2057 { 2058 PixelProcessor::setCompareFunc(sampler, compFunc); 2059 } 2060 else 2061 { 2062 VertexProcessor::setCompareFunc(sampler, compFunc); 2063 } 2064 } 2065 setBaseLevel(SamplerType type,int sampler,int baseLevel)2066 void Renderer::setBaseLevel(SamplerType type, int sampler, int baseLevel) 2067 { 2068 if(type == SAMPLER_PIXEL) 2069 { 2070 PixelProcessor::setBaseLevel(sampler, baseLevel); 2071 } 2072 else 2073 { 2074 VertexProcessor::setBaseLevel(sampler, baseLevel); 2075 } 2076 } 2077 setMaxLevel(SamplerType type,int sampler,int maxLevel)2078 void Renderer::setMaxLevel(SamplerType type, int sampler, int maxLevel) 2079 { 2080 if(type == SAMPLER_PIXEL) 2081 { 2082 PixelProcessor::setMaxLevel(sampler, maxLevel); 2083 } 2084 else 2085 { 2086 VertexProcessor::setMaxLevel(sampler, maxLevel); 2087 } 2088 } 2089 setMinLod(SamplerType type,int sampler,float minLod)2090 void Renderer::setMinLod(SamplerType type, int sampler, float minLod) 2091 { 2092 if(type == SAMPLER_PIXEL) 2093 { 2094 PixelProcessor::setMinLod(sampler, minLod); 2095 } 2096 else 2097 { 2098 VertexProcessor::setMinLod(sampler, minLod); 2099 } 2100 } 2101 setMaxLod(SamplerType type,int sampler,float maxLod)2102 void Renderer::setMaxLod(SamplerType type, int sampler, float maxLod) 2103 { 2104 if(type == SAMPLER_PIXEL) 2105 { 2106 PixelProcessor::setMaxLod(sampler, maxLod); 2107 } 2108 else 2109 { 2110 VertexProcessor::setMaxLod(sampler, maxLod); 2111 } 2112 } 2113 setLineWidth(float width)2114 void Renderer::setLineWidth(float width) 2115 { 2116 context->lineWidth = width; 2117 } 2118 setDepthBias(float bias)2119 void Renderer::setDepthBias(float bias) 2120 { 2121 context->depthBias = bias; 2122 } 2123 setSlopeDepthBias(float slopeBias)2124 void Renderer::setSlopeDepthBias(float slopeBias) 2125 { 2126 context->slopeDepthBias = slopeBias; 2127 } 2128 setRasterizerDiscard(bool rasterizerDiscard)2129 void Renderer::setRasterizerDiscard(bool rasterizerDiscard) 2130 { 2131 context->rasterizerDiscard = rasterizerDiscard; 2132 } 2133 setPixelShader(const PixelShader * shader)2134 void Renderer::setPixelShader(const PixelShader *shader) 2135 { 2136 context->pixelShader = shader; 2137 2138 loadConstants(shader); 2139 } 2140 setVertexShader(const VertexShader * shader)2141 void Renderer::setVertexShader(const VertexShader *shader) 2142 { 2143 context->vertexShader = shader; 2144 2145 loadConstants(shader); 2146 } 2147 setPixelShaderConstantF(unsigned int index,const float value[4],unsigned int count)2148 void Renderer::setPixelShaderConstantF(unsigned int index, const float value[4], unsigned int count) 2149 { 2150 for(unsigned int i = 0; i < DRAW_COUNT; i++) 2151 { 2152 if(drawCall[i]->psDirtyConstF < index + count) 2153 { 2154 drawCall[i]->psDirtyConstF = index + count; 2155 } 2156 } 2157 2158 for(unsigned int i = 0; i < count; i++) 2159 { 2160 PixelProcessor::setFloatConstant(index + i, value); 2161 value += 4; 2162 } 2163 } 2164 setPixelShaderConstantI(unsigned int index,const int value[4],unsigned int count)2165 void Renderer::setPixelShaderConstantI(unsigned int index, const int value[4], unsigned int count) 2166 { 2167 for(unsigned int i = 0; i < DRAW_COUNT; i++) 2168 { 2169 if(drawCall[i]->psDirtyConstI < index + count) 2170 { 2171 drawCall[i]->psDirtyConstI = index + count; 2172 } 2173 } 2174 2175 for(unsigned int i = 0; i < count; i++) 2176 { 2177 PixelProcessor::setIntegerConstant(index + i, value); 2178 value += 4; 2179 } 2180 } 2181 setPixelShaderConstantB(unsigned int index,const int * boolean,unsigned int count)2182 void Renderer::setPixelShaderConstantB(unsigned int index, const int *boolean, unsigned int count) 2183 { 2184 for(unsigned int i = 0; i < DRAW_COUNT; i++) 2185 { 2186 if(drawCall[i]->psDirtyConstB < index + count) 2187 { 2188 drawCall[i]->psDirtyConstB = index + count; 2189 } 2190 } 2191 2192 for(unsigned int i = 0; i < count; i++) 2193 { 2194 PixelProcessor::setBooleanConstant(index + i, *boolean); 2195 boolean++; 2196 } 2197 } 2198 setVertexShaderConstantF(unsigned int index,const float value[4],unsigned int count)2199 void Renderer::setVertexShaderConstantF(unsigned int index, const float value[4], unsigned int count) 2200 { 2201 for(unsigned int i = 0; i < DRAW_COUNT; i++) 2202 { 2203 if(drawCall[i]->vsDirtyConstF < index + count) 2204 { 2205 drawCall[i]->vsDirtyConstF = index + count; 2206 } 2207 } 2208 2209 for(unsigned int i = 0; i < count; i++) 2210 { 2211 VertexProcessor::setFloatConstant(index + i, value); 2212 value += 4; 2213 } 2214 } 2215 setVertexShaderConstantI(unsigned int index,const int value[4],unsigned int count)2216 void Renderer::setVertexShaderConstantI(unsigned int index, const int value[4], unsigned int count) 2217 { 2218 for(unsigned int i = 0; i < DRAW_COUNT; i++) 2219 { 2220 if(drawCall[i]->vsDirtyConstI < index + count) 2221 { 2222 drawCall[i]->vsDirtyConstI = index + count; 2223 } 2224 } 2225 2226 for(unsigned int i = 0; i < count; i++) 2227 { 2228 VertexProcessor::setIntegerConstant(index + i, value); 2229 value += 4; 2230 } 2231 } 2232 setVertexShaderConstantB(unsigned int index,const int * boolean,unsigned int count)2233 void Renderer::setVertexShaderConstantB(unsigned int index, const int *boolean, unsigned int count) 2234 { 2235 for(unsigned int i = 0; i < DRAW_COUNT; i++) 2236 { 2237 if(drawCall[i]->vsDirtyConstB < index + count) 2238 { 2239 drawCall[i]->vsDirtyConstB = index + count; 2240 } 2241 } 2242 2243 for(unsigned int i = 0; i < count; i++) 2244 { 2245 VertexProcessor::setBooleanConstant(index + i, *boolean); 2246 boolean++; 2247 } 2248 } 2249 addQuery(Query * query)2250 void Renderer::addQuery(Query *query) 2251 { 2252 queries.push_back(query); 2253 } 2254 removeQuery(Query * query)2255 void Renderer::removeQuery(Query *query) 2256 { 2257 queries.remove(query); 2258 } 2259 2260 #if PERF_HUD getThreadCount()2261 int Renderer::getThreadCount() 2262 { 2263 return threadCount; 2264 } 2265 getVertexTime(int thread)2266 int64_t Renderer::getVertexTime(int thread) 2267 { 2268 return vertexTime[thread]; 2269 } 2270 getSetupTime(int thread)2271 int64_t Renderer::getSetupTime(int thread) 2272 { 2273 return setupTime[thread]; 2274 } 2275 getPixelTime(int thread)2276 int64_t Renderer::getPixelTime(int thread) 2277 { 2278 return pixelTime[thread]; 2279 } 2280 resetTimers()2281 void Renderer::resetTimers() 2282 { 2283 for(int thread = 0; thread < threadCount; thread++) 2284 { 2285 vertexTime[thread] = 0; 2286 setupTime[thread] = 0; 2287 pixelTime[thread] = 0; 2288 } 2289 } 2290 #endif 2291 setContext(const sw::Context & context)2292 void Renderer::setContext(const sw::Context& context) 2293 { 2294 *(this->context) = context; 2295 } 2296 setViewport(const VkViewport & viewport)2297 void Renderer::setViewport(const VkViewport &viewport) 2298 { 2299 this->viewport = viewport; 2300 } 2301 setScissor(const Rect & scissor)2302 void Renderer::setScissor(const Rect &scissor) 2303 { 2304 this->scissor = scissor; 2305 } 2306 setClipFlags(int flags)2307 void Renderer::setClipFlags(int flags) 2308 { 2309 clipFlags = flags << 8; // Bottom 8 bits used by legacy frustum 2310 } 2311 setClipPlane(unsigned int index,const float plane[4])2312 void Renderer::setClipPlane(unsigned int index, const float plane[4]) 2313 { 2314 if(index < MAX_CLIP_PLANES) 2315 { 2316 userPlane[index] = plane; 2317 } 2318 else ASSERT(false); 2319 2320 updateClipPlanes = true; 2321 } 2322 updateConfiguration(bool initialUpdate)2323 void Renderer::updateConfiguration(bool initialUpdate) 2324 { 2325 bool newConfiguration = swiftConfig->hasNewConfiguration(); 2326 2327 if(newConfiguration || initialUpdate) 2328 { 2329 terminateThreads(); 2330 2331 SwiftConfig::Configuration configuration = {}; 2332 swiftConfig->getConfiguration(configuration); 2333 2334 precacheVertex = !newConfiguration && configuration.precache; 2335 precacheSetup = !newConfiguration && configuration.precache; 2336 precachePixel = !newConfiguration && configuration.precache; 2337 2338 VertexProcessor::setRoutineCacheSize(configuration.vertexRoutineCacheSize); 2339 PixelProcessor::setRoutineCacheSize(configuration.pixelRoutineCacheSize); 2340 SetupProcessor::setRoutineCacheSize(configuration.setupRoutineCacheSize); 2341 2342 switch(configuration.textureSampleQuality) 2343 { 2344 case 0: Sampler::setFilterQuality(FILTER_POINT); break; 2345 case 1: Sampler::setFilterQuality(FILTER_LINEAR); break; 2346 case 2: Sampler::setFilterQuality(FILTER_ANISOTROPIC); break; 2347 default: Sampler::setFilterQuality(FILTER_ANISOTROPIC); break; 2348 } 2349 2350 switch(configuration.mipmapQuality) 2351 { 2352 case 0: Sampler::setMipmapQuality(MIPMAP_POINT); break; 2353 case 1: Sampler::setMipmapQuality(MIPMAP_LINEAR); break; 2354 default: Sampler::setMipmapQuality(MIPMAP_LINEAR); break; 2355 } 2356 2357 setPerspectiveCorrection(configuration.perspectiveCorrection); 2358 2359 switch(configuration.transcendentalPrecision) 2360 { 2361 case 0: 2362 logPrecision = APPROXIMATE; 2363 expPrecision = APPROXIMATE; 2364 rcpPrecision = APPROXIMATE; 2365 rsqPrecision = APPROXIMATE; 2366 break; 2367 case 1: 2368 logPrecision = PARTIAL; 2369 expPrecision = PARTIAL; 2370 rcpPrecision = PARTIAL; 2371 rsqPrecision = PARTIAL; 2372 break; 2373 case 2: 2374 logPrecision = ACCURATE; 2375 expPrecision = ACCURATE; 2376 rcpPrecision = ACCURATE; 2377 rsqPrecision = ACCURATE; 2378 break; 2379 case 3: 2380 logPrecision = WHQL; 2381 expPrecision = WHQL; 2382 rcpPrecision = WHQL; 2383 rsqPrecision = WHQL; 2384 break; 2385 case 4: 2386 logPrecision = IEEE; 2387 expPrecision = IEEE; 2388 rcpPrecision = IEEE; 2389 rsqPrecision = IEEE; 2390 break; 2391 default: 2392 logPrecision = ACCURATE; 2393 expPrecision = ACCURATE; 2394 rcpPrecision = ACCURATE; 2395 rsqPrecision = ACCURATE; 2396 break; 2397 } 2398 2399 switch(configuration.transparencyAntialiasing) 2400 { 2401 case 0: transparencyAntialiasing = TRANSPARENCY_NONE; break; 2402 case 1: transparencyAntialiasing = TRANSPARENCY_ALPHA_TO_COVERAGE; break; 2403 default: transparencyAntialiasing = TRANSPARENCY_NONE; break; 2404 } 2405 2406 switch(configuration.threadCount) 2407 { 2408 case -1: threadCount = CPUID::coreCount(); break; 2409 case 0: threadCount = CPUID::processAffinity(); break; 2410 default: threadCount = configuration.threadCount; break; 2411 } 2412 2413 CPUID::setEnableSSE4_1(configuration.enableSSE4_1); 2414 CPUID::setEnableSSSE3(configuration.enableSSSE3); 2415 CPUID::setEnableSSE3(configuration.enableSSE3); 2416 CPUID::setEnableSSE2(configuration.enableSSE2); 2417 CPUID::setEnableSSE(configuration.enableSSE); 2418 2419 for(int pass = 0; pass < 10; pass++) 2420 { 2421 optimization[pass] = configuration.optimization[pass]; 2422 } 2423 2424 forceWindowed = configuration.forceWindowed; 2425 complementaryDepthBuffer = configuration.complementaryDepthBuffer; 2426 postBlendSRGB = configuration.postBlendSRGB; 2427 exactColorRounding = configuration.exactColorRounding; 2428 forceClearRegisters = configuration.forceClearRegisters; 2429 2430 #ifndef NDEBUG 2431 minPrimitives = configuration.minPrimitives; 2432 maxPrimitives = configuration.maxPrimitives; 2433 #endif 2434 } 2435 2436 if(!initialUpdate && !worker[0]) 2437 { 2438 initializeThreads(); 2439 } 2440 } 2441 } 2442