1 // Copyright 2016 The SwiftShader Authors. All Rights Reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #include "Renderer.hpp" 16 17 #include "Clipper.hpp" 18 #include "Math.hpp" 19 #include "FrameBuffer.hpp" 20 #include "Timer.hpp" 21 #include "Surface.hpp" 22 #include "Half.hpp" 23 #include "Primitive.hpp" 24 #include "Polygon.hpp" 25 #include "SwiftConfig.hpp" 26 #include "MutexLock.hpp" 27 #include "CPUID.hpp" 28 #include "Memory.hpp" 29 #include "Resource.hpp" 30 #include "Constants.hpp" 31 #include "Debug.hpp" 32 #include "Reactor/Reactor.hpp" 33 34 #undef max 35 36 bool disableServer = true; 37 38 #ifndef NDEBUG 39 unsigned int minPrimitives = 1; 40 unsigned int maxPrimitives = 1 << 21; 41 #endif 42 43 namespace sw 44 { 45 extern bool halfIntegerCoordinates; // Pixel centers are not at integer coordinates 46 extern bool symmetricNormalizedDepth; // [-1, 1] instead of [0, 1] 47 extern bool booleanFaceRegister; 48 extern bool fullPixelPositionRegister; 49 extern bool leadingVertexFirst; // Flat shading uses first vertex, else last 50 extern bool secondaryColor; // Specular lighting is applied after texturing 51 52 extern bool forceWindowed; 53 extern bool complementaryDepthBuffer; 54 extern bool postBlendSRGB; 55 extern bool exactColorRounding; 56 extern TransparencyAntialiasing transparencyAntialiasing; 57 extern bool forceClearRegisters; 58 59 extern bool precacheVertex; 60 extern bool precacheSetup; 61 extern bool precachePixel; 62 63 int batchSize = 128; 64 int threadCount = 1; 65 int unitCount = 1; 66 int clusterCount = 1; 67 68 TranscendentalPrecision logPrecision = ACCURATE; 69 TranscendentalPrecision expPrecision = ACCURATE; 70 TranscendentalPrecision rcpPrecision = ACCURATE; 71 TranscendentalPrecision rsqPrecision = ACCURATE; 72 bool perspectiveCorrection = true; 73 74 struct Parameters 75 { 76 Renderer *renderer; 77 int threadIndex; 78 }; 79 DrawCall()80 DrawCall::DrawCall() 81 { 82 queries = 0; 83 84 vsDirtyConstF = VERTEX_UNIFORM_VECTORS + 1; 85 vsDirtyConstI = 16; 86 vsDirtyConstB = 16; 87 88 psDirtyConstF = FRAGMENT_UNIFORM_VECTORS; 89 psDirtyConstI = 16; 90 psDirtyConstB = 16; 91 92 references = -1; 93 94 data = (DrawData*)allocate(sizeof(DrawData)); 95 data->constants = &constants; 96 } 97 ~DrawCall()98 DrawCall::~DrawCall() 99 { 100 delete queries; 101 102 deallocate(data); 103 } 104 Renderer(Context * context,Conventions conventions,bool exactColorRounding)105 Renderer::Renderer(Context *context, Conventions conventions, bool exactColorRounding) : VertexProcessor(context), PixelProcessor(context), SetupProcessor(context), context(context), viewport() 106 { 107 sw::halfIntegerCoordinates = conventions.halfIntegerCoordinates; 108 sw::symmetricNormalizedDepth = conventions.symmetricNormalizedDepth; 109 sw::booleanFaceRegister = conventions.booleanFaceRegister; 110 sw::fullPixelPositionRegister = conventions.fullPixelPositionRegister; 111 sw::leadingVertexFirst = conventions.leadingVertexFirst; 112 sw::secondaryColor = conventions.secondaryColor; 113 sw::exactColorRounding = exactColorRounding; 114 115 setRenderTarget(0, 0); 116 clipper = new Clipper(symmetricNormalizedDepth); 117 118 updateViewMatrix = true; 119 updateBaseMatrix = true; 120 updateProjectionMatrix = true; 121 updateClipPlanes = true; 122 123 #if PERF_HUD 124 resetTimers(); 125 #endif 126 127 for(int i = 0; i < 16; i++) 128 { 129 vertexTask[i] = 0; 130 131 worker[i] = 0; 132 resume[i] = 0; 133 suspend[i] = 0; 134 } 135 136 threadsAwake = 0; 137 resumeApp = new Event(); 138 139 currentDraw = 0; 140 nextDraw = 0; 141 142 qHead = 0; 143 qSize = 0; 144 145 for(int i = 0; i < 16; i++) 146 { 147 triangleBatch[i] = 0; 148 primitiveBatch[i] = 0; 149 } 150 151 for(int draw = 0; draw < DRAW_COUNT; draw++) 152 { 153 drawCall[draw] = new DrawCall(); 154 drawList[draw] = drawCall[draw]; 155 } 156 157 for(int unit = 0; unit < 16; unit++) 158 { 159 primitiveProgress[unit].init(); 160 } 161 162 for(int cluster = 0; cluster < 16; cluster++) 163 { 164 pixelProgress[cluster].init(); 165 } 166 167 clipFlags = 0; 168 169 swiftConfig = new SwiftConfig(disableServer); 170 updateConfiguration(true); 171 172 sync = new Resource(0); 173 } 174 ~Renderer()175 Renderer::~Renderer() 176 { 177 sync->destruct(); 178 179 delete clipper; 180 clipper = 0; 181 182 terminateThreads(); 183 delete resumeApp; 184 185 for(int draw = 0; draw < DRAW_COUNT; draw++) 186 { 187 delete drawCall[draw]; 188 } 189 190 delete swiftConfig; 191 } 192 clear(void * pixel,Format format,Surface * dest,const SliceRect & dRect,unsigned int rgbaMask)193 void Renderer::clear(void *pixel, Format format, Surface *dest, const SliceRect &dRect, unsigned int rgbaMask) 194 { 195 blitter.clear(pixel, format, dest, dRect, rgbaMask); 196 } 197 blit(Surface * source,const SliceRect & sRect,Surface * dest,const SliceRect & dRect,bool filter)198 void Renderer::blit(Surface *source, const SliceRect &sRect, Surface *dest, const SliceRect &dRect, bool filter) 199 { 200 blitter.blit(source, sRect, dest, dRect, filter); 201 } 202 blit3D(Surface * source,Surface * dest)203 void Renderer::blit3D(Surface *source, Surface *dest) 204 { 205 blitter.blit3D(source, dest); 206 } 207 draw(DrawType drawType,unsigned int indexOffset,unsigned int count,bool update)208 void Renderer::draw(DrawType drawType, unsigned int indexOffset, unsigned int count, bool update) 209 { 210 #ifndef NDEBUG 211 if(count < minPrimitives || count > maxPrimitives) 212 { 213 return; 214 } 215 #endif 216 217 context->drawType = drawType; 218 219 updateConfiguration(); 220 updateClipper(); 221 222 int ss = context->getSuperSampleCount(); 223 int ms = context->getMultiSampleCount(); 224 225 for(int q = 0; q < ss; q++) 226 { 227 unsigned int oldMultiSampleMask = context->multiSampleMask; 228 context->multiSampleMask = (context->sampleMask >> (ms * q)) & ((unsigned)0xFFFFFFFF >> (32 - ms)); 229 230 if(!context->multiSampleMask) 231 { 232 continue; 233 } 234 235 sync->lock(sw::PRIVATE); 236 237 Routine *vertexRoutine; 238 Routine *setupRoutine; 239 Routine *pixelRoutine; 240 241 if(update || oldMultiSampleMask != context->multiSampleMask) 242 { 243 vertexState = VertexProcessor::update(drawType); 244 setupState = SetupProcessor::update(); 245 pixelState = PixelProcessor::update(); 246 247 vertexRoutine = VertexProcessor::routine(vertexState); 248 setupRoutine = SetupProcessor::routine(setupState); 249 pixelRoutine = PixelProcessor::routine(pixelState); 250 } 251 252 int batch = batchSize / ms; 253 254 int (Renderer::*setupPrimitives)(int batch, int count); 255 256 if(context->isDrawTriangle()) 257 { 258 switch(context->fillMode) 259 { 260 case FILL_SOLID: 261 setupPrimitives = &Renderer::setupSolidTriangles; 262 break; 263 case FILL_WIREFRAME: 264 setupPrimitives = &Renderer::setupWireframeTriangle; 265 batch = 1; 266 break; 267 case FILL_VERTEX: 268 setupPrimitives = &Renderer::setupVertexTriangle; 269 batch = 1; 270 break; 271 default: ASSERT(false); 272 } 273 } 274 else if(context->isDrawLine()) 275 { 276 setupPrimitives = &Renderer::setupLines; 277 } 278 else // Point draw 279 { 280 setupPrimitives = &Renderer::setupPoints; 281 } 282 283 DrawCall *draw = 0; 284 285 do 286 { 287 for(int i = 0; i < DRAW_COUNT; i++) 288 { 289 if(drawCall[i]->references == -1) 290 { 291 draw = drawCall[i]; 292 drawList[nextDraw % DRAW_COUNT] = draw; 293 294 break; 295 } 296 } 297 298 if(!draw) 299 { 300 resumeApp->wait(); 301 } 302 } 303 while(!draw); 304 305 DrawData *data = draw->data; 306 307 if(queries.size() != 0) 308 { 309 draw->queries = new std::list<Query*>(); 310 bool includePrimitivesWrittenQueries = vertexState.transformFeedbackQueryEnabled && vertexState.transformFeedbackEnabled; 311 for(std::list<Query*>::iterator query = queries.begin(); query != queries.end(); query++) 312 { 313 Query* q = *query; 314 if(includePrimitivesWrittenQueries || (q->type != Query::TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN)) 315 { 316 atomicIncrement(&(q->reference)); 317 draw->queries->push_back(q); 318 } 319 } 320 } 321 322 draw->drawType = drawType; 323 draw->batchSize = batch; 324 325 vertexRoutine->bind(); 326 setupRoutine->bind(); 327 pixelRoutine->bind(); 328 329 draw->vertexRoutine = vertexRoutine; 330 draw->setupRoutine = setupRoutine; 331 draw->pixelRoutine = pixelRoutine; 332 draw->vertexPointer = (VertexProcessor::RoutinePointer)vertexRoutine->getEntry(); 333 draw->setupPointer = (SetupProcessor::RoutinePointer)setupRoutine->getEntry(); 334 draw->pixelPointer = (PixelProcessor::RoutinePointer)pixelRoutine->getEntry(); 335 draw->setupPrimitives = setupPrimitives; 336 draw->setupState = setupState; 337 338 for(int i = 0; i < MAX_VERTEX_INPUTS; i++) 339 { 340 draw->vertexStream[i] = context->input[i].resource; 341 data->input[i] = context->input[i].buffer; 342 data->stride[i] = context->input[i].stride; 343 344 if(draw->vertexStream[i]) 345 { 346 draw->vertexStream[i]->lock(PUBLIC, PRIVATE); 347 } 348 } 349 350 if(context->indexBuffer) 351 { 352 data->indices = (unsigned char*)context->indexBuffer->lock(PUBLIC, PRIVATE) + indexOffset; 353 } 354 355 draw->indexBuffer = context->indexBuffer; 356 357 for(int sampler = 0; sampler < TOTAL_IMAGE_UNITS; sampler++) 358 { 359 draw->texture[sampler] = 0; 360 } 361 362 for(int sampler = 0; sampler < TEXTURE_IMAGE_UNITS; sampler++) 363 { 364 if(pixelState.sampler[sampler].textureType != TEXTURE_NULL) 365 { 366 draw->texture[sampler] = context->texture[sampler]; 367 draw->texture[sampler]->lock(PUBLIC, isReadWriteTexture(sampler) ? MANAGED : PRIVATE); // If the texure is both read and written, use the same read/write lock as render targets 368 369 data->mipmap[sampler] = context->sampler[sampler].getTextureData(); 370 } 371 } 372 373 if(context->pixelShader) 374 { 375 if(draw->psDirtyConstF) 376 { 377 memcpy(&data->ps.cW, PixelProcessor::cW, sizeof(word4) * 4 * (draw->psDirtyConstF < 8 ? draw->psDirtyConstF : 8)); 378 memcpy(&data->ps.c, PixelProcessor::c, sizeof(float4) * draw->psDirtyConstF); 379 draw->psDirtyConstF = 0; 380 } 381 382 if(draw->psDirtyConstI) 383 { 384 memcpy(&data->ps.i, PixelProcessor::i, sizeof(int4) * draw->psDirtyConstI); 385 draw->psDirtyConstI = 0; 386 } 387 388 if(draw->psDirtyConstB) 389 { 390 memcpy(&data->ps.b, PixelProcessor::b, sizeof(bool) * draw->psDirtyConstB); 391 draw->psDirtyConstB = 0; 392 } 393 394 PixelProcessor::lockUniformBuffers(data->ps.u, draw->pUniformBuffers); 395 } 396 else 397 { 398 for(int i = 0; i < MAX_UNIFORM_BUFFER_BINDINGS; i++) 399 { 400 draw->pUniformBuffers[i] = nullptr; 401 } 402 } 403 404 if(context->pixelShaderVersion() <= 0x0104) 405 { 406 for(int stage = 0; stage < 8; stage++) 407 { 408 if(pixelState.textureStage[stage].stageOperation != TextureStage::STAGE_DISABLE || context->pixelShader) 409 { 410 data->textureStage[stage] = context->textureStage[stage].uniforms; 411 } 412 else break; 413 } 414 } 415 416 if(context->vertexShader) 417 { 418 if(context->vertexShader->getVersion() >= 0x0300) 419 { 420 for(int sampler = 0; sampler < VERTEX_TEXTURE_IMAGE_UNITS; sampler++) 421 { 422 if(vertexState.samplerState[sampler].textureType != TEXTURE_NULL) 423 { 424 draw->texture[TEXTURE_IMAGE_UNITS + sampler] = context->texture[TEXTURE_IMAGE_UNITS + sampler]; 425 draw->texture[TEXTURE_IMAGE_UNITS + sampler]->lock(PUBLIC, PRIVATE); 426 427 data->mipmap[TEXTURE_IMAGE_UNITS + sampler] = context->sampler[TEXTURE_IMAGE_UNITS + sampler].getTextureData(); 428 } 429 } 430 } 431 432 if(draw->vsDirtyConstF) 433 { 434 memcpy(&data->vs.c, VertexProcessor::c, sizeof(float4) * draw->vsDirtyConstF); 435 draw->vsDirtyConstF = 0; 436 } 437 438 if(draw->vsDirtyConstI) 439 { 440 memcpy(&data->vs.i, VertexProcessor::i, sizeof(int4) * draw->vsDirtyConstI); 441 draw->vsDirtyConstI = 0; 442 } 443 444 if(draw->vsDirtyConstB) 445 { 446 memcpy(&data->vs.b, VertexProcessor::b, sizeof(bool) * draw->vsDirtyConstB); 447 draw->vsDirtyConstB = 0; 448 } 449 450 if(context->vertexShader->instanceIdDeclared) 451 { 452 data->instanceID = context->instanceID; 453 } 454 455 VertexProcessor::lockUniformBuffers(data->vs.u, draw->vUniformBuffers); 456 VertexProcessor::lockTransformFeedbackBuffers(data->vs.t, data->vs.reg, data->vs.row, data->vs.col, data->vs.str, draw->transformFeedbackBuffers); 457 } 458 else 459 { 460 data->ff = ff; 461 462 draw->vsDirtyConstF = VERTEX_UNIFORM_VECTORS + 1; 463 draw->vsDirtyConstI = 16; 464 draw->vsDirtyConstB = 16; 465 466 for(int i = 0; i < MAX_UNIFORM_BUFFER_BINDINGS; i++) 467 { 468 draw->vUniformBuffers[i] = nullptr; 469 } 470 471 for(int i = 0; i < MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS; i++) 472 { 473 draw->transformFeedbackBuffers[i] = nullptr; 474 } 475 } 476 477 if(pixelState.stencilActive) 478 { 479 data->stencil[0] = stencil; 480 data->stencil[1] = stencilCCW; 481 } 482 483 if(pixelState.fogActive) 484 { 485 data->fog = fog; 486 } 487 488 if(setupState.isDrawPoint) 489 { 490 data->point = point; 491 } 492 493 data->lineWidth = context->lineWidth; 494 495 data->factor = factor; 496 497 if(pixelState.transparencyAntialiasing == TRANSPARENCY_ALPHA_TO_COVERAGE) 498 { 499 float ref = context->alphaReference * (1.0f / 255.0f); 500 float margin = sw::min(ref, 1.0f - ref); 501 502 if(ms == 4) 503 { 504 data->a2c0 = replicate(ref - margin * 0.6f); 505 data->a2c1 = replicate(ref - margin * 0.2f); 506 data->a2c2 = replicate(ref + margin * 0.2f); 507 data->a2c3 = replicate(ref + margin * 0.6f); 508 } 509 else if(ms == 2) 510 { 511 data->a2c0 = replicate(ref - margin * 0.3f); 512 data->a2c1 = replicate(ref + margin * 0.3f); 513 } 514 else ASSERT(false); 515 } 516 517 if(pixelState.occlusionEnabled) 518 { 519 for(int cluster = 0; cluster < clusterCount; cluster++) 520 { 521 data->occlusion[cluster] = 0; 522 } 523 } 524 525 #if PERF_PROFILE 526 for(int cluster = 0; cluster < clusterCount; cluster++) 527 { 528 for(int i = 0; i < PERF_TIMERS; i++) 529 { 530 data->cycles[i][cluster] = 0; 531 } 532 } 533 #endif 534 535 // Viewport 536 { 537 float W = 0.5f * viewport.width; 538 float H = 0.5f * viewport.height; 539 float X0 = viewport.x0 + W; 540 float Y0 = viewport.y0 + H; 541 float N = viewport.minZ; 542 float F = viewport.maxZ; 543 float Z = F - N; 544 545 if(context->isDrawTriangle(false)) 546 { 547 N += depthBias; 548 } 549 550 if(complementaryDepthBuffer) 551 { 552 Z = -Z; 553 N = 1 - N; 554 } 555 556 static const float X[5][16] = // Fragment offsets 557 { 558 {+0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f}, // 1 sample 559 {-0.2500f, +0.2500f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f}, // 2 samples 560 {-0.3000f, +0.1000f, +0.3000f, -0.1000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f}, // 4 samples 561 {+0.1875f, -0.3125f, +0.3125f, -0.4375f, -0.0625f, +0.4375f, +0.0625f, -0.1875f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f}, // 8 samples 562 {+0.2553f, -0.1155f, +0.1661f, -0.1828f, +0.2293f, -0.4132f, -0.1773f, -0.0577f, +0.3891f, -0.4656f, +0.4103f, +0.4248f, -0.2109f, +0.3966f, -0.2664f, -0.3872f} // 16 samples 563 }; 564 565 static const float Y[5][16] = // Fragment offsets 566 { 567 {+0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f}, // 1 sample 568 {-0.2500f, +0.2500f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f}, // 2 samples 569 {-0.1000f, -0.3000f, +0.1000f, +0.3000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f}, // 4 samples 570 {-0.4375f, -0.3125f, -0.1875f, -0.0625f, +0.0625f, +0.1875f, +0.3125f, +0.4375f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f}, // 8 samples 571 {-0.4503f, +0.1883f, +0.3684f, -0.4668f, -0.0690f, -0.1315f, +0.4999f, +0.0728f, +0.1070f, -0.3086f, +0.3725f, -0.1547f, -0.1102f, -0.3588f, +0.1789f, +0.0269f} // 16 samples 572 }; 573 574 int s = sw::log2(ss); 575 576 data->Wx16 = replicate(W * 16); 577 data->Hx16 = replicate(H * 16); 578 data->X0x16 = replicate(X0 * 16 - 8); 579 data->Y0x16 = replicate(Y0 * 16 - 8); 580 data->XXXX = replicate(X[s][q] / W); 581 data->YYYY = replicate(Y[s][q] / H); 582 data->halfPixelX = replicate(0.5f / W); 583 data->halfPixelY = replicate(0.5f / H); 584 data->viewportHeight = abs(viewport.height); 585 data->slopeDepthBias = slopeDepthBias; 586 data->depthRange = Z; 587 data->depthNear = N; 588 draw->clipFlags = clipFlags; 589 590 if(clipFlags) 591 { 592 if(clipFlags & Clipper::CLIP_PLANE0) data->clipPlane[0] = clipPlane[0]; 593 if(clipFlags & Clipper::CLIP_PLANE1) data->clipPlane[1] = clipPlane[1]; 594 if(clipFlags & Clipper::CLIP_PLANE2) data->clipPlane[2] = clipPlane[2]; 595 if(clipFlags & Clipper::CLIP_PLANE3) data->clipPlane[3] = clipPlane[3]; 596 if(clipFlags & Clipper::CLIP_PLANE4) data->clipPlane[4] = clipPlane[4]; 597 if(clipFlags & Clipper::CLIP_PLANE5) data->clipPlane[5] = clipPlane[5]; 598 } 599 } 600 601 // Target 602 { 603 for(int index = 0; index < RENDERTARGETS; index++) 604 { 605 draw->renderTarget[index] = context->renderTarget[index]; 606 607 if(draw->renderTarget[index]) 608 { 609 data->colorBuffer[index] = (unsigned int*)context->renderTarget[index]->lockInternal(0, 0, q * ms, LOCK_READWRITE, MANAGED); 610 data->colorPitchB[index] = context->renderTarget[index]->getInternalPitchB(); 611 data->colorSliceB[index] = context->renderTarget[index]->getInternalSliceB(); 612 } 613 } 614 615 draw->depthBuffer = context->depthBuffer; 616 draw->stencilBuffer = context->stencilBuffer; 617 618 if(draw->depthBuffer) 619 { 620 data->depthBuffer = (float*)context->depthBuffer->lockInternal(0, 0, q * ms, LOCK_READWRITE, MANAGED); 621 data->depthPitchB = context->depthBuffer->getInternalPitchB(); 622 data->depthSliceB = context->depthBuffer->getInternalSliceB(); 623 } 624 625 if(draw->stencilBuffer) 626 { 627 data->stencilBuffer = (unsigned char*)context->stencilBuffer->lockStencil(q * ms, MANAGED); 628 data->stencilPitchB = context->stencilBuffer->getStencilPitchB(); 629 data->stencilSliceB = context->stencilBuffer->getStencilSliceB(); 630 } 631 } 632 633 // Scissor 634 { 635 data->scissorX0 = scissor.x0; 636 data->scissorX1 = scissor.x1; 637 data->scissorY0 = scissor.y0; 638 data->scissorY1 = scissor.y1; 639 } 640 641 draw->primitive = 0; 642 draw->count = count; 643 644 draw->references = (count + batch - 1) / batch; 645 646 schedulerMutex.lock(); 647 nextDraw++; 648 schedulerMutex.unlock(); 649 650 if(threadCount > 1) 651 { 652 if(!threadsAwake) 653 { 654 suspend[0]->wait(); 655 656 threadsAwake = 1; 657 task[0].type = Task::RESUME; 658 659 resume[0]->signal(); 660 } 661 } 662 else // Use main thread for draw execution 663 { 664 threadsAwake = 1; 665 task[0].type = Task::RESUME; 666 667 taskLoop(0); 668 } 669 } 670 } 671 threadFunction(void * parameters)672 void Renderer::threadFunction(void *parameters) 673 { 674 Renderer *renderer = static_cast<Parameters*>(parameters)->renderer; 675 int threadIndex = static_cast<Parameters*>(parameters)->threadIndex; 676 677 if(logPrecision < IEEE) 678 { 679 CPUID::setFlushToZero(true); 680 CPUID::setDenormalsAreZero(true); 681 } 682 683 renderer->threadLoop(threadIndex); 684 } 685 threadLoop(int threadIndex)686 void Renderer::threadLoop(int threadIndex) 687 { 688 while(!exitThreads) 689 { 690 taskLoop(threadIndex); 691 692 suspend[threadIndex]->signal(); 693 resume[threadIndex]->wait(); 694 } 695 } 696 taskLoop(int threadIndex)697 void Renderer::taskLoop(int threadIndex) 698 { 699 while(task[threadIndex].type != Task::SUSPEND) 700 { 701 scheduleTask(threadIndex); 702 executeTask(threadIndex); 703 } 704 } 705 findAvailableTasks()706 void Renderer::findAvailableTasks() 707 { 708 // Find pixel tasks 709 for(int cluster = 0; cluster < clusterCount; cluster++) 710 { 711 if(!pixelProgress[cluster].executing) 712 { 713 for(int unit = 0; unit < unitCount; unit++) 714 { 715 if(primitiveProgress[unit].references > 0) // Contains processed primitives 716 { 717 if(pixelProgress[cluster].drawCall == primitiveProgress[unit].drawCall) 718 { 719 if(pixelProgress[cluster].processedPrimitives == primitiveProgress[unit].firstPrimitive) // Previous primitives have been rendered 720 { 721 Task &task = taskQueue[qHead]; 722 task.type = Task::PIXELS; 723 task.primitiveUnit = unit; 724 task.pixelCluster = cluster; 725 726 pixelProgress[cluster].executing = true; 727 728 // Commit to the task queue 729 qHead = (qHead + 1) % 32; 730 qSize++; 731 732 break; 733 } 734 } 735 } 736 } 737 } 738 } 739 740 // Find primitive tasks 741 if(currentDraw == nextDraw) 742 { 743 return; // No more primitives to process 744 } 745 746 for(int unit = 0; unit < unitCount; unit++) 747 { 748 DrawCall *draw = drawList[currentDraw % DRAW_COUNT]; 749 750 if(draw->primitive >= draw->count) 751 { 752 currentDraw++; 753 754 if(currentDraw == nextDraw) 755 { 756 return; // No more primitives to process 757 } 758 759 draw = drawList[currentDraw % DRAW_COUNT]; 760 } 761 762 if(!primitiveProgress[unit].references) // Task not already being executed and not still in use by a pixel unit 763 { 764 int primitive = draw->primitive; 765 int count = draw->count; 766 int batch = draw->batchSize; 767 768 primitiveProgress[unit].drawCall = currentDraw; 769 primitiveProgress[unit].firstPrimitive = primitive; 770 primitiveProgress[unit].primitiveCount = count - primitive >= batch ? batch : count - primitive; 771 772 draw->primitive += batch; 773 774 Task &task = taskQueue[qHead]; 775 task.type = Task::PRIMITIVES; 776 task.primitiveUnit = unit; 777 778 primitiveProgress[unit].references = -1; 779 780 // Commit to the task queue 781 qHead = (qHead + 1) % 32; 782 qSize++; 783 } 784 } 785 } 786 scheduleTask(int threadIndex)787 void Renderer::scheduleTask(int threadIndex) 788 { 789 schedulerMutex.lock(); 790 791 if((int)qSize < threadCount - threadsAwake + 1) 792 { 793 findAvailableTasks(); 794 } 795 796 if(qSize != 0) 797 { 798 task[threadIndex] = taskQueue[(qHead - qSize) % 32]; 799 qSize--; 800 801 if(threadsAwake != threadCount) 802 { 803 int wakeup = qSize - threadsAwake + 1; 804 805 for(int i = 0; i < threadCount && wakeup > 0; i++) 806 { 807 if(task[i].type == Task::SUSPEND) 808 { 809 suspend[i]->wait(); 810 task[i].type = Task::RESUME; 811 resume[i]->signal(); 812 813 threadsAwake++; 814 wakeup--; 815 } 816 } 817 } 818 } 819 else 820 { 821 task[threadIndex].type = Task::SUSPEND; 822 823 threadsAwake--; 824 } 825 826 schedulerMutex.unlock(); 827 } 828 executeTask(int threadIndex)829 void Renderer::executeTask(int threadIndex) 830 { 831 #if PERF_HUD 832 int64_t startTick = Timer::ticks(); 833 #endif 834 835 switch(task[threadIndex].type) 836 { 837 case Task::PRIMITIVES: 838 { 839 int unit = task[threadIndex].primitiveUnit; 840 841 int input = primitiveProgress[unit].firstPrimitive; 842 int count = primitiveProgress[unit].primitiveCount; 843 DrawCall *draw = drawList[primitiveProgress[unit].drawCall % DRAW_COUNT]; 844 int (Renderer::*setupPrimitives)(int batch, int count) = draw->setupPrimitives; 845 846 processPrimitiveVertices(unit, input, count, draw->count, threadIndex); 847 848 #if PERF_HUD 849 int64_t time = Timer::ticks(); 850 vertexTime[threadIndex] += time - startTick; 851 startTick = time; 852 #endif 853 854 int visible = 0; 855 856 if(!draw->setupState.rasterizerDiscard) 857 { 858 visible = (this->*setupPrimitives)(unit, count); 859 } 860 861 primitiveProgress[unit].visible = visible; 862 primitiveProgress[unit].references = clusterCount; 863 864 #if PERF_HUD 865 setupTime[threadIndex] += Timer::ticks() - startTick; 866 #endif 867 } 868 break; 869 case Task::PIXELS: 870 { 871 int unit = task[threadIndex].primitiveUnit; 872 int visible = primitiveProgress[unit].visible; 873 874 if(visible > 0) 875 { 876 int cluster = task[threadIndex].pixelCluster; 877 Primitive *primitive = primitiveBatch[unit]; 878 DrawCall *draw = drawList[pixelProgress[cluster].drawCall % DRAW_COUNT]; 879 DrawData *data = draw->data; 880 PixelProcessor::RoutinePointer pixelRoutine = draw->pixelPointer; 881 882 pixelRoutine(primitive, visible, cluster, data); 883 } 884 885 finishRendering(task[threadIndex]); 886 887 #if PERF_HUD 888 pixelTime[threadIndex] += Timer::ticks() - startTick; 889 #endif 890 } 891 break; 892 case Task::RESUME: 893 break; 894 case Task::SUSPEND: 895 break; 896 default: 897 ASSERT(false); 898 } 899 } 900 synchronize()901 void Renderer::synchronize() 902 { 903 sync->lock(sw::PUBLIC); 904 sync->unlock(); 905 } 906 finishRendering(Task & pixelTask)907 void Renderer::finishRendering(Task &pixelTask) 908 { 909 int unit = pixelTask.primitiveUnit; 910 int cluster = pixelTask.pixelCluster; 911 912 DrawCall &draw = *drawList[primitiveProgress[unit].drawCall % DRAW_COUNT]; 913 DrawData &data = *draw.data; 914 int primitive = primitiveProgress[unit].firstPrimitive; 915 int count = primitiveProgress[unit].primitiveCount; 916 int processedPrimitives = primitive + count; 917 918 pixelProgress[cluster].processedPrimitives = processedPrimitives; 919 920 if(pixelProgress[cluster].processedPrimitives >= draw.count) 921 { 922 pixelProgress[cluster].drawCall++; 923 pixelProgress[cluster].processedPrimitives = 0; 924 } 925 926 int ref = atomicDecrement(&primitiveProgress[unit].references); 927 928 if(ref == 0) 929 { 930 ref = atomicDecrement(&draw.references); 931 932 if(ref == 0) 933 { 934 #if PERF_PROFILE 935 for(int cluster = 0; cluster < clusterCount; cluster++) 936 { 937 for(int i = 0; i < PERF_TIMERS; i++) 938 { 939 profiler.cycles[i] += data.cycles[i][cluster]; 940 } 941 } 942 #endif 943 944 if(draw.queries) 945 { 946 for(std::list<Query*>::iterator q = draw.queries->begin(); q != draw.queries->end(); q++) 947 { 948 Query *query = *q; 949 950 switch(query->type) 951 { 952 case Query::FRAGMENTS_PASSED: 953 for(int cluster = 0; cluster < clusterCount; cluster++) 954 { 955 atomicAdd((volatile int*)&query->data, data.occlusion[cluster]); 956 } 957 break; 958 case Query::TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN: 959 atomicAdd((volatile int*)&query->data, processedPrimitives); 960 break; 961 default: 962 break; 963 } 964 965 atomicDecrement(&query->reference); 966 } 967 968 delete draw.queries; 969 draw.queries = 0; 970 } 971 972 for(int i = 0; i < RENDERTARGETS; i++) 973 { 974 if(draw.renderTarget[i]) 975 { 976 draw.renderTarget[i]->unlockInternal(); 977 } 978 } 979 980 if(draw.depthBuffer) 981 { 982 draw.depthBuffer->unlockInternal(); 983 } 984 985 if(draw.stencilBuffer) 986 { 987 draw.stencilBuffer->unlockStencil(); 988 } 989 990 for(int i = 0; i < TOTAL_IMAGE_UNITS; i++) 991 { 992 if(draw.texture[i]) 993 { 994 draw.texture[i]->unlock(); 995 } 996 } 997 998 for(int i = 0; i < MAX_VERTEX_INPUTS; i++) 999 { 1000 if(draw.vertexStream[i]) 1001 { 1002 draw.vertexStream[i]->unlock(); 1003 } 1004 } 1005 1006 if(draw.indexBuffer) 1007 { 1008 draw.indexBuffer->unlock(); 1009 } 1010 1011 for(int i = 0; i < MAX_UNIFORM_BUFFER_BINDINGS; i++) 1012 { 1013 if(draw.pUniformBuffers[i]) 1014 { 1015 draw.pUniformBuffers[i]->unlock(); 1016 } 1017 if(draw.vUniformBuffers[i]) 1018 { 1019 draw.vUniformBuffers[i]->unlock(); 1020 } 1021 } 1022 1023 for(int i = 0; i < MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS; i++) 1024 { 1025 if(draw.transformFeedbackBuffers[i]) 1026 { 1027 draw.transformFeedbackBuffers[i]->unlock(); 1028 } 1029 } 1030 1031 draw.vertexRoutine->unbind(); 1032 draw.setupRoutine->unbind(); 1033 draw.pixelRoutine->unbind(); 1034 1035 sync->unlock(); 1036 1037 draw.references = -1; 1038 resumeApp->signal(); 1039 } 1040 } 1041 1042 pixelProgress[cluster].executing = false; 1043 } 1044 processPrimitiveVertices(int unit,unsigned int start,unsigned int triangleCount,unsigned int loop,int thread)1045 void Renderer::processPrimitiveVertices(int unit, unsigned int start, unsigned int triangleCount, unsigned int loop, int thread) 1046 { 1047 Triangle *triangle = triangleBatch[unit]; 1048 DrawCall *draw = drawList[primitiveProgress[unit].drawCall % DRAW_COUNT]; 1049 DrawData *data = draw->data; 1050 VertexTask *task = vertexTask[thread]; 1051 1052 const void *indices = data->indices; 1053 VertexProcessor::RoutinePointer vertexRoutine = draw->vertexPointer; 1054 1055 if(task->vertexCache.drawCall != primitiveProgress[unit].drawCall) 1056 { 1057 task->vertexCache.clear(); 1058 task->vertexCache.drawCall = primitiveProgress[unit].drawCall; 1059 } 1060 1061 unsigned int batch[128][3]; // FIXME: Adjust to dynamic batch size 1062 1063 switch(draw->drawType) 1064 { 1065 case DRAW_POINTLIST: 1066 { 1067 unsigned int index = start; 1068 1069 for(unsigned int i = 0; i < triangleCount; i++) 1070 { 1071 batch[i][0] = index; 1072 batch[i][1] = index; 1073 batch[i][2] = index; 1074 1075 index += 1; 1076 } 1077 } 1078 break; 1079 case DRAW_LINELIST: 1080 { 1081 unsigned int index = 2 * start; 1082 1083 for(unsigned int i = 0; i < triangleCount; i++) 1084 { 1085 batch[i][0] = index + 0; 1086 batch[i][1] = index + 1; 1087 batch[i][2] = index + 1; 1088 1089 index += 2; 1090 } 1091 } 1092 break; 1093 case DRAW_LINESTRIP: 1094 { 1095 unsigned int index = start; 1096 1097 for(unsigned int i = 0; i < triangleCount; i++) 1098 { 1099 batch[i][0] = index + 0; 1100 batch[i][1] = index + 1; 1101 batch[i][2] = index + 1; 1102 1103 index += 1; 1104 } 1105 } 1106 break; 1107 case DRAW_LINELOOP: 1108 { 1109 unsigned int index = start; 1110 1111 for(unsigned int i = 0; i < triangleCount; i++) 1112 { 1113 batch[i][0] = (index + 0) % loop; 1114 batch[i][1] = (index + 1) % loop; 1115 batch[i][2] = (index + 1) % loop; 1116 1117 index += 1; 1118 } 1119 } 1120 break; 1121 case DRAW_TRIANGLELIST: 1122 { 1123 unsigned int index = 3 * start; 1124 1125 for(unsigned int i = 0; i < triangleCount; i++) 1126 { 1127 batch[i][0] = index + 0; 1128 batch[i][1] = index + 1; 1129 batch[i][2] = index + 2; 1130 1131 index += 3; 1132 } 1133 } 1134 break; 1135 case DRAW_TRIANGLESTRIP: 1136 { 1137 unsigned int index = start; 1138 1139 for(unsigned int i = 0; i < triangleCount; i++) 1140 { 1141 batch[i][0] = index + 0; 1142 batch[i][1] = index + (index & 1) + 1; 1143 batch[i][2] = index + (~index & 1) + 1; 1144 1145 index += 1; 1146 } 1147 } 1148 break; 1149 case DRAW_TRIANGLEFAN: 1150 { 1151 unsigned int index = start; 1152 1153 for(unsigned int i = 0; i < triangleCount; i++) 1154 { 1155 batch[i][0] = index + 1; 1156 batch[i][1] = index + 2; 1157 batch[i][2] = 0; 1158 1159 index += 1; 1160 } 1161 } 1162 break; 1163 case DRAW_INDEXEDPOINTLIST8: 1164 { 1165 const unsigned char *index = (const unsigned char*)indices + start; 1166 1167 for(unsigned int i = 0; i < triangleCount; i++) 1168 { 1169 batch[i][0] = *index; 1170 batch[i][1] = *index; 1171 batch[i][2] = *index; 1172 1173 index += 1; 1174 } 1175 } 1176 break; 1177 case DRAW_INDEXEDPOINTLIST16: 1178 { 1179 const unsigned short *index = (const unsigned short*)indices + start; 1180 1181 for(unsigned int i = 0; i < triangleCount; i++) 1182 { 1183 batch[i][0] = *index; 1184 batch[i][1] = *index; 1185 batch[i][2] = *index; 1186 1187 index += 1; 1188 } 1189 } 1190 break; 1191 case DRAW_INDEXEDPOINTLIST32: 1192 { 1193 const unsigned int *index = (const unsigned int*)indices + start; 1194 1195 for(unsigned int i = 0; i < triangleCount; i++) 1196 { 1197 batch[i][0] = *index; 1198 batch[i][1] = *index; 1199 batch[i][2] = *index; 1200 1201 index += 1; 1202 } 1203 } 1204 break; 1205 case DRAW_INDEXEDLINELIST8: 1206 { 1207 const unsigned char *index = (const unsigned char*)indices + 2 * start; 1208 1209 for(unsigned int i = 0; i < triangleCount; i++) 1210 { 1211 batch[i][0] = index[0]; 1212 batch[i][1] = index[1]; 1213 batch[i][2] = index[1]; 1214 1215 index += 2; 1216 } 1217 } 1218 break; 1219 case DRAW_INDEXEDLINELIST16: 1220 { 1221 const unsigned short *index = (const unsigned short*)indices + 2 * start; 1222 1223 for(unsigned int i = 0; i < triangleCount; i++) 1224 { 1225 batch[i][0] = index[0]; 1226 batch[i][1] = index[1]; 1227 batch[i][2] = index[1]; 1228 1229 index += 2; 1230 } 1231 } 1232 break; 1233 case DRAW_INDEXEDLINELIST32: 1234 { 1235 const unsigned int *index = (const unsigned int*)indices + 2 * start; 1236 1237 for(unsigned int i = 0; i < triangleCount; i++) 1238 { 1239 batch[i][0] = index[0]; 1240 batch[i][1] = index[1]; 1241 batch[i][2] = index[1]; 1242 1243 index += 2; 1244 } 1245 } 1246 break; 1247 case DRAW_INDEXEDLINESTRIP8: 1248 { 1249 const unsigned char *index = (const unsigned char*)indices + start; 1250 1251 for(unsigned int i = 0; i < triangleCount; i++) 1252 { 1253 batch[i][0] = index[0]; 1254 batch[i][1] = index[1]; 1255 batch[i][2] = index[1]; 1256 1257 index += 1; 1258 } 1259 } 1260 break; 1261 case DRAW_INDEXEDLINESTRIP16: 1262 { 1263 const unsigned short *index = (const unsigned short*)indices + start; 1264 1265 for(unsigned int i = 0; i < triangleCount; i++) 1266 { 1267 batch[i][0] = index[0]; 1268 batch[i][1] = index[1]; 1269 batch[i][2] = index[1]; 1270 1271 index += 1; 1272 } 1273 } 1274 break; 1275 case DRAW_INDEXEDLINESTRIP32: 1276 { 1277 const unsigned int *index = (const unsigned int*)indices + start; 1278 1279 for(unsigned int i = 0; i < triangleCount; i++) 1280 { 1281 batch[i][0] = index[0]; 1282 batch[i][1] = index[1]; 1283 batch[i][2] = index[1]; 1284 1285 index += 1; 1286 } 1287 } 1288 break; 1289 case DRAW_INDEXEDLINELOOP8: 1290 { 1291 const unsigned char *index = (const unsigned char*)indices; 1292 1293 for(unsigned int i = 0; i < triangleCount; i++) 1294 { 1295 batch[i][0] = index[(start + i + 0) % loop]; 1296 batch[i][1] = index[(start + i + 1) % loop]; 1297 batch[i][2] = index[(start + i + 1) % loop]; 1298 } 1299 } 1300 break; 1301 case DRAW_INDEXEDLINELOOP16: 1302 { 1303 const unsigned short *index = (const unsigned short*)indices; 1304 1305 for(unsigned int i = 0; i < triangleCount; i++) 1306 { 1307 batch[i][0] = index[(start + i + 0) % loop]; 1308 batch[i][1] = index[(start + i + 1) % loop]; 1309 batch[i][2] = index[(start + i + 1) % loop]; 1310 } 1311 } 1312 break; 1313 case DRAW_INDEXEDLINELOOP32: 1314 { 1315 const unsigned int *index = (const unsigned int*)indices; 1316 1317 for(unsigned int i = 0; i < triangleCount; i++) 1318 { 1319 batch[i][0] = index[(start + i + 0) % loop]; 1320 batch[i][1] = index[(start + i + 1) % loop]; 1321 batch[i][2] = index[(start + i + 1) % loop]; 1322 } 1323 } 1324 break; 1325 case DRAW_INDEXEDTRIANGLELIST8: 1326 { 1327 const unsigned char *index = (const unsigned char*)indices + 3 * start; 1328 1329 for(unsigned int i = 0; i < triangleCount; i++) 1330 { 1331 batch[i][0] = index[0]; 1332 batch[i][1] = index[1]; 1333 batch[i][2] = index[2]; 1334 1335 index += 3; 1336 } 1337 } 1338 break; 1339 case DRAW_INDEXEDTRIANGLELIST16: 1340 { 1341 const unsigned short *index = (const unsigned short*)indices + 3 * start; 1342 1343 for(unsigned int i = 0; i < triangleCount; i++) 1344 { 1345 batch[i][0] = index[0]; 1346 batch[i][1] = index[1]; 1347 batch[i][2] = index[2]; 1348 1349 index += 3; 1350 } 1351 } 1352 break; 1353 case DRAW_INDEXEDTRIANGLELIST32: 1354 { 1355 const unsigned int *index = (const unsigned int*)indices + 3 * start; 1356 1357 for(unsigned int i = 0; i < triangleCount; i++) 1358 { 1359 batch[i][0] = index[0]; 1360 batch[i][1] = index[1]; 1361 batch[i][2] = index[2]; 1362 1363 index += 3; 1364 } 1365 } 1366 break; 1367 case DRAW_INDEXEDTRIANGLESTRIP8: 1368 { 1369 const unsigned char *index = (const unsigned char*)indices + start; 1370 1371 for(unsigned int i = 0; i < triangleCount; i++) 1372 { 1373 batch[i][0] = index[0]; 1374 batch[i][1] = index[((start + i) & 1) + 1]; 1375 batch[i][2] = index[(~(start + i) & 1) + 1]; 1376 1377 index += 1; 1378 } 1379 } 1380 break; 1381 case DRAW_INDEXEDTRIANGLESTRIP16: 1382 { 1383 const unsigned short *index = (const unsigned short*)indices + start; 1384 1385 for(unsigned int i = 0; i < triangleCount; i++) 1386 { 1387 batch[i][0] = index[0]; 1388 batch[i][1] = index[((start + i) & 1) + 1]; 1389 batch[i][2] = index[(~(start + i) & 1) + 1]; 1390 1391 index += 1; 1392 } 1393 } 1394 break; 1395 case DRAW_INDEXEDTRIANGLESTRIP32: 1396 { 1397 const unsigned int *index = (const unsigned int*)indices + start; 1398 1399 for(unsigned int i = 0; i < triangleCount; i++) 1400 { 1401 batch[i][0] = index[0]; 1402 batch[i][1] = index[((start + i) & 1) + 1]; 1403 batch[i][2] = index[(~(start + i) & 1) + 1]; 1404 1405 index += 1; 1406 } 1407 } 1408 break; 1409 case DRAW_INDEXEDTRIANGLEFAN8: 1410 { 1411 const unsigned char *index = (const unsigned char*)indices; 1412 1413 for(unsigned int i = 0; i < triangleCount; i++) 1414 { 1415 batch[i][0] = index[start + i + 1]; 1416 batch[i][1] = index[start + i + 2]; 1417 batch[i][2] = index[0]; 1418 } 1419 } 1420 break; 1421 case DRAW_INDEXEDTRIANGLEFAN16: 1422 { 1423 const unsigned short *index = (const unsigned short*)indices; 1424 1425 for(unsigned int i = 0; i < triangleCount; i++) 1426 { 1427 batch[i][0] = index[start + i + 1]; 1428 batch[i][1] = index[start + i + 2]; 1429 batch[i][2] = index[0]; 1430 } 1431 } 1432 break; 1433 case DRAW_INDEXEDTRIANGLEFAN32: 1434 { 1435 const unsigned int *index = (const unsigned int*)indices; 1436 1437 for(unsigned int i = 0; i < triangleCount; i++) 1438 { 1439 batch[i][0] = index[start + i + 1]; 1440 batch[i][1] = index[start + i + 2]; 1441 batch[i][2] = index[0]; 1442 } 1443 } 1444 break; 1445 case DRAW_QUADLIST: 1446 { 1447 unsigned int index = 4 * start / 2; 1448 1449 for(unsigned int i = 0; i < triangleCount; i += 2) 1450 { 1451 batch[i+0][0] = index + 0; 1452 batch[i+0][1] = index + 1; 1453 batch[i+0][2] = index + 2; 1454 1455 batch[i+1][0] = index + 0; 1456 batch[i+1][1] = index + 2; 1457 batch[i+1][2] = index + 3; 1458 1459 index += 4; 1460 } 1461 } 1462 break; 1463 default: 1464 ASSERT(false); 1465 return; 1466 } 1467 1468 task->primitiveStart = start; 1469 task->vertexCount = triangleCount * 3; 1470 vertexRoutine(&triangle->v0, (unsigned int*)&batch, task, data); 1471 } 1472 setupSolidTriangles(int unit,int count)1473 int Renderer::setupSolidTriangles(int unit, int count) 1474 { 1475 Triangle *triangle = triangleBatch[unit]; 1476 Primitive *primitive = primitiveBatch[unit]; 1477 1478 DrawCall &draw = *drawList[primitiveProgress[unit].drawCall % DRAW_COUNT]; 1479 SetupProcessor::State &state = draw.setupState; 1480 const SetupProcessor::RoutinePointer &setupRoutine = draw.setupPointer; 1481 1482 int ms = state.multiSample; 1483 int pos = state.positionRegister; 1484 const DrawData *data = draw.data; 1485 int visible = 0; 1486 1487 for(int i = 0; i < count; i++, triangle++) 1488 { 1489 Vertex &v0 = triangle->v0; 1490 Vertex &v1 = triangle->v1; 1491 Vertex &v2 = triangle->v2; 1492 1493 if((v0.clipFlags & v1.clipFlags & v2.clipFlags) == Clipper::CLIP_FINITE) 1494 { 1495 Polygon polygon(&v0.v[pos], &v1.v[pos], &v2.v[pos]); 1496 1497 int clipFlagsOr = v0.clipFlags | v1.clipFlags | v2.clipFlags | draw.clipFlags; 1498 1499 if(clipFlagsOr != Clipper::CLIP_FINITE) 1500 { 1501 if(!clipper->clip(polygon, clipFlagsOr, draw)) 1502 { 1503 continue; 1504 } 1505 } 1506 1507 if(setupRoutine(primitive, triangle, &polygon, data)) 1508 { 1509 primitive += ms; 1510 visible++; 1511 } 1512 } 1513 } 1514 1515 return visible; 1516 } 1517 setupWireframeTriangle(int unit,int count)1518 int Renderer::setupWireframeTriangle(int unit, int count) 1519 { 1520 Triangle *triangle = triangleBatch[unit]; 1521 Primitive *primitive = primitiveBatch[unit]; 1522 int visible = 0; 1523 1524 DrawCall &draw = *drawList[primitiveProgress[unit].drawCall % DRAW_COUNT]; 1525 SetupProcessor::State &state = draw.setupState; 1526 SetupProcessor::RoutinePointer setupRoutine = draw.setupPointer; 1527 1528 const Vertex &v0 = triangle[0].v0; 1529 const Vertex &v1 = triangle[0].v1; 1530 const Vertex &v2 = triangle[0].v2; 1531 1532 float d = (v0.y * v1.x - v0.x * v1.y) * v2.w + (v0.x * v2.y - v0.y * v2.x) * v1.w + (v2.x * v1.y - v1.x * v2.y) * v0.w; 1533 1534 if(state.cullMode == CULL_CLOCKWISE) 1535 { 1536 if(d >= 0) return 0; 1537 } 1538 else if(state.cullMode == CULL_COUNTERCLOCKWISE) 1539 { 1540 if(d <= 0) return 0; 1541 } 1542 1543 // Copy attributes 1544 triangle[1].v0 = v1; 1545 triangle[1].v1 = v2; 1546 triangle[2].v0 = v2; 1547 triangle[2].v1 = v0; 1548 1549 if(state.color[0][0].flat) // FIXME 1550 { 1551 for(int i = 0; i < 2; i++) 1552 { 1553 triangle[1].v0.C[i] = triangle[0].v0.C[i]; 1554 triangle[1].v1.C[i] = triangle[0].v0.C[i]; 1555 triangle[2].v0.C[i] = triangle[0].v0.C[i]; 1556 triangle[2].v1.C[i] = triangle[0].v0.C[i]; 1557 } 1558 } 1559 1560 for(int i = 0; i < 3; i++) 1561 { 1562 if(setupLine(*primitive, *triangle, draw)) 1563 { 1564 primitive->area = 0.5f * d; 1565 1566 primitive++; 1567 visible++; 1568 } 1569 1570 triangle++; 1571 } 1572 1573 return visible; 1574 } 1575 setupVertexTriangle(int unit,int count)1576 int Renderer::setupVertexTriangle(int unit, int count) 1577 { 1578 Triangle *triangle = triangleBatch[unit]; 1579 Primitive *primitive = primitiveBatch[unit]; 1580 int visible = 0; 1581 1582 DrawCall &draw = *drawList[primitiveProgress[unit].drawCall % DRAW_COUNT]; 1583 SetupProcessor::State &state = draw.setupState; 1584 1585 const Vertex &v0 = triangle[0].v0; 1586 const Vertex &v1 = triangle[0].v1; 1587 const Vertex &v2 = triangle[0].v2; 1588 1589 float d = (v0.y * v1.x - v0.x * v1.y) * v2.w + (v0.x * v2.y - v0.y * v2.x) * v1.w + (v2.x * v1.y - v1.x * v2.y) * v0.w; 1590 1591 if(state.cullMode == CULL_CLOCKWISE) 1592 { 1593 if(d >= 0) return 0; 1594 } 1595 else if(state.cullMode == CULL_COUNTERCLOCKWISE) 1596 { 1597 if(d <= 0) return 0; 1598 } 1599 1600 // Copy attributes 1601 triangle[1].v0 = v1; 1602 triangle[2].v0 = v2; 1603 1604 for(int i = 0; i < 3; i++) 1605 { 1606 if(setupPoint(*primitive, *triangle, draw)) 1607 { 1608 primitive->area = 0.5f * d; 1609 1610 primitive++; 1611 visible++; 1612 } 1613 1614 triangle++; 1615 } 1616 1617 return visible; 1618 } 1619 setupLines(int unit,int count)1620 int Renderer::setupLines(int unit, int count) 1621 { 1622 Triangle *triangle = triangleBatch[unit]; 1623 Primitive *primitive = primitiveBatch[unit]; 1624 int visible = 0; 1625 1626 DrawCall &draw = *drawList[primitiveProgress[unit].drawCall % DRAW_COUNT]; 1627 SetupProcessor::State &state = draw.setupState; 1628 1629 int ms = state.multiSample; 1630 1631 for(int i = 0; i < count; i++) 1632 { 1633 if(setupLine(*primitive, *triangle, draw)) 1634 { 1635 primitive += ms; 1636 visible++; 1637 } 1638 1639 triangle++; 1640 } 1641 1642 return visible; 1643 } 1644 setupPoints(int unit,int count)1645 int Renderer::setupPoints(int unit, int count) 1646 { 1647 Triangle *triangle = triangleBatch[unit]; 1648 Primitive *primitive = primitiveBatch[unit]; 1649 int visible = 0; 1650 1651 DrawCall &draw = *drawList[primitiveProgress[unit].drawCall % DRAW_COUNT]; 1652 SetupProcessor::State &state = draw.setupState; 1653 1654 int ms = state.multiSample; 1655 1656 for(int i = 0; i < count; i++) 1657 { 1658 if(setupPoint(*primitive, *triangle, draw)) 1659 { 1660 primitive += ms; 1661 visible++; 1662 } 1663 1664 triangle++; 1665 } 1666 1667 return visible; 1668 } 1669 setupLine(Primitive & primitive,Triangle & triangle,const DrawCall & draw)1670 bool Renderer::setupLine(Primitive &primitive, Triangle &triangle, const DrawCall &draw) 1671 { 1672 const SetupProcessor::RoutinePointer &setupRoutine = draw.setupPointer; 1673 const SetupProcessor::State &state = draw.setupState; 1674 const DrawData &data = *draw.data; 1675 1676 float lineWidth = data.lineWidth; 1677 1678 Vertex &v0 = triangle.v0; 1679 Vertex &v1 = triangle.v1; 1680 1681 int pos = state.positionRegister; 1682 1683 const float4 &P0 = v0.v[pos]; 1684 const float4 &P1 = v1.v[pos]; 1685 1686 if(P0.w <= 0 && P1.w <= 0) 1687 { 1688 return false; 1689 } 1690 1691 const float W = data.Wx16[0] * (1.0f / 16.0f); 1692 const float H = data.Hx16[0] * (1.0f / 16.0f); 1693 1694 float dx = W * (P1.x / P1.w - P0.x / P0.w); 1695 float dy = H * (P1.y / P1.w - P0.y / P0.w); 1696 1697 if(dx == 0 && dy == 0) 1698 { 1699 return false; 1700 } 1701 1702 if(false) // Rectangle 1703 { 1704 float4 P[4]; 1705 int C[4]; 1706 1707 P[0] = P0; 1708 P[1] = P1; 1709 P[2] = P1; 1710 P[3] = P0; 1711 1712 float scale = lineWidth * 0.5f / sqrt(dx*dx + dy*dy); 1713 1714 dx *= scale; 1715 dy *= scale; 1716 1717 float dx0w = dx * P0.w / W; 1718 float dy0h = dy * P0.w / H; 1719 float dx0h = dx * P0.w / H; 1720 float dy0w = dy * P0.w / W; 1721 1722 float dx1w = dx * P1.w / W; 1723 float dy1h = dy * P1.w / H; 1724 float dx1h = dx * P1.w / H; 1725 float dy1w = dy * P1.w / W; 1726 1727 P[0].x += -dy0w + -dx0w; 1728 P[0].y += -dx0h + +dy0h; 1729 C[0] = clipper->computeClipFlags(P[0]); 1730 1731 P[1].x += -dy1w + +dx1w; 1732 P[1].y += -dx1h + +dy1h; 1733 C[1] = clipper->computeClipFlags(P[1]); 1734 1735 P[2].x += +dy1w + +dx1w; 1736 P[2].y += +dx1h + -dy1h; 1737 C[2] = clipper->computeClipFlags(P[2]); 1738 1739 P[3].x += +dy0w + -dx0w; 1740 P[3].y += +dx0h + +dy0h; 1741 C[3] = clipper->computeClipFlags(P[3]); 1742 1743 if((C[0] & C[1] & C[2] & C[3]) == Clipper::CLIP_FINITE) 1744 { 1745 Polygon polygon(P, 4); 1746 1747 int clipFlagsOr = C[0] | C[1] | C[2] | C[3] | draw.clipFlags; 1748 1749 if(clipFlagsOr != Clipper::CLIP_FINITE) 1750 { 1751 if(!clipper->clip(polygon, clipFlagsOr, draw)) 1752 { 1753 return false; 1754 } 1755 } 1756 1757 return setupRoutine(&primitive, &triangle, &polygon, &data); 1758 } 1759 } 1760 else // Diamond test convention 1761 { 1762 float4 P[8]; 1763 int C[8]; 1764 1765 P[0] = P0; 1766 P[1] = P0; 1767 P[2] = P0; 1768 P[3] = P0; 1769 P[4] = P1; 1770 P[5] = P1; 1771 P[6] = P1; 1772 P[7] = P1; 1773 1774 float dx0 = lineWidth * 0.5f * P0.w / W; 1775 float dy0 = lineWidth * 0.5f * P0.w / H; 1776 1777 float dx1 = lineWidth * 0.5f * P1.w / W; 1778 float dy1 = lineWidth * 0.5f * P1.w / H; 1779 1780 P[0].x += -dx0; 1781 C[0] = clipper->computeClipFlags(P[0]); 1782 1783 P[1].y += +dy0; 1784 C[1] = clipper->computeClipFlags(P[1]); 1785 1786 P[2].x += +dx0; 1787 C[2] = clipper->computeClipFlags(P[2]); 1788 1789 P[3].y += -dy0; 1790 C[3] = clipper->computeClipFlags(P[3]); 1791 1792 P[4].x += -dx1; 1793 C[4] = clipper->computeClipFlags(P[4]); 1794 1795 P[5].y += +dy1; 1796 C[5] = clipper->computeClipFlags(P[5]); 1797 1798 P[6].x += +dx1; 1799 C[6] = clipper->computeClipFlags(P[6]); 1800 1801 P[7].y += -dy1; 1802 C[7] = clipper->computeClipFlags(P[7]); 1803 1804 if((C[0] & C[1] & C[2] & C[3] & C[4] & C[5] & C[6] & C[7]) == Clipper::CLIP_FINITE) 1805 { 1806 float4 L[6]; 1807 1808 if(dx > -dy) 1809 { 1810 if(dx > dy) // Right 1811 { 1812 L[0] = P[0]; 1813 L[1] = P[1]; 1814 L[2] = P[5]; 1815 L[3] = P[6]; 1816 L[4] = P[7]; 1817 L[5] = P[3]; 1818 } 1819 else // Down 1820 { 1821 L[0] = P[0]; 1822 L[1] = P[4]; 1823 L[2] = P[5]; 1824 L[3] = P[6]; 1825 L[4] = P[2]; 1826 L[5] = P[3]; 1827 } 1828 } 1829 else 1830 { 1831 if(dx > dy) // Up 1832 { 1833 L[0] = P[0]; 1834 L[1] = P[1]; 1835 L[2] = P[2]; 1836 L[3] = P[6]; 1837 L[4] = P[7]; 1838 L[5] = P[4]; 1839 } 1840 else // Left 1841 { 1842 L[0] = P[1]; 1843 L[1] = P[2]; 1844 L[2] = P[3]; 1845 L[3] = P[7]; 1846 L[4] = P[4]; 1847 L[5] = P[5]; 1848 } 1849 } 1850 1851 Polygon polygon(L, 6); 1852 1853 int clipFlagsOr = C[0] | C[1] | C[2] | C[3] | C[4] | C[5] | C[6] | C[7] | draw.clipFlags; 1854 1855 if(clipFlagsOr != Clipper::CLIP_FINITE) 1856 { 1857 if(!clipper->clip(polygon, clipFlagsOr, draw)) 1858 { 1859 return false; 1860 } 1861 } 1862 1863 return setupRoutine(&primitive, &triangle, &polygon, &data); 1864 } 1865 } 1866 1867 return false; 1868 } 1869 setupPoint(Primitive & primitive,Triangle & triangle,const DrawCall & draw)1870 bool Renderer::setupPoint(Primitive &primitive, Triangle &triangle, const DrawCall &draw) 1871 { 1872 const SetupProcessor::RoutinePointer &setupRoutine = draw.setupPointer; 1873 const SetupProcessor::State &state = draw.setupState; 1874 const DrawData &data = *draw.data; 1875 1876 Vertex &v = triangle.v0; 1877 1878 float pSize; 1879 1880 int pts = state.pointSizeRegister; 1881 1882 if(state.pointSizeRegister != Unused) 1883 { 1884 pSize = v.v[pts].y; 1885 } 1886 else 1887 { 1888 pSize = data.point.pointSize[0]; 1889 } 1890 1891 pSize = clamp(pSize, data.point.pointSizeMin, data.point.pointSizeMax); 1892 1893 float4 P[4]; 1894 int C[4]; 1895 1896 int pos = state.positionRegister; 1897 1898 P[0] = v.v[pos]; 1899 P[1] = v.v[pos]; 1900 P[2] = v.v[pos]; 1901 P[3] = v.v[pos]; 1902 1903 const float X = pSize * P[0].w * data.halfPixelX[0]; 1904 const float Y = pSize * P[0].w * data.halfPixelY[0]; 1905 1906 P[0].x -= X; 1907 P[0].y += Y; 1908 C[0] = clipper->computeClipFlags(P[0]); 1909 1910 P[1].x += X; 1911 P[1].y += Y; 1912 C[1] = clipper->computeClipFlags(P[1]); 1913 1914 P[2].x += X; 1915 P[2].y -= Y; 1916 C[2] = clipper->computeClipFlags(P[2]); 1917 1918 P[3].x -= X; 1919 P[3].y -= Y; 1920 C[3] = clipper->computeClipFlags(P[3]); 1921 1922 triangle.v1 = triangle.v0; 1923 triangle.v2 = triangle.v0; 1924 1925 triangle.v1.X += iround(16 * 0.5f * pSize); 1926 triangle.v2.Y -= iround(16 * 0.5f * pSize) * (data.Hx16[0] > 0.0f ? 1 : -1); // Both Direct3D and OpenGL expect (0, 0) in the top-left corner 1927 1928 Polygon polygon(P, 4); 1929 1930 if((C[0] & C[1] & C[2] & C[3]) == Clipper::CLIP_FINITE) 1931 { 1932 int clipFlagsOr = C[0] | C[1] | C[2] | C[3] | draw.clipFlags; 1933 1934 if(clipFlagsOr != Clipper::CLIP_FINITE) 1935 { 1936 if(!clipper->clip(polygon, clipFlagsOr, draw)) 1937 { 1938 return false; 1939 } 1940 } 1941 1942 return setupRoutine(&primitive, &triangle, &polygon, &data); 1943 } 1944 1945 return false; 1946 } 1947 initializeThreads()1948 void Renderer::initializeThreads() 1949 { 1950 unitCount = ceilPow2(threadCount); 1951 clusterCount = ceilPow2(threadCount); 1952 1953 for(int i = 0; i < unitCount; i++) 1954 { 1955 triangleBatch[i] = (Triangle*)allocate(batchSize * sizeof(Triangle)); 1956 primitiveBatch[i] = (Primitive*)allocate(batchSize * sizeof(Primitive)); 1957 } 1958 1959 for(int i = 0; i < threadCount; i++) 1960 { 1961 vertexTask[i] = (VertexTask*)allocate(sizeof(VertexTask)); 1962 vertexTask[i]->vertexCache.drawCall = -1; 1963 1964 task[i].type = Task::SUSPEND; 1965 1966 resume[i] = new Event(); 1967 suspend[i] = new Event(); 1968 1969 Parameters parameters; 1970 parameters.threadIndex = i; 1971 parameters.renderer = this; 1972 1973 exitThreads = false; 1974 worker[i] = new Thread(threadFunction, ¶meters); 1975 1976 suspend[i]->wait(); 1977 suspend[i]->signal(); 1978 } 1979 } 1980 terminateThreads()1981 void Renderer::terminateThreads() 1982 { 1983 while(threadsAwake != 0) 1984 { 1985 Thread::sleep(1); 1986 } 1987 1988 for(int thread = 0; thread < threadCount; thread++) 1989 { 1990 if(worker[thread]) 1991 { 1992 exitThreads = true; 1993 resume[thread]->signal(); 1994 worker[thread]->join(); 1995 1996 delete worker[thread]; 1997 worker[thread] = 0; 1998 delete resume[thread]; 1999 resume[thread] = 0; 2000 delete suspend[thread]; 2001 suspend[thread] = 0; 2002 } 2003 2004 deallocate(vertexTask[thread]); 2005 vertexTask[thread] = 0; 2006 } 2007 2008 for(int i = 0; i < 16; i++) 2009 { 2010 deallocate(triangleBatch[i]); 2011 triangleBatch[i] = 0; 2012 2013 deallocate(primitiveBatch[i]); 2014 primitiveBatch[i] = 0; 2015 } 2016 } 2017 loadConstants(const VertexShader * vertexShader)2018 void Renderer::loadConstants(const VertexShader *vertexShader) 2019 { 2020 if(!vertexShader) return; 2021 2022 size_t count = vertexShader->getLength(); 2023 2024 for(size_t i = 0; i < count; i++) 2025 { 2026 const Shader::Instruction *instruction = vertexShader->getInstruction(i); 2027 2028 if(instruction->opcode == Shader::OPCODE_DEF) 2029 { 2030 int index = instruction->dst.index; 2031 float value[4]; 2032 2033 value[0] = instruction->src[0].value[0]; 2034 value[1] = instruction->src[0].value[1]; 2035 value[2] = instruction->src[0].value[2]; 2036 value[3] = instruction->src[0].value[3]; 2037 2038 setVertexShaderConstantF(index, value); 2039 } 2040 else if(instruction->opcode == Shader::OPCODE_DEFI) 2041 { 2042 int index = instruction->dst.index; 2043 int integer[4]; 2044 2045 integer[0] = instruction->src[0].integer[0]; 2046 integer[1] = instruction->src[0].integer[1]; 2047 integer[2] = instruction->src[0].integer[2]; 2048 integer[3] = instruction->src[0].integer[3]; 2049 2050 setVertexShaderConstantI(index, integer); 2051 } 2052 else if(instruction->opcode == Shader::OPCODE_DEFB) 2053 { 2054 int index = instruction->dst.index; 2055 int boolean = instruction->src[0].boolean[0]; 2056 2057 setVertexShaderConstantB(index, &boolean); 2058 } 2059 } 2060 } 2061 loadConstants(const PixelShader * pixelShader)2062 void Renderer::loadConstants(const PixelShader *pixelShader) 2063 { 2064 if(!pixelShader) return; 2065 2066 size_t count = pixelShader->getLength(); 2067 2068 for(size_t i = 0; i < count; i++) 2069 { 2070 const Shader::Instruction *instruction = pixelShader->getInstruction(i); 2071 2072 if(instruction->opcode == Shader::OPCODE_DEF) 2073 { 2074 int index = instruction->dst.index; 2075 float value[4]; 2076 2077 value[0] = instruction->src[0].value[0]; 2078 value[1] = instruction->src[0].value[1]; 2079 value[2] = instruction->src[0].value[2]; 2080 value[3] = instruction->src[0].value[3]; 2081 2082 setPixelShaderConstantF(index, value); 2083 } 2084 else if(instruction->opcode == Shader::OPCODE_DEFI) 2085 { 2086 int index = instruction->dst.index; 2087 int integer[4]; 2088 2089 integer[0] = instruction->src[0].integer[0]; 2090 integer[1] = instruction->src[0].integer[1]; 2091 integer[2] = instruction->src[0].integer[2]; 2092 integer[3] = instruction->src[0].integer[3]; 2093 2094 setPixelShaderConstantI(index, integer); 2095 } 2096 else if(instruction->opcode == Shader::OPCODE_DEFB) 2097 { 2098 int index = instruction->dst.index; 2099 int boolean = instruction->src[0].boolean[0]; 2100 2101 setPixelShaderConstantB(index, &boolean); 2102 } 2103 } 2104 } 2105 setIndexBuffer(Resource * indexBuffer)2106 void Renderer::setIndexBuffer(Resource *indexBuffer) 2107 { 2108 context->indexBuffer = indexBuffer; 2109 } 2110 setMultiSampleMask(unsigned int mask)2111 void Renderer::setMultiSampleMask(unsigned int mask) 2112 { 2113 context->sampleMask = mask; 2114 } 2115 setTransparencyAntialiasing(TransparencyAntialiasing transparencyAntialiasing)2116 void Renderer::setTransparencyAntialiasing(TransparencyAntialiasing transparencyAntialiasing) 2117 { 2118 sw::transparencyAntialiasing = transparencyAntialiasing; 2119 } 2120 isReadWriteTexture(int sampler)2121 bool Renderer::isReadWriteTexture(int sampler) 2122 { 2123 for(int index = 0; index < RENDERTARGETS; index++) 2124 { 2125 if(context->renderTarget[index] && context->texture[sampler] == context->renderTarget[index]->getResource()) 2126 { 2127 return true; 2128 } 2129 } 2130 2131 if(context->depthBuffer && context->texture[sampler] == context->depthBuffer->getResource()) 2132 { 2133 return true; 2134 } 2135 2136 return false; 2137 } 2138 updateClipper()2139 void Renderer::updateClipper() 2140 { 2141 if(updateClipPlanes) 2142 { 2143 if(VertexProcessor::isFixedFunction()) // User plane in world space 2144 { 2145 const Matrix &scissorWorld = getViewTransform(); 2146 2147 if(clipFlags & Clipper::CLIP_PLANE0) clipPlane[0] = scissorWorld * userPlane[0]; 2148 if(clipFlags & Clipper::CLIP_PLANE1) clipPlane[1] = scissorWorld * userPlane[1]; 2149 if(clipFlags & Clipper::CLIP_PLANE2) clipPlane[2] = scissorWorld * userPlane[2]; 2150 if(clipFlags & Clipper::CLIP_PLANE3) clipPlane[3] = scissorWorld * userPlane[3]; 2151 if(clipFlags & Clipper::CLIP_PLANE4) clipPlane[4] = scissorWorld * userPlane[4]; 2152 if(clipFlags & Clipper::CLIP_PLANE5) clipPlane[5] = scissorWorld * userPlane[5]; 2153 } 2154 else // User plane in clip space 2155 { 2156 if(clipFlags & Clipper::CLIP_PLANE0) clipPlane[0] = userPlane[0]; 2157 if(clipFlags & Clipper::CLIP_PLANE1) clipPlane[1] = userPlane[1]; 2158 if(clipFlags & Clipper::CLIP_PLANE2) clipPlane[2] = userPlane[2]; 2159 if(clipFlags & Clipper::CLIP_PLANE3) clipPlane[3] = userPlane[3]; 2160 if(clipFlags & Clipper::CLIP_PLANE4) clipPlane[4] = userPlane[4]; 2161 if(clipFlags & Clipper::CLIP_PLANE5) clipPlane[5] = userPlane[5]; 2162 } 2163 2164 updateClipPlanes = false; 2165 } 2166 } 2167 setTextureResource(unsigned int sampler,Resource * resource)2168 void Renderer::setTextureResource(unsigned int sampler, Resource *resource) 2169 { 2170 ASSERT(sampler < TOTAL_IMAGE_UNITS); 2171 2172 context->texture[sampler] = resource; 2173 } 2174 setTextureLevel(unsigned int sampler,unsigned int face,unsigned int level,Surface * surface,TextureType type)2175 void Renderer::setTextureLevel(unsigned int sampler, unsigned int face, unsigned int level, Surface *surface, TextureType type) 2176 { 2177 ASSERT(sampler < TOTAL_IMAGE_UNITS && face < 6 && level < MIPMAP_LEVELS); 2178 2179 context->sampler[sampler].setTextureLevel(face, level, surface, type); 2180 } 2181 setTextureFilter(SamplerType type,int sampler,FilterType textureFilter)2182 void Renderer::setTextureFilter(SamplerType type, int sampler, FilterType textureFilter) 2183 { 2184 if(type == SAMPLER_PIXEL) 2185 { 2186 PixelProcessor::setTextureFilter(sampler, textureFilter); 2187 } 2188 else 2189 { 2190 VertexProcessor::setTextureFilter(sampler, textureFilter); 2191 } 2192 } 2193 setMipmapFilter(SamplerType type,int sampler,MipmapType mipmapFilter)2194 void Renderer::setMipmapFilter(SamplerType type, int sampler, MipmapType mipmapFilter) 2195 { 2196 if(type == SAMPLER_PIXEL) 2197 { 2198 PixelProcessor::setMipmapFilter(sampler, mipmapFilter); 2199 } 2200 else 2201 { 2202 VertexProcessor::setMipmapFilter(sampler, mipmapFilter); 2203 } 2204 } 2205 setGatherEnable(SamplerType type,int sampler,bool enable)2206 void Renderer::setGatherEnable(SamplerType type, int sampler, bool enable) 2207 { 2208 if(type == SAMPLER_PIXEL) 2209 { 2210 PixelProcessor::setGatherEnable(sampler, enable); 2211 } 2212 else 2213 { 2214 VertexProcessor::setGatherEnable(sampler, enable); 2215 } 2216 } 2217 setAddressingModeU(SamplerType type,int sampler,AddressingMode addressMode)2218 void Renderer::setAddressingModeU(SamplerType type, int sampler, AddressingMode addressMode) 2219 { 2220 if(type == SAMPLER_PIXEL) 2221 { 2222 PixelProcessor::setAddressingModeU(sampler, addressMode); 2223 } 2224 else 2225 { 2226 VertexProcessor::setAddressingModeU(sampler, addressMode); 2227 } 2228 } 2229 setAddressingModeV(SamplerType type,int sampler,AddressingMode addressMode)2230 void Renderer::setAddressingModeV(SamplerType type, int sampler, AddressingMode addressMode) 2231 { 2232 if(type == SAMPLER_PIXEL) 2233 { 2234 PixelProcessor::setAddressingModeV(sampler, addressMode); 2235 } 2236 else 2237 { 2238 VertexProcessor::setAddressingModeV(sampler, addressMode); 2239 } 2240 } 2241 setAddressingModeW(SamplerType type,int sampler,AddressingMode addressMode)2242 void Renderer::setAddressingModeW(SamplerType type, int sampler, AddressingMode addressMode) 2243 { 2244 if(type == SAMPLER_PIXEL) 2245 { 2246 PixelProcessor::setAddressingModeW(sampler, addressMode); 2247 } 2248 else 2249 { 2250 VertexProcessor::setAddressingModeW(sampler, addressMode); 2251 } 2252 } 2253 setReadSRGB(SamplerType type,int sampler,bool sRGB)2254 void Renderer::setReadSRGB(SamplerType type, int sampler, bool sRGB) 2255 { 2256 if(type == SAMPLER_PIXEL) 2257 { 2258 PixelProcessor::setReadSRGB(sampler, sRGB); 2259 } 2260 else 2261 { 2262 VertexProcessor::setReadSRGB(sampler, sRGB); 2263 } 2264 } 2265 setMipmapLOD(SamplerType type,int sampler,float bias)2266 void Renderer::setMipmapLOD(SamplerType type, int sampler, float bias) 2267 { 2268 if(type == SAMPLER_PIXEL) 2269 { 2270 PixelProcessor::setMipmapLOD(sampler, bias); 2271 } 2272 else 2273 { 2274 VertexProcessor::setMipmapLOD(sampler, bias); 2275 } 2276 } 2277 setBorderColor(SamplerType type,int sampler,const Color<float> & borderColor)2278 void Renderer::setBorderColor(SamplerType type, int sampler, const Color<float> &borderColor) 2279 { 2280 if(type == SAMPLER_PIXEL) 2281 { 2282 PixelProcessor::setBorderColor(sampler, borderColor); 2283 } 2284 else 2285 { 2286 VertexProcessor::setBorderColor(sampler, borderColor); 2287 } 2288 } 2289 setMaxAnisotropy(SamplerType type,int sampler,float maxAnisotropy)2290 void Renderer::setMaxAnisotropy(SamplerType type, int sampler, float maxAnisotropy) 2291 { 2292 if(type == SAMPLER_PIXEL) 2293 { 2294 PixelProcessor::setMaxAnisotropy(sampler, maxAnisotropy); 2295 } 2296 else 2297 { 2298 VertexProcessor::setMaxAnisotropy(sampler, maxAnisotropy); 2299 } 2300 } 2301 setSwizzleR(SamplerType type,int sampler,SwizzleType swizzleR)2302 void Renderer::setSwizzleR(SamplerType type, int sampler, SwizzleType swizzleR) 2303 { 2304 if(type == SAMPLER_PIXEL) 2305 { 2306 PixelProcessor::setSwizzleR(sampler, swizzleR); 2307 } 2308 else 2309 { 2310 VertexProcessor::setSwizzleR(sampler, swizzleR); 2311 } 2312 } 2313 setSwizzleG(SamplerType type,int sampler,SwizzleType swizzleG)2314 void Renderer::setSwizzleG(SamplerType type, int sampler, SwizzleType swizzleG) 2315 { 2316 if(type == SAMPLER_PIXEL) 2317 { 2318 PixelProcessor::setSwizzleG(sampler, swizzleG); 2319 } 2320 else 2321 { 2322 VertexProcessor::setSwizzleG(sampler, swizzleG); 2323 } 2324 } 2325 setSwizzleB(SamplerType type,int sampler,SwizzleType swizzleB)2326 void Renderer::setSwizzleB(SamplerType type, int sampler, SwizzleType swizzleB) 2327 { 2328 if(type == SAMPLER_PIXEL) 2329 { 2330 PixelProcessor::setSwizzleB(sampler, swizzleB); 2331 } 2332 else 2333 { 2334 VertexProcessor::setSwizzleB(sampler, swizzleB); 2335 } 2336 } 2337 setSwizzleA(SamplerType type,int sampler,SwizzleType swizzleA)2338 void Renderer::setSwizzleA(SamplerType type, int sampler, SwizzleType swizzleA) 2339 { 2340 if(type == SAMPLER_PIXEL) 2341 { 2342 PixelProcessor::setSwizzleA(sampler, swizzleA); 2343 } 2344 else 2345 { 2346 VertexProcessor::setSwizzleA(sampler, swizzleA); 2347 } 2348 } 2349 setPointSpriteEnable(bool pointSpriteEnable)2350 void Renderer::setPointSpriteEnable(bool pointSpriteEnable) 2351 { 2352 context->setPointSpriteEnable(pointSpriteEnable); 2353 } 2354 setPointScaleEnable(bool pointScaleEnable)2355 void Renderer::setPointScaleEnable(bool pointScaleEnable) 2356 { 2357 context->setPointScaleEnable(pointScaleEnable); 2358 } 2359 setLineWidth(float width)2360 void Renderer::setLineWidth(float width) 2361 { 2362 context->lineWidth = width; 2363 } 2364 setDepthBias(float bias)2365 void Renderer::setDepthBias(float bias) 2366 { 2367 depthBias = bias; 2368 } 2369 setSlopeDepthBias(float slopeBias)2370 void Renderer::setSlopeDepthBias(float slopeBias) 2371 { 2372 slopeDepthBias = slopeBias; 2373 } 2374 setRasterizerDiscard(bool rasterizerDiscard)2375 void Renderer::setRasterizerDiscard(bool rasterizerDiscard) 2376 { 2377 context->rasterizerDiscard = rasterizerDiscard; 2378 } 2379 setPixelShader(const PixelShader * shader)2380 void Renderer::setPixelShader(const PixelShader *shader) 2381 { 2382 context->pixelShader = shader; 2383 2384 loadConstants(shader); 2385 } 2386 setVertexShader(const VertexShader * shader)2387 void Renderer::setVertexShader(const VertexShader *shader) 2388 { 2389 context->vertexShader = shader; 2390 2391 loadConstants(shader); 2392 } 2393 setPixelShaderConstantF(int index,const float value[4],int count)2394 void Renderer::setPixelShaderConstantF(int index, const float value[4], int count) 2395 { 2396 for(int i = 0; i < DRAW_COUNT; i++) 2397 { 2398 if(drawCall[i]->psDirtyConstF < index + count) 2399 { 2400 drawCall[i]->psDirtyConstF = index + count; 2401 } 2402 } 2403 2404 for(int i = 0; i < count; i++) 2405 { 2406 PixelProcessor::setFloatConstant(index + i, value); 2407 value += 4; 2408 } 2409 } 2410 setPixelShaderConstantI(int index,const int value[4],int count)2411 void Renderer::setPixelShaderConstantI(int index, const int value[4], int count) 2412 { 2413 for(int i = 0; i < DRAW_COUNT; i++) 2414 { 2415 if(drawCall[i]->psDirtyConstI < index + count) 2416 { 2417 drawCall[i]->psDirtyConstI = index + count; 2418 } 2419 } 2420 2421 for(int i = 0; i < count; i++) 2422 { 2423 PixelProcessor::setIntegerConstant(index + i, value); 2424 value += 4; 2425 } 2426 } 2427 setPixelShaderConstantB(int index,const int * boolean,int count)2428 void Renderer::setPixelShaderConstantB(int index, const int *boolean, int count) 2429 { 2430 for(int i = 0; i < DRAW_COUNT; i++) 2431 { 2432 if(drawCall[i]->psDirtyConstB < index + count) 2433 { 2434 drawCall[i]->psDirtyConstB = index + count; 2435 } 2436 } 2437 2438 for(int i = 0; i < count; i++) 2439 { 2440 PixelProcessor::setBooleanConstant(index + i, *boolean); 2441 boolean++; 2442 } 2443 } 2444 setVertexShaderConstantF(int index,const float value[4],int count)2445 void Renderer::setVertexShaderConstantF(int index, const float value[4], int count) 2446 { 2447 for(int i = 0; i < DRAW_COUNT; i++) 2448 { 2449 if(drawCall[i]->vsDirtyConstF < index + count) 2450 { 2451 drawCall[i]->vsDirtyConstF = index + count; 2452 } 2453 } 2454 2455 for(int i = 0; i < count; i++) 2456 { 2457 VertexProcessor::setFloatConstant(index + i, value); 2458 value += 4; 2459 } 2460 } 2461 setVertexShaderConstantI(int index,const int value[4],int count)2462 void Renderer::setVertexShaderConstantI(int index, const int value[4], int count) 2463 { 2464 for(int i = 0; i < DRAW_COUNT; i++) 2465 { 2466 if(drawCall[i]->vsDirtyConstI < index + count) 2467 { 2468 drawCall[i]->vsDirtyConstI = index + count; 2469 } 2470 } 2471 2472 for(int i = 0; i < count; i++) 2473 { 2474 VertexProcessor::setIntegerConstant(index + i, value); 2475 value += 4; 2476 } 2477 } 2478 setVertexShaderConstantB(int index,const int * boolean,int count)2479 void Renderer::setVertexShaderConstantB(int index, const int *boolean, int count) 2480 { 2481 for(int i = 0; i < DRAW_COUNT; i++) 2482 { 2483 if(drawCall[i]->vsDirtyConstB < index + count) 2484 { 2485 drawCall[i]->vsDirtyConstB = index + count; 2486 } 2487 } 2488 2489 for(int i = 0; i < count; i++) 2490 { 2491 VertexProcessor::setBooleanConstant(index + i, *boolean); 2492 boolean++; 2493 } 2494 } 2495 setModelMatrix(const Matrix & M,int i)2496 void Renderer::setModelMatrix(const Matrix &M, int i) 2497 { 2498 VertexProcessor::setModelMatrix(M, i); 2499 } 2500 setViewMatrix(const Matrix & V)2501 void Renderer::setViewMatrix(const Matrix &V) 2502 { 2503 VertexProcessor::setViewMatrix(V); 2504 updateClipPlanes = true; 2505 } 2506 setBaseMatrix(const Matrix & B)2507 void Renderer::setBaseMatrix(const Matrix &B) 2508 { 2509 VertexProcessor::setBaseMatrix(B); 2510 updateClipPlanes = true; 2511 } 2512 setProjectionMatrix(const Matrix & P)2513 void Renderer::setProjectionMatrix(const Matrix &P) 2514 { 2515 VertexProcessor::setProjectionMatrix(P); 2516 updateClipPlanes = true; 2517 } 2518 addQuery(Query * query)2519 void Renderer::addQuery(Query *query) 2520 { 2521 queries.push_back(query); 2522 } 2523 removeQuery(Query * query)2524 void Renderer::removeQuery(Query *query) 2525 { 2526 queries.remove(query); 2527 } 2528 2529 #if PERF_HUD getThreadCount()2530 int Renderer::getThreadCount() 2531 { 2532 return threadCount; 2533 } 2534 getVertexTime(int thread)2535 int64_t Renderer::getVertexTime(int thread) 2536 { 2537 return vertexTime[thread]; 2538 } 2539 getSetupTime(int thread)2540 int64_t Renderer::getSetupTime(int thread) 2541 { 2542 return setupTime[thread]; 2543 } 2544 getPixelTime(int thread)2545 int64_t Renderer::getPixelTime(int thread) 2546 { 2547 return pixelTime[thread]; 2548 } 2549 resetTimers()2550 void Renderer::resetTimers() 2551 { 2552 for(int thread = 0; thread < threadCount; thread++) 2553 { 2554 vertexTime[thread] = 0; 2555 setupTime[thread] = 0; 2556 pixelTime[thread] = 0; 2557 } 2558 } 2559 #endif 2560 setViewport(const Viewport & viewport)2561 void Renderer::setViewport(const Viewport &viewport) 2562 { 2563 this->viewport = viewport; 2564 } 2565 setScissor(const Rect & scissor)2566 void Renderer::setScissor(const Rect &scissor) 2567 { 2568 this->scissor = scissor; 2569 } 2570 setClipFlags(int flags)2571 void Renderer::setClipFlags(int flags) 2572 { 2573 clipFlags = flags << 8; // Bottom 8 bits used by legacy frustum 2574 } 2575 setClipPlane(unsigned int index,const float plane[4])2576 void Renderer::setClipPlane(unsigned int index, const float plane[4]) 2577 { 2578 if(index < MAX_CLIP_PLANES) 2579 { 2580 userPlane[index] = plane; 2581 } 2582 else ASSERT(false); 2583 2584 updateClipPlanes = true; 2585 } 2586 updateConfiguration(bool initialUpdate)2587 void Renderer::updateConfiguration(bool initialUpdate) 2588 { 2589 bool newConfiguration = swiftConfig->hasNewConfiguration(); 2590 2591 if(newConfiguration || initialUpdate) 2592 { 2593 terminateThreads(); 2594 2595 SwiftConfig::Configuration configuration = {}; 2596 swiftConfig->getConfiguration(configuration); 2597 2598 precacheVertex = !newConfiguration && configuration.precache; 2599 precacheSetup = !newConfiguration && configuration.precache; 2600 precachePixel = !newConfiguration && configuration.precache; 2601 2602 VertexProcessor::setRoutineCacheSize(configuration.vertexRoutineCacheSize); 2603 PixelProcessor::setRoutineCacheSize(configuration.pixelRoutineCacheSize); 2604 SetupProcessor::setRoutineCacheSize(configuration.setupRoutineCacheSize); 2605 2606 switch(configuration.textureSampleQuality) 2607 { 2608 case 0: Sampler::setFilterQuality(FILTER_POINT); break; 2609 case 1: Sampler::setFilterQuality(FILTER_LINEAR); break; 2610 case 2: Sampler::setFilterQuality(FILTER_ANISOTROPIC); break; 2611 default: Sampler::setFilterQuality(FILTER_ANISOTROPIC); break; 2612 } 2613 2614 switch(configuration.mipmapQuality) 2615 { 2616 case 0: Sampler::setMipmapQuality(MIPMAP_POINT); break; 2617 case 1: Sampler::setMipmapQuality(MIPMAP_LINEAR); break; 2618 default: Sampler::setMipmapQuality(MIPMAP_LINEAR); break; 2619 } 2620 2621 setPerspectiveCorrection(configuration.perspectiveCorrection); 2622 2623 switch(configuration.transcendentalPrecision) 2624 { 2625 case 0: 2626 logPrecision = APPROXIMATE; 2627 expPrecision = APPROXIMATE; 2628 rcpPrecision = APPROXIMATE; 2629 rsqPrecision = APPROXIMATE; 2630 break; 2631 case 1: 2632 logPrecision = PARTIAL; 2633 expPrecision = PARTIAL; 2634 rcpPrecision = PARTIAL; 2635 rsqPrecision = PARTIAL; 2636 break; 2637 case 2: 2638 logPrecision = ACCURATE; 2639 expPrecision = ACCURATE; 2640 rcpPrecision = ACCURATE; 2641 rsqPrecision = ACCURATE; 2642 break; 2643 case 3: 2644 logPrecision = WHQL; 2645 expPrecision = WHQL; 2646 rcpPrecision = WHQL; 2647 rsqPrecision = WHQL; 2648 break; 2649 case 4: 2650 logPrecision = IEEE; 2651 expPrecision = IEEE; 2652 rcpPrecision = IEEE; 2653 rsqPrecision = IEEE; 2654 break; 2655 default: 2656 logPrecision = ACCURATE; 2657 expPrecision = ACCURATE; 2658 rcpPrecision = ACCURATE; 2659 rsqPrecision = ACCURATE; 2660 break; 2661 } 2662 2663 switch(configuration.transparencyAntialiasing) 2664 { 2665 case 0: transparencyAntialiasing = TRANSPARENCY_NONE; break; 2666 case 1: transparencyAntialiasing = TRANSPARENCY_ALPHA_TO_COVERAGE; break; 2667 default: transparencyAntialiasing = TRANSPARENCY_NONE; break; 2668 } 2669 2670 switch(configuration.threadCount) 2671 { 2672 case -1: threadCount = CPUID::coreCount(); break; 2673 case 0: threadCount = CPUID::processAffinity(); break; 2674 default: threadCount = configuration.threadCount; break; 2675 } 2676 2677 CPUID::setEnableSSE4_1(configuration.enableSSE4_1); 2678 CPUID::setEnableSSSE3(configuration.enableSSSE3); 2679 CPUID::setEnableSSE3(configuration.enableSSE3); 2680 CPUID::setEnableSSE2(configuration.enableSSE2); 2681 CPUID::setEnableSSE(configuration.enableSSE); 2682 2683 for(int pass = 0; pass < 10; pass++) 2684 { 2685 optimization[pass] = configuration.optimization[pass]; 2686 } 2687 2688 forceWindowed = configuration.forceWindowed; 2689 complementaryDepthBuffer = configuration.complementaryDepthBuffer; 2690 postBlendSRGB = configuration.postBlendSRGB; 2691 exactColorRounding = configuration.exactColorRounding; 2692 forceClearRegisters = configuration.forceClearRegisters; 2693 2694 #ifndef NDEBUG 2695 minPrimitives = configuration.minPrimitives; 2696 maxPrimitives = configuration.maxPrimitives; 2697 #endif 2698 } 2699 2700 if(!initialUpdate && !worker[0]) 2701 { 2702 initializeThreads(); 2703 } 2704 } 2705 } 2706