1 // Copyright 2016 The SwiftShader Authors. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //    http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "Renderer.hpp"
16 
17 #include "Clipper.hpp"
18 #include "Surface.hpp"
19 #include "Primitive.hpp"
20 #include "Polygon.hpp"
21 #include "WSI/FrameBuffer.hpp"
22 #include "Device/SwiftConfig.hpp"
23 #include "Reactor/Reactor.hpp"
24 #include "Pipeline/Constants.hpp"
25 #include "System/MutexLock.hpp"
26 #include "System/CPUID.hpp"
27 #include "System/Memory.hpp"
28 #include "System/Resource.hpp"
29 #include "System/Half.hpp"
30 #include "System/Math.hpp"
31 #include "System/Timer.hpp"
32 #include "Vulkan/VkDebug.hpp"
33 
34 #undef max
35 
36 bool disableServer = true;
37 
38 #ifndef NDEBUG
39 unsigned int minPrimitives = 1;
40 unsigned int maxPrimitives = 1 << 21;
41 #endif
42 
43 namespace sw
44 {
45 	extern bool halfIntegerCoordinates;     // Pixel centers are not at integer coordinates
46 	extern bool booleanFaceRegister;
47 	extern bool fullPixelPositionRegister;
48 	extern bool leadingVertexFirst;         // Flat shading uses first vertex, else last
49 	extern bool secondaryColor;             // Specular lighting is applied after texturing
50 	extern bool colorsDefaultToZero;
51 
52 	extern bool forceWindowed;
53 	extern bool complementaryDepthBuffer;
54 	extern bool postBlendSRGB;
55 	extern bool exactColorRounding;
56 	extern TransparencyAntialiasing transparencyAntialiasing;
57 	extern bool forceClearRegisters;
58 
59 	extern bool precacheVertex;
60 	extern bool precacheSetup;
61 	extern bool precachePixel;
62 
63 	static const int batchSize = 128;
64 	AtomicInt threadCount(1);
65 	AtomicInt Renderer::unitCount(1);
66 	AtomicInt Renderer::clusterCount(1);
67 
68 	TranscendentalPrecision logPrecision = ACCURATE;
69 	TranscendentalPrecision expPrecision = ACCURATE;
70 	TranscendentalPrecision rcpPrecision = ACCURATE;
71 	TranscendentalPrecision rsqPrecision = ACCURATE;
72 	bool perspectiveCorrection = true;
73 
setGlobalRenderingSettings(Conventions conventions,bool exactColorRounding)74 	static void setGlobalRenderingSettings(Conventions conventions, bool exactColorRounding)
75 	{
76 		static bool initialized = false;
77 
78 		if(!initialized)
79 		{
80 			sw::halfIntegerCoordinates = conventions.halfIntegerCoordinates;
81 			sw::booleanFaceRegister = conventions.booleanFaceRegister;
82 			sw::fullPixelPositionRegister = conventions.fullPixelPositionRegister;
83 			sw::leadingVertexFirst = conventions.leadingVertexFirst;
84 			sw::secondaryColor = conventions.secondaryColor;
85 			sw::colorsDefaultToZero = conventions.colorsDefaultToZero;
86 			sw::exactColorRounding = exactColorRounding;
87 			initialized = true;
88 		}
89 	}
90 
91 	struct Parameters
92 	{
93 		Renderer *renderer;
94 		int threadIndex;
95 	};
96 
DrawCall()97 	DrawCall::DrawCall()
98 	{
99 		queries = 0;
100 
101 		vsDirtyConstF = VERTEX_UNIFORM_VECTORS + 1;
102 		vsDirtyConstI = 16;
103 		vsDirtyConstB = 16;
104 
105 		psDirtyConstF = FRAGMENT_UNIFORM_VECTORS;
106 		psDirtyConstI = 16;
107 		psDirtyConstB = 16;
108 
109 		references = -1;
110 
111 		data = (DrawData*)allocate(sizeof(DrawData));
112 		data->constants = &constants;
113 	}
114 
~DrawCall()115 	DrawCall::~DrawCall()
116 	{
117 		delete queries;
118 
119 		deallocate(data);
120 	}
121 
Renderer(Context * context,Conventions conventions,bool exactColorRounding)122 	Renderer::Renderer(Context *context, Conventions conventions, bool exactColorRounding) : VertexProcessor(context), PixelProcessor(context), SetupProcessor(context), context(context), viewport()
123 	{
124 		setGlobalRenderingSettings(conventions, exactColorRounding);
125 
126 		setRenderTarget(0, nullptr);
127 		clipper = new Clipper;
128 		blitter = new Blitter;
129 
130 		updateClipPlanes = true;
131 
132 		#if PERF_HUD
133 			resetTimers();
134 		#endif
135 
136 		for(int i = 0; i < 16; i++)
137 		{
138 			vertexTask[i] = nullptr;
139 
140 			worker[i] = nullptr;
141 			resume[i] = nullptr;
142 			suspend[i] = nullptr;
143 		}
144 
145 		threadsAwake = 0;
146 		resumeApp = new Event();
147 
148 		currentDraw = 0;
149 		nextDraw = 0;
150 
151 		qHead = 0;
152 		qSize = 0;
153 
154 		for(int i = 0; i < 16; i++)
155 		{
156 			triangleBatch[i] = nullptr;
157 			primitiveBatch[i] = nullptr;
158 		}
159 
160 		for(int draw = 0; draw < DRAW_COUNT; draw++)
161 		{
162 			drawCall[draw] = new DrawCall();
163 			drawList[draw] = drawCall[draw];
164 		}
165 
166 		for(int unit = 0; unit < 16; unit++)
167 		{
168 			primitiveProgress[unit].init();
169 		}
170 
171 		for(int cluster = 0; cluster < 16; cluster++)
172 		{
173 			pixelProgress[cluster].init();
174 		}
175 
176 		clipFlags = 0;
177 
178 		swiftConfig = new SwiftConfig(disableServer);
179 		updateConfiguration(true);
180 
181 		sync = new Resource(0);
182 	}
183 
~Renderer()184 	Renderer::~Renderer()
185 	{
186 		sync->destruct();
187 
188 		delete clipper;
189 		clipper = nullptr;
190 
191 		delete blitter;
192 		blitter = nullptr;
193 
194 		terminateThreads();
195 		delete resumeApp;
196 
197 		for(int draw = 0; draw < DRAW_COUNT; draw++)
198 		{
199 			delete drawCall[draw];
200 		}
201 
202 		delete swiftConfig;
203 	}
204 
205 	// This object has to be mem aligned
operator new(size_t size)206 	void* Renderer::operator new(size_t size)
207 	{
208 		ASSERT(size == sizeof(Renderer)); // This operator can't be called from a derived class
209 		return sw::allocate(sizeof(Renderer), 16);
210 	}
211 
operator delete(void * mem)212 	void Renderer::operator delete(void * mem)
213 	{
214 		sw::deallocate(mem);
215 	}
216 
draw(DrawType drawType,unsigned int indexOffset,unsigned int count,bool update)217 	void Renderer::draw(DrawType drawType, unsigned int indexOffset, unsigned int count, bool update)
218 	{
219 		#ifndef NDEBUG
220 			if(count < minPrimitives || count > maxPrimitives)
221 			{
222 				return;
223 			}
224 		#endif
225 
226 		context->drawType = drawType;
227 
228 		updateConfiguration();
229 		updateClipper();
230 
231 		int ms = context->getMultiSampleCount();
232 		unsigned int oldMultiSampleMask = context->multiSampleMask;
233 		context->multiSampleMask = context->sampleMask & ((unsigned)0xFFFFFFFF >> (32 - ms));
234 
235 		if(!context->multiSampleMask)
236 		{
237 			return;
238 		}
239 
240 		sync->lock(sw::PRIVATE);
241 
242 		if(update || oldMultiSampleMask != context->multiSampleMask)
243 		{
244 			vertexState = VertexProcessor::update(drawType);
245 			setupState = SetupProcessor::update();
246 			pixelState = PixelProcessor::update();
247 
248 			vertexRoutine = VertexProcessor::routine(vertexState);
249 			setupRoutine = SetupProcessor::routine(setupState);
250 			pixelRoutine = PixelProcessor::routine(pixelState);
251 		}
252 
253 		int batch = batchSize / ms;
254 
255 		int (Renderer::*setupPrimitives)(int batch, int count);
256 
257 		if(context->isDrawTriangle())
258 		{
259 			setupPrimitives = &Renderer::setupTriangles;
260 		}
261 		else if(context->isDrawLine())
262 		{
263 			setupPrimitives = &Renderer::setupLines;
264 		}
265 		else   // Point draw
266 		{
267 			setupPrimitives = &Renderer::setupPoints;
268 		}
269 
270 		DrawCall *draw = nullptr;
271 
272 		do
273 		{
274 			for(int i = 0; i < DRAW_COUNT; i++)
275 			{
276 				if(drawCall[i]->references == -1)
277 				{
278 					draw = drawCall[i];
279 					drawList[nextDraw & DRAW_COUNT_BITS] = draw;
280 
281 					break;
282 				}
283 			}
284 
285 			if(!draw)
286 			{
287 				resumeApp->wait();
288 			}
289 		}
290 		while(!draw);
291 
292 		DrawData *data = draw->data;
293 
294 		if(queries.size() != 0)
295 		{
296 			draw->queries = new std::list<Query*>();
297 			bool includePrimitivesWrittenQueries = vertexState.transformFeedbackQueryEnabled && vertexState.transformFeedbackEnabled;
298 			for(auto &query : queries)
299 			{
300 				if(includePrimitivesWrittenQueries || (query->type != Query::TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN))
301 				{
302 					++query->reference; // Atomic
303 					draw->queries->push_back(query);
304 				}
305 			}
306 		}
307 
308 		draw->drawType = drawType;
309 		draw->batchSize = batch;
310 
311 		vertexRoutine->bind();
312 		setupRoutine->bind();
313 		pixelRoutine->bind();
314 
315 		draw->vertexRoutine = vertexRoutine;
316 		draw->setupRoutine = setupRoutine;
317 		draw->pixelRoutine = pixelRoutine;
318 		draw->vertexPointer = (VertexProcessor::RoutinePointer)vertexRoutine->getEntry();
319 		draw->setupPointer = (SetupProcessor::RoutinePointer)setupRoutine->getEntry();
320 		draw->pixelPointer = (PixelProcessor::RoutinePointer)pixelRoutine->getEntry();
321 		draw->setupPrimitives = setupPrimitives;
322 		draw->setupState = setupState;
323 
324 		for(int i = 0; i < MAX_VERTEX_INPUTS; i++)
325 		{
326 			draw->vertexStream[i] = context->input[i].resource;
327 			data->input[i] = context->input[i].buffer;
328 			data->stride[i] = context->input[i].stride;
329 
330 			if(draw->vertexStream[i])
331 			{
332 				draw->vertexStream[i]->lock(PUBLIC, PRIVATE);
333 			}
334 		}
335 
336 		if(context->indexBuffer)
337 		{
338 			data->indices = (unsigned char*)context->indexBuffer->lock(PUBLIC, PRIVATE) + indexOffset;
339 		}
340 
341 		draw->indexBuffer = context->indexBuffer;
342 
343 		for(int sampler = 0; sampler < TOTAL_IMAGE_UNITS; sampler++)
344 		{
345 			draw->texture[sampler] = 0;
346 		}
347 
348 		for(int sampler = 0; sampler < TEXTURE_IMAGE_UNITS; sampler++)
349 		{
350 			if(pixelState.sampler[sampler].textureType != TEXTURE_NULL)
351 			{
352 				draw->texture[sampler] = context->texture[sampler];
353 				draw->texture[sampler]->lock(PUBLIC, isReadWriteTexture(sampler) ? MANAGED : PRIVATE);   // If the texure is both read and written, use the same read/write lock as render targets
354 
355 				data->mipmap[sampler] = context->sampler[sampler].getTextureData();
356 			}
357 		}
358 
359 		if(context->pixelShader)
360 		{
361 			if(draw->psDirtyConstF)
362 			{
363 				memcpy(&data->ps.c, PixelProcessor::c, sizeof(float4) * draw->psDirtyConstF);
364 				draw->psDirtyConstF = 0;
365 			}
366 
367 			if(draw->psDirtyConstI)
368 			{
369 				memcpy(&data->ps.i, PixelProcessor::i, sizeof(int4) * draw->psDirtyConstI);
370 				draw->psDirtyConstI = 0;
371 			}
372 
373 			if(draw->psDirtyConstB)
374 			{
375 				memcpy(&data->ps.b, PixelProcessor::b, sizeof(bool) * draw->psDirtyConstB);
376 				draw->psDirtyConstB = 0;
377 			}
378 
379 			PixelProcessor::lockUniformBuffers(data->ps.u, draw->pUniformBuffers);
380 		}
381 		else
382 		{
383 			for(int i = 0; i < MAX_UNIFORM_BUFFER_BINDINGS; i++)
384 			{
385 				draw->pUniformBuffers[i] = nullptr;
386 			}
387 		}
388 
389 		for(int sampler = 0; sampler < VERTEX_TEXTURE_IMAGE_UNITS; sampler++)
390 		{
391 			if(vertexState.sampler[sampler].textureType != TEXTURE_NULL)
392 			{
393 				draw->texture[TEXTURE_IMAGE_UNITS + sampler] = context->texture[TEXTURE_IMAGE_UNITS + sampler];
394 				draw->texture[TEXTURE_IMAGE_UNITS + sampler]->lock(PUBLIC, PRIVATE);
395 
396 				data->mipmap[TEXTURE_IMAGE_UNITS + sampler] = context->sampler[TEXTURE_IMAGE_UNITS + sampler].getTextureData();
397 			}
398 		}
399 
400 		if(draw->vsDirtyConstF)
401 		{
402 			memcpy(&data->vs.c, VertexProcessor::c, sizeof(float4) * draw->vsDirtyConstF);
403 			draw->vsDirtyConstF = 0;
404 		}
405 
406 		if(draw->vsDirtyConstI)
407 		{
408 			memcpy(&data->vs.i, VertexProcessor::i, sizeof(int4) * draw->vsDirtyConstI);
409 			draw->vsDirtyConstI = 0;
410 		}
411 
412 		if(draw->vsDirtyConstB)
413 		{
414 			memcpy(&data->vs.b, VertexProcessor::b, sizeof(bool) * draw->vsDirtyConstB);
415 			draw->vsDirtyConstB = 0;
416 		}
417 
418 		if(context->vertexShader->isInstanceIdDeclared())
419 		{
420 			data->instanceID = context->instanceID;
421 		}
422 
423 		VertexProcessor::lockUniformBuffers(data->vs.u, draw->vUniformBuffers);
424 		VertexProcessor::lockTransformFeedbackBuffers(data->vs.t, data->vs.reg, data->vs.row, data->vs.col, data->vs.str, draw->transformFeedbackBuffers);
425 
426 		if(pixelState.stencilActive)
427 		{
428 			data->stencil[0] = stencil;
429 			data->stencil[1] = stencilCCW;
430 		}
431 
432 		if(setupState.isDrawPoint)
433 		{
434 			data->pointSizeMin = pointSizeMin;
435 			data->pointSizeMax = pointSizeMax;
436 		}
437 
438 		data->lineWidth = context->lineWidth;
439 
440 		data->factor = factor;
441 
442 		if(pixelState.transparencyAntialiasing == TRANSPARENCY_ALPHA_TO_COVERAGE)
443 		{
444 			float ref = context->alphaReference * (1.0f / 255.0f);
445 			float margin = sw::min(ref, 1.0f - ref);
446 
447 			if(ms == 4)
448 			{
449 				data->a2c0 = replicate(ref - margin * 0.6f);
450 				data->a2c1 = replicate(ref - margin * 0.2f);
451 				data->a2c2 = replicate(ref + margin * 0.2f);
452 				data->a2c3 = replicate(ref + margin * 0.6f);
453 			}
454 			else if(ms == 2)
455 			{
456 				data->a2c0 = replicate(ref - margin * 0.3f);
457 				data->a2c1 = replicate(ref + margin * 0.3f);
458 			}
459 			else ASSERT(false);
460 		}
461 
462 		if(pixelState.occlusionEnabled)
463 		{
464 			for(int cluster = 0; cluster < clusterCount; cluster++)
465 			{
466 				data->occlusion[cluster] = 0;
467 			}
468 		}
469 
470 		#if PERF_PROFILE
471 			for(int cluster = 0; cluster < clusterCount; cluster++)
472 			{
473 				for(int i = 0; i < PERF_TIMERS; i++)
474 				{
475 					data->cycles[i][cluster] = 0;
476 				}
477 			}
478 		#endif
479 
480 		// Viewport
481 		{
482 			float W = 0.5f * viewport.width;
483 			float H = 0.5f * viewport.height;
484 			float X0 = viewport.x + W;
485 			float Y0 = viewport.y + H;
486 			float N = viewport.minDepth;
487 			float F = viewport.maxDepth;
488 			float Z = F - N;
489 
490 			if(context->isDrawTriangle())
491 			{
492 				N += context->depthBias;
493 			}
494 
495 			if(complementaryDepthBuffer)
496 			{
497 				Z = -Z;
498 				N = 1 - N;
499 			}
500 
501 			data->Wx16 = replicate(W * 16);
502 			data->Hx16 = replicate(H * 16);
503 			data->X0x16 = replicate(X0 * 16 - 8);
504 			data->Y0x16 = replicate(Y0 * 16 - 8);
505 			data->halfPixelX = replicate(0.5f / W);
506 			data->halfPixelY = replicate(0.5f / H);
507 			data->viewportHeight = abs(viewport.height);
508 			data->slopeDepthBias = context->slopeDepthBias;
509 			data->depthRange = Z;
510 			data->depthNear = N;
511 			draw->clipFlags = clipFlags;
512 
513 			if(clipFlags)
514 			{
515 				if(clipFlags & Clipper::CLIP_PLANE0) data->clipPlane[0] = clipPlane[0];
516 				if(clipFlags & Clipper::CLIP_PLANE1) data->clipPlane[1] = clipPlane[1];
517 				if(clipFlags & Clipper::CLIP_PLANE2) data->clipPlane[2] = clipPlane[2];
518 				if(clipFlags & Clipper::CLIP_PLANE3) data->clipPlane[3] = clipPlane[3];
519 				if(clipFlags & Clipper::CLIP_PLANE4) data->clipPlane[4] = clipPlane[4];
520 				if(clipFlags & Clipper::CLIP_PLANE5) data->clipPlane[5] = clipPlane[5];
521 			}
522 		}
523 
524 		// Target
525 		{
526 			for(int index = 0; index < RENDERTARGETS; index++)
527 			{
528 				draw->renderTarget[index] = context->renderTarget[index];
529 
530 				if(draw->renderTarget[index])
531 				{
532 					unsigned int layer = context->renderTargetLayer[index];
533 					data->colorBuffer[index] = (unsigned int*)context->renderTarget[index]->lockInternal(0, 0, layer, LOCK_READWRITE, MANAGED);
534 					data->colorPitchB[index] = context->renderTarget[index]->getInternalPitchB();
535 					data->colorSliceB[index] = context->renderTarget[index]->getInternalSliceB();
536 				}
537 			}
538 
539 			draw->depthBuffer = context->depthBuffer;
540 			draw->stencilBuffer = context->stencilBuffer;
541 
542 			if(draw->depthBuffer)
543 			{
544 				unsigned int layer = context->depthBufferLayer;
545 				data->depthBuffer = (float*)context->depthBuffer->lockInternal(0, 0, layer, LOCK_READWRITE, MANAGED);
546 				data->depthPitchB = context->depthBuffer->getInternalPitchB();
547 				data->depthSliceB = context->depthBuffer->getInternalSliceB();
548 			}
549 
550 			if(draw->stencilBuffer)
551 			{
552 				unsigned int layer = context->stencilBufferLayer;
553 				data->stencilBuffer = (unsigned char*)context->stencilBuffer->lockStencil(0, 0, layer, MANAGED);
554 				data->stencilPitchB = context->stencilBuffer->getStencilPitchB();
555 				data->stencilSliceB = context->stencilBuffer->getStencilSliceB();
556 			}
557 		}
558 
559 		// Scissor
560 		{
561 			data->scissorX0 = scissor.x0;
562 			data->scissorX1 = scissor.x1;
563 			data->scissorY0 = scissor.y0;
564 			data->scissorY1 = scissor.y1;
565 		}
566 
567 		draw->primitive = 0;
568 		draw->count = count;
569 
570 		draw->references = (count + batch - 1) / batch;
571 
572 		schedulerMutex.lock();
573 		++nextDraw; // Atomic
574 		schedulerMutex.unlock();
575 
576 		#ifndef NDEBUG
577 		if(threadCount == 1)   // Use main thread for draw execution
578 		{
579 			threadsAwake = 1;
580 			task[0].type = Task::RESUME;
581 
582 			taskLoop(0);
583 		}
584 		else
585 		#endif
586 		{
587 			if(!threadsAwake)
588 			{
589 				suspend[0]->wait();
590 
591 				threadsAwake = 1;
592 				task[0].type = Task::RESUME;
593 
594 				resume[0]->signal();
595 			}
596 		}
597 	}
598 
clear(void * value,VkFormat format,Surface * dest,const Rect & clearRect,unsigned int rgbaMask)599 	void Renderer::clear(void *value, VkFormat format, Surface *dest, const Rect &clearRect, unsigned int rgbaMask)
600 	{
601 		blitter->clear(value, format, dest, clearRect, rgbaMask);
602 	}
603 
blit(Surface * source,const SliceRectF & sRect,Surface * dest,const SliceRect & dRect,bool filter,bool isStencil,bool sRGBconversion)604 	void Renderer::blit(Surface *source, const SliceRectF &sRect, Surface *dest, const SliceRect &dRect, bool filter, bool isStencil, bool sRGBconversion)
605 	{
606 		blitter->blit(source, sRect, dest, dRect, {filter, isStencil, sRGBconversion});
607 	}
608 
blit3D(Surface * source,Surface * dest)609 	void Renderer::blit3D(Surface *source, Surface *dest)
610 	{
611 		blitter->blit3D(source, dest);
612 	}
613 
threadFunction(void * parameters)614 	void Renderer::threadFunction(void *parameters)
615 	{
616 		Renderer *renderer = static_cast<Parameters*>(parameters)->renderer;
617 		int threadIndex = static_cast<Parameters*>(parameters)->threadIndex;
618 
619 		if(logPrecision < IEEE)
620 		{
621 			CPUID::setFlushToZero(true);
622 			CPUID::setDenormalsAreZero(true);
623 		}
624 
625 		renderer->threadLoop(threadIndex);
626 	}
627 
threadLoop(int threadIndex)628 	void Renderer::threadLoop(int threadIndex)
629 	{
630 		while(!exitThreads)
631 		{
632 			taskLoop(threadIndex);
633 
634 			suspend[threadIndex]->signal();
635 			resume[threadIndex]->wait();
636 		}
637 	}
638 
taskLoop(int threadIndex)639 	void Renderer::taskLoop(int threadIndex)
640 	{
641 		while(task[threadIndex].type != Task::SUSPEND)
642 		{
643 			scheduleTask(threadIndex);
644 			executeTask(threadIndex);
645 		}
646 	}
647 
findAvailableTasks()648 	void Renderer::findAvailableTasks()
649 	{
650 		// Find pixel tasks
651 		for(int cluster = 0; cluster < clusterCount; cluster++)
652 		{
653 			if(!pixelProgress[cluster].executing)
654 			{
655 				for(int unit = 0; unit < unitCount; unit++)
656 				{
657 					if(primitiveProgress[unit].references > 0)   // Contains processed primitives
658 					{
659 						if(pixelProgress[cluster].drawCall == primitiveProgress[unit].drawCall)
660 						{
661 							if(pixelProgress[cluster].processedPrimitives == primitiveProgress[unit].firstPrimitive)   // Previous primitives have been rendered
662 							{
663 								Task &task = taskQueue[qHead];
664 								task.type = Task::PIXELS;
665 								task.primitiveUnit = unit;
666 								task.pixelCluster = cluster;
667 
668 								pixelProgress[cluster].executing = true;
669 
670 								// Commit to the task queue
671 								qHead = (qHead + 1) & TASK_COUNT_BITS;
672 								qSize++;
673 
674 								break;
675 							}
676 						}
677 					}
678 				}
679 			}
680 		}
681 
682 		// Find primitive tasks
683 		if(currentDraw == nextDraw)
684 		{
685 			return;   // No more primitives to process
686 		}
687 
688 		for(int unit = 0; unit < unitCount; unit++)
689 		{
690 			DrawCall *draw = drawList[currentDraw & DRAW_COUNT_BITS];
691 
692 			int primitive = draw->primitive;
693 			int count = draw->count;
694 
695 			if(primitive >= count)
696 			{
697 				++currentDraw; // Atomic
698 
699 				if(currentDraw == nextDraw)
700 				{
701 					return;   // No more primitives to process
702 				}
703 
704 				draw = drawList[currentDraw & DRAW_COUNT_BITS];
705 			}
706 
707 			if(!primitiveProgress[unit].references)   // Task not already being executed and not still in use by a pixel unit
708 			{
709 				primitive = draw->primitive;
710 				count = draw->count;
711 				int batch = draw->batchSize;
712 
713 				primitiveProgress[unit].drawCall = currentDraw;
714 				primitiveProgress[unit].firstPrimitive = primitive;
715 				primitiveProgress[unit].primitiveCount = count - primitive >= batch ? batch : count - primitive;
716 
717 				draw->primitive += batch;
718 
719 				Task &task = taskQueue[qHead];
720 				task.type = Task::PRIMITIVES;
721 				task.primitiveUnit = unit;
722 
723 				primitiveProgress[unit].references = -1;
724 
725 				// Commit to the task queue
726 				qHead = (qHead + 1) & TASK_COUNT_BITS;
727 				qSize++;
728 			}
729 		}
730 	}
731 
scheduleTask(int threadIndex)732 	void Renderer::scheduleTask(int threadIndex)
733 	{
734 		schedulerMutex.lock();
735 
736 		int curThreadsAwake = threadsAwake;
737 
738 		if((int)qSize < threadCount - curThreadsAwake + 1)
739 		{
740 			findAvailableTasks();
741 		}
742 
743 		if(qSize != 0)
744 		{
745 			task[threadIndex] = taskQueue[(qHead - qSize) & TASK_COUNT_BITS];
746 			qSize--;
747 
748 			if(curThreadsAwake != threadCount)
749 			{
750 				int wakeup = qSize - curThreadsAwake + 1;
751 
752 				for(int i = 0; i < threadCount && wakeup > 0; i++)
753 				{
754 					if(task[i].type == Task::SUSPEND)
755 					{
756 						suspend[i]->wait();
757 						task[i].type = Task::RESUME;
758 						resume[i]->signal();
759 
760 						++threadsAwake; // Atomic
761 						wakeup--;
762 					}
763 				}
764 			}
765 		}
766 		else
767 		{
768 			task[threadIndex].type = Task::SUSPEND;
769 
770 			--threadsAwake; // Atomic
771 		}
772 
773 		schedulerMutex.unlock();
774 	}
775 
executeTask(int threadIndex)776 	void Renderer::executeTask(int threadIndex)
777 	{
778 		#if PERF_HUD
779 			int64_t startTick = Timer::ticks();
780 		#endif
781 
782 		switch(task[threadIndex].type)
783 		{
784 		case Task::PRIMITIVES:
785 			{
786 				int unit = task[threadIndex].primitiveUnit;
787 
788 				int input = primitiveProgress[unit].firstPrimitive;
789 				int count = primitiveProgress[unit].primitiveCount;
790 				DrawCall *draw = drawList[primitiveProgress[unit].drawCall & DRAW_COUNT_BITS];
791 				int (Renderer::*setupPrimitives)(int batch, int count) = draw->setupPrimitives;
792 
793 				processPrimitiveVertices(unit, input, count, draw->count, threadIndex);
794 
795 				#if PERF_HUD
796 					int64_t time = Timer::ticks();
797 					vertexTime[threadIndex] += time - startTick;
798 					startTick = time;
799 				#endif
800 
801 				int visible = 0;
802 
803 				if(!draw->setupState.rasterizerDiscard)
804 				{
805 					visible = (this->*setupPrimitives)(unit, count);
806 				}
807 
808 				primitiveProgress[unit].visible = visible;
809 				primitiveProgress[unit].references = clusterCount;
810 
811 				#if PERF_HUD
812 					setupTime[threadIndex] += Timer::ticks() - startTick;
813 				#endif
814 			}
815 			break;
816 		case Task::PIXELS:
817 			{
818 				int unit = task[threadIndex].primitiveUnit;
819 				int visible = primitiveProgress[unit].visible;
820 
821 				if(visible > 0)
822 				{
823 					int cluster = task[threadIndex].pixelCluster;
824 					Primitive *primitive = primitiveBatch[unit];
825 					DrawCall *draw = drawList[pixelProgress[cluster].drawCall & DRAW_COUNT_BITS];
826 					DrawData *data = draw->data;
827 					PixelProcessor::RoutinePointer pixelRoutine = draw->pixelPointer;
828 
829 					pixelRoutine(primitive, visible, cluster, data);
830 				}
831 
832 				finishRendering(task[threadIndex]);
833 
834 				#if PERF_HUD
835 					pixelTime[threadIndex] += Timer::ticks() - startTick;
836 				#endif
837 			}
838 			break;
839 		case Task::RESUME:
840 			break;
841 		case Task::SUSPEND:
842 			break;
843 		default:
844 			ASSERT(false);
845 		}
846 	}
847 
synchronize()848 	void Renderer::synchronize()
849 	{
850 		sync->lock(sw::PUBLIC);
851 		sync->unlock();
852 	}
853 
finishRendering(Task & pixelTask)854 	void Renderer::finishRendering(Task &pixelTask)
855 	{
856 		int unit = pixelTask.primitiveUnit;
857 		int cluster = pixelTask.pixelCluster;
858 
859 		DrawCall &draw = *drawList[primitiveProgress[unit].drawCall & DRAW_COUNT_BITS];
860 		DrawData &data = *draw.data;
861 		int primitive = primitiveProgress[unit].firstPrimitive;
862 		int count = primitiveProgress[unit].primitiveCount;
863 		int processedPrimitives = primitive + count;
864 
865 		pixelProgress[cluster].processedPrimitives = processedPrimitives;
866 
867 		if(pixelProgress[cluster].processedPrimitives >= draw.count)
868 		{
869 			++pixelProgress[cluster].drawCall; // Atomic
870 			pixelProgress[cluster].processedPrimitives = 0;
871 		}
872 
873 		int ref = primitiveProgress[unit].references--; // Atomic
874 
875 		if(ref == 0)
876 		{
877 			ref = draw.references--; // Atomic
878 
879 			if(ref == 0)
880 			{
881 				#if PERF_PROFILE
882 					for(int cluster = 0; cluster < clusterCount; cluster++)
883 					{
884 						for(int i = 0; i < PERF_TIMERS; i++)
885 						{
886 							profiler.cycles[i] += data.cycles[i][cluster];
887 						}
888 					}
889 				#endif
890 
891 				if(draw.queries)
892 				{
893 					for(auto &query : *(draw.queries))
894 					{
895 						switch(query->type)
896 						{
897 						case Query::FRAGMENTS_PASSED:
898 							for(int cluster = 0; cluster < clusterCount; cluster++)
899 							{
900 								query->data += data.occlusion[cluster];
901 							}
902 							break;
903 						case Query::TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN:
904 							query->data += processedPrimitives;
905 							break;
906 						default:
907 							break;
908 						}
909 
910 						--query->reference; // Atomic
911 					}
912 
913 					delete draw.queries;
914 					draw.queries = 0;
915 				}
916 
917 				for(int i = 0; i < RENDERTARGETS; i++)
918 				{
919 					if(draw.renderTarget[i])
920 					{
921 						draw.renderTarget[i]->unlockInternal();
922 					}
923 				}
924 
925 				if(draw.depthBuffer)
926 				{
927 					draw.depthBuffer->unlockInternal();
928 				}
929 
930 				if(draw.stencilBuffer)
931 				{
932 					draw.stencilBuffer->unlockStencil();
933 				}
934 
935 				for(int i = 0; i < TOTAL_IMAGE_UNITS; i++)
936 				{
937 					if(draw.texture[i])
938 					{
939 						draw.texture[i]->unlock();
940 					}
941 				}
942 
943 				for(int i = 0; i < MAX_VERTEX_INPUTS; i++)
944 				{
945 					if(draw.vertexStream[i])
946 					{
947 						draw.vertexStream[i]->unlock();
948 					}
949 				}
950 
951 				if(draw.indexBuffer)
952 				{
953 					draw.indexBuffer->unlock();
954 				}
955 
956 				for(int i = 0; i < MAX_UNIFORM_BUFFER_BINDINGS; i++)
957 				{
958 					if(draw.pUniformBuffers[i])
959 					{
960 						draw.pUniformBuffers[i]->unlock();
961 					}
962 					if(draw.vUniformBuffers[i])
963 					{
964 						draw.vUniformBuffers[i]->unlock();
965 					}
966 				}
967 
968 				for(int i = 0; i < MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS; i++)
969 				{
970 					if(draw.transformFeedbackBuffers[i])
971 					{
972 						draw.transformFeedbackBuffers[i]->unlock();
973 					}
974 				}
975 
976 				draw.vertexRoutine->unbind();
977 				draw.setupRoutine->unbind();
978 				draw.pixelRoutine->unbind();
979 
980 				sync->unlock();
981 
982 				draw.references = -1;
983 				resumeApp->signal();
984 			}
985 		}
986 
987 		pixelProgress[cluster].executing = false;
988 	}
989 
processPrimitiveVertices(int unit,unsigned int start,unsigned int triangleCount,unsigned int loop,int thread)990 	void Renderer::processPrimitiveVertices(int unit, unsigned int start, unsigned int triangleCount, unsigned int loop, int thread)
991 	{
992 		Triangle *triangle = triangleBatch[unit];
993 		int primitiveDrawCall = primitiveProgress[unit].drawCall;
994 		DrawCall *draw = drawList[primitiveDrawCall & DRAW_COUNT_BITS];
995 		DrawData *data = draw->data;
996 		VertexTask *task = vertexTask[thread];
997 
998 		const void *indices = data->indices;
999 		VertexProcessor::RoutinePointer vertexRoutine = draw->vertexPointer;
1000 
1001 		if(task->vertexCache.drawCall != primitiveDrawCall)
1002 		{
1003 			task->vertexCache.clear();
1004 			task->vertexCache.drawCall = primitiveDrawCall;
1005 		}
1006 
1007 		unsigned int batch[128][3];   // FIXME: Adjust to dynamic batch size
1008 
1009 		switch(draw->drawType)
1010 		{
1011 		case DRAW_POINTLIST:
1012 			{
1013 				unsigned int index = start;
1014 
1015 				for(unsigned int i = 0; i < triangleCount; i++)
1016 				{
1017 					batch[i][0] = index;
1018 					batch[i][1] = index;
1019 					batch[i][2] = index;
1020 
1021 					index += 1;
1022 				}
1023 			}
1024 			break;
1025 		case DRAW_LINELIST:
1026 			{
1027 				unsigned int index = 2 * start;
1028 
1029 				for(unsigned int i = 0; i < triangleCount; i++)
1030 				{
1031 					batch[i][0] = index + 0;
1032 					batch[i][1] = index + 1;
1033 					batch[i][2] = index + 1;
1034 
1035 					index += 2;
1036 				}
1037 			}
1038 			break;
1039 		case DRAW_LINESTRIP:
1040 			{
1041 				unsigned int index = start;
1042 
1043 				for(unsigned int i = 0; i < triangleCount; i++)
1044 				{
1045 					batch[i][0] = index + 0;
1046 					batch[i][1] = index + 1;
1047 					batch[i][2] = index + 1;
1048 
1049 					index += 1;
1050 				}
1051 			}
1052 			break;
1053 		case DRAW_TRIANGLELIST:
1054 			{
1055 				unsigned int index = 3 * start;
1056 
1057 				for(unsigned int i = 0; i < triangleCount; i++)
1058 				{
1059 					batch[i][0] = index + 0;
1060 					batch[i][1] = index + 1;
1061 					batch[i][2] = index + 2;
1062 
1063 					index += 3;
1064 				}
1065 			}
1066 			break;
1067 		case DRAW_TRIANGLESTRIP:
1068 			{
1069 				unsigned int index = start;
1070 
1071 				for(unsigned int i = 0; i < triangleCount; i++)
1072 				{
1073 					if(leadingVertexFirst)
1074 					{
1075 						batch[i][0] = index + 0;
1076 						batch[i][1] = index + (index & 1) + 1;
1077 						batch[i][2] = index + (~index & 1) + 1;
1078 					}
1079 					else
1080 					{
1081 						batch[i][0] = index + (index & 1);
1082 						batch[i][1] = index + (~index & 1);
1083 						batch[i][2] = index + 2;
1084 					}
1085 
1086 					index += 1;
1087 				}
1088 			}
1089 			break;
1090 		case DRAW_TRIANGLEFAN:
1091 			{
1092 				unsigned int index = start;
1093 
1094 				for(unsigned int i = 0; i < triangleCount; i++)
1095 				{
1096 					if(leadingVertexFirst)
1097 					{
1098 						batch[i][0] = index + 1;
1099 						batch[i][1] = index + 2;
1100 						batch[i][2] = 0;
1101 					}
1102 					else
1103 					{
1104 						batch[i][0] = 0;
1105 						batch[i][1] = index + 1;
1106 						batch[i][2] = index + 2;
1107 					}
1108 
1109 					index += 1;
1110 				}
1111 			}
1112 			break;
1113 		case DRAW_INDEXEDPOINTLIST16:
1114 			{
1115 				const unsigned short *index = (const unsigned short*)indices + start;
1116 
1117 				for(unsigned int i = 0; i < triangleCount; i++)
1118 				{
1119 					batch[i][0] = *index;
1120 					batch[i][1] = *index;
1121 					batch[i][2] = *index;
1122 
1123 					index += 1;
1124 				}
1125 			}
1126 			break;
1127 		case DRAW_INDEXEDPOINTLIST32:
1128 			{
1129 				const unsigned int *index = (const unsigned int*)indices + start;
1130 
1131 				for(unsigned int i = 0; i < triangleCount; i++)
1132 				{
1133 					batch[i][0] = *index;
1134 					batch[i][1] = *index;
1135 					batch[i][2] = *index;
1136 
1137 					index += 1;
1138 				}
1139 			}
1140 			break;
1141 		case DRAW_INDEXEDLINELIST16:
1142 			{
1143 				const unsigned short *index = (const unsigned short*)indices + 2 * start;
1144 
1145 				for(unsigned int i = 0; i < triangleCount; i++)
1146 				{
1147 					batch[i][0] = index[0];
1148 					batch[i][1] = index[1];
1149 					batch[i][2] = index[1];
1150 
1151 					index += 2;
1152 				}
1153 			}
1154 			break;
1155 		case DRAW_INDEXEDLINELIST32:
1156 			{
1157 				const unsigned int *index = (const unsigned int*)indices + 2 * start;
1158 
1159 				for(unsigned int i = 0; i < triangleCount; i++)
1160 				{
1161 					batch[i][0] = index[0];
1162 					batch[i][1] = index[1];
1163 					batch[i][2] = index[1];
1164 
1165 					index += 2;
1166 				}
1167 			}
1168 			break;
1169 		case DRAW_INDEXEDLINESTRIP16:
1170 			{
1171 				const unsigned short *index = (const unsigned short*)indices + start;
1172 
1173 				for(unsigned int i = 0; i < triangleCount; i++)
1174 				{
1175 					batch[i][0] = index[0];
1176 					batch[i][1] = index[1];
1177 					batch[i][2] = index[1];
1178 
1179 					index += 1;
1180 				}
1181 			}
1182 			break;
1183 		case DRAW_INDEXEDLINESTRIP32:
1184 			{
1185 				const unsigned int *index = (const unsigned int*)indices + start;
1186 
1187 				for(unsigned int i = 0; i < triangleCount; i++)
1188 				{
1189 					batch[i][0] = index[0];
1190 					batch[i][1] = index[1];
1191 					batch[i][2] = index[1];
1192 
1193 					index += 1;
1194 				}
1195 			}
1196 			break;
1197 		case DRAW_INDEXEDTRIANGLELIST16:
1198 			{
1199 				const unsigned short *index = (const unsigned short*)indices + 3 * start;
1200 
1201 				for(unsigned int i = 0; i < triangleCount; i++)
1202 				{
1203 					batch[i][0] = index[0];
1204 					batch[i][1] = index[1];
1205 					batch[i][2] = index[2];
1206 
1207 					index += 3;
1208 				}
1209 			}
1210 			break;
1211 		case DRAW_INDEXEDTRIANGLELIST32:
1212 			{
1213 				const unsigned int *index = (const unsigned int*)indices + 3 * start;
1214 
1215 				for(unsigned int i = 0; i < triangleCount; i++)
1216 				{
1217 					batch[i][0] = index[0];
1218 					batch[i][1] = index[1];
1219 					batch[i][2] = index[2];
1220 
1221 					index += 3;
1222 				}
1223 			}
1224 			break;
1225 		case DRAW_INDEXEDTRIANGLESTRIP16:
1226 			{
1227 				const unsigned short *index = (const unsigned short*)indices + start;
1228 
1229 				for(unsigned int i = 0; i < triangleCount; i++)
1230 				{
1231 					batch[i][0] = index[0];
1232 					batch[i][1] = index[((start + i) & 1) + 1];
1233 					batch[i][2] = index[(~(start + i) & 1) + 1];
1234 
1235 					index += 1;
1236 				}
1237 			}
1238 			break;
1239 		case DRAW_INDEXEDTRIANGLESTRIP32:
1240 			{
1241 				const unsigned int *index = (const unsigned int*)indices + start;
1242 
1243 				for(unsigned int i = 0; i < triangleCount; i++)
1244 				{
1245 					batch[i][0] = index[0];
1246 					batch[i][1] = index[((start + i) & 1) + 1];
1247 					batch[i][2] = index[(~(start + i) & 1) + 1];
1248 
1249 					index += 1;
1250 				}
1251 			}
1252 			break;
1253 		case DRAW_INDEXEDTRIANGLEFAN16:
1254 			{
1255 				const unsigned short *index = (const unsigned short*)indices;
1256 
1257 				for(unsigned int i = 0; i < triangleCount; i++)
1258 				{
1259 					batch[i][0] = index[start + i + 1];
1260 					batch[i][1] = index[start + i + 2];
1261 					batch[i][2] = index[0];
1262 				}
1263 			}
1264 			break;
1265 		case DRAW_INDEXEDTRIANGLEFAN32:
1266 			{
1267 				const unsigned int *index = (const unsigned int*)indices;
1268 
1269 				for(unsigned int i = 0; i < triangleCount; i++)
1270 				{
1271 					batch[i][0] = index[start + i + 1];
1272 					batch[i][1] = index[start + i + 2];
1273 					batch[i][2] = index[0];
1274 				}
1275 			}
1276 			break;
1277 		default:
1278 			ASSERT(false);
1279 			return;
1280 		}
1281 
1282 		task->primitiveStart = start;
1283 		task->vertexCount = triangleCount * 3;
1284 		vertexRoutine(&triangle->v0, (unsigned int*)&batch, task, data);
1285 	}
1286 
setupTriangles(int unit,int count)1287 	int Renderer::setupTriangles(int unit, int count)
1288 	{
1289 		Triangle *triangle = triangleBatch[unit];
1290 		Primitive *primitive = primitiveBatch[unit];
1291 
1292 		DrawCall &draw = *drawList[primitiveProgress[unit].drawCall & DRAW_COUNT_BITS];
1293 		SetupProcessor::State &state = draw.setupState;
1294 		const SetupProcessor::RoutinePointer &setupRoutine = draw.setupPointer;
1295 
1296 		int ms = state.multiSample;
1297 		int pos = state.positionRegister;
1298 		const DrawData *data = draw.data;
1299 		int visible = 0;
1300 
1301 		for(int i = 0; i < count; i++, triangle++)
1302 		{
1303 			Vertex &v0 = triangle->v0;
1304 			Vertex &v1 = triangle->v1;
1305 			Vertex &v2 = triangle->v2;
1306 
1307 			if((v0.clipFlags & v1.clipFlags & v2.clipFlags) == Clipper::CLIP_FINITE)
1308 			{
1309 				Polygon polygon(&v0.v[pos], &v1.v[pos], &v2.v[pos]);
1310 
1311 				int clipFlagsOr = v0.clipFlags | v1.clipFlags | v2.clipFlags | draw.clipFlags;
1312 
1313 				if(clipFlagsOr != Clipper::CLIP_FINITE)
1314 				{
1315 					if(!clipper->clip(polygon, clipFlagsOr, draw))
1316 					{
1317 						continue;
1318 					}
1319 				}
1320 
1321 				if(setupRoutine(primitive, triangle, &polygon, data))
1322 				{
1323 					primitive += ms;
1324 					visible++;
1325 				}
1326 			}
1327 		}
1328 
1329 		return visible;
1330 	}
1331 
setupLines(int unit,int count)1332 	int Renderer::setupLines(int unit, int count)
1333 	{
1334 		Triangle *triangle = triangleBatch[unit];
1335 		Primitive *primitive = primitiveBatch[unit];
1336 		int visible = 0;
1337 
1338 		DrawCall &draw = *drawList[primitiveProgress[unit].drawCall & DRAW_COUNT_BITS];
1339 		SetupProcessor::State &state = draw.setupState;
1340 
1341 		int ms = state.multiSample;
1342 
1343 		for(int i = 0; i < count; i++)
1344 		{
1345 			if(setupLine(*primitive, *triangle, draw))
1346 			{
1347 				primitive += ms;
1348 				visible++;
1349 			}
1350 
1351 			triangle++;
1352 		}
1353 
1354 		return visible;
1355 	}
1356 
setupPoints(int unit,int count)1357 	int Renderer::setupPoints(int unit, int count)
1358 	{
1359 		Triangle *triangle = triangleBatch[unit];
1360 		Primitive *primitive = primitiveBatch[unit];
1361 		int visible = 0;
1362 
1363 		DrawCall &draw = *drawList[primitiveProgress[unit].drawCall & DRAW_COUNT_BITS];
1364 		SetupProcessor::State &state = draw.setupState;
1365 
1366 		int ms = state.multiSample;
1367 
1368 		for(int i = 0; i < count; i++)
1369 		{
1370 			if(setupPoint(*primitive, *triangle, draw))
1371 			{
1372 				primitive += ms;
1373 				visible++;
1374 			}
1375 
1376 			triangle++;
1377 		}
1378 
1379 		return visible;
1380 	}
1381 
setupLine(Primitive & primitive,Triangle & triangle,const DrawCall & draw)1382 	bool Renderer::setupLine(Primitive &primitive, Triangle &triangle, const DrawCall &draw)
1383 	{
1384 		const SetupProcessor::RoutinePointer &setupRoutine = draw.setupPointer;
1385 		const SetupProcessor::State &state = draw.setupState;
1386 		const DrawData &data = *draw.data;
1387 
1388 		float lineWidth = data.lineWidth;
1389 
1390 		Vertex &v0 = triangle.v0;
1391 		Vertex &v1 = triangle.v1;
1392 
1393 		int pos = state.positionRegister;
1394 
1395 		const float4 &P0 = v0.v[pos];
1396 		const float4 &P1 = v1.v[pos];
1397 
1398 		if(P0.w <= 0 && P1.w <= 0)
1399 		{
1400 			return false;
1401 		}
1402 
1403 		const float W = data.Wx16[0] * (1.0f / 16.0f);
1404 		const float H = data.Hx16[0] * (1.0f / 16.0f);
1405 
1406 		float dx = W * (P1.x / P1.w - P0.x / P0.w);
1407 		float dy = H * (P1.y / P1.w - P0.y / P0.w);
1408 
1409 		if(dx == 0 && dy == 0)
1410 		{
1411 			return false;
1412 		}
1413 
1414 		if(state.multiSample > 1)   // Rectangle
1415 		{
1416 			float4 P[4];
1417 			int C[4];
1418 
1419 			P[0] = P0;
1420 			P[1] = P1;
1421 			P[2] = P1;
1422 			P[3] = P0;
1423 
1424 			float scale = lineWidth * 0.5f / sqrt(dx*dx + dy*dy);
1425 
1426 			dx *= scale;
1427 			dy *= scale;
1428 
1429 			float dx0h = dx * P0.w / H;
1430 			float dy0w = dy * P0.w / W;
1431 
1432 			float dx1h = dx * P1.w / H;
1433 			float dy1w = dy * P1.w / W;
1434 
1435 			P[0].x += -dy0w;
1436 			P[0].y += +dx0h;
1437 			C[0] = clipper->computeClipFlags(P[0]);
1438 
1439 			P[1].x += -dy1w;
1440 			P[1].y += +dx1h;
1441 			C[1] = clipper->computeClipFlags(P[1]);
1442 
1443 			P[2].x += +dy1w;
1444 			P[2].y += -dx1h;
1445 			C[2] = clipper->computeClipFlags(P[2]);
1446 
1447 			P[3].x += +dy0w;
1448 			P[3].y += -dx0h;
1449 			C[3] = clipper->computeClipFlags(P[3]);
1450 
1451 			if((C[0] & C[1] & C[2] & C[3]) == Clipper::CLIP_FINITE)
1452 			{
1453 				Polygon polygon(P, 4);
1454 
1455 				int clipFlagsOr = C[0] | C[1] | C[2] | C[3] | draw.clipFlags;
1456 
1457 				if(clipFlagsOr != Clipper::CLIP_FINITE)
1458 				{
1459 					if(!clipper->clip(polygon, clipFlagsOr, draw))
1460 					{
1461 						return false;
1462 					}
1463 				}
1464 
1465 				return setupRoutine(&primitive, &triangle, &polygon, &data);
1466 			}
1467 		}
1468 		else   // Diamond test convention
1469 		{
1470 			float4 P[8];
1471 			int C[8];
1472 
1473 			P[0] = P0;
1474 			P[1] = P0;
1475 			P[2] = P0;
1476 			P[3] = P0;
1477 			P[4] = P1;
1478 			P[5] = P1;
1479 			P[6] = P1;
1480 			P[7] = P1;
1481 
1482 			float dx0 = lineWidth * 0.5f * P0.w / W;
1483 			float dy0 = lineWidth * 0.5f * P0.w / H;
1484 
1485 			float dx1 = lineWidth * 0.5f * P1.w / W;
1486 			float dy1 = lineWidth * 0.5f * P1.w / H;
1487 
1488 			P[0].x += -dx0;
1489 			C[0] = clipper->computeClipFlags(P[0]);
1490 
1491 			P[1].y += +dy0;
1492 			C[1] = clipper->computeClipFlags(P[1]);
1493 
1494 			P[2].x += +dx0;
1495 			C[2] = clipper->computeClipFlags(P[2]);
1496 
1497 			P[3].y += -dy0;
1498 			C[3] = clipper->computeClipFlags(P[3]);
1499 
1500 			P[4].x += -dx1;
1501 			C[4] = clipper->computeClipFlags(P[4]);
1502 
1503 			P[5].y += +dy1;
1504 			C[5] = clipper->computeClipFlags(P[5]);
1505 
1506 			P[6].x += +dx1;
1507 			C[6] = clipper->computeClipFlags(P[6]);
1508 
1509 			P[7].y += -dy1;
1510 			C[7] = clipper->computeClipFlags(P[7]);
1511 
1512 			if((C[0] & C[1] & C[2] & C[3] & C[4] & C[5] & C[6] & C[7]) == Clipper::CLIP_FINITE)
1513 			{
1514 				float4 L[6];
1515 
1516 				if(dx > -dy)
1517 				{
1518 					if(dx > dy)   // Right
1519 					{
1520 						L[0] = P[0];
1521 						L[1] = P[1];
1522 						L[2] = P[5];
1523 						L[3] = P[6];
1524 						L[4] = P[7];
1525 						L[5] = P[3];
1526 					}
1527 					else   // Down
1528 					{
1529 						L[0] = P[0];
1530 						L[1] = P[4];
1531 						L[2] = P[5];
1532 						L[3] = P[6];
1533 						L[4] = P[2];
1534 						L[5] = P[3];
1535 					}
1536 				}
1537 				else
1538 				{
1539 					if(dx > dy)   // Up
1540 					{
1541 						L[0] = P[0];
1542 						L[1] = P[1];
1543 						L[2] = P[2];
1544 						L[3] = P[6];
1545 						L[4] = P[7];
1546 						L[5] = P[4];
1547 					}
1548 					else   // Left
1549 					{
1550 						L[0] = P[1];
1551 						L[1] = P[2];
1552 						L[2] = P[3];
1553 						L[3] = P[7];
1554 						L[4] = P[4];
1555 						L[5] = P[5];
1556 					}
1557 				}
1558 
1559 				Polygon polygon(L, 6);
1560 
1561 				int clipFlagsOr = C[0] | C[1] | C[2] | C[3] | C[4] | C[5] | C[6] | C[7] | draw.clipFlags;
1562 
1563 				if(clipFlagsOr != Clipper::CLIP_FINITE)
1564 				{
1565 					if(!clipper->clip(polygon, clipFlagsOr, draw))
1566 					{
1567 						return false;
1568 					}
1569 				}
1570 
1571 				return setupRoutine(&primitive, &triangle, &polygon, &data);
1572 			}
1573 		}
1574 
1575 		return false;
1576 	}
1577 
setupPoint(Primitive & primitive,Triangle & triangle,const DrawCall & draw)1578 	bool Renderer::setupPoint(Primitive &primitive, Triangle &triangle, const DrawCall &draw)
1579 	{
1580 		const SetupProcessor::RoutinePointer &setupRoutine = draw.setupPointer;
1581 		const SetupProcessor::State &state = draw.setupState;
1582 		const DrawData &data = *draw.data;
1583 
1584 		Vertex &v = triangle.v0;
1585 
1586 		float pSize;
1587 
1588 		int pts = state.pointSizeRegister;
1589 
1590 		if(state.pointSizeRegister != Unused)
1591 		{
1592 			pSize = v.v[pts].y;
1593 		}
1594 		else
1595 		{
1596 			pSize = 1.0f;
1597 		}
1598 
1599 		pSize = clamp(pSize, data.pointSizeMin, data.pointSizeMax);
1600 
1601 		float4 P[4];
1602 		int C[4];
1603 
1604 		int pos = state.positionRegister;
1605 
1606 		P[0] = v.v[pos];
1607 		P[1] = v.v[pos];
1608 		P[2] = v.v[pos];
1609 		P[3] = v.v[pos];
1610 
1611 		const float X = pSize * P[0].w * data.halfPixelX[0];
1612 		const float Y = pSize * P[0].w * data.halfPixelY[0];
1613 
1614 		P[0].x -= X;
1615 		P[0].y += Y;
1616 		C[0] = clipper->computeClipFlags(P[0]);
1617 
1618 		P[1].x += X;
1619 		P[1].y += Y;
1620 		C[1] = clipper->computeClipFlags(P[1]);
1621 
1622 		P[2].x += X;
1623 		P[2].y -= Y;
1624 		C[2] = clipper->computeClipFlags(P[2]);
1625 
1626 		P[3].x -= X;
1627 		P[3].y -= Y;
1628 		C[3] = clipper->computeClipFlags(P[3]);
1629 
1630 		triangle.v1 = triangle.v0;
1631 		triangle.v2 = triangle.v0;
1632 
1633 		triangle.v1.X += iround(16 * 0.5f * pSize);
1634 		triangle.v2.Y -= iround(16 * 0.5f * pSize) * (data.Hx16[0] > 0.0f ? 1 : -1);   // Both Direct3D and OpenGL expect (0, 0) in the top-left corner
1635 
1636 		Polygon polygon(P, 4);
1637 
1638 		if((C[0] & C[1] & C[2] & C[3]) == Clipper::CLIP_FINITE)
1639 		{
1640 			int clipFlagsOr = C[0] | C[1] | C[2] | C[3] | draw.clipFlags;
1641 
1642 			if(clipFlagsOr != Clipper::CLIP_FINITE)
1643 			{
1644 				if(!clipper->clip(polygon, clipFlagsOr, draw))
1645 				{
1646 					return false;
1647 				}
1648 			}
1649 
1650 			return setupRoutine(&primitive, &triangle, &polygon, &data);
1651 		}
1652 
1653 		return false;
1654 	}
1655 
initializeThreads()1656 	void Renderer::initializeThreads()
1657 	{
1658 		unitCount = ceilPow2(threadCount);
1659 		clusterCount = ceilPow2(threadCount);
1660 
1661 		for(int i = 0; i < unitCount; i++)
1662 		{
1663 			triangleBatch[i] = (Triangle*)allocate(batchSize * sizeof(Triangle));
1664 			primitiveBatch[i] = (Primitive*)allocate(batchSize * sizeof(Primitive));
1665 		}
1666 
1667 		for(int i = 0; i < threadCount; i++)
1668 		{
1669 			vertexTask[i] = (VertexTask*)allocate(sizeof(VertexTask));
1670 			vertexTask[i]->vertexCache.drawCall = -1;
1671 
1672 			task[i].type = Task::SUSPEND;
1673 
1674 			resume[i] = new Event();
1675 			suspend[i] = new Event();
1676 
1677 			Parameters parameters;
1678 			parameters.threadIndex = i;
1679 			parameters.renderer = this;
1680 
1681 			exitThreads = false;
1682 			worker[i] = new Thread(threadFunction, &parameters);
1683 
1684 			suspend[i]->wait();
1685 			suspend[i]->signal();
1686 		}
1687 	}
1688 
terminateThreads()1689 	void Renderer::terminateThreads()
1690 	{
1691 		while(threadsAwake != 0)
1692 		{
1693 			Thread::sleep(1);
1694 		}
1695 
1696 		for(int thread = 0; thread < threadCount; thread++)
1697 		{
1698 			if(worker[thread])
1699 			{
1700 				exitThreads = true;
1701 				resume[thread]->signal();
1702 				worker[thread]->join();
1703 
1704 				delete worker[thread];
1705 				worker[thread] = 0;
1706 				delete resume[thread];
1707 				resume[thread] = 0;
1708 				delete suspend[thread];
1709 				suspend[thread] = 0;
1710 			}
1711 
1712 			deallocate(vertexTask[thread]);
1713 			vertexTask[thread] = 0;
1714 		}
1715 
1716 		for(int i = 0; i < 16; i++)
1717 		{
1718 			deallocate(triangleBatch[i]);
1719 			triangleBatch[i] = 0;
1720 
1721 			deallocate(primitiveBatch[i]);
1722 			primitiveBatch[i] = 0;
1723 		}
1724 	}
1725 
loadConstants(const VertexShader * vertexShader)1726 	void Renderer::loadConstants(const VertexShader *vertexShader)
1727 	{
1728 		size_t count = vertexShader->getLength();
1729 
1730 		for(size_t i = 0; i < count; i++)
1731 		{
1732 			const Shader::Instruction *instruction = vertexShader->getInstruction(i);
1733 
1734 			if(instruction->opcode == Shader::OPCODE_DEF)
1735 			{
1736 				int index = instruction->dst.index;
1737 				float value[4];
1738 
1739 				value[0] = instruction->src[0].value[0];
1740 				value[1] = instruction->src[0].value[1];
1741 				value[2] = instruction->src[0].value[2];
1742 				value[3] = instruction->src[0].value[3];
1743 
1744 				setVertexShaderConstantF(index, value);
1745 			}
1746 			else if(instruction->opcode == Shader::OPCODE_DEFI)
1747 			{
1748 				int index = instruction->dst.index;
1749 				int integer[4];
1750 
1751 				integer[0] = instruction->src[0].integer[0];
1752 				integer[1] = instruction->src[0].integer[1];
1753 				integer[2] = instruction->src[0].integer[2];
1754 				integer[3] = instruction->src[0].integer[3];
1755 
1756 				setVertexShaderConstantI(index, integer);
1757 			}
1758 			else if(instruction->opcode == Shader::OPCODE_DEFB)
1759 			{
1760 				int index = instruction->dst.index;
1761 				int boolean = instruction->src[0].boolean[0];
1762 
1763 				setVertexShaderConstantB(index, &boolean);
1764 			}
1765 		}
1766 	}
1767 
loadConstants(const PixelShader * pixelShader)1768 	void Renderer::loadConstants(const PixelShader *pixelShader)
1769 	{
1770 		if(!pixelShader) return;
1771 
1772 		size_t count = pixelShader->getLength();
1773 
1774 		for(size_t i = 0; i < count; i++)
1775 		{
1776 			const Shader::Instruction *instruction = pixelShader->getInstruction(i);
1777 
1778 			if(instruction->opcode == Shader::OPCODE_DEF)
1779 			{
1780 				int index = instruction->dst.index;
1781 				float value[4];
1782 
1783 				value[0] = instruction->src[0].value[0];
1784 				value[1] = instruction->src[0].value[1];
1785 				value[2] = instruction->src[0].value[2];
1786 				value[3] = instruction->src[0].value[3];
1787 
1788 				setPixelShaderConstantF(index, value);
1789 			}
1790 			else if(instruction->opcode == Shader::OPCODE_DEFI)
1791 			{
1792 				int index = instruction->dst.index;
1793 				int integer[4];
1794 
1795 				integer[0] = instruction->src[0].integer[0];
1796 				integer[1] = instruction->src[0].integer[1];
1797 				integer[2] = instruction->src[0].integer[2];
1798 				integer[3] = instruction->src[0].integer[3];
1799 
1800 				setPixelShaderConstantI(index, integer);
1801 			}
1802 			else if(instruction->opcode == Shader::OPCODE_DEFB)
1803 			{
1804 				int index = instruction->dst.index;
1805 				int boolean = instruction->src[0].boolean[0];
1806 
1807 				setPixelShaderConstantB(index, &boolean);
1808 			}
1809 		}
1810 	}
1811 
setIndexBuffer(Resource * indexBuffer)1812 	void Renderer::setIndexBuffer(Resource *indexBuffer)
1813 	{
1814 		context->indexBuffer = indexBuffer;
1815 	}
1816 
setMultiSampleMask(unsigned int mask)1817 	void Renderer::setMultiSampleMask(unsigned int mask)
1818 	{
1819 		context->sampleMask = mask;
1820 	}
1821 
setTransparencyAntialiasing(TransparencyAntialiasing transparencyAntialiasing)1822 	void Renderer::setTransparencyAntialiasing(TransparencyAntialiasing transparencyAntialiasing)
1823 	{
1824 		sw::transparencyAntialiasing = transparencyAntialiasing;
1825 	}
1826 
isReadWriteTexture(int sampler)1827 	bool Renderer::isReadWriteTexture(int sampler)
1828 	{
1829 		for(int index = 0; index < RENDERTARGETS; index++)
1830 		{
1831 			if(context->renderTarget[index] && context->texture[sampler] == context->renderTarget[index]->getResource())
1832 			{
1833 				return true;
1834 			}
1835 		}
1836 
1837 		if(context->depthBuffer && context->texture[sampler] == context->depthBuffer->getResource())
1838 		{
1839 			return true;
1840 		}
1841 
1842 		return false;
1843 	}
1844 
updateClipper()1845 	void Renderer::updateClipper()
1846 	{
1847 		if(updateClipPlanes)
1848 		{
1849 			if(clipFlags & Clipper::CLIP_PLANE0) clipPlane[0] = userPlane[0];
1850 			if(clipFlags & Clipper::CLIP_PLANE1) clipPlane[1] = userPlane[1];
1851 			if(clipFlags & Clipper::CLIP_PLANE2) clipPlane[2] = userPlane[2];
1852 			if(clipFlags & Clipper::CLIP_PLANE3) clipPlane[3] = userPlane[3];
1853 			if(clipFlags & Clipper::CLIP_PLANE4) clipPlane[4] = userPlane[4];
1854 			if(clipFlags & Clipper::CLIP_PLANE5) clipPlane[5] = userPlane[5];
1855 
1856 			updateClipPlanes = false;
1857 		}
1858 	}
1859 
setTextureResource(unsigned int sampler,Resource * resource)1860 	void Renderer::setTextureResource(unsigned int sampler, Resource *resource)
1861 	{
1862 		ASSERT(sampler < TOTAL_IMAGE_UNITS);
1863 
1864 		context->texture[sampler] = resource;
1865 	}
1866 
setTextureLevel(unsigned int sampler,unsigned int face,unsigned int level,Surface * surface,TextureType type)1867 	void Renderer::setTextureLevel(unsigned int sampler, unsigned int face, unsigned int level, Surface *surface, TextureType type)
1868 	{
1869 		ASSERT(sampler < TOTAL_IMAGE_UNITS && face < 6 && level < MIPMAP_LEVELS);
1870 
1871 		context->sampler[sampler].setTextureLevel(face, level, surface, type);
1872 	}
1873 
setTextureFilter(SamplerType type,int sampler,FilterType textureFilter)1874 	void Renderer::setTextureFilter(SamplerType type, int sampler, FilterType textureFilter)
1875 	{
1876 		if(type == SAMPLER_PIXEL)
1877 		{
1878 			PixelProcessor::setTextureFilter(sampler, textureFilter);
1879 		}
1880 		else
1881 		{
1882 			VertexProcessor::setTextureFilter(sampler, textureFilter);
1883 		}
1884 	}
1885 
setMipmapFilter(SamplerType type,int sampler,MipmapType mipmapFilter)1886 	void Renderer::setMipmapFilter(SamplerType type, int sampler, MipmapType mipmapFilter)
1887 	{
1888 		if(type == SAMPLER_PIXEL)
1889 		{
1890 			PixelProcessor::setMipmapFilter(sampler, mipmapFilter);
1891 		}
1892 		else
1893 		{
1894 			VertexProcessor::setMipmapFilter(sampler, mipmapFilter);
1895 		}
1896 	}
1897 
setGatherEnable(SamplerType type,int sampler,bool enable)1898 	void Renderer::setGatherEnable(SamplerType type, int sampler, bool enable)
1899 	{
1900 		if(type == SAMPLER_PIXEL)
1901 		{
1902 			PixelProcessor::setGatherEnable(sampler, enable);
1903 		}
1904 		else
1905 		{
1906 			VertexProcessor::setGatherEnable(sampler, enable);
1907 		}
1908 	}
1909 
setAddressingModeU(SamplerType type,int sampler,AddressingMode addressMode)1910 	void Renderer::setAddressingModeU(SamplerType type, int sampler, AddressingMode addressMode)
1911 	{
1912 		if(type == SAMPLER_PIXEL)
1913 		{
1914 			PixelProcessor::setAddressingModeU(sampler, addressMode);
1915 		}
1916 		else
1917 		{
1918 			VertexProcessor::setAddressingModeU(sampler, addressMode);
1919 		}
1920 	}
1921 
setAddressingModeV(SamplerType type,int sampler,AddressingMode addressMode)1922 	void Renderer::setAddressingModeV(SamplerType type, int sampler, AddressingMode addressMode)
1923 	{
1924 		if(type == SAMPLER_PIXEL)
1925 		{
1926 			PixelProcessor::setAddressingModeV(sampler, addressMode);
1927 		}
1928 		else
1929 		{
1930 			VertexProcessor::setAddressingModeV(sampler, addressMode);
1931 		}
1932 	}
1933 
setAddressingModeW(SamplerType type,int sampler,AddressingMode addressMode)1934 	void Renderer::setAddressingModeW(SamplerType type, int sampler, AddressingMode addressMode)
1935 	{
1936 		if(type == SAMPLER_PIXEL)
1937 		{
1938 			PixelProcessor::setAddressingModeW(sampler, addressMode);
1939 		}
1940 		else
1941 		{
1942 			VertexProcessor::setAddressingModeW(sampler, addressMode);
1943 		}
1944 	}
1945 
setReadSRGB(SamplerType type,int sampler,bool sRGB)1946 	void Renderer::setReadSRGB(SamplerType type, int sampler, bool sRGB)
1947 	{
1948 		if(type == SAMPLER_PIXEL)
1949 		{
1950 			PixelProcessor::setReadSRGB(sampler, sRGB);
1951 		}
1952 		else
1953 		{
1954 			VertexProcessor::setReadSRGB(sampler, sRGB);
1955 		}
1956 	}
1957 
setMipmapLOD(SamplerType type,int sampler,float bias)1958 	void Renderer::setMipmapLOD(SamplerType type, int sampler, float bias)
1959 	{
1960 		if(type == SAMPLER_PIXEL)
1961 		{
1962 			PixelProcessor::setMipmapLOD(sampler, bias);
1963 		}
1964 		else
1965 		{
1966 			VertexProcessor::setMipmapLOD(sampler, bias);
1967 		}
1968 	}
1969 
setBorderColor(SamplerType type,int sampler,const Color<float> & borderColor)1970 	void Renderer::setBorderColor(SamplerType type, int sampler, const Color<float> &borderColor)
1971 	{
1972 		if(type == SAMPLER_PIXEL)
1973 		{
1974 			PixelProcessor::setBorderColor(sampler, borderColor);
1975 		}
1976 		else
1977 		{
1978 			VertexProcessor::setBorderColor(sampler, borderColor);
1979 		}
1980 	}
1981 
setMaxAnisotropy(SamplerType type,int sampler,float maxAnisotropy)1982 	void Renderer::setMaxAnisotropy(SamplerType type, int sampler, float maxAnisotropy)
1983 	{
1984 		if(type == SAMPLER_PIXEL)
1985 		{
1986 			PixelProcessor::setMaxAnisotropy(sampler, maxAnisotropy);
1987 		}
1988 		else
1989 		{
1990 			VertexProcessor::setMaxAnisotropy(sampler, maxAnisotropy);
1991 		}
1992 	}
1993 
setHighPrecisionFiltering(SamplerType type,int sampler,bool highPrecisionFiltering)1994 	void Renderer::setHighPrecisionFiltering(SamplerType type, int sampler, bool highPrecisionFiltering)
1995 	{
1996 		if(type == SAMPLER_PIXEL)
1997 		{
1998 			PixelProcessor::setHighPrecisionFiltering(sampler, highPrecisionFiltering);
1999 		}
2000 		else
2001 		{
2002 			VertexProcessor::setHighPrecisionFiltering(sampler, highPrecisionFiltering);
2003 		}
2004 	}
2005 
setSwizzleR(SamplerType type,int sampler,SwizzleType swizzleR)2006 	void Renderer::setSwizzleR(SamplerType type, int sampler, SwizzleType swizzleR)
2007 	{
2008 		if(type == SAMPLER_PIXEL)
2009 		{
2010 			PixelProcessor::setSwizzleR(sampler, swizzleR);
2011 		}
2012 		else
2013 		{
2014 			VertexProcessor::setSwizzleR(sampler, swizzleR);
2015 		}
2016 	}
2017 
setSwizzleG(SamplerType type,int sampler,SwizzleType swizzleG)2018 	void Renderer::setSwizzleG(SamplerType type, int sampler, SwizzleType swizzleG)
2019 	{
2020 		if(type == SAMPLER_PIXEL)
2021 		{
2022 			PixelProcessor::setSwizzleG(sampler, swizzleG);
2023 		}
2024 		else
2025 		{
2026 			VertexProcessor::setSwizzleG(sampler, swizzleG);
2027 		}
2028 	}
2029 
setSwizzleB(SamplerType type,int sampler,SwizzleType swizzleB)2030 	void Renderer::setSwizzleB(SamplerType type, int sampler, SwizzleType swizzleB)
2031 	{
2032 		if(type == SAMPLER_PIXEL)
2033 		{
2034 			PixelProcessor::setSwizzleB(sampler, swizzleB);
2035 		}
2036 		else
2037 		{
2038 			VertexProcessor::setSwizzleB(sampler, swizzleB);
2039 		}
2040 	}
2041 
setSwizzleA(SamplerType type,int sampler,SwizzleType swizzleA)2042 	void Renderer::setSwizzleA(SamplerType type, int sampler, SwizzleType swizzleA)
2043 	{
2044 		if(type == SAMPLER_PIXEL)
2045 		{
2046 			PixelProcessor::setSwizzleA(sampler, swizzleA);
2047 		}
2048 		else
2049 		{
2050 			VertexProcessor::setSwizzleA(sampler, swizzleA);
2051 		}
2052 	}
2053 
setCompareFunc(SamplerType type,int sampler,CompareFunc compFunc)2054 	void Renderer::setCompareFunc(SamplerType type, int sampler, CompareFunc compFunc)
2055 	{
2056 		if(type == SAMPLER_PIXEL)
2057 		{
2058 			PixelProcessor::setCompareFunc(sampler, compFunc);
2059 		}
2060 		else
2061 		{
2062 			VertexProcessor::setCompareFunc(sampler, compFunc);
2063 		}
2064 	}
2065 
setBaseLevel(SamplerType type,int sampler,int baseLevel)2066 	void Renderer::setBaseLevel(SamplerType type, int sampler, int baseLevel)
2067 	{
2068 		if(type == SAMPLER_PIXEL)
2069 		{
2070 			PixelProcessor::setBaseLevel(sampler, baseLevel);
2071 		}
2072 		else
2073 		{
2074 			VertexProcessor::setBaseLevel(sampler, baseLevel);
2075 		}
2076 	}
2077 
setMaxLevel(SamplerType type,int sampler,int maxLevel)2078 	void Renderer::setMaxLevel(SamplerType type, int sampler, int maxLevel)
2079 	{
2080 		if(type == SAMPLER_PIXEL)
2081 		{
2082 			PixelProcessor::setMaxLevel(sampler, maxLevel);
2083 		}
2084 		else
2085 		{
2086 			VertexProcessor::setMaxLevel(sampler, maxLevel);
2087 		}
2088 	}
2089 
setMinLod(SamplerType type,int sampler,float minLod)2090 	void Renderer::setMinLod(SamplerType type, int sampler, float minLod)
2091 	{
2092 		if(type == SAMPLER_PIXEL)
2093 		{
2094 			PixelProcessor::setMinLod(sampler, minLod);
2095 		}
2096 		else
2097 		{
2098 			VertexProcessor::setMinLod(sampler, minLod);
2099 		}
2100 	}
2101 
setMaxLod(SamplerType type,int sampler,float maxLod)2102 	void Renderer::setMaxLod(SamplerType type, int sampler, float maxLod)
2103 	{
2104 		if(type == SAMPLER_PIXEL)
2105 		{
2106 			PixelProcessor::setMaxLod(sampler, maxLod);
2107 		}
2108 		else
2109 		{
2110 			VertexProcessor::setMaxLod(sampler, maxLod);
2111 		}
2112 	}
2113 
setLineWidth(float width)2114 	void Renderer::setLineWidth(float width)
2115 	{
2116 		context->lineWidth = width;
2117 	}
2118 
setDepthBias(float bias)2119 	void Renderer::setDepthBias(float bias)
2120 	{
2121 		context->depthBias = bias;
2122 	}
2123 
setSlopeDepthBias(float slopeBias)2124 	void Renderer::setSlopeDepthBias(float slopeBias)
2125 	{
2126 		context->slopeDepthBias = slopeBias;
2127 	}
2128 
setRasterizerDiscard(bool rasterizerDiscard)2129 	void Renderer::setRasterizerDiscard(bool rasterizerDiscard)
2130 	{
2131 		context->rasterizerDiscard = rasterizerDiscard;
2132 	}
2133 
setPixelShader(const PixelShader * shader)2134 	void Renderer::setPixelShader(const PixelShader *shader)
2135 	{
2136 		context->pixelShader = shader;
2137 
2138 		loadConstants(shader);
2139 	}
2140 
setVertexShader(const VertexShader * shader)2141 	void Renderer::setVertexShader(const VertexShader *shader)
2142 	{
2143 		context->vertexShader = shader;
2144 
2145 		loadConstants(shader);
2146 	}
2147 
setPixelShaderConstantF(unsigned int index,const float value[4],unsigned int count)2148 	void Renderer::setPixelShaderConstantF(unsigned int index, const float value[4], unsigned int count)
2149 	{
2150 		for(unsigned int i = 0; i < DRAW_COUNT; i++)
2151 		{
2152 			if(drawCall[i]->psDirtyConstF < index + count)
2153 			{
2154 				drawCall[i]->psDirtyConstF = index + count;
2155 			}
2156 		}
2157 
2158 		for(unsigned int i = 0; i < count; i++)
2159 		{
2160 			PixelProcessor::setFloatConstant(index + i, value);
2161 			value += 4;
2162 		}
2163 	}
2164 
setPixelShaderConstantI(unsigned int index,const int value[4],unsigned int count)2165 	void Renderer::setPixelShaderConstantI(unsigned int index, const int value[4], unsigned int count)
2166 	{
2167 		for(unsigned int i = 0; i < DRAW_COUNT; i++)
2168 		{
2169 			if(drawCall[i]->psDirtyConstI < index + count)
2170 			{
2171 				drawCall[i]->psDirtyConstI = index + count;
2172 			}
2173 		}
2174 
2175 		for(unsigned int i = 0; i < count; i++)
2176 		{
2177 			PixelProcessor::setIntegerConstant(index + i, value);
2178 			value += 4;
2179 		}
2180 	}
2181 
setPixelShaderConstantB(unsigned int index,const int * boolean,unsigned int count)2182 	void Renderer::setPixelShaderConstantB(unsigned int index, const int *boolean, unsigned int count)
2183 	{
2184 		for(unsigned int i = 0; i < DRAW_COUNT; i++)
2185 		{
2186 			if(drawCall[i]->psDirtyConstB < index + count)
2187 			{
2188 				drawCall[i]->psDirtyConstB = index + count;
2189 			}
2190 		}
2191 
2192 		for(unsigned int i = 0; i < count; i++)
2193 		{
2194 			PixelProcessor::setBooleanConstant(index + i, *boolean);
2195 			boolean++;
2196 		}
2197 	}
2198 
setVertexShaderConstantF(unsigned int index,const float value[4],unsigned int count)2199 	void Renderer::setVertexShaderConstantF(unsigned int index, const float value[4], unsigned int count)
2200 	{
2201 		for(unsigned int i = 0; i < DRAW_COUNT; i++)
2202 		{
2203 			if(drawCall[i]->vsDirtyConstF < index + count)
2204 			{
2205 				drawCall[i]->vsDirtyConstF = index + count;
2206 			}
2207 		}
2208 
2209 		for(unsigned int i = 0; i < count; i++)
2210 		{
2211 			VertexProcessor::setFloatConstant(index + i, value);
2212 			value += 4;
2213 		}
2214 	}
2215 
setVertexShaderConstantI(unsigned int index,const int value[4],unsigned int count)2216 	void Renderer::setVertexShaderConstantI(unsigned int index, const int value[4], unsigned int count)
2217 	{
2218 		for(unsigned int i = 0; i < DRAW_COUNT; i++)
2219 		{
2220 			if(drawCall[i]->vsDirtyConstI < index + count)
2221 			{
2222 				drawCall[i]->vsDirtyConstI = index + count;
2223 			}
2224 		}
2225 
2226 		for(unsigned int i = 0; i < count; i++)
2227 		{
2228 			VertexProcessor::setIntegerConstant(index + i, value);
2229 			value += 4;
2230 		}
2231 	}
2232 
setVertexShaderConstantB(unsigned int index,const int * boolean,unsigned int count)2233 	void Renderer::setVertexShaderConstantB(unsigned int index, const int *boolean, unsigned int count)
2234 	{
2235 		for(unsigned int i = 0; i < DRAW_COUNT; i++)
2236 		{
2237 			if(drawCall[i]->vsDirtyConstB < index + count)
2238 			{
2239 				drawCall[i]->vsDirtyConstB = index + count;
2240 			}
2241 		}
2242 
2243 		for(unsigned int i = 0; i < count; i++)
2244 		{
2245 			VertexProcessor::setBooleanConstant(index + i, *boolean);
2246 			boolean++;
2247 		}
2248 	}
2249 
addQuery(Query * query)2250 	void Renderer::addQuery(Query *query)
2251 	{
2252 		queries.push_back(query);
2253 	}
2254 
removeQuery(Query * query)2255 	void Renderer::removeQuery(Query *query)
2256 	{
2257 		queries.remove(query);
2258 	}
2259 
2260 	#if PERF_HUD
getThreadCount()2261 		int Renderer::getThreadCount()
2262 		{
2263 			return threadCount;
2264 		}
2265 
getVertexTime(int thread)2266 		int64_t Renderer::getVertexTime(int thread)
2267 		{
2268 			return vertexTime[thread];
2269 		}
2270 
getSetupTime(int thread)2271 		int64_t Renderer::getSetupTime(int thread)
2272 		{
2273 			return setupTime[thread];
2274 		}
2275 
getPixelTime(int thread)2276 		int64_t Renderer::getPixelTime(int thread)
2277 		{
2278 			return pixelTime[thread];
2279 		}
2280 
resetTimers()2281 		void Renderer::resetTimers()
2282 		{
2283 			for(int thread = 0; thread < threadCount; thread++)
2284 			{
2285 				vertexTime[thread] = 0;
2286 				setupTime[thread] = 0;
2287 				pixelTime[thread] = 0;
2288 			}
2289 		}
2290 	#endif
2291 
setContext(const sw::Context & context)2292 	void Renderer::setContext(const sw::Context& context)
2293 	{
2294 		*(this->context) = context;
2295 	}
2296 
setViewport(const VkViewport & viewport)2297 	void Renderer::setViewport(const VkViewport &viewport)
2298 	{
2299 		this->viewport = viewport;
2300 	}
2301 
setScissor(const Rect & scissor)2302 	void Renderer::setScissor(const Rect &scissor)
2303 	{
2304 		this->scissor = scissor;
2305 	}
2306 
setClipFlags(int flags)2307 	void Renderer::setClipFlags(int flags)
2308 	{
2309 		clipFlags = flags << 8;   // Bottom 8 bits used by legacy frustum
2310 	}
2311 
setClipPlane(unsigned int index,const float plane[4])2312 	void Renderer::setClipPlane(unsigned int index, const float plane[4])
2313 	{
2314 		if(index < MAX_CLIP_PLANES)
2315 		{
2316 			userPlane[index] = plane;
2317 		}
2318 		else ASSERT(false);
2319 
2320 		updateClipPlanes = true;
2321 	}
2322 
updateConfiguration(bool initialUpdate)2323 	void Renderer::updateConfiguration(bool initialUpdate)
2324 	{
2325 		bool newConfiguration = swiftConfig->hasNewConfiguration();
2326 
2327 		if(newConfiguration || initialUpdate)
2328 		{
2329 			terminateThreads();
2330 
2331 			SwiftConfig::Configuration configuration = {};
2332 			swiftConfig->getConfiguration(configuration);
2333 
2334 			precacheVertex = !newConfiguration && configuration.precache;
2335 			precacheSetup = !newConfiguration && configuration.precache;
2336 			precachePixel = !newConfiguration && configuration.precache;
2337 
2338 			VertexProcessor::setRoutineCacheSize(configuration.vertexRoutineCacheSize);
2339 			PixelProcessor::setRoutineCacheSize(configuration.pixelRoutineCacheSize);
2340 			SetupProcessor::setRoutineCacheSize(configuration.setupRoutineCacheSize);
2341 
2342 			switch(configuration.textureSampleQuality)
2343 			{
2344 			case 0:  Sampler::setFilterQuality(FILTER_POINT);       break;
2345 			case 1:  Sampler::setFilterQuality(FILTER_LINEAR);      break;
2346 			case 2:  Sampler::setFilterQuality(FILTER_ANISOTROPIC); break;
2347 			default: Sampler::setFilterQuality(FILTER_ANISOTROPIC); break;
2348 			}
2349 
2350 			switch(configuration.mipmapQuality)
2351 			{
2352 			case 0:  Sampler::setMipmapQuality(MIPMAP_POINT);  break;
2353 			case 1:  Sampler::setMipmapQuality(MIPMAP_LINEAR); break;
2354 			default: Sampler::setMipmapQuality(MIPMAP_LINEAR); break;
2355 			}
2356 
2357 			setPerspectiveCorrection(configuration.perspectiveCorrection);
2358 
2359 			switch(configuration.transcendentalPrecision)
2360 			{
2361 			case 0:
2362 				logPrecision = APPROXIMATE;
2363 				expPrecision = APPROXIMATE;
2364 				rcpPrecision = APPROXIMATE;
2365 				rsqPrecision = APPROXIMATE;
2366 				break;
2367 			case 1:
2368 				logPrecision = PARTIAL;
2369 				expPrecision = PARTIAL;
2370 				rcpPrecision = PARTIAL;
2371 				rsqPrecision = PARTIAL;
2372 				break;
2373 			case 2:
2374 				logPrecision = ACCURATE;
2375 				expPrecision = ACCURATE;
2376 				rcpPrecision = ACCURATE;
2377 				rsqPrecision = ACCURATE;
2378 				break;
2379 			case 3:
2380 				logPrecision = WHQL;
2381 				expPrecision = WHQL;
2382 				rcpPrecision = WHQL;
2383 				rsqPrecision = WHQL;
2384 				break;
2385 			case 4:
2386 				logPrecision = IEEE;
2387 				expPrecision = IEEE;
2388 				rcpPrecision = IEEE;
2389 				rsqPrecision = IEEE;
2390 				break;
2391 			default:
2392 				logPrecision = ACCURATE;
2393 				expPrecision = ACCURATE;
2394 				rcpPrecision = ACCURATE;
2395 				rsqPrecision = ACCURATE;
2396 				break;
2397 			}
2398 
2399 			switch(configuration.transparencyAntialiasing)
2400 			{
2401 			case 0:  transparencyAntialiasing = TRANSPARENCY_NONE;              break;
2402 			case 1:  transparencyAntialiasing = TRANSPARENCY_ALPHA_TO_COVERAGE; break;
2403 			default: transparencyAntialiasing = TRANSPARENCY_NONE;              break;
2404 			}
2405 
2406 			switch(configuration.threadCount)
2407 			{
2408 			case -1: threadCount = CPUID::coreCount();        break;
2409 			case 0:  threadCount = CPUID::processAffinity();  break;
2410 			default: threadCount = configuration.threadCount; break;
2411 			}
2412 
2413 			CPUID::setEnableSSE4_1(configuration.enableSSE4_1);
2414 			CPUID::setEnableSSSE3(configuration.enableSSSE3);
2415 			CPUID::setEnableSSE3(configuration.enableSSE3);
2416 			CPUID::setEnableSSE2(configuration.enableSSE2);
2417 			CPUID::setEnableSSE(configuration.enableSSE);
2418 
2419 			for(int pass = 0; pass < 10; pass++)
2420 			{
2421 				optimization[pass] = configuration.optimization[pass];
2422 			}
2423 
2424 			forceWindowed = configuration.forceWindowed;
2425 			complementaryDepthBuffer = configuration.complementaryDepthBuffer;
2426 			postBlendSRGB = configuration.postBlendSRGB;
2427 			exactColorRounding = configuration.exactColorRounding;
2428 			forceClearRegisters = configuration.forceClearRegisters;
2429 
2430 		#ifndef NDEBUG
2431 			minPrimitives = configuration.minPrimitives;
2432 			maxPrimitives = configuration.maxPrimitives;
2433 		#endif
2434 		}
2435 
2436 		if(!initialUpdate && !worker[0])
2437 		{
2438 			initializeThreads();
2439 		}
2440 	}
2441 }
2442