1 // Copyright 2016 The SwiftShader Authors. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //    http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "Renderer.hpp"
16 
17 #include "Clipper.hpp"
18 #include "Surface.hpp"
19 #include "Primitive.hpp"
20 #include "Polygon.hpp"
21 #include "Main/FrameBuffer.hpp"
22 #include "Main/SwiftConfig.hpp"
23 #include "Reactor/Reactor.hpp"
24 #include "Shader/Constants.hpp"
25 #include "Common/MutexLock.hpp"
26 #include "Common/CPUID.hpp"
27 #include "Common/Memory.hpp"
28 #include "Common/Resource.hpp"
29 #include "Common/Half.hpp"
30 #include "Common/Math.hpp"
31 #include "Common/Timer.hpp"
32 #include "Common/Debug.hpp"
33 
34 #undef max
35 
36 bool disableServer = true;
37 
38 #ifndef NDEBUG
39 unsigned int minPrimitives = 1;
40 unsigned int maxPrimitives = 1 << 21;
41 #endif
42 
43 namespace sw
44 {
45 	extern bool halfIntegerCoordinates;     // Pixel centers are not at integer coordinates
46 	extern bool symmetricNormalizedDepth;   // [-1, 1] instead of [0, 1]
47 	extern bool booleanFaceRegister;
48 	extern bool fullPixelPositionRegister;
49 	extern bool leadingVertexFirst;         // Flat shading uses first vertex, else last
50 	extern bool secondaryColor;             // Specular lighting is applied after texturing
51 	extern bool colorsDefaultToZero;
52 
53 	extern bool forceWindowed;
54 	extern bool complementaryDepthBuffer;
55 	extern bool postBlendSRGB;
56 	extern bool exactColorRounding;
57 	extern TransparencyAntialiasing transparencyAntialiasing;
58 	extern bool forceClearRegisters;
59 
60 	extern bool precacheVertex;
61 	extern bool precacheSetup;
62 	extern bool precachePixel;
63 
64 	static const int batchSize = 128;
65 	AtomicInt threadCount(1);
66 	AtomicInt Renderer::unitCount(1);
67 	AtomicInt Renderer::clusterCount(1);
68 
69 	TranscendentalPrecision logPrecision = ACCURATE;
70 	TranscendentalPrecision expPrecision = ACCURATE;
71 	TranscendentalPrecision rcpPrecision = ACCURATE;
72 	TranscendentalPrecision rsqPrecision = ACCURATE;
73 	bool perspectiveCorrection = true;
74 
setGlobalRenderingSettings(Conventions conventions,bool exactColorRounding)75 	static void setGlobalRenderingSettings(Conventions conventions, bool exactColorRounding)
76 	{
77 		static bool initialized = false;
78 
79 		if(!initialized)
80 		{
81 			sw::halfIntegerCoordinates = conventions.halfIntegerCoordinates;
82 			sw::symmetricNormalizedDepth = conventions.symmetricNormalizedDepth;
83 			sw::booleanFaceRegister = conventions.booleanFaceRegister;
84 			sw::fullPixelPositionRegister = conventions.fullPixelPositionRegister;
85 			sw::leadingVertexFirst = conventions.leadingVertexFirst;
86 			sw::secondaryColor = conventions.secondaryColor;
87 			sw::colorsDefaultToZero = conventions.colorsDefaultToZero;
88 			sw::exactColorRounding = exactColorRounding;
89 			initialized = true;
90 		}
91 	}
92 
93 	struct Parameters
94 	{
95 		Renderer *renderer;
96 		int threadIndex;
97 	};
98 
Query(Type type)99 	Query::Query(Type type) : building(false), data(0), type(type), reference(1)
100 	{
101 	}
102 
addRef()103 	void Query::addRef()
104 	{
105 		++reference; // Atomic
106 	}
107 
release()108 	void Query::release()
109 	{
110 		int ref = reference--; // Atomic
111 
112 		ASSERT(ref >= 0);
113 
114 		if(ref == 0)
115 		{
116 			delete this;
117 		}
118 	}
119 
DrawCall()120 	DrawCall::DrawCall()
121 	{
122 		queries = 0;
123 
124 		vsDirtyConstF = VERTEX_UNIFORM_VECTORS + 1;
125 		vsDirtyConstI = 16;
126 		vsDirtyConstB = 16;
127 
128 		psDirtyConstF = FRAGMENT_UNIFORM_VECTORS;
129 		psDirtyConstI = 16;
130 		psDirtyConstB = 16;
131 
132 		references = -1;
133 
134 		data = (DrawData*)allocate(sizeof(DrawData));
135 		data->constants = &constants;
136 	}
137 
~DrawCall()138 	DrawCall::~DrawCall()
139 	{
140 		delete queries;
141 
142 		deallocate(data);
143 	}
144 
Renderer(Context * context,Conventions conventions,bool exactColorRounding)145 	Renderer::Renderer(Context *context, Conventions conventions, bool exactColorRounding) : VertexProcessor(context), PixelProcessor(context), SetupProcessor(context), context(context), viewport()
146 	{
147 		setGlobalRenderingSettings(conventions, exactColorRounding);
148 
149 		setRenderTarget(0, 0);
150 		clipper = new Clipper(symmetricNormalizedDepth);
151 		blitter = new Blitter;
152 
153 		updateViewMatrix = true;
154 		updateBaseMatrix = true;
155 		updateProjectionMatrix = true;
156 		updateClipPlanes = true;
157 
158 		#if PERF_HUD
159 			resetTimers();
160 		#endif
161 
162 		for(int i = 0; i < 16; i++)
163 		{
164 			vertexTask[i] = 0;
165 
166 			worker[i] = 0;
167 			resume[i] = 0;
168 			suspend[i] = 0;
169 		}
170 
171 		threadsAwake = 0;
172 		resumeApp = new Event();
173 
174 		currentDraw = 0;
175 		nextDraw = 0;
176 
177 		qHead = 0;
178 		qSize = 0;
179 
180 		for(int i = 0; i < 16; i++)
181 		{
182 			triangleBatch[i] = 0;
183 			primitiveBatch[i] = 0;
184 		}
185 
186 		for(int draw = 0; draw < DRAW_COUNT; draw++)
187 		{
188 			drawCall[draw] = new DrawCall();
189 			drawList[draw] = drawCall[draw];
190 		}
191 
192 		for(int unit = 0; unit < 16; unit++)
193 		{
194 			primitiveProgress[unit].init();
195 		}
196 
197 		for(int cluster = 0; cluster < 16; cluster++)
198 		{
199 			pixelProgress[cluster].init();
200 		}
201 
202 		clipFlags = 0;
203 
204 		swiftConfig = new SwiftConfig(disableServer);
205 		updateConfiguration(true);
206 
207 		sync = new Resource(0);
208 	}
209 
~Renderer()210 	Renderer::~Renderer()
211 	{
212 		sync->lock(EXCLUSIVE);
213 		sync->destruct();
214 		terminateThreads();
215 		sync->unlock();
216 
217 		delete clipper;
218 		clipper = nullptr;
219 
220 		delete blitter;
221 		blitter = nullptr;
222 
223 		delete resumeApp;
224 		resumeApp = nullptr;
225 
226 		for(int draw = 0; draw < DRAW_COUNT; draw++)
227 		{
228 			delete drawCall[draw];
229 			drawCall[draw] = nullptr;
230 		}
231 
232 		delete swiftConfig;
233 		swiftConfig = nullptr;
234 	}
235 
236 	// This object has to be mem aligned
operator new(size_t size)237 	void* Renderer::operator new(size_t size)
238 	{
239 		ASSERT(size == sizeof(Renderer)); // This operator can't be called from a derived class
240 		return sw::allocate(sizeof(Renderer), 16);
241 	}
242 
operator delete(void * mem)243 	void Renderer::operator delete(void * mem)
244 	{
245 		sw::deallocate(mem);
246 	}
247 
draw(DrawType drawType,unsigned int indexOffset,unsigned int count,bool update)248 	void Renderer::draw(DrawType drawType, unsigned int indexOffset, unsigned int count, bool update)
249 	{
250 		#ifndef NDEBUG
251 			if(count < minPrimitives || count > maxPrimitives)
252 			{
253 				return;
254 			}
255 		#endif
256 
257 		context->drawType = drawType;
258 
259 		updateConfiguration();
260 		updateClipper();
261 
262 		int ss = context->getSuperSampleCount();
263 		int ms = context->getMultiSampleCount();
264 		bool requiresSync = false;
265 
266 		for(int q = 0; q < ss; q++)
267 		{
268 			unsigned int oldMultiSampleMask = context->multiSampleMask;
269 			context->multiSampleMask = (context->sampleMask >> (ms * q)) & ((unsigned)0xFFFFFFFF >> (32 - ms));
270 
271 			if(!context->multiSampleMask)
272 			{
273 				continue;
274 			}
275 
276 			sync->lock(sw::PRIVATE);
277 
278 			if(update || oldMultiSampleMask != context->multiSampleMask)
279 			{
280 				vertexState = VertexProcessor::update(drawType);
281 				setupState = SetupProcessor::update();
282 				pixelState = PixelProcessor::update();
283 
284 				vertexRoutine = VertexProcessor::routine(vertexState);
285 				setupRoutine = SetupProcessor::routine(setupState);
286 				pixelRoutine = PixelProcessor::routine(pixelState);
287 			}
288 
289 			int batch = batchSize / ms;
290 
291 			int (Renderer::*setupPrimitives)(int batch, int count);
292 
293 			if(context->isDrawTriangle())
294 			{
295 				switch(context->fillMode)
296 				{
297 				case FILL_SOLID:
298 					setupPrimitives = &Renderer::setupSolidTriangles;
299 					break;
300 				case FILL_WIREFRAME:
301 					setupPrimitives = &Renderer::setupWireframeTriangle;
302 					batch = 1;
303 					break;
304 				case FILL_VERTEX:
305 					setupPrimitives = &Renderer::setupVertexTriangle;
306 					batch = 1;
307 					break;
308 				default:
309 					ASSERT(false);
310 					return;
311 				}
312 			}
313 			else if(context->isDrawLine())
314 			{
315 				setupPrimitives = &Renderer::setupLines;
316 			}
317 			else   // Point draw
318 			{
319 				setupPrimitives = &Renderer::setupPoints;
320 			}
321 
322 			DrawCall *draw = nullptr;
323 
324 			do
325 			{
326 				for(int i = 0; i < DRAW_COUNT; i++)
327 				{
328 					if(drawCall[i]->references == -1)
329 					{
330 						draw = drawCall[i];
331 						drawList[nextDraw & DRAW_COUNT_BITS] = draw;
332 
333 						break;
334 					}
335 				}
336 
337 				if(!draw)
338 				{
339 					resumeApp->wait();
340 				}
341 			}
342 			while(!draw);
343 
344 			DrawData *data = draw->data;
345 
346 			if(queries.size() != 0)
347 			{
348 				draw->queries = new std::list<Query*>();
349 				bool includePrimitivesWrittenQueries = vertexState.transformFeedbackQueryEnabled && vertexState.transformFeedbackEnabled;
350 				for(auto &query : queries)
351 				{
352 					if(includePrimitivesWrittenQueries || (query->type != Query::TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN))
353 					{
354 						query->addRef();
355 						draw->queries->push_back(query);
356 					}
357 				}
358 			}
359 
360 			draw->drawType = drawType;
361 			draw->batchSize = batch;
362 
363 			draw->vertexRoutine = vertexRoutine;
364 			draw->setupRoutine = setupRoutine;
365 			draw->pixelRoutine = pixelRoutine;
366 			draw->vertexPointer = (VertexProcessor::RoutinePointer)vertexRoutine->getEntry();
367 			draw->setupPointer = (SetupProcessor::RoutinePointer)setupRoutine->getEntry();
368 			draw->pixelPointer = (PixelProcessor::RoutinePointer)pixelRoutine->getEntry();
369 			draw->setupPrimitives = setupPrimitives;
370 			draw->setupState = setupState;
371 
372 			for(int i = 0; i < MAX_VERTEX_INPUTS; i++)
373 			{
374 				draw->vertexStream[i] = context->input[i].resource;
375 				data->input[i] = context->input[i].buffer;
376 				data->stride[i] = context->input[i].stride;
377 
378 				if(draw->vertexStream[i])
379 				{
380 					draw->vertexStream[i]->lock(PUBLIC, PRIVATE);
381 				}
382 			}
383 
384 			if(context->indexBuffer)
385 			{
386 				data->indices = (unsigned char*)context->indexBuffer->lock(PUBLIC, PRIVATE) + indexOffset;
387 			}
388 
389 			draw->indexBuffer = context->indexBuffer;
390 
391 			for(int sampler = 0; sampler < TOTAL_IMAGE_UNITS; sampler++)
392 			{
393 				draw->texture[sampler] = 0;
394 			}
395 
396 			for(int sampler = 0; sampler < TEXTURE_IMAGE_UNITS; sampler++)
397 			{
398 				if(pixelState.sampler[sampler].textureType != TEXTURE_NULL)
399 				{
400 					draw->texture[sampler] = context->texture[sampler];
401 					draw->texture[sampler]->lock(PUBLIC, isReadWriteTexture(sampler) ? MANAGED : PRIVATE);   // If the texure is both read and written, use the same read/write lock as render targets
402 
403 					data->mipmap[sampler] = context->sampler[sampler].getTextureData();
404 
405 					requiresSync |= context->sampler[sampler].requiresSync();
406 				}
407 			}
408 
409 			if(context->pixelShader)
410 			{
411 				if(draw->psDirtyConstF)
412 				{
413 					memcpy(&data->ps.cW, PixelProcessor::cW, sizeof(word4) * 4 * (draw->psDirtyConstF < 8 ? draw->psDirtyConstF : 8));
414 					memcpy(&data->ps.c, PixelProcessor::c, sizeof(float4) * draw->psDirtyConstF);
415 					draw->psDirtyConstF = 0;
416 				}
417 
418 				if(draw->psDirtyConstI)
419 				{
420 					memcpy(&data->ps.i, PixelProcessor::i, sizeof(int4) * draw->psDirtyConstI);
421 					draw->psDirtyConstI = 0;
422 				}
423 
424 				if(draw->psDirtyConstB)
425 				{
426 					memcpy(&data->ps.b, PixelProcessor::b, sizeof(bool) * draw->psDirtyConstB);
427 					draw->psDirtyConstB = 0;
428 				}
429 
430 				PixelProcessor::lockUniformBuffers(data->ps.u, draw->pUniformBuffers);
431 			}
432 			else
433 			{
434 				for(int i = 0; i < MAX_UNIFORM_BUFFER_BINDINGS; i++)
435 				{
436 					draw->pUniformBuffers[i] = nullptr;
437 				}
438 			}
439 
440 			if(context->pixelShaderModel() <= 0x0104)
441 			{
442 				for(int stage = 0; stage < 8; stage++)
443 				{
444 					if(pixelState.textureStage[stage].stageOperation != TextureStage::STAGE_DISABLE || context->pixelShader)
445 					{
446 						data->textureStage[stage] = context->textureStage[stage].uniforms;
447 					}
448 					else break;
449 				}
450 			}
451 
452 			if(context->vertexShader)
453 			{
454 				if(context->vertexShader->getShaderModel() >= 0x0300)
455 				{
456 					for(int sampler = 0; sampler < VERTEX_TEXTURE_IMAGE_UNITS; sampler++)
457 					{
458 						if(vertexState.sampler[sampler].textureType != TEXTURE_NULL)
459 						{
460 							draw->texture[TEXTURE_IMAGE_UNITS + sampler] = context->texture[TEXTURE_IMAGE_UNITS + sampler];
461 							draw->texture[TEXTURE_IMAGE_UNITS + sampler]->lock(PUBLIC, PRIVATE);
462 
463 							data->mipmap[TEXTURE_IMAGE_UNITS + sampler] = context->sampler[TEXTURE_IMAGE_UNITS + sampler].getTextureData();
464 
465 							requiresSync |= context->sampler[TEXTURE_IMAGE_UNITS + sampler].requiresSync();
466 						}
467 					}
468 				}
469 
470 				if(draw->vsDirtyConstF)
471 				{
472 					memcpy(&data->vs.c, VertexProcessor::c, sizeof(float4) * draw->vsDirtyConstF);
473 					draw->vsDirtyConstF = 0;
474 				}
475 
476 				if(draw->vsDirtyConstI)
477 				{
478 					memcpy(&data->vs.i, VertexProcessor::i, sizeof(int4) * draw->vsDirtyConstI);
479 					draw->vsDirtyConstI = 0;
480 				}
481 
482 				if(draw->vsDirtyConstB)
483 				{
484 					memcpy(&data->vs.b, VertexProcessor::b, sizeof(bool) * draw->vsDirtyConstB);
485 					draw->vsDirtyConstB = 0;
486 				}
487 
488 				if(context->vertexShader->isInstanceIdDeclared())
489 				{
490 					data->instanceID = context->instanceID;
491 				}
492 
493 				VertexProcessor::lockUniformBuffers(data->vs.u, draw->vUniformBuffers);
494 				VertexProcessor::lockTransformFeedbackBuffers(data->vs.t, data->vs.reg, data->vs.row, data->vs.col, data->vs.str, draw->transformFeedbackBuffers);
495 			}
496 			else
497 			{
498 				data->ff = ff;
499 
500 				draw->vsDirtyConstF = VERTEX_UNIFORM_VECTORS + 1;
501 				draw->vsDirtyConstI = 16;
502 				draw->vsDirtyConstB = 16;
503 
504 				for(int i = 0; i < MAX_UNIFORM_BUFFER_BINDINGS; i++)
505 				{
506 					draw->vUniformBuffers[i] = nullptr;
507 				}
508 
509 				for(int i = 0; i < MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS; i++)
510 				{
511 					draw->transformFeedbackBuffers[i] = nullptr;
512 				}
513 			}
514 
515 			if(pixelState.stencilActive)
516 			{
517 				data->stencil[0] = stencil;
518 				data->stencil[1] = stencilCCW;
519 			}
520 
521 			if(pixelState.fogActive)
522 			{
523 				data->fog = fog;
524 			}
525 
526 			if(setupState.isDrawPoint)
527 			{
528 				data->point = point;
529 			}
530 
531 			data->lineWidth = context->lineWidth;
532 
533 			data->factor = factor;
534 
535 			if(pixelState.transparencyAntialiasing == TRANSPARENCY_ALPHA_TO_COVERAGE)
536 			{
537 				float ref = context->alphaReference * (1.0f / 255.0f);
538 				float margin = sw::min(ref, 1.0f - ref);
539 
540 				if(ms == 4)
541 				{
542 					data->a2c0 = replicate(ref - margin * 0.6f);
543 					data->a2c1 = replicate(ref - margin * 0.2f);
544 					data->a2c2 = replicate(ref + margin * 0.2f);
545 					data->a2c3 = replicate(ref + margin * 0.6f);
546 				}
547 				else if(ms == 2)
548 				{
549 					data->a2c0 = replicate(ref - margin * 0.3f);
550 					data->a2c1 = replicate(ref + margin * 0.3f);
551 				}
552 				else ASSERT(false);
553 			}
554 
555 			if(pixelState.occlusionEnabled)
556 			{
557 				for(int cluster = 0; cluster < clusterCount; cluster++)
558 				{
559 					data->occlusion[cluster] = 0;
560 				}
561 			}
562 
563 			#if PERF_PROFILE
564 				for(int cluster = 0; cluster < clusterCount; cluster++)
565 				{
566 					for(int i = 0; i < PERF_TIMERS; i++)
567 					{
568 						data->cycles[i][cluster] = 0;
569 					}
570 				}
571 			#endif
572 
573 			// Viewport
574 			{
575 				float W = 0.5f * viewport.width;
576 				float H = 0.5f * viewport.height;
577 				float X0 = viewport.x0 + W;
578 				float Y0 = viewport.y0 + H;
579 				float N = viewport.minZ;
580 				float F = viewport.maxZ;
581 				float Z = F - N;
582 
583 				if(context->isDrawTriangle(false))
584 				{
585 					N += context->depthBias;
586 				}
587 
588 				if(complementaryDepthBuffer)
589 				{
590 					Z = -Z;
591 					N = 1 - N;
592 				}
593 
594 				static const float X[5][16] =   // Fragment offsets
595 				{
596 					{+0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 1 sample
597 					{-0.2500f, +0.2500f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 2 samples
598 					{-0.3000f, +0.1000f, +0.3000f, -0.1000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 4 samples
599 					{+0.1875f, -0.3125f, +0.3125f, -0.4375f, -0.0625f, +0.4375f, +0.0625f, -0.1875f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 8 samples
600 					{+0.2553f, -0.1155f, +0.1661f, -0.1828f, +0.2293f, -0.4132f, -0.1773f, -0.0577f, +0.3891f, -0.4656f, +0.4103f, +0.4248f, -0.2109f, +0.3966f, -0.2664f, -0.3872f}    // 16 samples
601 				};
602 
603 				static const float Y[5][16] =   // Fragment offsets
604 				{
605 					{+0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 1 sample
606 					{-0.2500f, +0.2500f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 2 samples
607 					{-0.1000f, -0.3000f, +0.1000f, +0.3000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 4 samples
608 					{-0.4375f, -0.3125f, -0.1875f, -0.0625f, +0.0625f, +0.1875f, +0.3125f, +0.4375f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 8 samples
609 					{-0.4503f, +0.1883f, +0.3684f, -0.4668f, -0.0690f, -0.1315f, +0.4999f, +0.0728f, +0.1070f, -0.3086f, +0.3725f, -0.1547f, -0.1102f, -0.3588f, +0.1789f, +0.0269f}    // 16 samples
610 				};
611 
612 				int s = sw::log2(ss);
613 
614 				data->Wx16 = replicate(W * 16);
615 				data->Hx16 = replicate(H * 16);
616 				data->X0x16 = replicate(X0 * 16 - 8);
617 				data->Y0x16 = replicate(Y0 * 16 - 8);
618 				data->XXXX = replicate(X[s][q] / W);
619 				data->YYYY = replicate(Y[s][q] / H);
620 				data->halfPixelX = replicate(0.5f / W);
621 				data->halfPixelY = replicate(0.5f / H);
622 				data->viewportHeight = abs(viewport.height);
623 				data->slopeDepthBias = context->slopeDepthBias;
624 				data->depthRange = Z;
625 				data->depthNear = N;
626 				draw->clipFlags = clipFlags;
627 
628 				if(clipFlags)
629 				{
630 					if(clipFlags & Clipper::CLIP_PLANE0) data->clipPlane[0] = clipPlane[0];
631 					if(clipFlags & Clipper::CLIP_PLANE1) data->clipPlane[1] = clipPlane[1];
632 					if(clipFlags & Clipper::CLIP_PLANE2) data->clipPlane[2] = clipPlane[2];
633 					if(clipFlags & Clipper::CLIP_PLANE3) data->clipPlane[3] = clipPlane[3];
634 					if(clipFlags & Clipper::CLIP_PLANE4) data->clipPlane[4] = clipPlane[4];
635 					if(clipFlags & Clipper::CLIP_PLANE5) data->clipPlane[5] = clipPlane[5];
636 				}
637 			}
638 
639 			// Target
640 			{
641 				for(int index = 0; index < RENDERTARGETS; index++)
642 				{
643 					draw->renderTarget[index] = context->renderTarget[index];
644 
645 					if(draw->renderTarget[index])
646 					{
647 						unsigned int layer = context->renderTargetLayer[index];
648 						requiresSync |= context->renderTarget[index]->requiresSync();
649 						data->colorBuffer[index] = (unsigned int*)context->renderTarget[index]->lockInternal(0, 0, layer, LOCK_READWRITE, MANAGED);
650 						data->colorBuffer[index] += q * ms * context->renderTarget[index]->getSliceB(true);
651 						data->colorPitchB[index] = context->renderTarget[index]->getInternalPitchB();
652 						data->colorSliceB[index] = context->renderTarget[index]->getInternalSliceB();
653 					}
654 				}
655 
656 				draw->depthBuffer = context->depthBuffer;
657 				draw->stencilBuffer = context->stencilBuffer;
658 
659 				if(draw->depthBuffer)
660 				{
661 					unsigned int layer = context->depthBufferLayer;
662 					requiresSync |= context->depthBuffer->requiresSync();
663 					data->depthBuffer = (float*)context->depthBuffer->lockInternal(0, 0, layer, LOCK_READWRITE, MANAGED);
664 					data->depthBuffer += q * ms * context->depthBuffer->getSliceB(true);
665 					data->depthPitchB = context->depthBuffer->getInternalPitchB();
666 					data->depthSliceB = context->depthBuffer->getInternalSliceB();
667 				}
668 
669 				if(draw->stencilBuffer)
670 				{
671 					unsigned int layer = context->stencilBufferLayer;
672 					requiresSync |= context->stencilBuffer->requiresSync();
673 					data->stencilBuffer = (unsigned char*)context->stencilBuffer->lockStencil(0, 0, layer, MANAGED);
674 					data->stencilBuffer += q * ms * context->stencilBuffer->getSliceB(true);
675 					data->stencilPitchB = context->stencilBuffer->getStencilPitchB();
676 					data->stencilSliceB = context->stencilBuffer->getStencilSliceB();
677 				}
678 			}
679 
680 			// Scissor
681 			{
682 				data->scissorX0 = scissor.x0;
683 				data->scissorX1 = scissor.x1;
684 				data->scissorY0 = scissor.y0;
685 				data->scissorY1 = scissor.y1;
686 			}
687 
688 			draw->primitive = 0;
689 			draw->count = count;
690 
691 			draw->references = (count + batch - 1) / batch;
692 
693 			schedulerMutex.lock();
694 			++nextDraw; // Atomic
695 			schedulerMutex.unlock();
696 
697 			#ifndef NDEBUG
698 			if(threadCount == 1)   // Use main thread for draw execution
699 			{
700 				threadsAwake = 1;
701 				task[0].type = Task::RESUME;
702 
703 				taskLoop(0);
704 			}
705 			else
706 			#endif
707 			{
708 				if(!threadsAwake)
709 				{
710 					suspend[0]->wait();
711 
712 					threadsAwake = 1;
713 					task[0].type = Task::RESUME;
714 
715 					resume[0]->signal();
716 				}
717 			}
718 		}
719 
720 		// TODO(sugoi): This is a temporary brute-force workaround to ensure IOSurface synchronization.
721 		if(requiresSync)
722 		{
723 			synchronize();
724 		}
725 	}
726 
clear(void * value,Format format,Surface * dest,const Rect & clearRect,unsigned int rgbaMask)727 	void Renderer::clear(void *value, Format format, Surface *dest, const Rect &clearRect, unsigned int rgbaMask)
728 	{
729 		blitter->clear(value, format, dest, clearRect, rgbaMask);
730 	}
731 
blit(Surface * source,const SliceRectF & sRect,Surface * dest,const SliceRect & dRect,bool filter,bool isStencil,bool sRGBconversion)732 	void Renderer::blit(Surface *source, const SliceRectF &sRect, Surface *dest, const SliceRect &dRect, bool filter, bool isStencil, bool sRGBconversion)
733 	{
734 		blitter->blit(source, sRect, dest, dRect, {filter, isStencil, sRGBconversion});
735 	}
736 
blit3D(Surface * source,Surface * dest)737 	void Renderer::blit3D(Surface *source, Surface *dest)
738 	{
739 		blitter->blit3D(source, dest);
740 	}
741 
threadFunction(void * parameters)742 	void Renderer::threadFunction(void *parameters)
743 	{
744 		Renderer *renderer = static_cast<Parameters*>(parameters)->renderer;
745 		int threadIndex = static_cast<Parameters*>(parameters)->threadIndex;
746 
747 		if(logPrecision < IEEE)
748 		{
749 			CPUID::setFlushToZero(true);
750 			CPUID::setDenormalsAreZero(true);
751 		}
752 
753 		renderer->threadLoop(threadIndex);
754 	}
755 
threadLoop(int threadIndex)756 	void Renderer::threadLoop(int threadIndex)
757 	{
758 		while(!exitThreads)
759 		{
760 			taskLoop(threadIndex);
761 
762 			suspend[threadIndex]->signal();
763 			resume[threadIndex]->wait();
764 		}
765 	}
766 
taskLoop(int threadIndex)767 	void Renderer::taskLoop(int threadIndex)
768 	{
769 		while(task[threadIndex].type != Task::SUSPEND)
770 		{
771 			scheduleTask(threadIndex);
772 			executeTask(threadIndex);
773 		}
774 	}
775 
findAvailableTasks()776 	void Renderer::findAvailableTasks()
777 	{
778 		// Find pixel tasks
779 		for(int cluster = 0; cluster < clusterCount; cluster++)
780 		{
781 			if(!pixelProgress[cluster].executing)
782 			{
783 				for(int unit = 0; unit < unitCount; unit++)
784 				{
785 					if(primitiveProgress[unit].references > 0)   // Contains processed primitives
786 					{
787 						if(pixelProgress[cluster].drawCall == primitiveProgress[unit].drawCall)
788 						{
789 							if(pixelProgress[cluster].processedPrimitives == primitiveProgress[unit].firstPrimitive)   // Previous primitives have been rendered
790 							{
791 								Task &task = taskQueue[qHead];
792 								task.type = Task::PIXELS;
793 								task.primitiveUnit = unit;
794 								task.pixelCluster = cluster;
795 
796 								pixelProgress[cluster].executing = true;
797 
798 								// Commit to the task queue
799 								qHead = (qHead + 1) & TASK_COUNT_BITS;
800 								qSize++;
801 
802 								break;
803 							}
804 						}
805 					}
806 				}
807 			}
808 		}
809 
810 		// Find primitive tasks
811 		if(currentDraw == nextDraw)
812 		{
813 			return;   // No more primitives to process
814 		}
815 
816 		for(int unit = 0; unit < unitCount; unit++)
817 		{
818 			DrawCall *draw = drawList[currentDraw & DRAW_COUNT_BITS];
819 
820 			int primitive = draw->primitive;
821 			int count = draw->count;
822 
823 			if(primitive >= count)
824 			{
825 				++currentDraw; // Atomic
826 
827 				if(currentDraw == nextDraw)
828 				{
829 					return;   // No more primitives to process
830 				}
831 
832 				draw = drawList[currentDraw & DRAW_COUNT_BITS];
833 			}
834 
835 			if(!primitiveProgress[unit].references)   // Task not already being executed and not still in use by a pixel unit
836 			{
837 				primitive = draw->primitive;
838 				count = draw->count;
839 				int batch = draw->batchSize;
840 
841 				primitiveProgress[unit].drawCall = currentDraw;
842 				primitiveProgress[unit].firstPrimitive = primitive;
843 				primitiveProgress[unit].primitiveCount = count - primitive >= batch ? batch : count - primitive;
844 
845 				draw->primitive += batch;
846 
847 				Task &task = taskQueue[qHead];
848 				task.type = Task::PRIMITIVES;
849 				task.primitiveUnit = unit;
850 
851 				primitiveProgress[unit].references = -1;
852 
853 				// Commit to the task queue
854 				qHead = (qHead + 1) & TASK_COUNT_BITS;
855 				qSize++;
856 			}
857 		}
858 	}
859 
scheduleTask(int threadIndex)860 	void Renderer::scheduleTask(int threadIndex)
861 	{
862 		schedulerMutex.lock();
863 
864 		int curThreadsAwake = threadsAwake;
865 
866 		if((int)qSize < threadCount - curThreadsAwake + 1)
867 		{
868 			findAvailableTasks();
869 		}
870 
871 		if(qSize != 0)
872 		{
873 			task[threadIndex] = taskQueue[(qHead - qSize) & TASK_COUNT_BITS];
874 			qSize--;
875 
876 			if(curThreadsAwake != threadCount)
877 			{
878 				int wakeup = qSize - curThreadsAwake + 1;
879 
880 				for(int i = 0; i < threadCount && wakeup > 0; i++)
881 				{
882 					if(task[i].type == Task::SUSPEND)
883 					{
884 						suspend[i]->wait();
885 						task[i].type = Task::RESUME;
886 						resume[i]->signal();
887 
888 						++threadsAwake; // Atomic
889 						wakeup--;
890 					}
891 				}
892 			}
893 		}
894 		else
895 		{
896 			task[threadIndex].type = Task::SUSPEND;
897 
898 			--threadsAwake; // Atomic
899 		}
900 
901 		schedulerMutex.unlock();
902 	}
903 
executeTask(int threadIndex)904 	void Renderer::executeTask(int threadIndex)
905 	{
906 		#if PERF_HUD
907 			int64_t startTick = Timer::ticks();
908 		#endif
909 
910 		switch(task[threadIndex].type)
911 		{
912 		case Task::PRIMITIVES:
913 			{
914 				int unit = task[threadIndex].primitiveUnit;
915 
916 				int input = primitiveProgress[unit].firstPrimitive;
917 				int count = primitiveProgress[unit].primitiveCount;
918 				DrawCall *draw = drawList[primitiveProgress[unit].drawCall & DRAW_COUNT_BITS];
919 				int (Renderer::*setupPrimitives)(int batch, int count) = draw->setupPrimitives;
920 
921 				processPrimitiveVertices(unit, input, count, draw->count, threadIndex);
922 
923 				#if PERF_HUD
924 					int64_t time = Timer::ticks();
925 					vertexTime[threadIndex] += time - startTick;
926 					startTick = time;
927 				#endif
928 
929 				int visible = 0;
930 
931 				if(!draw->setupState.rasterizerDiscard)
932 				{
933 					visible = (this->*setupPrimitives)(unit, count);
934 				}
935 
936 				primitiveProgress[unit].visible = visible;
937 				primitiveProgress[unit].references = clusterCount;
938 
939 				#if PERF_HUD
940 					setupTime[threadIndex] += Timer::ticks() - startTick;
941 				#endif
942 			}
943 			break;
944 		case Task::PIXELS:
945 			{
946 				int unit = task[threadIndex].primitiveUnit;
947 				int visible = primitiveProgress[unit].visible;
948 
949 				if(visible > 0)
950 				{
951 					int cluster = task[threadIndex].pixelCluster;
952 					Primitive *primitive = primitiveBatch[unit];
953 					DrawCall *draw = drawList[pixelProgress[cluster].drawCall & DRAW_COUNT_BITS];
954 					DrawData *data = draw->data;
955 					PixelProcessor::RoutinePointer pixelRoutine = draw->pixelPointer;
956 
957 					pixelRoutine(primitive, visible, cluster, data);
958 				}
959 
960 				finishRendering(task[threadIndex]);
961 
962 				#if PERF_HUD
963 					pixelTime[threadIndex] += Timer::ticks() - startTick;
964 				#endif
965 			}
966 			break;
967 		case Task::RESUME:
968 			break;
969 		case Task::SUSPEND:
970 			break;
971 		default:
972 			ASSERT(false);
973 		}
974 	}
975 
synchronize()976 	void Renderer::synchronize()
977 	{
978 		sync->lock(sw::PUBLIC);
979 		sync->unlock();
980 	}
981 
finishRendering(Task & pixelTask)982 	void Renderer::finishRendering(Task &pixelTask)
983 	{
984 		int unit = pixelTask.primitiveUnit;
985 		int cluster = pixelTask.pixelCluster;
986 
987 		DrawCall &draw = *drawList[primitiveProgress[unit].drawCall & DRAW_COUNT_BITS];
988 		DrawData &data = *draw.data;
989 		int primitive = primitiveProgress[unit].firstPrimitive;
990 		int count = primitiveProgress[unit].primitiveCount;
991 		int processedPrimitives = primitive + count;
992 
993 		pixelProgress[cluster].processedPrimitives = processedPrimitives;
994 
995 		if(pixelProgress[cluster].processedPrimitives >= draw.count)
996 		{
997 			++pixelProgress[cluster].drawCall; // Atomic
998 			pixelProgress[cluster].processedPrimitives = 0;
999 		}
1000 
1001 		int ref = primitiveProgress[unit].references--; // Atomic
1002 
1003 		if(ref == 0)
1004 		{
1005 			ref = draw.references--; // Atomic
1006 
1007 			if(ref == 0)
1008 			{
1009 				#if PERF_PROFILE
1010 					for(int cluster = 0; cluster < clusterCount; cluster++)
1011 					{
1012 						for(int i = 0; i < PERF_TIMERS; i++)
1013 						{
1014 							profiler.cycles[i] += data.cycles[i][cluster];
1015 						}
1016 					}
1017 				#endif
1018 
1019 				if(draw.queries)
1020 				{
1021 					for(auto &query : *(draw.queries))
1022 					{
1023 						switch(query->type)
1024 						{
1025 						case Query::FRAGMENTS_PASSED:
1026 							for(int cluster = 0; cluster < clusterCount; cluster++)
1027 							{
1028 								query->data += data.occlusion[cluster];
1029 							}
1030 							break;
1031 						case Query::TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN:
1032 							query->data += processedPrimitives;
1033 							break;
1034 						default:
1035 							break;
1036 						}
1037 
1038 						query->release();
1039 					}
1040 
1041 					delete draw.queries;
1042 					draw.queries = 0;
1043 				}
1044 
1045 				for(int i = 0; i < RENDERTARGETS; i++)
1046 				{
1047 					if(draw.renderTarget[i])
1048 					{
1049 						draw.renderTarget[i]->unlockInternal();
1050 					}
1051 				}
1052 
1053 				if(draw.depthBuffer)
1054 				{
1055 					draw.depthBuffer->unlockInternal();
1056 				}
1057 
1058 				if(draw.stencilBuffer)
1059 				{
1060 					draw.stencilBuffer->unlockStencil();
1061 				}
1062 
1063 				for(int i = 0; i < TOTAL_IMAGE_UNITS; i++)
1064 				{
1065 					if(draw.texture[i])
1066 					{
1067 						draw.texture[i]->unlock();
1068 					}
1069 				}
1070 
1071 				for(int i = 0; i < MAX_VERTEX_INPUTS; i++)
1072 				{
1073 					if(draw.vertexStream[i])
1074 					{
1075 						draw.vertexStream[i]->unlock();
1076 					}
1077 				}
1078 
1079 				if(draw.indexBuffer)
1080 				{
1081 					draw.indexBuffer->unlock();
1082 				}
1083 
1084 				for(int i = 0; i < MAX_UNIFORM_BUFFER_BINDINGS; i++)
1085 				{
1086 					if(draw.pUniformBuffers[i])
1087 					{
1088 						draw.pUniformBuffers[i]->unlock();
1089 					}
1090 					if(draw.vUniformBuffers[i])
1091 					{
1092 						draw.vUniformBuffers[i]->unlock();
1093 					}
1094 				}
1095 
1096 				for(int i = 0; i < MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS; i++)
1097 				{
1098 					if(draw.transformFeedbackBuffers[i])
1099 					{
1100 						draw.transformFeedbackBuffers[i]->unlock();
1101 					}
1102 				}
1103 
1104 				draw.vertexRoutine.reset();
1105 				draw.setupRoutine.reset();
1106 				draw.pixelRoutine.reset();
1107 
1108 				sync->unlock();
1109 
1110 				draw.references = -1;
1111 				resumeApp->signal();
1112 			}
1113 		}
1114 
1115 		pixelProgress[cluster].executing = false;
1116 	}
1117 
processPrimitiveVertices(int unit,unsigned int start,unsigned int triangleCount,unsigned int loop,int thread)1118 	void Renderer::processPrimitiveVertices(int unit, unsigned int start, unsigned int triangleCount, unsigned int loop, int thread)
1119 	{
1120 		Triangle *triangle = triangleBatch[unit];
1121 		int primitiveDrawCall = primitiveProgress[unit].drawCall;
1122 		DrawCall *draw = drawList[primitiveDrawCall & DRAW_COUNT_BITS];
1123 		DrawData *data = draw->data;
1124 		VertexTask *task = vertexTask[thread];
1125 
1126 		const void *indices = data->indices;
1127 		VertexProcessor::RoutinePointer vertexRoutine = draw->vertexPointer;
1128 
1129 		if(task->vertexCache.drawCall != primitiveDrawCall)
1130 		{
1131 			task->vertexCache.clear();
1132 			task->vertexCache.drawCall = primitiveDrawCall;
1133 		}
1134 
1135 		unsigned int batch[128][3];   // FIXME: Adjust to dynamic batch size
1136 
1137 		switch(draw->drawType)
1138 		{
1139 		case DRAW_POINTLIST:
1140 			{
1141 				unsigned int index = start;
1142 
1143 				for(unsigned int i = 0; i < triangleCount; i++)
1144 				{
1145 					batch[i][0] = index;
1146 					batch[i][1] = index;
1147 					batch[i][2] = index;
1148 
1149 					index += 1;
1150 				}
1151 			}
1152 			break;
1153 		case DRAW_LINELIST:
1154 			{
1155 				unsigned int index = 2 * start;
1156 
1157 				for(unsigned int i = 0; i < triangleCount; i++)
1158 				{
1159 					batch[i][0] = index + 0;
1160 					batch[i][1] = index + 1;
1161 					batch[i][2] = index + 1;
1162 
1163 					index += 2;
1164 				}
1165 			}
1166 			break;
1167 		case DRAW_LINESTRIP:
1168 			{
1169 				unsigned int index = start;
1170 
1171 				for(unsigned int i = 0; i < triangleCount; i++)
1172 				{
1173 					batch[i][0] = index + 0;
1174 					batch[i][1] = index + 1;
1175 					batch[i][2] = index + 1;
1176 
1177 					index += 1;
1178 				}
1179 			}
1180 			break;
1181 		case DRAW_LINELOOP:
1182 			{
1183 				unsigned int index = start;
1184 
1185 				for(unsigned int i = 0; i < triangleCount; i++)
1186 				{
1187 					batch[i][0] = (index + 0) % loop;
1188 					batch[i][1] = (index + 1) % loop;
1189 					batch[i][2] = (index + 1) % loop;
1190 
1191 					index += 1;
1192 				}
1193 			}
1194 			break;
1195 		case DRAW_TRIANGLELIST:
1196 			{
1197 				unsigned int index = 3 * start;
1198 
1199 				for(unsigned int i = 0; i < triangleCount; i++)
1200 				{
1201 					batch[i][0] = index + 0;
1202 					batch[i][1] = index + 1;
1203 					batch[i][2] = index + 2;
1204 
1205 					index += 3;
1206 				}
1207 			}
1208 			break;
1209 		case DRAW_TRIANGLESTRIP:
1210 			{
1211 				unsigned int index = start;
1212 
1213 				for(unsigned int i = 0; i < triangleCount; i++)
1214 				{
1215 					if(leadingVertexFirst)
1216 					{
1217 						batch[i][0] = index + 0;
1218 						batch[i][1] = index + (index & 1) + 1;
1219 						batch[i][2] = index + (~index & 1) + 1;
1220 					}
1221 					else
1222 					{
1223 						batch[i][0] = index + (index & 1);
1224 						batch[i][1] = index + (~index & 1);
1225 						batch[i][2] = index + 2;
1226 					}
1227 
1228 					index += 1;
1229 				}
1230 			}
1231 			break;
1232 		case DRAW_TRIANGLEFAN:
1233 			{
1234 				unsigned int index = start;
1235 
1236 				for(unsigned int i = 0; i < triangleCount; i++)
1237 				{
1238 					if(leadingVertexFirst)
1239 					{
1240 						batch[i][0] = index + 1;
1241 						batch[i][1] = index + 2;
1242 						batch[i][2] = 0;
1243 					}
1244 					else
1245 					{
1246 						batch[i][0] = 0;
1247 						batch[i][1] = index + 1;
1248 						batch[i][2] = index + 2;
1249 					}
1250 
1251 					index += 1;
1252 				}
1253 			}
1254 			break;
1255 		case DRAW_INDEXEDPOINTLIST8:
1256 			{
1257 				const unsigned char *index = (const unsigned char*)indices + start;
1258 
1259 				for(unsigned int i = 0; i < triangleCount; i++)
1260 				{
1261 					batch[i][0] = *index;
1262 					batch[i][1] = *index;
1263 					batch[i][2] = *index;
1264 
1265 					index += 1;
1266 				}
1267 			}
1268 			break;
1269 		case DRAW_INDEXEDPOINTLIST16:
1270 			{
1271 				const unsigned short *index = (const unsigned short*)indices + start;
1272 
1273 				for(unsigned int i = 0; i < triangleCount; i++)
1274 				{
1275 					batch[i][0] = *index;
1276 					batch[i][1] = *index;
1277 					batch[i][2] = *index;
1278 
1279 					index += 1;
1280 				}
1281 			}
1282 			break;
1283 		case DRAW_INDEXEDPOINTLIST32:
1284 			{
1285 				const unsigned int *index = (const unsigned int*)indices + start;
1286 
1287 				for(unsigned int i = 0; i < triangleCount; i++)
1288 				{
1289 					batch[i][0] = *index;
1290 					batch[i][1] = *index;
1291 					batch[i][2] = *index;
1292 
1293 					index += 1;
1294 				}
1295 			}
1296 			break;
1297 		case DRAW_INDEXEDLINELIST8:
1298 			{
1299 				const unsigned char *index = (const unsigned char*)indices + 2 * start;
1300 
1301 				for(unsigned int i = 0; i < triangleCount; i++)
1302 				{
1303 					batch[i][0] = index[0];
1304 					batch[i][1] = index[1];
1305 					batch[i][2] = index[1];
1306 
1307 					index += 2;
1308 				}
1309 			}
1310 			break;
1311 		case DRAW_INDEXEDLINELIST16:
1312 			{
1313 				const unsigned short *index = (const unsigned short*)indices + 2 * start;
1314 
1315 				for(unsigned int i = 0; i < triangleCount; i++)
1316 				{
1317 					batch[i][0] = index[0];
1318 					batch[i][1] = index[1];
1319 					batch[i][2] = index[1];
1320 
1321 					index += 2;
1322 				}
1323 			}
1324 			break;
1325 		case DRAW_INDEXEDLINELIST32:
1326 			{
1327 				const unsigned int *index = (const unsigned int*)indices + 2 * start;
1328 
1329 				for(unsigned int i = 0; i < triangleCount; i++)
1330 				{
1331 					batch[i][0] = index[0];
1332 					batch[i][1] = index[1];
1333 					batch[i][2] = index[1];
1334 
1335 					index += 2;
1336 				}
1337 			}
1338 			break;
1339 		case DRAW_INDEXEDLINESTRIP8:
1340 			{
1341 				const unsigned char *index = (const unsigned char*)indices + start;
1342 
1343 				for(unsigned int i = 0; i < triangleCount; i++)
1344 				{
1345 					batch[i][0] = index[0];
1346 					batch[i][1] = index[1];
1347 					batch[i][2] = index[1];
1348 
1349 					index += 1;
1350 				}
1351 			}
1352 			break;
1353 		case DRAW_INDEXEDLINESTRIP16:
1354 			{
1355 				const unsigned short *index = (const unsigned short*)indices + start;
1356 
1357 				for(unsigned int i = 0; i < triangleCount; i++)
1358 				{
1359 					batch[i][0] = index[0];
1360 					batch[i][1] = index[1];
1361 					batch[i][2] = index[1];
1362 
1363 					index += 1;
1364 				}
1365 			}
1366 			break;
1367 		case DRAW_INDEXEDLINESTRIP32:
1368 			{
1369 				const unsigned int *index = (const unsigned int*)indices + start;
1370 
1371 				for(unsigned int i = 0; i < triangleCount; i++)
1372 				{
1373 					batch[i][0] = index[0];
1374 					batch[i][1] = index[1];
1375 					batch[i][2] = index[1];
1376 
1377 					index += 1;
1378 				}
1379 			}
1380 			break;
1381 		case DRAW_INDEXEDLINELOOP8:
1382 			{
1383 				const unsigned char *index = (const unsigned char*)indices;
1384 
1385 				for(unsigned int i = 0; i < triangleCount; i++)
1386 				{
1387 					batch[i][0] = index[(start + i + 0) % loop];
1388 					batch[i][1] = index[(start + i + 1) % loop];
1389 					batch[i][2] = index[(start + i + 1) % loop];
1390 				}
1391 			}
1392 			break;
1393 		case DRAW_INDEXEDLINELOOP16:
1394 			{
1395 				const unsigned short *index = (const unsigned short*)indices;
1396 
1397 				for(unsigned int i = 0; i < triangleCount; i++)
1398 				{
1399 					batch[i][0] = index[(start + i + 0) % loop];
1400 					batch[i][1] = index[(start + i + 1) % loop];
1401 					batch[i][2] = index[(start + i + 1) % loop];
1402 				}
1403 			}
1404 			break;
1405 		case DRAW_INDEXEDLINELOOP32:
1406 			{
1407 				const unsigned int *index = (const unsigned int*)indices;
1408 
1409 				for(unsigned int i = 0; i < triangleCount; i++)
1410 				{
1411 					batch[i][0] = index[(start + i + 0) % loop];
1412 					batch[i][1] = index[(start + i + 1) % loop];
1413 					batch[i][2] = index[(start + i + 1) % loop];
1414 				}
1415 			}
1416 			break;
1417 		case DRAW_INDEXEDTRIANGLELIST8:
1418 			{
1419 				const unsigned char *index = (const unsigned char*)indices + 3 * start;
1420 
1421 				for(unsigned int i = 0; i < triangleCount; i++)
1422 				{
1423 					batch[i][0] = index[0];
1424 					batch[i][1] = index[1];
1425 					batch[i][2] = index[2];
1426 
1427 					index += 3;
1428 				}
1429 			}
1430 			break;
1431 		case DRAW_INDEXEDTRIANGLELIST16:
1432 			{
1433 				const unsigned short *index = (const unsigned short*)indices + 3 * start;
1434 
1435 				for(unsigned int i = 0; i < triangleCount; i++)
1436 				{
1437 					batch[i][0] = index[0];
1438 					batch[i][1] = index[1];
1439 					batch[i][2] = index[2];
1440 
1441 					index += 3;
1442 				}
1443 			}
1444 			break;
1445 		case DRAW_INDEXEDTRIANGLELIST32:
1446 			{
1447 				const unsigned int *index = (const unsigned int*)indices + 3 * start;
1448 
1449 				for(unsigned int i = 0; i < triangleCount; i++)
1450 				{
1451 					batch[i][0] = index[0];
1452 					batch[i][1] = index[1];
1453 					batch[i][2] = index[2];
1454 
1455 					index += 3;
1456 				}
1457 			}
1458 			break;
1459 		case DRAW_INDEXEDTRIANGLESTRIP8:
1460 			{
1461 				const unsigned char *index = (const unsigned char*)indices + start;
1462 
1463 				for(unsigned int i = 0; i < triangleCount; i++)
1464 				{
1465 					batch[i][0] = index[0];
1466 					batch[i][1] = index[((start + i) & 1) + 1];
1467 					batch[i][2] = index[(~(start + i) & 1) + 1];
1468 
1469 					index += 1;
1470 				}
1471 			}
1472 			break;
1473 		case DRAW_INDEXEDTRIANGLESTRIP16:
1474 			{
1475 				const unsigned short *index = (const unsigned short*)indices + start;
1476 
1477 				for(unsigned int i = 0; i < triangleCount; i++)
1478 				{
1479 					batch[i][0] = index[0];
1480 					batch[i][1] = index[((start + i) & 1) + 1];
1481 					batch[i][2] = index[(~(start + i) & 1) + 1];
1482 
1483 					index += 1;
1484 				}
1485 			}
1486 			break;
1487 		case DRAW_INDEXEDTRIANGLESTRIP32:
1488 			{
1489 				const unsigned int *index = (const unsigned int*)indices + start;
1490 
1491 				for(unsigned int i = 0; i < triangleCount; i++)
1492 				{
1493 					batch[i][0] = index[0];
1494 					batch[i][1] = index[((start + i) & 1) + 1];
1495 					batch[i][2] = index[(~(start + i) & 1) + 1];
1496 
1497 					index += 1;
1498 				}
1499 			}
1500 			break;
1501 		case DRAW_INDEXEDTRIANGLEFAN8:
1502 			{
1503 				const unsigned char *index = (const unsigned char*)indices;
1504 
1505 				for(unsigned int i = 0; i < triangleCount; i++)
1506 				{
1507 					batch[i][0] = index[start + i + 1];
1508 					batch[i][1] = index[start + i + 2];
1509 					batch[i][2] = index[0];
1510 				}
1511 			}
1512 			break;
1513 		case DRAW_INDEXEDTRIANGLEFAN16:
1514 			{
1515 				const unsigned short *index = (const unsigned short*)indices;
1516 
1517 				for(unsigned int i = 0; i < triangleCount; i++)
1518 				{
1519 					batch[i][0] = index[start + i + 1];
1520 					batch[i][1] = index[start + i + 2];
1521 					batch[i][2] = index[0];
1522 				}
1523 			}
1524 			break;
1525 		case DRAW_INDEXEDTRIANGLEFAN32:
1526 			{
1527 				const unsigned int *index = (const unsigned int*)indices;
1528 
1529 				for(unsigned int i = 0; i < triangleCount; i++)
1530 				{
1531 					batch[i][0] = index[start + i + 1];
1532 					batch[i][1] = index[start + i + 2];
1533 					batch[i][2] = index[0];
1534 				}
1535 			}
1536 			break;
1537 		case DRAW_QUADLIST:
1538 			{
1539 				unsigned int index = 4 * start / 2;
1540 
1541 				for(unsigned int i = 0; i < triangleCount; i += 2)
1542 				{
1543 					batch[i+0][0] = index + 0;
1544 					batch[i+0][1] = index + 1;
1545 					batch[i+0][2] = index + 2;
1546 
1547 					batch[i+1][0] = index + 0;
1548 					batch[i+1][1] = index + 2;
1549 					batch[i+1][2] = index + 3;
1550 
1551 					index += 4;
1552 				}
1553 			}
1554 			break;
1555 		default:
1556 			ASSERT(false);
1557 			return;
1558 		}
1559 
1560 		task->primitiveStart = start;
1561 		task->vertexCount = triangleCount * 3;
1562 		vertexRoutine(&triangle->v0, (unsigned int*)&batch, task, data);
1563 	}
1564 
setupSolidTriangles(int unit,int count)1565 	int Renderer::setupSolidTriangles(int unit, int count)
1566 	{
1567 		Triangle *triangle = triangleBatch[unit];
1568 		Primitive *primitive = primitiveBatch[unit];
1569 
1570 		DrawCall &draw = *drawList[primitiveProgress[unit].drawCall & DRAW_COUNT_BITS];
1571 		SetupProcessor::State &state = draw.setupState;
1572 		const SetupProcessor::RoutinePointer &setupRoutine = draw.setupPointer;
1573 
1574 		int ms = state.multiSample;
1575 		int pos = state.positionRegister;
1576 		const DrawData *data = draw.data;
1577 		int visible = 0;
1578 
1579 		for(int i = 0; i < count; i++, triangle++)
1580 		{
1581 			Vertex &v0 = triangle->v0;
1582 			Vertex &v1 = triangle->v1;
1583 			Vertex &v2 = triangle->v2;
1584 
1585 			if((v0.clipFlags & v1.clipFlags & v2.clipFlags) == Clipper::CLIP_FINITE)
1586 			{
1587 				Polygon polygon(&v0.v[pos], &v1.v[pos], &v2.v[pos]);
1588 
1589 				int clipFlagsOr = v0.clipFlags | v1.clipFlags | v2.clipFlags | draw.clipFlags;
1590 
1591 				if(clipFlagsOr != Clipper::CLIP_FINITE)
1592 				{
1593 					if(!clipper->clip(polygon, clipFlagsOr, draw))
1594 					{
1595 						continue;
1596 					}
1597 				}
1598 
1599 				if(setupRoutine(primitive, triangle, &polygon, data))
1600 				{
1601 					primitive += ms;
1602 					visible++;
1603 				}
1604 			}
1605 		}
1606 
1607 		return visible;
1608 	}
1609 
setupWireframeTriangle(int unit,int count)1610 	int Renderer::setupWireframeTriangle(int unit, int count)
1611 	{
1612 		Triangle *triangle = triangleBatch[unit];
1613 		Primitive *primitive = primitiveBatch[unit];
1614 		int visible = 0;
1615 
1616 		DrawCall &draw = *drawList[primitiveProgress[unit].drawCall & DRAW_COUNT_BITS];
1617 		SetupProcessor::State &state = draw.setupState;
1618 
1619 		const Vertex &v0 = triangle[0].v0;
1620 		const Vertex &v1 = triangle[0].v1;
1621 		const Vertex &v2 = triangle[0].v2;
1622 
1623 		float d = (v0.y * v1.x - v0.x * v1.y) * v2.w + (v0.x * v2.y - v0.y * v2.x) * v1.w + (v2.x * v1.y - v1.x * v2.y) * v0.w;
1624 
1625 		if(state.cullMode == CULL_CLOCKWISE)
1626 		{
1627 			if(d >= 0) return 0;
1628 		}
1629 		else if(state.cullMode == CULL_COUNTERCLOCKWISE)
1630 		{
1631 			if(d <= 0) return 0;
1632 		}
1633 
1634 		// Copy attributes
1635 		triangle[1].v0 = v1;
1636 		triangle[1].v1 = v2;
1637 		triangle[2].v0 = v2;
1638 		triangle[2].v1 = v0;
1639 
1640 		if(state.color[0][0].flat)   // FIXME
1641 		{
1642 			for(int i = 0; i < 2; i++)
1643 			{
1644 				triangle[1].v0.C[i] = triangle[0].v0.C[i];
1645 				triangle[1].v1.C[i] = triangle[0].v0.C[i];
1646 				triangle[2].v0.C[i] = triangle[0].v0.C[i];
1647 				triangle[2].v1.C[i] = triangle[0].v0.C[i];
1648 			}
1649 		}
1650 
1651 		for(int i = 0; i < 3; i++)
1652 		{
1653 			if(setupLine(*primitive, *triangle, draw))
1654 			{
1655 				primitive->area = 0.5f * d;
1656 
1657 				primitive++;
1658 				visible++;
1659 			}
1660 
1661 			triangle++;
1662 		}
1663 
1664 		return visible;
1665 	}
1666 
setupVertexTriangle(int unit,int count)1667 	int Renderer::setupVertexTriangle(int unit, int count)
1668 	{
1669 		Triangle *triangle = triangleBatch[unit];
1670 		Primitive *primitive = primitiveBatch[unit];
1671 		int visible = 0;
1672 
1673 		DrawCall &draw = *drawList[primitiveProgress[unit].drawCall & DRAW_COUNT_BITS];
1674 		SetupProcessor::State &state = draw.setupState;
1675 
1676 		const Vertex &v0 = triangle[0].v0;
1677 		const Vertex &v1 = triangle[0].v1;
1678 		const Vertex &v2 = triangle[0].v2;
1679 
1680 		float d = (v0.y * v1.x - v0.x * v1.y) * v2.w + (v0.x * v2.y - v0.y * v2.x) * v1.w + (v2.x * v1.y - v1.x * v2.y) * v0.w;
1681 
1682 		if(state.cullMode == CULL_CLOCKWISE)
1683 		{
1684 			if(d >= 0) return 0;
1685 		}
1686 		else if(state.cullMode == CULL_COUNTERCLOCKWISE)
1687 		{
1688 			if(d <= 0) return 0;
1689 		}
1690 
1691 		// Copy attributes
1692 		triangle[1].v0 = v1;
1693 		triangle[2].v0 = v2;
1694 
1695 		for(int i = 0; i < 3; i++)
1696 		{
1697 			if(setupPoint(*primitive, *triangle, draw))
1698 			{
1699 				primitive->area = 0.5f * d;
1700 
1701 				primitive++;
1702 				visible++;
1703 			}
1704 
1705 			triangle++;
1706 		}
1707 
1708 		return visible;
1709 	}
1710 
setupLines(int unit,int count)1711 	int Renderer::setupLines(int unit, int count)
1712 	{
1713 		Triangle *triangle = triangleBatch[unit];
1714 		Primitive *primitive = primitiveBatch[unit];
1715 		int visible = 0;
1716 
1717 		DrawCall &draw = *drawList[primitiveProgress[unit].drawCall & DRAW_COUNT_BITS];
1718 		SetupProcessor::State &state = draw.setupState;
1719 
1720 		int ms = state.multiSample;
1721 
1722 		for(int i = 0; i < count; i++)
1723 		{
1724 			if(setupLine(*primitive, *triangle, draw))
1725 			{
1726 				primitive += ms;
1727 				visible++;
1728 			}
1729 
1730 			triangle++;
1731 		}
1732 
1733 		return visible;
1734 	}
1735 
setupPoints(int unit,int count)1736 	int Renderer::setupPoints(int unit, int count)
1737 	{
1738 		Triangle *triangle = triangleBatch[unit];
1739 		Primitive *primitive = primitiveBatch[unit];
1740 		int visible = 0;
1741 
1742 		DrawCall &draw = *drawList[primitiveProgress[unit].drawCall & DRAW_COUNT_BITS];
1743 		SetupProcessor::State &state = draw.setupState;
1744 
1745 		int ms = state.multiSample;
1746 
1747 		for(int i = 0; i < count; i++)
1748 		{
1749 			if(setupPoint(*primitive, *triangle, draw))
1750 			{
1751 				primitive += ms;
1752 				visible++;
1753 			}
1754 
1755 			triangle++;
1756 		}
1757 
1758 		return visible;
1759 	}
1760 
setupLine(Primitive & primitive,Triangle & triangle,const DrawCall & draw)1761 	bool Renderer::setupLine(Primitive &primitive, Triangle &triangle, const DrawCall &draw)
1762 	{
1763 		const SetupProcessor::RoutinePointer &setupRoutine = draw.setupPointer;
1764 		const SetupProcessor::State &state = draw.setupState;
1765 		const DrawData &data = *draw.data;
1766 
1767 		float lineWidth = data.lineWidth;
1768 
1769 		Vertex &v0 = triangle.v0;
1770 		Vertex &v1 = triangle.v1;
1771 
1772 		int pos = state.positionRegister;
1773 
1774 		const float4 &P0 = v0.v[pos];
1775 		const float4 &P1 = v1.v[pos];
1776 
1777 		if(P0.w <= 0 && P1.w <= 0)
1778 		{
1779 			return false;
1780 		}
1781 
1782 		const float W = data.Wx16[0] * (1.0f / 16.0f);
1783 		const float H = data.Hx16[0] * (1.0f / 16.0f);
1784 
1785 		float dx = W * (P1.x / P1.w - P0.x / P0.w);
1786 		float dy = H * (P1.y / P1.w - P0.y / P0.w);
1787 
1788 		if(dx == 0 && dy == 0)
1789 		{
1790 			return false;
1791 		}
1792 
1793 		if(state.multiSample > 1)
1794 		{
1795 			// Rectangle centered on the line segment
1796 
1797 			float4 P[4];
1798 			int C[4];
1799 
1800 			P[0] = P0;
1801 			P[1] = P1;
1802 			P[2] = P1;
1803 			P[3] = P0;
1804 
1805 			float scale = lineWidth * 0.5f / sqrt(dx*dx + dy*dy);
1806 
1807 			dx *= scale;
1808 			dy *= scale;
1809 
1810 			float dx0h = dx * P0.w / H;
1811 			float dy0w = dy * P0.w / W;
1812 
1813 			float dx1h = dx * P1.w / H;
1814 			float dy1w = dy * P1.w / W;
1815 
1816 			P[0].x += -dy0w;
1817 			P[0].y += +dx0h;
1818 			C[0] = clipper->computeClipFlags(P[0]);
1819 
1820 			P[1].x += -dy1w;
1821 			P[1].y += +dx1h;
1822 			C[1] = clipper->computeClipFlags(P[1]);
1823 
1824 			P[2].x += +dy1w;
1825 			P[2].y += -dx1h;
1826 			C[2] = clipper->computeClipFlags(P[2]);
1827 
1828 			P[3].x += +dy0w;
1829 			P[3].y += -dx0h;
1830 			C[3] = clipper->computeClipFlags(P[3]);
1831 
1832 			if((C[0] & C[1] & C[2] & C[3]) == Clipper::CLIP_FINITE)
1833 			{
1834 				Polygon polygon(P, 4);
1835 
1836 				int clipFlagsOr = C[0] | C[1] | C[2] | C[3] | draw.clipFlags;
1837 
1838 				if(clipFlagsOr != Clipper::CLIP_FINITE)
1839 				{
1840 					if(!clipper->clip(polygon, clipFlagsOr, draw))
1841 					{
1842 						return false;
1843 					}
1844 				}
1845 
1846 				return setupRoutine(&primitive, &triangle, &polygon, &data);
1847 			}
1848 		}
1849 		else if(true)
1850 		{
1851 			// Connecting diamonds polygon
1852 			// This shape satisfies the diamond test convention, except for the exit rule part.
1853 			// Line segments with overlapping endpoints have duplicate fragments.
1854 			// The ideal algorithm requires half-open line rasterization (b/80135519).
1855 
1856 			float4 P[8];
1857 			int C[8];
1858 
1859 			P[0] = P0;
1860 			P[1] = P0;
1861 			P[2] = P0;
1862 			P[3] = P0;
1863 			P[4] = P1;
1864 			P[5] = P1;
1865 			P[6] = P1;
1866 			P[7] = P1;
1867 
1868 			float dx0 = lineWidth * 0.5f * P0.w / W;
1869 			float dy0 = lineWidth * 0.5f * P0.w / H;
1870 
1871 			float dx1 = lineWidth * 0.5f * P1.w / W;
1872 			float dy1 = lineWidth * 0.5f * P1.w / H;
1873 
1874 			P[0].x += -dx0;
1875 			C[0] = clipper->computeClipFlags(P[0]);
1876 
1877 			P[1].y += +dy0;
1878 			C[1] = clipper->computeClipFlags(P[1]);
1879 
1880 			P[2].x += +dx0;
1881 			C[2] = clipper->computeClipFlags(P[2]);
1882 
1883 			P[3].y += -dy0;
1884 			C[3] = clipper->computeClipFlags(P[3]);
1885 
1886 			P[4].x += -dx1;
1887 			C[4] = clipper->computeClipFlags(P[4]);
1888 
1889 			P[5].y += +dy1;
1890 			C[5] = clipper->computeClipFlags(P[5]);
1891 
1892 			P[6].x += +dx1;
1893 			C[6] = clipper->computeClipFlags(P[6]);
1894 
1895 			P[7].y += -dy1;
1896 			C[7] = clipper->computeClipFlags(P[7]);
1897 
1898 			if((C[0] & C[1] & C[2] & C[3] & C[4] & C[5] & C[6] & C[7]) == Clipper::CLIP_FINITE)
1899 			{
1900 				float4 L[6];
1901 
1902 				if(dx > -dy)
1903 				{
1904 					if(dx > dy)   // Right
1905 					{
1906 						L[0] = P[0];
1907 						L[1] = P[1];
1908 						L[2] = P[5];
1909 						L[3] = P[6];
1910 						L[4] = P[7];
1911 						L[5] = P[3];
1912 					}
1913 					else   // Down
1914 					{
1915 						L[0] = P[0];
1916 						L[1] = P[4];
1917 						L[2] = P[5];
1918 						L[3] = P[6];
1919 						L[4] = P[2];
1920 						L[5] = P[3];
1921 					}
1922 				}
1923 				else
1924 				{
1925 					if(dx > dy)   // Up
1926 					{
1927 						L[0] = P[0];
1928 						L[1] = P[1];
1929 						L[2] = P[2];
1930 						L[3] = P[6];
1931 						L[4] = P[7];
1932 						L[5] = P[4];
1933 					}
1934 					else   // Left
1935 					{
1936 						L[0] = P[1];
1937 						L[1] = P[2];
1938 						L[2] = P[3];
1939 						L[3] = P[7];
1940 						L[4] = P[4];
1941 						L[5] = P[5];
1942 					}
1943 				}
1944 
1945 				Polygon polygon(L, 6);
1946 
1947 				int clipFlagsOr = C[0] | C[1] | C[2] | C[3] | C[4] | C[5] | C[6] | C[7] | draw.clipFlags;
1948 
1949 				if(clipFlagsOr != Clipper::CLIP_FINITE)
1950 				{
1951 					if(!clipper->clip(polygon, clipFlagsOr, draw))
1952 					{
1953 						return false;
1954 					}
1955 				}
1956 
1957 				return setupRoutine(&primitive, &triangle, &polygon, &data);
1958 			}
1959 		}
1960 		else
1961 		{
1962 			// Parallelogram approximating Bresenham line
1963 			// This algorithm does not satisfy the ideal diamond-exit rule, but does avoid the
1964 			// duplicate fragment rasterization problem and satisfies all of Vulkan's minimum
1965 			// requirements for Bresenham line segment rasterization.
1966 
1967 			float4 P[8];
1968 			P[0] = P0;
1969 			P[1] = P0;
1970 			P[2] = P0;
1971 			P[3] = P0;
1972 			P[4] = P1;
1973 			P[5] = P1;
1974 			P[6] = P1;
1975 			P[7] = P1;
1976 
1977 			float dx0 = lineWidth * 0.5f * P0.w / W;
1978 			float dy0 = lineWidth * 0.5f * P0.w / H;
1979 
1980 			float dx1 = lineWidth * 0.5f * P1.w / W;
1981 			float dy1 = lineWidth * 0.5f * P1.w / H;
1982 
1983 			P[0].x += -dx0;
1984 			P[1].y += +dy0;
1985 			P[2].x += +dx0;
1986 			P[3].y += -dy0;
1987 			P[4].x += -dx1;
1988 			P[5].y += +dy1;
1989 			P[6].x += +dx1;
1990 			P[7].y += -dy1;
1991 
1992 			float4 L[4];
1993 
1994 			if(dx > -dy)
1995 			{
1996 				if(dx > dy)   // Right
1997 				{
1998 					L[0] = P[1];
1999 					L[1] = P[5];
2000 					L[2] = P[7];
2001 					L[3] = P[3];
2002 				}
2003 				else   // Down
2004 				{
2005 					L[0] = P[0];
2006 					L[1] = P[4];
2007 					L[2] = P[6];
2008 					L[3] = P[2];
2009 				}
2010 			}
2011 			else
2012 			{
2013 				if(dx > dy)   // Up
2014 				{
2015 					L[0] = P[0];
2016 					L[1] = P[2];
2017 					L[2] = P[6];
2018 					L[3] = P[4];
2019 				}
2020 				else   // Left
2021 				{
2022 					L[0] = P[1];
2023 					L[1] = P[3];
2024 					L[2] = P[7];
2025 					L[3] = P[5];
2026 				}
2027 			}
2028 
2029 			int C0 = clipper->computeClipFlags(L[0]);
2030 			int C1 = clipper->computeClipFlags(L[1]);
2031 			int C2 = clipper->computeClipFlags(L[2]);
2032 			int C3 = clipper->computeClipFlags(L[3]);
2033 
2034 			if((C0 & C1 & C2 & C3) == Clipper::CLIP_FINITE)
2035 			{
2036 				Polygon polygon(L, 4);
2037 
2038 				int clipFlagsOr = C0 | C1 | C2 | C3;
2039 
2040 				if(clipFlagsOr != Clipper::CLIP_FINITE)
2041 				{
2042 					if(!clipper->clip(polygon, clipFlagsOr, draw))
2043 					{
2044 						return false;
2045 					}
2046 				}
2047 
2048 				return setupRoutine(&primitive, &triangle, &polygon, &data);
2049 			}
2050 		}
2051 
2052 		return false;
2053 	}
2054 
setupPoint(Primitive & primitive,Triangle & triangle,const DrawCall & draw)2055 	bool Renderer::setupPoint(Primitive &primitive, Triangle &triangle, const DrawCall &draw)
2056 	{
2057 		const SetupProcessor::RoutinePointer &setupRoutine = draw.setupPointer;
2058 		const SetupProcessor::State &state = draw.setupState;
2059 		const DrawData &data = *draw.data;
2060 
2061 		Vertex &v = triangle.v0;
2062 
2063 		float pSize;
2064 
2065 		int pts = state.pointSizeRegister;
2066 
2067 		if(state.pointSizeRegister != Unused)
2068 		{
2069 			pSize = v.v[pts].y;
2070 		}
2071 		else
2072 		{
2073 			pSize = data.point.pointSize[0];
2074 		}
2075 
2076 		pSize = clamp(pSize, data.point.pointSizeMin, data.point.pointSizeMax);
2077 
2078 		float4 P[4];
2079 		int C[4];
2080 
2081 		int pos = state.positionRegister;
2082 
2083 		P[0] = v.v[pos];
2084 		P[1] = v.v[pos];
2085 		P[2] = v.v[pos];
2086 		P[3] = v.v[pos];
2087 
2088 		const float X = pSize * P[0].w * data.halfPixelX[0];
2089 		const float Y = pSize * P[0].w * data.halfPixelY[0];
2090 
2091 		P[0].x -= X;
2092 		P[0].y += Y;
2093 		C[0] = clipper->computeClipFlags(P[0]);
2094 
2095 		P[1].x += X;
2096 		P[1].y += Y;
2097 		C[1] = clipper->computeClipFlags(P[1]);
2098 
2099 		P[2].x += X;
2100 		P[2].y -= Y;
2101 		C[2] = clipper->computeClipFlags(P[2]);
2102 
2103 		P[3].x -= X;
2104 		P[3].y -= Y;
2105 		C[3] = clipper->computeClipFlags(P[3]);
2106 
2107 		Polygon polygon(P, 4);
2108 
2109 		if((C[0] & C[1] & C[2] & C[3]) == Clipper::CLIP_FINITE)
2110 		{
2111 			int clipFlagsOr = C[0] | C[1] | C[2] | C[3] | draw.clipFlags;
2112 
2113 			if(clipFlagsOr != Clipper::CLIP_FINITE)
2114 			{
2115 				if(!clipper->clip(polygon, clipFlagsOr, draw))
2116 				{
2117 					return false;
2118 				}
2119 			}
2120 
2121 			triangle.v1 = triangle.v0;
2122 			triangle.v2 = triangle.v0;
2123 
2124 			triangle.v1.X += iround(16 * 0.5f * pSize);
2125 			triangle.v2.Y -= iround(16 * 0.5f * pSize) * (data.Hx16[0] > 0.0f ? 1 : -1);   // Both Direct3D and OpenGL expect (0, 0) in the top-left corner
2126 			return setupRoutine(&primitive, &triangle, &polygon, &data);
2127 		}
2128 
2129 		return false;
2130 	}
2131 
initializeThreads()2132 	void Renderer::initializeThreads()
2133 	{
2134 		unitCount = ceilPow2(threadCount);
2135 		clusterCount = ceilPow2(threadCount);
2136 
2137 		for(int i = 0; i < unitCount; i++)
2138 		{
2139 			triangleBatch[i] = (Triangle*)allocate(batchSize * sizeof(Triangle));
2140 			primitiveBatch[i] = (Primitive*)allocate(batchSize * sizeof(Primitive));
2141 		}
2142 
2143 		for(int i = 0; i < threadCount; i++)
2144 		{
2145 			vertexTask[i] = (VertexTask*)allocate(sizeof(VertexTask));
2146 			vertexTask[i]->vertexCache.drawCall = -1;
2147 
2148 			task[i].type = Task::SUSPEND;
2149 
2150 			resume[i] = new Event();
2151 			suspend[i] = new Event();
2152 
2153 			Parameters parameters;
2154 			parameters.threadIndex = i;
2155 			parameters.renderer = this;
2156 
2157 			exitThreads = false;
2158 			worker[i] = new Thread(threadFunction, &parameters);
2159 
2160 			suspend[i]->wait();
2161 			suspend[i]->signal();
2162 		}
2163 	}
2164 
terminateThreads()2165 	void Renderer::terminateThreads()
2166 	{
2167 		while(threadsAwake != 0)
2168 		{
2169 			Thread::sleep(1);
2170 		}
2171 
2172 		for(int thread = 0; thread < threadCount; thread++)
2173 		{
2174 			if(worker[thread])
2175 			{
2176 				exitThreads = true;
2177 				resume[thread]->signal();
2178 				worker[thread]->join();
2179 
2180 				delete worker[thread];
2181 				worker[thread] = 0;
2182 				delete resume[thread];
2183 				resume[thread] = 0;
2184 				delete suspend[thread];
2185 				suspend[thread] = 0;
2186 			}
2187 
2188 			deallocate(vertexTask[thread]);
2189 			vertexTask[thread] = 0;
2190 		}
2191 
2192 		for(int i = 0; i < 16; i++)
2193 		{
2194 			deallocate(triangleBatch[i]);
2195 			triangleBatch[i] = 0;
2196 
2197 			deallocate(primitiveBatch[i]);
2198 			primitiveBatch[i] = 0;
2199 		}
2200 	}
2201 
loadConstants(const VertexShader * vertexShader)2202 	void Renderer::loadConstants(const VertexShader *vertexShader)
2203 	{
2204 		if(!vertexShader) return;
2205 
2206 		size_t count = vertexShader->getLength();
2207 
2208 		for(size_t i = 0; i < count; i++)
2209 		{
2210 			const Shader::Instruction *instruction = vertexShader->getInstruction(i);
2211 
2212 			if(instruction->opcode == Shader::OPCODE_DEF)
2213 			{
2214 				int index = instruction->dst.index;
2215 				float value[4];
2216 
2217 				value[0] = instruction->src[0].value[0];
2218 				value[1] = instruction->src[0].value[1];
2219 				value[2] = instruction->src[0].value[2];
2220 				value[3] = instruction->src[0].value[3];
2221 
2222 				setVertexShaderConstantF(index, value);
2223 			}
2224 			else if(instruction->opcode == Shader::OPCODE_DEFI)
2225 			{
2226 				int index = instruction->dst.index;
2227 				int integer[4];
2228 
2229 				integer[0] = instruction->src[0].integer[0];
2230 				integer[1] = instruction->src[0].integer[1];
2231 				integer[2] = instruction->src[0].integer[2];
2232 				integer[3] = instruction->src[0].integer[3];
2233 
2234 				setVertexShaderConstantI(index, integer);
2235 			}
2236 			else if(instruction->opcode == Shader::OPCODE_DEFB)
2237 			{
2238 				int index = instruction->dst.index;
2239 				int boolean = instruction->src[0].boolean[0];
2240 
2241 				setVertexShaderConstantB(index, &boolean);
2242 			}
2243 		}
2244 	}
2245 
loadConstants(const PixelShader * pixelShader)2246 	void Renderer::loadConstants(const PixelShader *pixelShader)
2247 	{
2248 		if(!pixelShader) return;
2249 
2250 		size_t count = pixelShader->getLength();
2251 
2252 		for(size_t i = 0; i < count; i++)
2253 		{
2254 			const Shader::Instruction *instruction = pixelShader->getInstruction(i);
2255 
2256 			if(instruction->opcode == Shader::OPCODE_DEF)
2257 			{
2258 				int index = instruction->dst.index;
2259 				float value[4];
2260 
2261 				value[0] = instruction->src[0].value[0];
2262 				value[1] = instruction->src[0].value[1];
2263 				value[2] = instruction->src[0].value[2];
2264 				value[3] = instruction->src[0].value[3];
2265 
2266 				setPixelShaderConstantF(index, value);
2267 			}
2268 			else if(instruction->opcode == Shader::OPCODE_DEFI)
2269 			{
2270 				int index = instruction->dst.index;
2271 				int integer[4];
2272 
2273 				integer[0] = instruction->src[0].integer[0];
2274 				integer[1] = instruction->src[0].integer[1];
2275 				integer[2] = instruction->src[0].integer[2];
2276 				integer[3] = instruction->src[0].integer[3];
2277 
2278 				setPixelShaderConstantI(index, integer);
2279 			}
2280 			else if(instruction->opcode == Shader::OPCODE_DEFB)
2281 			{
2282 				int index = instruction->dst.index;
2283 				int boolean = instruction->src[0].boolean[0];
2284 
2285 				setPixelShaderConstantB(index, &boolean);
2286 			}
2287 		}
2288 	}
2289 
setIndexBuffer(Resource * indexBuffer)2290 	void Renderer::setIndexBuffer(Resource *indexBuffer)
2291 	{
2292 		context->indexBuffer = indexBuffer;
2293 	}
2294 
setMultiSampleMask(unsigned int mask)2295 	void Renderer::setMultiSampleMask(unsigned int mask)
2296 	{
2297 		context->sampleMask = mask;
2298 	}
2299 
setTransparencyAntialiasing(TransparencyAntialiasing transparencyAntialiasing)2300 	void Renderer::setTransparencyAntialiasing(TransparencyAntialiasing transparencyAntialiasing)
2301 	{
2302 		sw::transparencyAntialiasing = transparencyAntialiasing;
2303 	}
2304 
isReadWriteTexture(int sampler)2305 	bool Renderer::isReadWriteTexture(int sampler)
2306 	{
2307 		for(int index = 0; index < RENDERTARGETS; index++)
2308 		{
2309 			if(context->renderTarget[index] && context->texture[sampler] == context->renderTarget[index]->getResource())
2310 			{
2311 				return true;
2312 			}
2313 		}
2314 
2315 		if(context->depthBuffer && context->texture[sampler] == context->depthBuffer->getResource())
2316 		{
2317 			return true;
2318 		}
2319 
2320 		return false;
2321 	}
2322 
updateClipper()2323 	void Renderer::updateClipper()
2324 	{
2325 		if(updateClipPlanes)
2326 		{
2327 			if(VertexProcessor::isFixedFunction())   // User plane in world space
2328 			{
2329 				const Matrix &scissorWorld = getViewTransform();
2330 
2331 				if(clipFlags & Clipper::CLIP_PLANE0) clipPlane[0] = scissorWorld * userPlane[0];
2332 				if(clipFlags & Clipper::CLIP_PLANE1) clipPlane[1] = scissorWorld * userPlane[1];
2333 				if(clipFlags & Clipper::CLIP_PLANE2) clipPlane[2] = scissorWorld * userPlane[2];
2334 				if(clipFlags & Clipper::CLIP_PLANE3) clipPlane[3] = scissorWorld * userPlane[3];
2335 				if(clipFlags & Clipper::CLIP_PLANE4) clipPlane[4] = scissorWorld * userPlane[4];
2336 				if(clipFlags & Clipper::CLIP_PLANE5) clipPlane[5] = scissorWorld * userPlane[5];
2337 			}
2338 			else   // User plane in clip space
2339 			{
2340 				if(clipFlags & Clipper::CLIP_PLANE0) clipPlane[0] = userPlane[0];
2341 				if(clipFlags & Clipper::CLIP_PLANE1) clipPlane[1] = userPlane[1];
2342 				if(clipFlags & Clipper::CLIP_PLANE2) clipPlane[2] = userPlane[2];
2343 				if(clipFlags & Clipper::CLIP_PLANE3) clipPlane[3] = userPlane[3];
2344 				if(clipFlags & Clipper::CLIP_PLANE4) clipPlane[4] = userPlane[4];
2345 				if(clipFlags & Clipper::CLIP_PLANE5) clipPlane[5] = userPlane[5];
2346 			}
2347 
2348 			updateClipPlanes = false;
2349 		}
2350 	}
2351 
setTextureResource(unsigned int sampler,Resource * resource)2352 	void Renderer::setTextureResource(unsigned int sampler, Resource *resource)
2353 	{
2354 		ASSERT(sampler < TOTAL_IMAGE_UNITS);
2355 
2356 		context->texture[sampler] = resource;
2357 	}
2358 
setTextureLevel(unsigned int sampler,unsigned int face,unsigned int level,Surface * surface,TextureType type)2359 	void Renderer::setTextureLevel(unsigned int sampler, unsigned int face, unsigned int level, Surface *surface, TextureType type)
2360 	{
2361 		ASSERT(sampler < TOTAL_IMAGE_UNITS && face < 6 && level < MIPMAP_LEVELS);
2362 
2363 		context->sampler[sampler].setTextureLevel(face, level, surface, type);
2364 	}
2365 
setTextureFilter(SamplerType type,int sampler,FilterType textureFilter)2366 	void Renderer::setTextureFilter(SamplerType type, int sampler, FilterType textureFilter)
2367 	{
2368 		if(type == SAMPLER_PIXEL)
2369 		{
2370 			PixelProcessor::setTextureFilter(sampler, textureFilter);
2371 		}
2372 		else
2373 		{
2374 			VertexProcessor::setTextureFilter(sampler, textureFilter);
2375 		}
2376 	}
2377 
setMipmapFilter(SamplerType type,int sampler,MipmapType mipmapFilter)2378 	void Renderer::setMipmapFilter(SamplerType type, int sampler, MipmapType mipmapFilter)
2379 	{
2380 		if(type == SAMPLER_PIXEL)
2381 		{
2382 			PixelProcessor::setMipmapFilter(sampler, mipmapFilter);
2383 		}
2384 		else
2385 		{
2386 			VertexProcessor::setMipmapFilter(sampler, mipmapFilter);
2387 		}
2388 	}
2389 
setGatherEnable(SamplerType type,int sampler,bool enable)2390 	void Renderer::setGatherEnable(SamplerType type, int sampler, bool enable)
2391 	{
2392 		if(type == SAMPLER_PIXEL)
2393 		{
2394 			PixelProcessor::setGatherEnable(sampler, enable);
2395 		}
2396 		else
2397 		{
2398 			VertexProcessor::setGatherEnable(sampler, enable);
2399 		}
2400 	}
2401 
setAddressingModeU(SamplerType type,int sampler,AddressingMode addressMode)2402 	void Renderer::setAddressingModeU(SamplerType type, int sampler, AddressingMode addressMode)
2403 	{
2404 		if(type == SAMPLER_PIXEL)
2405 		{
2406 			PixelProcessor::setAddressingModeU(sampler, addressMode);
2407 		}
2408 		else
2409 		{
2410 			VertexProcessor::setAddressingModeU(sampler, addressMode);
2411 		}
2412 	}
2413 
setAddressingModeV(SamplerType type,int sampler,AddressingMode addressMode)2414 	void Renderer::setAddressingModeV(SamplerType type, int sampler, AddressingMode addressMode)
2415 	{
2416 		if(type == SAMPLER_PIXEL)
2417 		{
2418 			PixelProcessor::setAddressingModeV(sampler, addressMode);
2419 		}
2420 		else
2421 		{
2422 			VertexProcessor::setAddressingModeV(sampler, addressMode);
2423 		}
2424 	}
2425 
setAddressingModeW(SamplerType type,int sampler,AddressingMode addressMode)2426 	void Renderer::setAddressingModeW(SamplerType type, int sampler, AddressingMode addressMode)
2427 	{
2428 		if(type == SAMPLER_PIXEL)
2429 		{
2430 			PixelProcessor::setAddressingModeW(sampler, addressMode);
2431 		}
2432 		else
2433 		{
2434 			VertexProcessor::setAddressingModeW(sampler, addressMode);
2435 		}
2436 	}
2437 
setReadSRGB(SamplerType type,int sampler,bool sRGB)2438 	void Renderer::setReadSRGB(SamplerType type, int sampler, bool sRGB)
2439 	{
2440 		if(type == SAMPLER_PIXEL)
2441 		{
2442 			PixelProcessor::setReadSRGB(sampler, sRGB);
2443 		}
2444 		else
2445 		{
2446 			VertexProcessor::setReadSRGB(sampler, sRGB);
2447 		}
2448 	}
2449 
setMipmapLOD(SamplerType type,int sampler,float bias)2450 	void Renderer::setMipmapLOD(SamplerType type, int sampler, float bias)
2451 	{
2452 		if(type == SAMPLER_PIXEL)
2453 		{
2454 			PixelProcessor::setMipmapLOD(sampler, bias);
2455 		}
2456 		else
2457 		{
2458 			VertexProcessor::setMipmapLOD(sampler, bias);
2459 		}
2460 	}
2461 
setBorderColor(SamplerType type,int sampler,const Color<float> & borderColor)2462 	void Renderer::setBorderColor(SamplerType type, int sampler, const Color<float> &borderColor)
2463 	{
2464 		if(type == SAMPLER_PIXEL)
2465 		{
2466 			PixelProcessor::setBorderColor(sampler, borderColor);
2467 		}
2468 		else
2469 		{
2470 			VertexProcessor::setBorderColor(sampler, borderColor);
2471 		}
2472 	}
2473 
setMaxAnisotropy(SamplerType type,int sampler,float maxAnisotropy)2474 	void Renderer::setMaxAnisotropy(SamplerType type, int sampler, float maxAnisotropy)
2475 	{
2476 		if(type == SAMPLER_PIXEL)
2477 		{
2478 			PixelProcessor::setMaxAnisotropy(sampler, maxAnisotropy);
2479 		}
2480 		else
2481 		{
2482 			VertexProcessor::setMaxAnisotropy(sampler, maxAnisotropy);
2483 		}
2484 	}
2485 
setHighPrecisionFiltering(SamplerType type,int sampler,bool highPrecisionFiltering)2486 	void Renderer::setHighPrecisionFiltering(SamplerType type, int sampler, bool highPrecisionFiltering)
2487 	{
2488 		if(type == SAMPLER_PIXEL)
2489 		{
2490 			PixelProcessor::setHighPrecisionFiltering(sampler, highPrecisionFiltering);
2491 		}
2492 		else
2493 		{
2494 			VertexProcessor::setHighPrecisionFiltering(sampler, highPrecisionFiltering);
2495 		}
2496 	}
2497 
setSwizzleR(SamplerType type,int sampler,SwizzleType swizzleR)2498 	void Renderer::setSwizzleR(SamplerType type, int sampler, SwizzleType swizzleR)
2499 	{
2500 		if(type == SAMPLER_PIXEL)
2501 		{
2502 			PixelProcessor::setSwizzleR(sampler, swizzleR);
2503 		}
2504 		else
2505 		{
2506 			VertexProcessor::setSwizzleR(sampler, swizzleR);
2507 		}
2508 	}
2509 
setSwizzleG(SamplerType type,int sampler,SwizzleType swizzleG)2510 	void Renderer::setSwizzleG(SamplerType type, int sampler, SwizzleType swizzleG)
2511 	{
2512 		if(type == SAMPLER_PIXEL)
2513 		{
2514 			PixelProcessor::setSwizzleG(sampler, swizzleG);
2515 		}
2516 		else
2517 		{
2518 			VertexProcessor::setSwizzleG(sampler, swizzleG);
2519 		}
2520 	}
2521 
setSwizzleB(SamplerType type,int sampler,SwizzleType swizzleB)2522 	void Renderer::setSwizzleB(SamplerType type, int sampler, SwizzleType swizzleB)
2523 	{
2524 		if(type == SAMPLER_PIXEL)
2525 		{
2526 			PixelProcessor::setSwizzleB(sampler, swizzleB);
2527 		}
2528 		else
2529 		{
2530 			VertexProcessor::setSwizzleB(sampler, swizzleB);
2531 		}
2532 	}
2533 
setSwizzleA(SamplerType type,int sampler,SwizzleType swizzleA)2534 	void Renderer::setSwizzleA(SamplerType type, int sampler, SwizzleType swizzleA)
2535 	{
2536 		if(type == SAMPLER_PIXEL)
2537 		{
2538 			PixelProcessor::setSwizzleA(sampler, swizzleA);
2539 		}
2540 		else
2541 		{
2542 			VertexProcessor::setSwizzleA(sampler, swizzleA);
2543 		}
2544 	}
2545 
setCompareFunc(SamplerType type,int sampler,CompareFunc compFunc)2546 	void Renderer::setCompareFunc(SamplerType type, int sampler, CompareFunc compFunc)
2547 	{
2548 		if(type == SAMPLER_PIXEL)
2549 		{
2550 			PixelProcessor::setCompareFunc(sampler, compFunc);
2551 		}
2552 		else
2553 		{
2554 			VertexProcessor::setCompareFunc(sampler, compFunc);
2555 		}
2556 	}
2557 
setBaseLevel(SamplerType type,int sampler,int baseLevel)2558 	void Renderer::setBaseLevel(SamplerType type, int sampler, int baseLevel)
2559 	{
2560 		if(type == SAMPLER_PIXEL)
2561 		{
2562 			PixelProcessor::setBaseLevel(sampler, baseLevel);
2563 		}
2564 		else
2565 		{
2566 			VertexProcessor::setBaseLevel(sampler, baseLevel);
2567 		}
2568 	}
2569 
setMaxLevel(SamplerType type,int sampler,int maxLevel)2570 	void Renderer::setMaxLevel(SamplerType type, int sampler, int maxLevel)
2571 	{
2572 		if(type == SAMPLER_PIXEL)
2573 		{
2574 			PixelProcessor::setMaxLevel(sampler, maxLevel);
2575 		}
2576 		else
2577 		{
2578 			VertexProcessor::setMaxLevel(sampler, maxLevel);
2579 		}
2580 	}
2581 
setMinLod(SamplerType type,int sampler,float minLod)2582 	void Renderer::setMinLod(SamplerType type, int sampler, float minLod)
2583 	{
2584 		if(type == SAMPLER_PIXEL)
2585 		{
2586 			PixelProcessor::setMinLod(sampler, minLod);
2587 		}
2588 		else
2589 		{
2590 			VertexProcessor::setMinLod(sampler, minLod);
2591 		}
2592 	}
2593 
setMaxLod(SamplerType type,int sampler,float maxLod)2594 	void Renderer::setMaxLod(SamplerType type, int sampler, float maxLod)
2595 	{
2596 		if(type == SAMPLER_PIXEL)
2597 		{
2598 			PixelProcessor::setMaxLod(sampler, maxLod);
2599 		}
2600 		else
2601 		{
2602 			VertexProcessor::setMaxLod(sampler, maxLod);
2603 		}
2604 	}
2605 
setSyncRequired(SamplerType type,int sampler,bool syncRequired)2606 	void Renderer::setSyncRequired(SamplerType type, int sampler, bool syncRequired)
2607 	{
2608 		if(type == SAMPLER_PIXEL)
2609 		{
2610 			PixelProcessor::setSyncRequired(sampler, syncRequired);
2611 		}
2612 		else
2613 		{
2614 			VertexProcessor::setSyncRequired(sampler, syncRequired);
2615 		}
2616 	}
2617 
setPointSpriteEnable(bool pointSpriteEnable)2618 	void Renderer::setPointSpriteEnable(bool pointSpriteEnable)
2619 	{
2620 		context->setPointSpriteEnable(pointSpriteEnable);
2621 	}
2622 
setPointScaleEnable(bool pointScaleEnable)2623 	void Renderer::setPointScaleEnable(bool pointScaleEnable)
2624 	{
2625 		context->setPointScaleEnable(pointScaleEnable);
2626 	}
2627 
setLineWidth(float width)2628 	void Renderer::setLineWidth(float width)
2629 	{
2630 		context->lineWidth = width;
2631 	}
2632 
setDepthBias(float bias)2633 	void Renderer::setDepthBias(float bias)
2634 	{
2635 		context->depthBias = bias;
2636 	}
2637 
setSlopeDepthBias(float slopeBias)2638 	void Renderer::setSlopeDepthBias(float slopeBias)
2639 	{
2640 		context->slopeDepthBias = slopeBias;
2641 	}
2642 
setRasterizerDiscard(bool rasterizerDiscard)2643 	void Renderer::setRasterizerDiscard(bool rasterizerDiscard)
2644 	{
2645 		context->rasterizerDiscard = rasterizerDiscard;
2646 	}
2647 
setPixelShader(const PixelShader * shader)2648 	void Renderer::setPixelShader(const PixelShader *shader)
2649 	{
2650 		context->pixelShader = shader;
2651 
2652 		loadConstants(shader);
2653 	}
2654 
setVertexShader(const VertexShader * shader)2655 	void Renderer::setVertexShader(const VertexShader *shader)
2656 	{
2657 		context->vertexShader = shader;
2658 
2659 		loadConstants(shader);
2660 	}
2661 
setPixelShaderConstantF(unsigned int index,const float value[4],unsigned int count)2662 	void Renderer::setPixelShaderConstantF(unsigned int index, const float value[4], unsigned int count)
2663 	{
2664 		for(unsigned int i = 0; i < DRAW_COUNT; i++)
2665 		{
2666 			if(drawCall[i]->psDirtyConstF < index + count)
2667 			{
2668 				drawCall[i]->psDirtyConstF = index + count;
2669 			}
2670 		}
2671 
2672 		for(unsigned int i = 0; i < count; i++)
2673 		{
2674 			PixelProcessor::setFloatConstant(index + i, value);
2675 			value += 4;
2676 		}
2677 	}
2678 
setPixelShaderConstantI(unsigned int index,const int value[4],unsigned int count)2679 	void Renderer::setPixelShaderConstantI(unsigned int index, const int value[4], unsigned int count)
2680 	{
2681 		for(unsigned int i = 0; i < DRAW_COUNT; i++)
2682 		{
2683 			if(drawCall[i]->psDirtyConstI < index + count)
2684 			{
2685 				drawCall[i]->psDirtyConstI = index + count;
2686 			}
2687 		}
2688 
2689 		for(unsigned int i = 0; i < count; i++)
2690 		{
2691 			PixelProcessor::setIntegerConstant(index + i, value);
2692 			value += 4;
2693 		}
2694 	}
2695 
setPixelShaderConstantB(unsigned int index,const int * boolean,unsigned int count)2696 	void Renderer::setPixelShaderConstantB(unsigned int index, const int *boolean, unsigned int count)
2697 	{
2698 		for(unsigned int i = 0; i < DRAW_COUNT; i++)
2699 		{
2700 			if(drawCall[i]->psDirtyConstB < index + count)
2701 			{
2702 				drawCall[i]->psDirtyConstB = index + count;
2703 			}
2704 		}
2705 
2706 		for(unsigned int i = 0; i < count; i++)
2707 		{
2708 			PixelProcessor::setBooleanConstant(index + i, *boolean);
2709 			boolean++;
2710 		}
2711 	}
2712 
setVertexShaderConstantF(unsigned int index,const float value[4],unsigned int count)2713 	void Renderer::setVertexShaderConstantF(unsigned int index, const float value[4], unsigned int count)
2714 	{
2715 		for(unsigned int i = 0; i < DRAW_COUNT; i++)
2716 		{
2717 			if(drawCall[i]->vsDirtyConstF < index + count)
2718 			{
2719 				drawCall[i]->vsDirtyConstF = index + count;
2720 			}
2721 		}
2722 
2723 		for(unsigned int i = 0; i < count; i++)
2724 		{
2725 			VertexProcessor::setFloatConstant(index + i, value);
2726 			value += 4;
2727 		}
2728 	}
2729 
setVertexShaderConstantI(unsigned int index,const int value[4],unsigned int count)2730 	void Renderer::setVertexShaderConstantI(unsigned int index, const int value[4], unsigned int count)
2731 	{
2732 		for(unsigned int i = 0; i < DRAW_COUNT; i++)
2733 		{
2734 			if(drawCall[i]->vsDirtyConstI < index + count)
2735 			{
2736 				drawCall[i]->vsDirtyConstI = index + count;
2737 			}
2738 		}
2739 
2740 		for(unsigned int i = 0; i < count; i++)
2741 		{
2742 			VertexProcessor::setIntegerConstant(index + i, value);
2743 			value += 4;
2744 		}
2745 	}
2746 
setVertexShaderConstantB(unsigned int index,const int * boolean,unsigned int count)2747 	void Renderer::setVertexShaderConstantB(unsigned int index, const int *boolean, unsigned int count)
2748 	{
2749 		for(unsigned int i = 0; i < DRAW_COUNT; i++)
2750 		{
2751 			if(drawCall[i]->vsDirtyConstB < index + count)
2752 			{
2753 				drawCall[i]->vsDirtyConstB = index + count;
2754 			}
2755 		}
2756 
2757 		for(unsigned int i = 0; i < count; i++)
2758 		{
2759 			VertexProcessor::setBooleanConstant(index + i, *boolean);
2760 			boolean++;
2761 		}
2762 	}
2763 
setModelMatrix(const Matrix & M,int i)2764 	void Renderer::setModelMatrix(const Matrix &M, int i)
2765 	{
2766 		VertexProcessor::setModelMatrix(M, i);
2767 	}
2768 
setViewMatrix(const Matrix & V)2769 	void Renderer::setViewMatrix(const Matrix &V)
2770 	{
2771 		VertexProcessor::setViewMatrix(V);
2772 		updateClipPlanes = true;
2773 	}
2774 
setBaseMatrix(const Matrix & B)2775 	void Renderer::setBaseMatrix(const Matrix &B)
2776 	{
2777 		VertexProcessor::setBaseMatrix(B);
2778 		updateClipPlanes = true;
2779 	}
2780 
setProjectionMatrix(const Matrix & P)2781 	void Renderer::setProjectionMatrix(const Matrix &P)
2782 	{
2783 		VertexProcessor::setProjectionMatrix(P);
2784 		updateClipPlanes = true;
2785 	}
2786 
addQuery(Query * query)2787 	void Renderer::addQuery(Query *query)
2788 	{
2789 		queries.push_back(query);
2790 	}
2791 
removeQuery(Query * query)2792 	void Renderer::removeQuery(Query *query)
2793 	{
2794 		queries.remove(query);
2795 	}
2796 
2797 	#if PERF_HUD
getThreadCount()2798 		int Renderer::getThreadCount()
2799 		{
2800 			return threadCount;
2801 		}
2802 
getVertexTime(int thread)2803 		int64_t Renderer::getVertexTime(int thread)
2804 		{
2805 			return vertexTime[thread];
2806 		}
2807 
getSetupTime(int thread)2808 		int64_t Renderer::getSetupTime(int thread)
2809 		{
2810 			return setupTime[thread];
2811 		}
2812 
getPixelTime(int thread)2813 		int64_t Renderer::getPixelTime(int thread)
2814 		{
2815 			return pixelTime[thread];
2816 		}
2817 
resetTimers()2818 		void Renderer::resetTimers()
2819 		{
2820 			for(int thread = 0; thread < threadCount; thread++)
2821 			{
2822 				vertexTime[thread] = 0;
2823 				setupTime[thread] = 0;
2824 				pixelTime[thread] = 0;
2825 			}
2826 		}
2827 	#endif
2828 
setViewport(const Viewport & viewport)2829 	void Renderer::setViewport(const Viewport &viewport)
2830 	{
2831 		this->viewport = viewport;
2832 	}
2833 
setScissor(const Rect & scissor)2834 	void Renderer::setScissor(const Rect &scissor)
2835 	{
2836 		this->scissor = scissor;
2837 	}
2838 
setClipFlags(int flags)2839 	void Renderer::setClipFlags(int flags)
2840 	{
2841 		clipFlags = flags << 8;   // Bottom 8 bits used by legacy frustum
2842 	}
2843 
setClipPlane(unsigned int index,const float plane[4])2844 	void Renderer::setClipPlane(unsigned int index, const float plane[4])
2845 	{
2846 		if(index < MAX_CLIP_PLANES)
2847 		{
2848 			userPlane[index] = plane;
2849 		}
2850 		else ASSERT(false);
2851 
2852 		updateClipPlanes = true;
2853 	}
2854 
updateConfiguration(bool initialUpdate)2855 	void Renderer::updateConfiguration(bool initialUpdate)
2856 	{
2857 		bool newConfiguration = swiftConfig->hasNewConfiguration();
2858 
2859 		if(newConfiguration || initialUpdate)
2860 		{
2861 			terminateThreads();
2862 
2863 			SwiftConfig::Configuration configuration = {};
2864 			swiftConfig->getConfiguration(configuration);
2865 
2866 			precacheVertex = !newConfiguration && configuration.precache;
2867 			precacheSetup = !newConfiguration && configuration.precache;
2868 			precachePixel = !newConfiguration && configuration.precache;
2869 
2870 			VertexProcessor::setRoutineCacheSize(configuration.vertexRoutineCacheSize);
2871 			PixelProcessor::setRoutineCacheSize(configuration.pixelRoutineCacheSize);
2872 			SetupProcessor::setRoutineCacheSize(configuration.setupRoutineCacheSize);
2873 
2874 			switch(configuration.textureSampleQuality)
2875 			{
2876 			case 0:  Sampler::setFilterQuality(FILTER_POINT);       break;
2877 			case 1:  Sampler::setFilterQuality(FILTER_LINEAR);      break;
2878 			case 2:  Sampler::setFilterQuality(FILTER_ANISOTROPIC); break;
2879 			default: Sampler::setFilterQuality(FILTER_ANISOTROPIC); break;
2880 			}
2881 
2882 			switch(configuration.mipmapQuality)
2883 			{
2884 			case 0:  Sampler::setMipmapQuality(MIPMAP_POINT);  break;
2885 			case 1:  Sampler::setMipmapQuality(MIPMAP_LINEAR); break;
2886 			default: Sampler::setMipmapQuality(MIPMAP_LINEAR); break;
2887 			}
2888 
2889 			setPerspectiveCorrection(configuration.perspectiveCorrection);
2890 
2891 			switch(configuration.transcendentalPrecision)
2892 			{
2893 			case 0:
2894 				logPrecision = APPROXIMATE;
2895 				expPrecision = APPROXIMATE;
2896 				rcpPrecision = APPROXIMATE;
2897 				rsqPrecision = APPROXIMATE;
2898 				break;
2899 			case 1:
2900 				logPrecision = PARTIAL;
2901 				expPrecision = PARTIAL;
2902 				rcpPrecision = PARTIAL;
2903 				rsqPrecision = PARTIAL;
2904 				break;
2905 			case 2:
2906 				logPrecision = ACCURATE;
2907 				expPrecision = ACCURATE;
2908 				rcpPrecision = ACCURATE;
2909 				rsqPrecision = ACCURATE;
2910 				break;
2911 			case 3:
2912 				logPrecision = WHQL;
2913 				expPrecision = WHQL;
2914 				rcpPrecision = WHQL;
2915 				rsqPrecision = WHQL;
2916 				break;
2917 			case 4:
2918 				logPrecision = IEEE;
2919 				expPrecision = IEEE;
2920 				rcpPrecision = IEEE;
2921 				rsqPrecision = IEEE;
2922 				break;
2923 			default:
2924 				logPrecision = ACCURATE;
2925 				expPrecision = ACCURATE;
2926 				rcpPrecision = ACCURATE;
2927 				rsqPrecision = ACCURATE;
2928 				break;
2929 			}
2930 
2931 			switch(configuration.transparencyAntialiasing)
2932 			{
2933 			case 0:  transparencyAntialiasing = TRANSPARENCY_NONE;              break;
2934 			case 1:  transparencyAntialiasing = TRANSPARENCY_ALPHA_TO_COVERAGE; break;
2935 			default: transparencyAntialiasing = TRANSPARENCY_NONE;              break;
2936 			}
2937 
2938 			switch(configuration.threadCount)
2939 			{
2940 			case -1: threadCount = CPUID::coreCount();        break;
2941 			case 0:  threadCount = CPUID::processAffinity();  break;
2942 			default: threadCount = configuration.threadCount; break;
2943 			}
2944 
2945 			CPUID::setEnableSSE4_1(configuration.enableSSE4_1);
2946 			CPUID::setEnableSSSE3(configuration.enableSSSE3);
2947 			CPUID::setEnableSSE3(configuration.enableSSE3);
2948 			CPUID::setEnableSSE2(configuration.enableSSE2);
2949 			CPUID::setEnableSSE(configuration.enableSSE);
2950 
2951 			rr::Config::Edit cfg;
2952 			cfg.clearOptimizationPasses();
2953 			for(auto pass : configuration.optimization)
2954 			{
2955 				if (pass != rr::Optimization::Pass::Disabled) { cfg.add(pass); }
2956 			}
2957 			rr::Nucleus::adjustDefaultConfig(cfg);
2958 
2959 			forceWindowed = configuration.forceWindowed;
2960 			complementaryDepthBuffer = configuration.complementaryDepthBuffer;
2961 			postBlendSRGB = configuration.postBlendSRGB;
2962 			exactColorRounding = configuration.exactColorRounding;
2963 			forceClearRegisters = configuration.forceClearRegisters;
2964 
2965 		#ifndef NDEBUG
2966 			minPrimitives = configuration.minPrimitives;
2967 			maxPrimitives = configuration.maxPrimitives;
2968 		#endif
2969 		}
2970 
2971 		if(!initialUpdate && !worker[0])
2972 		{
2973 			initializeThreads();
2974 		}
2975 	}
2976 }
2977