1 // Copyright 2016 The SwiftShader Authors. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //    http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "Renderer.hpp"
16 
17 #include "Clipper.hpp"
18 #include "Math.hpp"
19 #include "FrameBuffer.hpp"
20 #include "Timer.hpp"
21 #include "Surface.hpp"
22 #include "Half.hpp"
23 #include "Primitive.hpp"
24 #include "Polygon.hpp"
25 #include "SwiftConfig.hpp"
26 #include "MutexLock.hpp"
27 #include "CPUID.hpp"
28 #include "Memory.hpp"
29 #include "Resource.hpp"
30 #include "Constants.hpp"
31 #include "Debug.hpp"
32 #include "Reactor/Reactor.hpp"
33 
34 #undef max
35 
36 bool disableServer = true;
37 
38 #ifndef NDEBUG
39 unsigned int minPrimitives = 1;
40 unsigned int maxPrimitives = 1 << 21;
41 #endif
42 
43 namespace sw
44 {
45 	extern bool halfIntegerCoordinates;     // Pixel centers are not at integer coordinates
46 	extern bool symmetricNormalizedDepth;   // [-1, 1] instead of [0, 1]
47 	extern bool booleanFaceRegister;
48 	extern bool fullPixelPositionRegister;
49 	extern bool leadingVertexFirst;         // Flat shading uses first vertex, else last
50 	extern bool secondaryColor;             // Specular lighting is applied after texturing
51 
52 	extern bool forceWindowed;
53 	extern bool complementaryDepthBuffer;
54 	extern bool postBlendSRGB;
55 	extern bool exactColorRounding;
56 	extern TransparencyAntialiasing transparencyAntialiasing;
57 	extern bool forceClearRegisters;
58 
59 	extern bool precacheVertex;
60 	extern bool precacheSetup;
61 	extern bool precachePixel;
62 
63 	int batchSize = 128;
64 	int threadCount = 1;
65 	int unitCount = 1;
66 	int clusterCount = 1;
67 
68 	TranscendentalPrecision logPrecision = ACCURATE;
69 	TranscendentalPrecision expPrecision = ACCURATE;
70 	TranscendentalPrecision rcpPrecision = ACCURATE;
71 	TranscendentalPrecision rsqPrecision = ACCURATE;
72 	bool perspectiveCorrection = true;
73 
74 	struct Parameters
75 	{
76 		Renderer *renderer;
77 		int threadIndex;
78 	};
79 
DrawCall()80 	DrawCall::DrawCall()
81 	{
82 		queries = 0;
83 
84 		vsDirtyConstF = VERTEX_UNIFORM_VECTORS + 1;
85 		vsDirtyConstI = 16;
86 		vsDirtyConstB = 16;
87 
88 		psDirtyConstF = FRAGMENT_UNIFORM_VECTORS;
89 		psDirtyConstI = 16;
90 		psDirtyConstB = 16;
91 
92 		references = -1;
93 
94 		data = (DrawData*)allocate(sizeof(DrawData));
95 		data->constants = &constants;
96 	}
97 
~DrawCall()98 	DrawCall::~DrawCall()
99 	{
100 		delete queries;
101 
102 		deallocate(data);
103 	}
104 
Renderer(Context * context,Conventions conventions,bool exactColorRounding)105 	Renderer::Renderer(Context *context, Conventions conventions, bool exactColorRounding) : VertexProcessor(context), PixelProcessor(context), SetupProcessor(context), context(context), viewport()
106 	{
107 		sw::halfIntegerCoordinates = conventions.halfIntegerCoordinates;
108 		sw::symmetricNormalizedDepth = conventions.symmetricNormalizedDepth;
109 		sw::booleanFaceRegister = conventions.booleanFaceRegister;
110 		sw::fullPixelPositionRegister = conventions.fullPixelPositionRegister;
111 		sw::leadingVertexFirst = conventions.leadingVertexFirst;
112 		sw::secondaryColor = conventions.secondaryColor;
113 		sw::exactColorRounding = exactColorRounding;
114 
115 		setRenderTarget(0, 0);
116 		clipper = new Clipper(symmetricNormalizedDepth);
117 
118 		updateViewMatrix = true;
119 		updateBaseMatrix = true;
120 		updateProjectionMatrix = true;
121 		updateClipPlanes = true;
122 
123 		#if PERF_HUD
124 			resetTimers();
125 		#endif
126 
127 		for(int i = 0; i < 16; i++)
128 		{
129 			vertexTask[i] = 0;
130 
131 			worker[i] = 0;
132 			resume[i] = 0;
133 			suspend[i] = 0;
134 		}
135 
136 		threadsAwake = 0;
137 		resumeApp = new Event();
138 
139 		currentDraw = 0;
140 		nextDraw = 0;
141 
142 		qHead = 0;
143 		qSize = 0;
144 
145 		for(int i = 0; i < 16; i++)
146 		{
147 			triangleBatch[i] = 0;
148 			primitiveBatch[i] = 0;
149 		}
150 
151 		for(int draw = 0; draw < DRAW_COUNT; draw++)
152 		{
153 			drawCall[draw] = new DrawCall();
154 			drawList[draw] = drawCall[draw];
155 		}
156 
157 		for(int unit = 0; unit < 16; unit++)
158 		{
159 			primitiveProgress[unit].init();
160 		}
161 
162 		for(int cluster = 0; cluster < 16; cluster++)
163 		{
164 			pixelProgress[cluster].init();
165 		}
166 
167 		clipFlags = 0;
168 
169 		swiftConfig = new SwiftConfig(disableServer);
170 		updateConfiguration(true);
171 
172 		sync = new Resource(0);
173 	}
174 
~Renderer()175 	Renderer::~Renderer()
176 	{
177 		sync->destruct();
178 
179 		delete clipper;
180 		clipper = 0;
181 
182 		terminateThreads();
183 		delete resumeApp;
184 
185 		for(int draw = 0; draw < DRAW_COUNT; draw++)
186 		{
187 			delete drawCall[draw];
188 		}
189 
190 		delete swiftConfig;
191 	}
192 
clear(void * pixel,Format format,Surface * dest,const SliceRect & dRect,unsigned int rgbaMask)193 	void Renderer::clear(void *pixel, Format format, Surface *dest, const SliceRect &dRect, unsigned int rgbaMask)
194 	{
195 		blitter.clear(pixel, format, dest, dRect, rgbaMask);
196 	}
197 
blit(Surface * source,const SliceRect & sRect,Surface * dest,const SliceRect & dRect,bool filter)198 	void Renderer::blit(Surface *source, const SliceRect &sRect, Surface *dest, const SliceRect &dRect, bool filter)
199 	{
200 		blitter.blit(source, sRect, dest, dRect, filter);
201 	}
202 
blit3D(Surface * source,Surface * dest)203 	void Renderer::blit3D(Surface *source, Surface *dest)
204 	{
205 		blitter.blit3D(source, dest);
206 	}
207 
draw(DrawType drawType,unsigned int indexOffset,unsigned int count,bool update)208 	void Renderer::draw(DrawType drawType, unsigned int indexOffset, unsigned int count, bool update)
209 	{
210 		#ifndef NDEBUG
211 			if(count < minPrimitives || count > maxPrimitives)
212 			{
213 				return;
214 			}
215 		#endif
216 
217 		context->drawType = drawType;
218 
219 		updateConfiguration();
220 		updateClipper();
221 
222 		int ss = context->getSuperSampleCount();
223 		int ms = context->getMultiSampleCount();
224 
225 		for(int q = 0; q < ss; q++)
226 		{
227 			unsigned int oldMultiSampleMask = context->multiSampleMask;
228 			context->multiSampleMask = (context->sampleMask >> (ms * q)) & ((unsigned)0xFFFFFFFF >> (32 - ms));
229 
230 			if(!context->multiSampleMask)
231 			{
232 				continue;
233 			}
234 
235 			sync->lock(sw::PRIVATE);
236 
237 			Routine *vertexRoutine;
238 			Routine *setupRoutine;
239 			Routine *pixelRoutine;
240 
241 			if(update || oldMultiSampleMask != context->multiSampleMask)
242 			{
243 				vertexState = VertexProcessor::update(drawType);
244 				setupState = SetupProcessor::update();
245 				pixelState = PixelProcessor::update();
246 
247 				vertexRoutine = VertexProcessor::routine(vertexState);
248 				setupRoutine = SetupProcessor::routine(setupState);
249 				pixelRoutine = PixelProcessor::routine(pixelState);
250 			}
251 
252 			int batch = batchSize / ms;
253 
254 			int (Renderer::*setupPrimitives)(int batch, int count);
255 
256 			if(context->isDrawTriangle())
257 			{
258 				switch(context->fillMode)
259 				{
260 				case FILL_SOLID:
261 					setupPrimitives = &Renderer::setupSolidTriangles;
262 					break;
263 				case FILL_WIREFRAME:
264 					setupPrimitives = &Renderer::setupWireframeTriangle;
265 					batch = 1;
266 					break;
267 				case FILL_VERTEX:
268 					setupPrimitives = &Renderer::setupVertexTriangle;
269 					batch = 1;
270 					break;
271 				default: ASSERT(false);
272 				}
273 			}
274 			else if(context->isDrawLine())
275 			{
276 				setupPrimitives = &Renderer::setupLines;
277 			}
278 			else   // Point draw
279 			{
280 				setupPrimitives = &Renderer::setupPoints;
281 			}
282 
283 			DrawCall *draw = 0;
284 
285 			do
286 			{
287 				for(int i = 0; i < DRAW_COUNT; i++)
288 				{
289 					if(drawCall[i]->references == -1)
290 					{
291 						draw = drawCall[i];
292 						drawList[nextDraw % DRAW_COUNT] = draw;
293 
294 						break;
295 					}
296 				}
297 
298 				if(!draw)
299 				{
300 					resumeApp->wait();
301 				}
302 			}
303 			while(!draw);
304 
305 			DrawData *data = draw->data;
306 
307 			if(queries.size() != 0)
308 			{
309 				draw->queries = new std::list<Query*>();
310 				bool includePrimitivesWrittenQueries = vertexState.transformFeedbackQueryEnabled && vertexState.transformFeedbackEnabled;
311 				for(std::list<Query*>::iterator query = queries.begin(); query != queries.end(); query++)
312 				{
313 					Query* q = *query;
314 					if(includePrimitivesWrittenQueries || (q->type != Query::TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN))
315 					{
316 						atomicIncrement(&(q->reference));
317 						draw->queries->push_back(q);
318 					}
319 				}
320 			}
321 
322 			draw->drawType = drawType;
323 			draw->batchSize = batch;
324 
325 			vertexRoutine->bind();
326 			setupRoutine->bind();
327 			pixelRoutine->bind();
328 
329 			draw->vertexRoutine = vertexRoutine;
330 			draw->setupRoutine = setupRoutine;
331 			draw->pixelRoutine = pixelRoutine;
332 			draw->vertexPointer = (VertexProcessor::RoutinePointer)vertexRoutine->getEntry();
333 			draw->setupPointer = (SetupProcessor::RoutinePointer)setupRoutine->getEntry();
334 			draw->pixelPointer = (PixelProcessor::RoutinePointer)pixelRoutine->getEntry();
335 			draw->setupPrimitives = setupPrimitives;
336 			draw->setupState = setupState;
337 
338 			for(int i = 0; i < MAX_VERTEX_INPUTS; i++)
339 			{
340 				draw->vertexStream[i] = context->input[i].resource;
341 				data->input[i] = context->input[i].buffer;
342 				data->stride[i] = context->input[i].stride;
343 
344 				if(draw->vertexStream[i])
345 				{
346 					draw->vertexStream[i]->lock(PUBLIC, PRIVATE);
347 				}
348 			}
349 
350 			if(context->indexBuffer)
351 			{
352 				data->indices = (unsigned char*)context->indexBuffer->lock(PUBLIC, PRIVATE) + indexOffset;
353 			}
354 
355 			draw->indexBuffer = context->indexBuffer;
356 
357 			for(int sampler = 0; sampler < TOTAL_IMAGE_UNITS; sampler++)
358 			{
359 				draw->texture[sampler] = 0;
360 			}
361 
362 			for(int sampler = 0; sampler < TEXTURE_IMAGE_UNITS; sampler++)
363 			{
364 				if(pixelState.sampler[sampler].textureType != TEXTURE_NULL)
365 				{
366 					draw->texture[sampler] = context->texture[sampler];
367 					draw->texture[sampler]->lock(PUBLIC, isReadWriteTexture(sampler) ? MANAGED : PRIVATE);   // If the texure is both read and written, use the same read/write lock as render targets
368 
369 					data->mipmap[sampler] = context->sampler[sampler].getTextureData();
370 				}
371 			}
372 
373 			if(context->pixelShader)
374 			{
375 				if(draw->psDirtyConstF)
376 				{
377 					memcpy(&data->ps.cW, PixelProcessor::cW, sizeof(word4) * 4 * (draw->psDirtyConstF < 8 ? draw->psDirtyConstF : 8));
378 					memcpy(&data->ps.c, PixelProcessor::c, sizeof(float4) * draw->psDirtyConstF);
379 					draw->psDirtyConstF = 0;
380 				}
381 
382 				if(draw->psDirtyConstI)
383 				{
384 					memcpy(&data->ps.i, PixelProcessor::i, sizeof(int4) * draw->psDirtyConstI);
385 					draw->psDirtyConstI = 0;
386 				}
387 
388 				if(draw->psDirtyConstB)
389 				{
390 					memcpy(&data->ps.b, PixelProcessor::b, sizeof(bool) * draw->psDirtyConstB);
391 					draw->psDirtyConstB = 0;
392 				}
393 
394 				PixelProcessor::lockUniformBuffers(data->ps.u, draw->pUniformBuffers);
395 			}
396 			else
397 			{
398 				for(int i = 0; i < MAX_UNIFORM_BUFFER_BINDINGS; i++)
399 				{
400 					draw->pUniformBuffers[i] = nullptr;
401 				}
402 			}
403 
404 			if(context->pixelShaderVersion() <= 0x0104)
405 			{
406 				for(int stage = 0; stage < 8; stage++)
407 				{
408 					if(pixelState.textureStage[stage].stageOperation != TextureStage::STAGE_DISABLE || context->pixelShader)
409 					{
410 						data->textureStage[stage] = context->textureStage[stage].uniforms;
411 					}
412 					else break;
413 				}
414 			}
415 
416 			if(context->vertexShader)
417 			{
418 				if(context->vertexShader->getVersion() >= 0x0300)
419 				{
420 					for(int sampler = 0; sampler < VERTEX_TEXTURE_IMAGE_UNITS; sampler++)
421 					{
422 						if(vertexState.samplerState[sampler].textureType != TEXTURE_NULL)
423 						{
424 							draw->texture[TEXTURE_IMAGE_UNITS + sampler] = context->texture[TEXTURE_IMAGE_UNITS + sampler];
425 							draw->texture[TEXTURE_IMAGE_UNITS + sampler]->lock(PUBLIC, PRIVATE);
426 
427 							data->mipmap[TEXTURE_IMAGE_UNITS + sampler] = context->sampler[TEXTURE_IMAGE_UNITS + sampler].getTextureData();
428 						}
429 					}
430 				}
431 
432 				if(draw->vsDirtyConstF)
433 				{
434 					memcpy(&data->vs.c, VertexProcessor::c, sizeof(float4) * draw->vsDirtyConstF);
435 					draw->vsDirtyConstF = 0;
436 				}
437 
438 				if(draw->vsDirtyConstI)
439 				{
440 					memcpy(&data->vs.i, VertexProcessor::i, sizeof(int4) * draw->vsDirtyConstI);
441 					draw->vsDirtyConstI = 0;
442 				}
443 
444 				if(draw->vsDirtyConstB)
445 				{
446 					memcpy(&data->vs.b, VertexProcessor::b, sizeof(bool) * draw->vsDirtyConstB);
447 					draw->vsDirtyConstB = 0;
448 				}
449 
450 				if(context->vertexShader->instanceIdDeclared)
451 				{
452 					data->instanceID = context->instanceID;
453 				}
454 
455 				VertexProcessor::lockUniformBuffers(data->vs.u, draw->vUniformBuffers);
456 				VertexProcessor::lockTransformFeedbackBuffers(data->vs.t, data->vs.reg, data->vs.row, data->vs.col, data->vs.str, draw->transformFeedbackBuffers);
457 			}
458 			else
459 			{
460 				data->ff = ff;
461 
462 				draw->vsDirtyConstF = VERTEX_UNIFORM_VECTORS + 1;
463 				draw->vsDirtyConstI = 16;
464 				draw->vsDirtyConstB = 16;
465 
466 				for(int i = 0; i < MAX_UNIFORM_BUFFER_BINDINGS; i++)
467 				{
468 					draw->vUniformBuffers[i] = nullptr;
469 				}
470 
471 				for(int i = 0; i < MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS; i++)
472 				{
473 					draw->transformFeedbackBuffers[i] = nullptr;
474 				}
475 			}
476 
477 			if(pixelState.stencilActive)
478 			{
479 				data->stencil[0] = stencil;
480 				data->stencil[1] = stencilCCW;
481 			}
482 
483 			if(pixelState.fogActive)
484 			{
485 				data->fog = fog;
486 			}
487 
488 			if(setupState.isDrawPoint)
489 			{
490 				data->point = point;
491 			}
492 
493 			data->lineWidth = context->lineWidth;
494 
495 			data->factor = factor;
496 
497 			if(pixelState.transparencyAntialiasing == TRANSPARENCY_ALPHA_TO_COVERAGE)
498 			{
499 				float ref = context->alphaReference * (1.0f / 255.0f);
500 				float margin = sw::min(ref, 1.0f - ref);
501 
502 				if(ms == 4)
503 				{
504 					data->a2c0 = replicate(ref - margin * 0.6f);
505 					data->a2c1 = replicate(ref - margin * 0.2f);
506 					data->a2c2 = replicate(ref + margin * 0.2f);
507 					data->a2c3 = replicate(ref + margin * 0.6f);
508 				}
509 				else if(ms == 2)
510 				{
511 					data->a2c0 = replicate(ref - margin * 0.3f);
512 					data->a2c1 = replicate(ref + margin * 0.3f);
513 				}
514 				else ASSERT(false);
515 			}
516 
517 			if(pixelState.occlusionEnabled)
518 			{
519 				for(int cluster = 0; cluster < clusterCount; cluster++)
520 				{
521 					data->occlusion[cluster] = 0;
522 				}
523 			}
524 
525 			#if PERF_PROFILE
526 				for(int cluster = 0; cluster < clusterCount; cluster++)
527 				{
528 					for(int i = 0; i < PERF_TIMERS; i++)
529 					{
530 						data->cycles[i][cluster] = 0;
531 					}
532 				}
533 			#endif
534 
535 			// Viewport
536 			{
537 				float W = 0.5f * viewport.width;
538 				float H = 0.5f * viewport.height;
539 				float X0 = viewport.x0 + W;
540 				float Y0 = viewport.y0 + H;
541 				float N = viewport.minZ;
542 				float F = viewport.maxZ;
543 				float Z = F - N;
544 
545 				if(context->isDrawTriangle(false))
546 				{
547 					N += depthBias;
548 				}
549 
550 				if(complementaryDepthBuffer)
551 				{
552 					Z = -Z;
553 					N = 1 - N;
554 				}
555 
556 				static const float X[5][16] =   // Fragment offsets
557 				{
558 					{+0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 1 sample
559 					{-0.2500f, +0.2500f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 2 samples
560 					{-0.3000f, +0.1000f, +0.3000f, -0.1000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 4 samples
561 					{+0.1875f, -0.3125f, +0.3125f, -0.4375f, -0.0625f, +0.4375f, +0.0625f, -0.1875f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 8 samples
562 					{+0.2553f, -0.1155f, +0.1661f, -0.1828f, +0.2293f, -0.4132f, -0.1773f, -0.0577f, +0.3891f, -0.4656f, +0.4103f, +0.4248f, -0.2109f, +0.3966f, -0.2664f, -0.3872f}    // 16 samples
563 				};
564 
565 				static const float Y[5][16] =   // Fragment offsets
566 				{
567 					{+0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 1 sample
568 					{-0.2500f, +0.2500f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 2 samples
569 					{-0.1000f, -0.3000f, +0.1000f, +0.3000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 4 samples
570 					{-0.4375f, -0.3125f, -0.1875f, -0.0625f, +0.0625f, +0.1875f, +0.3125f, +0.4375f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 8 samples
571 					{-0.4503f, +0.1883f, +0.3684f, -0.4668f, -0.0690f, -0.1315f, +0.4999f, +0.0728f, +0.1070f, -0.3086f, +0.3725f, -0.1547f, -0.1102f, -0.3588f, +0.1789f, +0.0269f}    // 16 samples
572 				};
573 
574 				int s = sw::log2(ss);
575 
576 				data->Wx16 = replicate(W * 16);
577 				data->Hx16 = replicate(H * 16);
578 				data->X0x16 = replicate(X0 * 16 - 8);
579 				data->Y0x16 = replicate(Y0 * 16 - 8);
580 				data->XXXX = replicate(X[s][q] / W);
581 				data->YYYY = replicate(Y[s][q] / H);
582 				data->halfPixelX = replicate(0.5f / W);
583 				data->halfPixelY = replicate(0.5f / H);
584 				data->viewportHeight = abs(viewport.height);
585 				data->slopeDepthBias = slopeDepthBias;
586 				data->depthRange = Z;
587 				data->depthNear = N;
588 				draw->clipFlags = clipFlags;
589 
590 				if(clipFlags)
591 				{
592 					if(clipFlags & Clipper::CLIP_PLANE0) data->clipPlane[0] = clipPlane[0];
593 					if(clipFlags & Clipper::CLIP_PLANE1) data->clipPlane[1] = clipPlane[1];
594 					if(clipFlags & Clipper::CLIP_PLANE2) data->clipPlane[2] = clipPlane[2];
595 					if(clipFlags & Clipper::CLIP_PLANE3) data->clipPlane[3] = clipPlane[3];
596 					if(clipFlags & Clipper::CLIP_PLANE4) data->clipPlane[4] = clipPlane[4];
597 					if(clipFlags & Clipper::CLIP_PLANE5) data->clipPlane[5] = clipPlane[5];
598 				}
599 			}
600 
601 			// Target
602 			{
603 				for(int index = 0; index < RENDERTARGETS; index++)
604 				{
605 					draw->renderTarget[index] = context->renderTarget[index];
606 
607 					if(draw->renderTarget[index])
608 					{
609 						data->colorBuffer[index] = (unsigned int*)context->renderTarget[index]->lockInternal(0, 0, q * ms, LOCK_READWRITE, MANAGED);
610 						data->colorPitchB[index] = context->renderTarget[index]->getInternalPitchB();
611 						data->colorSliceB[index] = context->renderTarget[index]->getInternalSliceB();
612 					}
613 				}
614 
615 				draw->depthBuffer = context->depthBuffer;
616 				draw->stencilBuffer = context->stencilBuffer;
617 
618 				if(draw->depthBuffer)
619 				{
620 					data->depthBuffer = (float*)context->depthBuffer->lockInternal(0, 0, q * ms, LOCK_READWRITE, MANAGED);
621 					data->depthPitchB = context->depthBuffer->getInternalPitchB();
622 					data->depthSliceB = context->depthBuffer->getInternalSliceB();
623 				}
624 
625 				if(draw->stencilBuffer)
626 				{
627 					data->stencilBuffer = (unsigned char*)context->stencilBuffer->lockStencil(q * ms, MANAGED);
628 					data->stencilPitchB = context->stencilBuffer->getStencilPitchB();
629 					data->stencilSliceB = context->stencilBuffer->getStencilSliceB();
630 				}
631 			}
632 
633 			// Scissor
634 			{
635 				data->scissorX0 = scissor.x0;
636 				data->scissorX1 = scissor.x1;
637 				data->scissorY0 = scissor.y0;
638 				data->scissorY1 = scissor.y1;
639 			}
640 
641 			draw->primitive = 0;
642 			draw->count = count;
643 
644 			draw->references = (count + batch - 1) / batch;
645 
646 			schedulerMutex.lock();
647 			nextDraw++;
648 			schedulerMutex.unlock();
649 
650 			if(threadCount > 1)
651 			{
652 				if(!threadsAwake)
653 				{
654 					suspend[0]->wait();
655 
656 					threadsAwake = 1;
657 					task[0].type = Task::RESUME;
658 
659 					resume[0]->signal();
660 				}
661 			}
662 			else   // Use main thread for draw execution
663 			{
664 				threadsAwake = 1;
665 				task[0].type = Task::RESUME;
666 
667 				taskLoop(0);
668 			}
669 		}
670 	}
671 
threadFunction(void * parameters)672 	void Renderer::threadFunction(void *parameters)
673 	{
674 		Renderer *renderer = static_cast<Parameters*>(parameters)->renderer;
675 		int threadIndex = static_cast<Parameters*>(parameters)->threadIndex;
676 
677 		if(logPrecision < IEEE)
678 		{
679 			CPUID::setFlushToZero(true);
680 			CPUID::setDenormalsAreZero(true);
681 		}
682 
683 		renderer->threadLoop(threadIndex);
684 	}
685 
threadLoop(int threadIndex)686 	void Renderer::threadLoop(int threadIndex)
687 	{
688 		while(!exitThreads)
689 		{
690 			taskLoop(threadIndex);
691 
692 			suspend[threadIndex]->signal();
693 			resume[threadIndex]->wait();
694 		}
695 	}
696 
taskLoop(int threadIndex)697 	void Renderer::taskLoop(int threadIndex)
698 	{
699 		while(task[threadIndex].type != Task::SUSPEND)
700 		{
701 			scheduleTask(threadIndex);
702 			executeTask(threadIndex);
703 		}
704 	}
705 
findAvailableTasks()706 	void Renderer::findAvailableTasks()
707 	{
708 		// Find pixel tasks
709 		for(int cluster = 0; cluster < clusterCount; cluster++)
710 		{
711 			if(!pixelProgress[cluster].executing)
712 			{
713 				for(int unit = 0; unit < unitCount; unit++)
714 				{
715 					if(primitiveProgress[unit].references > 0)   // Contains processed primitives
716 					{
717 						if(pixelProgress[cluster].drawCall == primitiveProgress[unit].drawCall)
718 						{
719 							if(pixelProgress[cluster].processedPrimitives == primitiveProgress[unit].firstPrimitive)   // Previous primitives have been rendered
720 							{
721 								Task &task = taskQueue[qHead];
722 								task.type = Task::PIXELS;
723 								task.primitiveUnit = unit;
724 								task.pixelCluster = cluster;
725 
726 								pixelProgress[cluster].executing = true;
727 
728 								// Commit to the task queue
729 								qHead = (qHead + 1) % 32;
730 								qSize++;
731 
732 								break;
733 							}
734 						}
735 					}
736 				}
737 			}
738 		}
739 
740 		// Find primitive tasks
741 		if(currentDraw == nextDraw)
742 		{
743 			return;   // No more primitives to process
744 		}
745 
746 		for(int unit = 0; unit < unitCount; unit++)
747 		{
748 			DrawCall *draw = drawList[currentDraw % DRAW_COUNT];
749 
750 			if(draw->primitive >= draw->count)
751 			{
752 				currentDraw++;
753 
754 				if(currentDraw == nextDraw)
755 				{
756 					return;   // No more primitives to process
757 				}
758 
759 				draw = drawList[currentDraw % DRAW_COUNT];
760 			}
761 
762 			if(!primitiveProgress[unit].references)   // Task not already being executed and not still in use by a pixel unit
763 			{
764 				int primitive = draw->primitive;
765 				int count = draw->count;
766 				int batch = draw->batchSize;
767 
768 				primitiveProgress[unit].drawCall = currentDraw;
769 				primitiveProgress[unit].firstPrimitive = primitive;
770 				primitiveProgress[unit].primitiveCount = count - primitive >= batch ? batch : count - primitive;
771 
772 				draw->primitive += batch;
773 
774 				Task &task = taskQueue[qHead];
775 				task.type = Task::PRIMITIVES;
776 				task.primitiveUnit = unit;
777 
778 				primitiveProgress[unit].references = -1;
779 
780 				// Commit to the task queue
781 				qHead = (qHead + 1) % 32;
782 				qSize++;
783 			}
784 		}
785 	}
786 
scheduleTask(int threadIndex)787 	void Renderer::scheduleTask(int threadIndex)
788 	{
789 		schedulerMutex.lock();
790 
791 		if((int)qSize < threadCount - threadsAwake + 1)
792 		{
793 			findAvailableTasks();
794 		}
795 
796 		if(qSize != 0)
797 		{
798 			task[threadIndex] = taskQueue[(qHead - qSize) % 32];
799 			qSize--;
800 
801 			if(threadsAwake != threadCount)
802 			{
803 				int wakeup = qSize - threadsAwake + 1;
804 
805 				for(int i = 0; i < threadCount && wakeup > 0; i++)
806 				{
807 					if(task[i].type == Task::SUSPEND)
808 					{
809 						suspend[i]->wait();
810 						task[i].type = Task::RESUME;
811 						resume[i]->signal();
812 
813 						threadsAwake++;
814 						wakeup--;
815 					}
816 				}
817 			}
818 		}
819 		else
820 		{
821 			task[threadIndex].type = Task::SUSPEND;
822 
823 			threadsAwake--;
824 		}
825 
826 		schedulerMutex.unlock();
827 	}
828 
executeTask(int threadIndex)829 	void Renderer::executeTask(int threadIndex)
830 	{
831 		#if PERF_HUD
832 			int64_t startTick = Timer::ticks();
833 		#endif
834 
835 		switch(task[threadIndex].type)
836 		{
837 		case Task::PRIMITIVES:
838 			{
839 				int unit = task[threadIndex].primitiveUnit;
840 
841 				int input = primitiveProgress[unit].firstPrimitive;
842 				int count = primitiveProgress[unit].primitiveCount;
843 				DrawCall *draw = drawList[primitiveProgress[unit].drawCall % DRAW_COUNT];
844 				int (Renderer::*setupPrimitives)(int batch, int count) = draw->setupPrimitives;
845 
846 				processPrimitiveVertices(unit, input, count, draw->count, threadIndex);
847 
848 				#if PERF_HUD
849 					int64_t time = Timer::ticks();
850 					vertexTime[threadIndex] += time - startTick;
851 					startTick = time;
852 				#endif
853 
854 				int visible = 0;
855 
856 				if(!draw->setupState.rasterizerDiscard)
857 				{
858 					visible = (this->*setupPrimitives)(unit, count);
859 				}
860 
861 				primitiveProgress[unit].visible = visible;
862 				primitiveProgress[unit].references = clusterCount;
863 
864 				#if PERF_HUD
865 					setupTime[threadIndex] += Timer::ticks() - startTick;
866 				#endif
867 			}
868 			break;
869 		case Task::PIXELS:
870 			{
871 				int unit = task[threadIndex].primitiveUnit;
872 				int visible = primitiveProgress[unit].visible;
873 
874 				if(visible > 0)
875 				{
876 					int cluster = task[threadIndex].pixelCluster;
877 					Primitive *primitive = primitiveBatch[unit];
878 					DrawCall *draw = drawList[pixelProgress[cluster].drawCall % DRAW_COUNT];
879 					DrawData *data = draw->data;
880 					PixelProcessor::RoutinePointer pixelRoutine = draw->pixelPointer;
881 
882 					pixelRoutine(primitive, visible, cluster, data);
883 				}
884 
885 				finishRendering(task[threadIndex]);
886 
887 				#if PERF_HUD
888 					pixelTime[threadIndex] += Timer::ticks() - startTick;
889 				#endif
890 			}
891 			break;
892 		case Task::RESUME:
893 			break;
894 		case Task::SUSPEND:
895 			break;
896 		default:
897 			ASSERT(false);
898 		}
899 	}
900 
synchronize()901 	void Renderer::synchronize()
902 	{
903 		sync->lock(sw::PUBLIC);
904 		sync->unlock();
905 	}
906 
finishRendering(Task & pixelTask)907 	void Renderer::finishRendering(Task &pixelTask)
908 	{
909 		int unit = pixelTask.primitiveUnit;
910 		int cluster = pixelTask.pixelCluster;
911 
912 		DrawCall &draw = *drawList[primitiveProgress[unit].drawCall % DRAW_COUNT];
913 		DrawData &data = *draw.data;
914 		int primitive = primitiveProgress[unit].firstPrimitive;
915 		int count = primitiveProgress[unit].primitiveCount;
916 		int processedPrimitives = primitive + count;
917 
918 		pixelProgress[cluster].processedPrimitives = processedPrimitives;
919 
920 		if(pixelProgress[cluster].processedPrimitives >= draw.count)
921 		{
922 			pixelProgress[cluster].drawCall++;
923 			pixelProgress[cluster].processedPrimitives = 0;
924 		}
925 
926 		int ref = atomicDecrement(&primitiveProgress[unit].references);
927 
928 		if(ref == 0)
929 		{
930 			ref = atomicDecrement(&draw.references);
931 
932 			if(ref == 0)
933 			{
934 				#if PERF_PROFILE
935 					for(int cluster = 0; cluster < clusterCount; cluster++)
936 					{
937 						for(int i = 0; i < PERF_TIMERS; i++)
938 						{
939 							profiler.cycles[i] += data.cycles[i][cluster];
940 						}
941 					}
942 				#endif
943 
944 				if(draw.queries)
945 				{
946 					for(std::list<Query*>::iterator q = draw.queries->begin(); q != draw.queries->end(); q++)
947 					{
948 						Query *query = *q;
949 
950 						switch(query->type)
951 						{
952 						case Query::FRAGMENTS_PASSED:
953 							for(int cluster = 0; cluster < clusterCount; cluster++)
954 							{
955 								atomicAdd((volatile int*)&query->data, data.occlusion[cluster]);
956 							}
957 							break;
958 						case Query::TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN:
959 							atomicAdd((volatile int*)&query->data, processedPrimitives);
960 							break;
961 						default:
962 							break;
963 						}
964 
965 						atomicDecrement(&query->reference);
966 					}
967 
968 					delete draw.queries;
969 					draw.queries = 0;
970 				}
971 
972 				for(int i = 0; i < RENDERTARGETS; i++)
973 				{
974 					if(draw.renderTarget[i])
975 					{
976 						draw.renderTarget[i]->unlockInternal();
977 					}
978 				}
979 
980 				if(draw.depthBuffer)
981 				{
982 					draw.depthBuffer->unlockInternal();
983 				}
984 
985 				if(draw.stencilBuffer)
986 				{
987 					draw.stencilBuffer->unlockStencil();
988 				}
989 
990 				for(int i = 0; i < TOTAL_IMAGE_UNITS; i++)
991 				{
992 					if(draw.texture[i])
993 					{
994 						draw.texture[i]->unlock();
995 					}
996 				}
997 
998 				for(int i = 0; i < MAX_VERTEX_INPUTS; i++)
999 				{
1000 					if(draw.vertexStream[i])
1001 					{
1002 						draw.vertexStream[i]->unlock();
1003 					}
1004 				}
1005 
1006 				if(draw.indexBuffer)
1007 				{
1008 					draw.indexBuffer->unlock();
1009 				}
1010 
1011 				for(int i = 0; i < MAX_UNIFORM_BUFFER_BINDINGS; i++)
1012 				{
1013 					if(draw.pUniformBuffers[i])
1014 					{
1015 						draw.pUniformBuffers[i]->unlock();
1016 					}
1017 					if(draw.vUniformBuffers[i])
1018 					{
1019 						draw.vUniformBuffers[i]->unlock();
1020 					}
1021 				}
1022 
1023 				for(int i = 0; i < MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS; i++)
1024 				{
1025 					if(draw.transformFeedbackBuffers[i])
1026 					{
1027 						draw.transformFeedbackBuffers[i]->unlock();
1028 					}
1029 				}
1030 
1031 				draw.vertexRoutine->unbind();
1032 				draw.setupRoutine->unbind();
1033 				draw.pixelRoutine->unbind();
1034 
1035 				sync->unlock();
1036 
1037 				draw.references = -1;
1038 				resumeApp->signal();
1039 			}
1040 		}
1041 
1042 		pixelProgress[cluster].executing = false;
1043 	}
1044 
processPrimitiveVertices(int unit,unsigned int start,unsigned int triangleCount,unsigned int loop,int thread)1045 	void Renderer::processPrimitiveVertices(int unit, unsigned int start, unsigned int triangleCount, unsigned int loop, int thread)
1046 	{
1047 		Triangle *triangle = triangleBatch[unit];
1048 		DrawCall *draw = drawList[primitiveProgress[unit].drawCall % DRAW_COUNT];
1049 		DrawData *data = draw->data;
1050 		VertexTask *task = vertexTask[thread];
1051 
1052 		const void *indices = data->indices;
1053 		VertexProcessor::RoutinePointer vertexRoutine = draw->vertexPointer;
1054 
1055 		if(task->vertexCache.drawCall != primitiveProgress[unit].drawCall)
1056 		{
1057 			task->vertexCache.clear();
1058 			task->vertexCache.drawCall = primitiveProgress[unit].drawCall;
1059 		}
1060 
1061 		unsigned int batch[128][3];   // FIXME: Adjust to dynamic batch size
1062 
1063 		switch(draw->drawType)
1064 		{
1065 		case DRAW_POINTLIST:
1066 			{
1067 				unsigned int index = start;
1068 
1069 				for(unsigned int i = 0; i < triangleCount; i++)
1070 				{
1071 					batch[i][0] = index;
1072 					batch[i][1] = index;
1073 					batch[i][2] = index;
1074 
1075 					index += 1;
1076 				}
1077 			}
1078 			break;
1079 		case DRAW_LINELIST:
1080 			{
1081 				unsigned int index = 2 * start;
1082 
1083 				for(unsigned int i = 0; i < triangleCount; i++)
1084 				{
1085 					batch[i][0] = index + 0;
1086 					batch[i][1] = index + 1;
1087 					batch[i][2] = index + 1;
1088 
1089 					index += 2;
1090 				}
1091 			}
1092 			break;
1093 		case DRAW_LINESTRIP:
1094 			{
1095 				unsigned int index = start;
1096 
1097 				for(unsigned int i = 0; i < triangleCount; i++)
1098 				{
1099 					batch[i][0] = index + 0;
1100 					batch[i][1] = index + 1;
1101 					batch[i][2] = index + 1;
1102 
1103 					index += 1;
1104 				}
1105 			}
1106 			break;
1107 		case DRAW_LINELOOP:
1108 			{
1109 				unsigned int index = start;
1110 
1111 				for(unsigned int i = 0; i < triangleCount; i++)
1112 				{
1113 					batch[i][0] = (index + 0) % loop;
1114 					batch[i][1] = (index + 1) % loop;
1115 					batch[i][2] = (index + 1) % loop;
1116 
1117 					index += 1;
1118 				}
1119 			}
1120 			break;
1121 		case DRAW_TRIANGLELIST:
1122 			{
1123 				unsigned int index = 3 * start;
1124 
1125 				for(unsigned int i = 0; i < triangleCount; i++)
1126 				{
1127 					batch[i][0] = index + 0;
1128 					batch[i][1] = index + 1;
1129 					batch[i][2] = index + 2;
1130 
1131 					index += 3;
1132 				}
1133 			}
1134 			break;
1135 		case DRAW_TRIANGLESTRIP:
1136 			{
1137 				unsigned int index = start;
1138 
1139 				for(unsigned int i = 0; i < triangleCount; i++)
1140 				{
1141 					batch[i][0] = index + 0;
1142 					batch[i][1] = index + (index & 1) + 1;
1143 					batch[i][2] = index + (~index & 1) + 1;
1144 
1145 					index += 1;
1146 				}
1147 			}
1148 			break;
1149 		case DRAW_TRIANGLEFAN:
1150 			{
1151 				unsigned int index = start;
1152 
1153 				for(unsigned int i = 0; i < triangleCount; i++)
1154 				{
1155 					batch[i][0] = index + 1;
1156 					batch[i][1] = index + 2;
1157 					batch[i][2] = 0;
1158 
1159 					index += 1;
1160 				}
1161 			}
1162 			break;
1163 		case DRAW_INDEXEDPOINTLIST8:
1164 			{
1165 				const unsigned char *index = (const unsigned char*)indices + start;
1166 
1167 				for(unsigned int i = 0; i < triangleCount; i++)
1168 				{
1169 					batch[i][0] = *index;
1170 					batch[i][1] = *index;
1171 					batch[i][2] = *index;
1172 
1173 					index += 1;
1174 				}
1175 			}
1176 			break;
1177 		case DRAW_INDEXEDPOINTLIST16:
1178 			{
1179 				const unsigned short *index = (const unsigned short*)indices + start;
1180 
1181 				for(unsigned int i = 0; i < triangleCount; i++)
1182 				{
1183 					batch[i][0] = *index;
1184 					batch[i][1] = *index;
1185 					batch[i][2] = *index;
1186 
1187 					index += 1;
1188 				}
1189 			}
1190 			break;
1191 		case DRAW_INDEXEDPOINTLIST32:
1192 			{
1193 				const unsigned int *index = (const unsigned int*)indices + start;
1194 
1195 				for(unsigned int i = 0; i < triangleCount; i++)
1196 				{
1197 					batch[i][0] = *index;
1198 					batch[i][1] = *index;
1199 					batch[i][2] = *index;
1200 
1201 					index += 1;
1202 				}
1203 			}
1204 			break;
1205 		case DRAW_INDEXEDLINELIST8:
1206 			{
1207 				const unsigned char *index = (const unsigned char*)indices + 2 * start;
1208 
1209 				for(unsigned int i = 0; i < triangleCount; i++)
1210 				{
1211 					batch[i][0] = index[0];
1212 					batch[i][1] = index[1];
1213 					batch[i][2] = index[1];
1214 
1215 					index += 2;
1216 				}
1217 			}
1218 			break;
1219 		case DRAW_INDEXEDLINELIST16:
1220 			{
1221 				const unsigned short *index = (const unsigned short*)indices + 2 * start;
1222 
1223 				for(unsigned int i = 0; i < triangleCount; i++)
1224 				{
1225 					batch[i][0] = index[0];
1226 					batch[i][1] = index[1];
1227 					batch[i][2] = index[1];
1228 
1229 					index += 2;
1230 				}
1231 			}
1232 			break;
1233 		case DRAW_INDEXEDLINELIST32:
1234 			{
1235 				const unsigned int *index = (const unsigned int*)indices + 2 * start;
1236 
1237 				for(unsigned int i = 0; i < triangleCount; i++)
1238 				{
1239 					batch[i][0] = index[0];
1240 					batch[i][1] = index[1];
1241 					batch[i][2] = index[1];
1242 
1243 					index += 2;
1244 				}
1245 			}
1246 			break;
1247 		case DRAW_INDEXEDLINESTRIP8:
1248 			{
1249 				const unsigned char *index = (const unsigned char*)indices + start;
1250 
1251 				for(unsigned int i = 0; i < triangleCount; i++)
1252 				{
1253 					batch[i][0] = index[0];
1254 					batch[i][1] = index[1];
1255 					batch[i][2] = index[1];
1256 
1257 					index += 1;
1258 				}
1259 			}
1260 			break;
1261 		case DRAW_INDEXEDLINESTRIP16:
1262 			{
1263 				const unsigned short *index = (const unsigned short*)indices + start;
1264 
1265 				for(unsigned int i = 0; i < triangleCount; i++)
1266 				{
1267 					batch[i][0] = index[0];
1268 					batch[i][1] = index[1];
1269 					batch[i][2] = index[1];
1270 
1271 					index += 1;
1272 				}
1273 			}
1274 			break;
1275 		case DRAW_INDEXEDLINESTRIP32:
1276 			{
1277 				const unsigned int *index = (const unsigned int*)indices + start;
1278 
1279 				for(unsigned int i = 0; i < triangleCount; i++)
1280 				{
1281 					batch[i][0] = index[0];
1282 					batch[i][1] = index[1];
1283 					batch[i][2] = index[1];
1284 
1285 					index += 1;
1286 				}
1287 			}
1288 			break;
1289 		case DRAW_INDEXEDLINELOOP8:
1290 			{
1291 				const unsigned char *index = (const unsigned char*)indices;
1292 
1293 				for(unsigned int i = 0; i < triangleCount; i++)
1294 				{
1295 					batch[i][0] = index[(start + i + 0) % loop];
1296 					batch[i][1] = index[(start + i + 1) % loop];
1297 					batch[i][2] = index[(start + i + 1) % loop];
1298 				}
1299 			}
1300 			break;
1301 		case DRAW_INDEXEDLINELOOP16:
1302 			{
1303 				const unsigned short *index = (const unsigned short*)indices;
1304 
1305 				for(unsigned int i = 0; i < triangleCount; i++)
1306 				{
1307 					batch[i][0] = index[(start + i + 0) % loop];
1308 					batch[i][1] = index[(start + i + 1) % loop];
1309 					batch[i][2] = index[(start + i + 1) % loop];
1310 				}
1311 			}
1312 			break;
1313 		case DRAW_INDEXEDLINELOOP32:
1314 			{
1315 				const unsigned int *index = (const unsigned int*)indices;
1316 
1317 				for(unsigned int i = 0; i < triangleCount; i++)
1318 				{
1319 					batch[i][0] = index[(start + i + 0) % loop];
1320 					batch[i][1] = index[(start + i + 1) % loop];
1321 					batch[i][2] = index[(start + i + 1) % loop];
1322 				}
1323 			}
1324 			break;
1325 		case DRAW_INDEXEDTRIANGLELIST8:
1326 			{
1327 				const unsigned char *index = (const unsigned char*)indices + 3 * start;
1328 
1329 				for(unsigned int i = 0; i < triangleCount; i++)
1330 				{
1331 					batch[i][0] = index[0];
1332 					batch[i][1] = index[1];
1333 					batch[i][2] = index[2];
1334 
1335 					index += 3;
1336 				}
1337 			}
1338 			break;
1339 		case DRAW_INDEXEDTRIANGLELIST16:
1340 			{
1341 				const unsigned short *index = (const unsigned short*)indices + 3 * start;
1342 
1343 				for(unsigned int i = 0; i < triangleCount; i++)
1344 				{
1345 					batch[i][0] = index[0];
1346 					batch[i][1] = index[1];
1347 					batch[i][2] = index[2];
1348 
1349 					index += 3;
1350 				}
1351 			}
1352 			break;
1353 		case DRAW_INDEXEDTRIANGLELIST32:
1354 			{
1355 				const unsigned int *index = (const unsigned int*)indices + 3 * start;
1356 
1357 				for(unsigned int i = 0; i < triangleCount; i++)
1358 				{
1359 					batch[i][0] = index[0];
1360 					batch[i][1] = index[1];
1361 					batch[i][2] = index[2];
1362 
1363 					index += 3;
1364 				}
1365 			}
1366 			break;
1367 		case DRAW_INDEXEDTRIANGLESTRIP8:
1368 			{
1369 				const unsigned char *index = (const unsigned char*)indices + start;
1370 
1371 				for(unsigned int i = 0; i < triangleCount; i++)
1372 				{
1373 					batch[i][0] = index[0];
1374 					batch[i][1] = index[((start + i) & 1) + 1];
1375 					batch[i][2] = index[(~(start + i) & 1) + 1];
1376 
1377 					index += 1;
1378 				}
1379 			}
1380 			break;
1381 		case DRAW_INDEXEDTRIANGLESTRIP16:
1382 			{
1383 				const unsigned short *index = (const unsigned short*)indices + start;
1384 
1385 				for(unsigned int i = 0; i < triangleCount; i++)
1386 				{
1387 					batch[i][0] = index[0];
1388 					batch[i][1] = index[((start + i) & 1) + 1];
1389 					batch[i][2] = index[(~(start + i) & 1) + 1];
1390 
1391 					index += 1;
1392 				}
1393 			}
1394 			break;
1395 		case DRAW_INDEXEDTRIANGLESTRIP32:
1396 			{
1397 				const unsigned int *index = (const unsigned int*)indices + start;
1398 
1399 				for(unsigned int i = 0; i < triangleCount; i++)
1400 				{
1401 					batch[i][0] = index[0];
1402 					batch[i][1] = index[((start + i) & 1) + 1];
1403 					batch[i][2] = index[(~(start + i) & 1) + 1];
1404 
1405 					index += 1;
1406 				}
1407 			}
1408 			break;
1409 		case DRAW_INDEXEDTRIANGLEFAN8:
1410 			{
1411 				const unsigned char *index = (const unsigned char*)indices;
1412 
1413 				for(unsigned int i = 0; i < triangleCount; i++)
1414 				{
1415 					batch[i][0] = index[start + i + 1];
1416 					batch[i][1] = index[start + i + 2];
1417 					batch[i][2] = index[0];
1418 				}
1419 			}
1420 			break;
1421 		case DRAW_INDEXEDTRIANGLEFAN16:
1422 			{
1423 				const unsigned short *index = (const unsigned short*)indices;
1424 
1425 				for(unsigned int i = 0; i < triangleCount; i++)
1426 				{
1427 					batch[i][0] = index[start + i + 1];
1428 					batch[i][1] = index[start + i + 2];
1429 					batch[i][2] = index[0];
1430 				}
1431 			}
1432 			break;
1433 		case DRAW_INDEXEDTRIANGLEFAN32:
1434 			{
1435 				const unsigned int *index = (const unsigned int*)indices;
1436 
1437 				for(unsigned int i = 0; i < triangleCount; i++)
1438 				{
1439 					batch[i][0] = index[start + i + 1];
1440 					batch[i][1] = index[start + i + 2];
1441 					batch[i][2] = index[0];
1442 				}
1443 			}
1444 			break;
1445 		case DRAW_QUADLIST:
1446 			{
1447 				unsigned int index = 4 * start / 2;
1448 
1449 				for(unsigned int i = 0; i < triangleCount; i += 2)
1450 				{
1451 					batch[i+0][0] = index + 0;
1452 					batch[i+0][1] = index + 1;
1453 					batch[i+0][2] = index + 2;
1454 
1455 					batch[i+1][0] = index + 0;
1456 					batch[i+1][1] = index + 2;
1457 					batch[i+1][2] = index + 3;
1458 
1459 					index += 4;
1460 				}
1461 			}
1462 			break;
1463 		default:
1464 			ASSERT(false);
1465 			return;
1466 		}
1467 
1468 		task->primitiveStart = start;
1469 		task->vertexCount = triangleCount * 3;
1470 		vertexRoutine(&triangle->v0, (unsigned int*)&batch, task, data);
1471 	}
1472 
setupSolidTriangles(int unit,int count)1473 	int Renderer::setupSolidTriangles(int unit, int count)
1474 	{
1475 		Triangle *triangle = triangleBatch[unit];
1476 		Primitive *primitive = primitiveBatch[unit];
1477 
1478 		DrawCall &draw = *drawList[primitiveProgress[unit].drawCall % DRAW_COUNT];
1479 		SetupProcessor::State &state = draw.setupState;
1480 		const SetupProcessor::RoutinePointer &setupRoutine = draw.setupPointer;
1481 
1482 		int ms = state.multiSample;
1483 		int pos = state.positionRegister;
1484 		const DrawData *data = draw.data;
1485 		int visible = 0;
1486 
1487 		for(int i = 0; i < count; i++, triangle++)
1488 		{
1489 			Vertex &v0 = triangle->v0;
1490 			Vertex &v1 = triangle->v1;
1491 			Vertex &v2 = triangle->v2;
1492 
1493 			if((v0.clipFlags & v1.clipFlags & v2.clipFlags) == Clipper::CLIP_FINITE)
1494 			{
1495 				Polygon polygon(&v0.v[pos], &v1.v[pos], &v2.v[pos]);
1496 
1497 				int clipFlagsOr = v0.clipFlags | v1.clipFlags | v2.clipFlags | draw.clipFlags;
1498 
1499 				if(clipFlagsOr != Clipper::CLIP_FINITE)
1500 				{
1501 					if(!clipper->clip(polygon, clipFlagsOr, draw))
1502 					{
1503 						continue;
1504 					}
1505 				}
1506 
1507 				if(setupRoutine(primitive, triangle, &polygon, data))
1508 				{
1509 					primitive += ms;
1510 					visible++;
1511 				}
1512 			}
1513 		}
1514 
1515 		return visible;
1516 	}
1517 
setupWireframeTriangle(int unit,int count)1518 	int Renderer::setupWireframeTriangle(int unit, int count)
1519 	{
1520 		Triangle *triangle = triangleBatch[unit];
1521 		Primitive *primitive = primitiveBatch[unit];
1522 		int visible = 0;
1523 
1524 		DrawCall &draw = *drawList[primitiveProgress[unit].drawCall % DRAW_COUNT];
1525 		SetupProcessor::State &state = draw.setupState;
1526 		SetupProcessor::RoutinePointer setupRoutine = draw.setupPointer;
1527 
1528 		const Vertex &v0 = triangle[0].v0;
1529 		const Vertex &v1 = triangle[0].v1;
1530 		const Vertex &v2 = triangle[0].v2;
1531 
1532 		float d = (v0.y * v1.x - v0.x * v1.y) * v2.w + (v0.x * v2.y - v0.y * v2.x) * v1.w + (v2.x * v1.y - v1.x * v2.y) * v0.w;
1533 
1534 		if(state.cullMode == CULL_CLOCKWISE)
1535 		{
1536 			if(d >= 0) return 0;
1537 		}
1538 		else if(state.cullMode == CULL_COUNTERCLOCKWISE)
1539 		{
1540 			if(d <= 0) return 0;
1541 		}
1542 
1543 		// Copy attributes
1544 		triangle[1].v0 = v1;
1545 		triangle[1].v1 = v2;
1546 		triangle[2].v0 = v2;
1547 		triangle[2].v1 = v0;
1548 
1549 		if(state.color[0][0].flat)   // FIXME
1550 		{
1551 			for(int i = 0; i < 2; i++)
1552 			{
1553 				triangle[1].v0.C[i] = triangle[0].v0.C[i];
1554 				triangle[1].v1.C[i] = triangle[0].v0.C[i];
1555 				triangle[2].v0.C[i] = triangle[0].v0.C[i];
1556 				triangle[2].v1.C[i] = triangle[0].v0.C[i];
1557 			}
1558 		}
1559 
1560 		for(int i = 0; i < 3; i++)
1561 		{
1562 			if(setupLine(*primitive, *triangle, draw))
1563 			{
1564 				primitive->area = 0.5f * d;
1565 
1566 				primitive++;
1567 				visible++;
1568 			}
1569 
1570 			triangle++;
1571 		}
1572 
1573 		return visible;
1574 	}
1575 
setupVertexTriangle(int unit,int count)1576 	int Renderer::setupVertexTriangle(int unit, int count)
1577 	{
1578 		Triangle *triangle = triangleBatch[unit];
1579 		Primitive *primitive = primitiveBatch[unit];
1580 		int visible = 0;
1581 
1582 		DrawCall &draw = *drawList[primitiveProgress[unit].drawCall % DRAW_COUNT];
1583 		SetupProcessor::State &state = draw.setupState;
1584 
1585 		const Vertex &v0 = triangle[0].v0;
1586 		const Vertex &v1 = triangle[0].v1;
1587 		const Vertex &v2 = triangle[0].v2;
1588 
1589 		float d = (v0.y * v1.x - v0.x * v1.y) * v2.w + (v0.x * v2.y - v0.y * v2.x) * v1.w + (v2.x * v1.y - v1.x * v2.y) * v0.w;
1590 
1591 		if(state.cullMode == CULL_CLOCKWISE)
1592 		{
1593 			if(d >= 0) return 0;
1594 		}
1595 		else if(state.cullMode == CULL_COUNTERCLOCKWISE)
1596 		{
1597 			if(d <= 0) return 0;
1598 		}
1599 
1600 		// Copy attributes
1601 		triangle[1].v0 = v1;
1602 		triangle[2].v0 = v2;
1603 
1604 		for(int i = 0; i < 3; i++)
1605 		{
1606 			if(setupPoint(*primitive, *triangle, draw))
1607 			{
1608 				primitive->area = 0.5f * d;
1609 
1610 				primitive++;
1611 				visible++;
1612 			}
1613 
1614 			triangle++;
1615 		}
1616 
1617 		return visible;
1618 	}
1619 
setupLines(int unit,int count)1620 	int Renderer::setupLines(int unit, int count)
1621 	{
1622 		Triangle *triangle = triangleBatch[unit];
1623 		Primitive *primitive = primitiveBatch[unit];
1624 		int visible = 0;
1625 
1626 		DrawCall &draw = *drawList[primitiveProgress[unit].drawCall % DRAW_COUNT];
1627 		SetupProcessor::State &state = draw.setupState;
1628 
1629 		int ms = state.multiSample;
1630 
1631 		for(int i = 0; i < count; i++)
1632 		{
1633 			if(setupLine(*primitive, *triangle, draw))
1634 			{
1635 				primitive += ms;
1636 				visible++;
1637 			}
1638 
1639 			triangle++;
1640 		}
1641 
1642 		return visible;
1643 	}
1644 
setupPoints(int unit,int count)1645 	int Renderer::setupPoints(int unit, int count)
1646 	{
1647 		Triangle *triangle = triangleBatch[unit];
1648 		Primitive *primitive = primitiveBatch[unit];
1649 		int visible = 0;
1650 
1651 		DrawCall &draw = *drawList[primitiveProgress[unit].drawCall % DRAW_COUNT];
1652 		SetupProcessor::State &state = draw.setupState;
1653 
1654 		int ms = state.multiSample;
1655 
1656 		for(int i = 0; i < count; i++)
1657 		{
1658 			if(setupPoint(*primitive, *triangle, draw))
1659 			{
1660 				primitive += ms;
1661 				visible++;
1662 			}
1663 
1664 			triangle++;
1665 		}
1666 
1667 		return visible;
1668 	}
1669 
setupLine(Primitive & primitive,Triangle & triangle,const DrawCall & draw)1670 	bool Renderer::setupLine(Primitive &primitive, Triangle &triangle, const DrawCall &draw)
1671 	{
1672 		const SetupProcessor::RoutinePointer &setupRoutine = draw.setupPointer;
1673 		const SetupProcessor::State &state = draw.setupState;
1674 		const DrawData &data = *draw.data;
1675 
1676 		float lineWidth = data.lineWidth;
1677 
1678 		Vertex &v0 = triangle.v0;
1679 		Vertex &v1 = triangle.v1;
1680 
1681 		int pos = state.positionRegister;
1682 
1683 		const float4 &P0 = v0.v[pos];
1684 		const float4 &P1 = v1.v[pos];
1685 
1686 		if(P0.w <= 0 && P1.w <= 0)
1687 		{
1688 			return false;
1689 		}
1690 
1691 		const float W = data.Wx16[0] * (1.0f / 16.0f);
1692 		const float H = data.Hx16[0] * (1.0f / 16.0f);
1693 
1694 		float dx = W * (P1.x / P1.w - P0.x / P0.w);
1695 		float dy = H * (P1.y / P1.w - P0.y / P0.w);
1696 
1697 		if(dx == 0 && dy == 0)
1698 		{
1699 			return false;
1700 		}
1701 
1702 		if(false)   // Rectangle
1703 		{
1704 			float4 P[4];
1705 			int C[4];
1706 
1707 			P[0] = P0;
1708 			P[1] = P1;
1709 			P[2] = P1;
1710 			P[3] = P0;
1711 
1712 			float scale = lineWidth * 0.5f / sqrt(dx*dx + dy*dy);
1713 
1714 			dx *= scale;
1715 			dy *= scale;
1716 
1717 			float dx0w = dx * P0.w / W;
1718 			float dy0h = dy * P0.w / H;
1719 			float dx0h = dx * P0.w / H;
1720 			float dy0w = dy * P0.w / W;
1721 
1722 			float dx1w = dx * P1.w / W;
1723 			float dy1h = dy * P1.w / H;
1724 			float dx1h = dx * P1.w / H;
1725 			float dy1w = dy * P1.w / W;
1726 
1727 			P[0].x += -dy0w + -dx0w;
1728 			P[0].y += -dx0h + +dy0h;
1729 			C[0] = clipper->computeClipFlags(P[0]);
1730 
1731 			P[1].x += -dy1w + +dx1w;
1732 			P[1].y += -dx1h + +dy1h;
1733 			C[1] = clipper->computeClipFlags(P[1]);
1734 
1735 			P[2].x += +dy1w + +dx1w;
1736 			P[2].y += +dx1h + -dy1h;
1737 			C[2] = clipper->computeClipFlags(P[2]);
1738 
1739 			P[3].x += +dy0w + -dx0w;
1740 			P[3].y += +dx0h + +dy0h;
1741 			C[3] = clipper->computeClipFlags(P[3]);
1742 
1743 			if((C[0] & C[1] & C[2] & C[3]) == Clipper::CLIP_FINITE)
1744 			{
1745 				Polygon polygon(P, 4);
1746 
1747 				int clipFlagsOr = C[0] | C[1] | C[2] | C[3] | draw.clipFlags;
1748 
1749 				if(clipFlagsOr != Clipper::CLIP_FINITE)
1750 				{
1751 					if(!clipper->clip(polygon, clipFlagsOr, draw))
1752 					{
1753 						return false;
1754 					}
1755 				}
1756 
1757 				return setupRoutine(&primitive, &triangle, &polygon, &data);
1758 			}
1759 		}
1760 		else   // Diamond test convention
1761 		{
1762 			float4 P[8];
1763 			int C[8];
1764 
1765 			P[0] = P0;
1766 			P[1] = P0;
1767 			P[2] = P0;
1768 			P[3] = P0;
1769 			P[4] = P1;
1770 			P[5] = P1;
1771 			P[6] = P1;
1772 			P[7] = P1;
1773 
1774 			float dx0 = lineWidth * 0.5f * P0.w / W;
1775 			float dy0 = lineWidth * 0.5f * P0.w / H;
1776 
1777 			float dx1 = lineWidth * 0.5f * P1.w / W;
1778 			float dy1 = lineWidth * 0.5f * P1.w / H;
1779 
1780 			P[0].x += -dx0;
1781 			C[0] = clipper->computeClipFlags(P[0]);
1782 
1783 			P[1].y += +dy0;
1784 			C[1] = clipper->computeClipFlags(P[1]);
1785 
1786 			P[2].x += +dx0;
1787 			C[2] = clipper->computeClipFlags(P[2]);
1788 
1789 			P[3].y += -dy0;
1790 			C[3] = clipper->computeClipFlags(P[3]);
1791 
1792 			P[4].x += -dx1;
1793 			C[4] = clipper->computeClipFlags(P[4]);
1794 
1795 			P[5].y += +dy1;
1796 			C[5] = clipper->computeClipFlags(P[5]);
1797 
1798 			P[6].x += +dx1;
1799 			C[6] = clipper->computeClipFlags(P[6]);
1800 
1801 			P[7].y += -dy1;
1802 			C[7] = clipper->computeClipFlags(P[7]);
1803 
1804 			if((C[0] & C[1] & C[2] & C[3] & C[4] & C[5] & C[6] & C[7]) == Clipper::CLIP_FINITE)
1805 			{
1806 				float4 L[6];
1807 
1808 				if(dx > -dy)
1809 				{
1810 					if(dx > dy)   // Right
1811 					{
1812 						L[0] = P[0];
1813 						L[1] = P[1];
1814 						L[2] = P[5];
1815 						L[3] = P[6];
1816 						L[4] = P[7];
1817 						L[5] = P[3];
1818 					}
1819 					else   // Down
1820 					{
1821 						L[0] = P[0];
1822 						L[1] = P[4];
1823 						L[2] = P[5];
1824 						L[3] = P[6];
1825 						L[4] = P[2];
1826 						L[5] = P[3];
1827 					}
1828 				}
1829 				else
1830 				{
1831 					if(dx > dy)   // Up
1832 					{
1833 						L[0] = P[0];
1834 						L[1] = P[1];
1835 						L[2] = P[2];
1836 						L[3] = P[6];
1837 						L[4] = P[7];
1838 						L[5] = P[4];
1839 					}
1840 					else   // Left
1841 					{
1842 						L[0] = P[1];
1843 						L[1] = P[2];
1844 						L[2] = P[3];
1845 						L[3] = P[7];
1846 						L[4] = P[4];
1847 						L[5] = P[5];
1848 					}
1849 				}
1850 
1851 				Polygon polygon(L, 6);
1852 
1853 				int clipFlagsOr = C[0] | C[1] | C[2] | C[3] | C[4] | C[5] | C[6] | C[7] | draw.clipFlags;
1854 
1855 				if(clipFlagsOr != Clipper::CLIP_FINITE)
1856 				{
1857 					if(!clipper->clip(polygon, clipFlagsOr, draw))
1858 					{
1859 						return false;
1860 					}
1861 				}
1862 
1863 				return setupRoutine(&primitive, &triangle, &polygon, &data);
1864 			}
1865 		}
1866 
1867 		return false;
1868 	}
1869 
setupPoint(Primitive & primitive,Triangle & triangle,const DrawCall & draw)1870 	bool Renderer::setupPoint(Primitive &primitive, Triangle &triangle, const DrawCall &draw)
1871 	{
1872 		const SetupProcessor::RoutinePointer &setupRoutine = draw.setupPointer;
1873 		const SetupProcessor::State &state = draw.setupState;
1874 		const DrawData &data = *draw.data;
1875 
1876 		Vertex &v = triangle.v0;
1877 
1878 		float pSize;
1879 
1880 		int pts = state.pointSizeRegister;
1881 
1882 		if(state.pointSizeRegister != Unused)
1883 		{
1884 			pSize = v.v[pts].y;
1885 		}
1886 		else
1887 		{
1888 			pSize = data.point.pointSize[0];
1889 		}
1890 
1891 		pSize = clamp(pSize, data.point.pointSizeMin, data.point.pointSizeMax);
1892 
1893 		float4 P[4];
1894 		int C[4];
1895 
1896 		int pos = state.positionRegister;
1897 
1898 		P[0] = v.v[pos];
1899 		P[1] = v.v[pos];
1900 		P[2] = v.v[pos];
1901 		P[3] = v.v[pos];
1902 
1903 		const float X = pSize * P[0].w * data.halfPixelX[0];
1904 		const float Y = pSize * P[0].w * data.halfPixelY[0];
1905 
1906 		P[0].x -= X;
1907 		P[0].y += Y;
1908 		C[0] = clipper->computeClipFlags(P[0]);
1909 
1910 		P[1].x += X;
1911 		P[1].y += Y;
1912 		C[1] = clipper->computeClipFlags(P[1]);
1913 
1914 		P[2].x += X;
1915 		P[2].y -= Y;
1916 		C[2] = clipper->computeClipFlags(P[2]);
1917 
1918 		P[3].x -= X;
1919 		P[3].y -= Y;
1920 		C[3] = clipper->computeClipFlags(P[3]);
1921 
1922 		triangle.v1 = triangle.v0;
1923 		triangle.v2 = triangle.v0;
1924 
1925 		triangle.v1.X += iround(16 * 0.5f * pSize);
1926 		triangle.v2.Y -= iround(16 * 0.5f * pSize) * (data.Hx16[0] > 0.0f ? 1 : -1);   // Both Direct3D and OpenGL expect (0, 0) in the top-left corner
1927 
1928 		Polygon polygon(P, 4);
1929 
1930 		if((C[0] & C[1] & C[2] & C[3]) == Clipper::CLIP_FINITE)
1931 		{
1932 			int clipFlagsOr = C[0] | C[1] | C[2] | C[3] | draw.clipFlags;
1933 
1934 			if(clipFlagsOr != Clipper::CLIP_FINITE)
1935 			{
1936 				if(!clipper->clip(polygon, clipFlagsOr, draw))
1937 				{
1938 					return false;
1939 				}
1940 			}
1941 
1942 			return setupRoutine(&primitive, &triangle, &polygon, &data);
1943 		}
1944 
1945 		return false;
1946 	}
1947 
initializeThreads()1948 	void Renderer::initializeThreads()
1949 	{
1950 		unitCount = ceilPow2(threadCount);
1951 		clusterCount = ceilPow2(threadCount);
1952 
1953 		for(int i = 0; i < unitCount; i++)
1954 		{
1955 			triangleBatch[i] = (Triangle*)allocate(batchSize * sizeof(Triangle));
1956 			primitiveBatch[i] = (Primitive*)allocate(batchSize * sizeof(Primitive));
1957 		}
1958 
1959 		for(int i = 0; i < threadCount; i++)
1960 		{
1961 			vertexTask[i] = (VertexTask*)allocate(sizeof(VertexTask));
1962 			vertexTask[i]->vertexCache.drawCall = -1;
1963 
1964 			task[i].type = Task::SUSPEND;
1965 
1966 			resume[i] = new Event();
1967 			suspend[i] = new Event();
1968 
1969 			Parameters parameters;
1970 			parameters.threadIndex = i;
1971 			parameters.renderer = this;
1972 
1973 			exitThreads = false;
1974 			worker[i] = new Thread(threadFunction, &parameters);
1975 
1976 			suspend[i]->wait();
1977 			suspend[i]->signal();
1978 		}
1979 	}
1980 
terminateThreads()1981 	void Renderer::terminateThreads()
1982 	{
1983 		while(threadsAwake != 0)
1984 		{
1985 			Thread::sleep(1);
1986 		}
1987 
1988 		for(int thread = 0; thread < threadCount; thread++)
1989 		{
1990 			if(worker[thread])
1991 			{
1992 				exitThreads = true;
1993 				resume[thread]->signal();
1994 				worker[thread]->join();
1995 
1996 				delete worker[thread];
1997 				worker[thread] = 0;
1998 				delete resume[thread];
1999 				resume[thread] = 0;
2000 				delete suspend[thread];
2001 				suspend[thread] = 0;
2002 			}
2003 
2004 			deallocate(vertexTask[thread]);
2005 			vertexTask[thread] = 0;
2006 		}
2007 
2008 		for(int i = 0; i < 16; i++)
2009 		{
2010 			deallocate(triangleBatch[i]);
2011 			triangleBatch[i] = 0;
2012 
2013 			deallocate(primitiveBatch[i]);
2014 			primitiveBatch[i] = 0;
2015 		}
2016 	}
2017 
loadConstants(const VertexShader * vertexShader)2018 	void Renderer::loadConstants(const VertexShader *vertexShader)
2019 	{
2020 		if(!vertexShader) return;
2021 
2022 		size_t count = vertexShader->getLength();
2023 
2024 		for(size_t i = 0; i < count; i++)
2025 		{
2026 			const Shader::Instruction *instruction = vertexShader->getInstruction(i);
2027 
2028 			if(instruction->opcode == Shader::OPCODE_DEF)
2029 			{
2030 				int index = instruction->dst.index;
2031 				float value[4];
2032 
2033 				value[0] = instruction->src[0].value[0];
2034 				value[1] = instruction->src[0].value[1];
2035 				value[2] = instruction->src[0].value[2];
2036 				value[3] = instruction->src[0].value[3];
2037 
2038 				setVertexShaderConstantF(index, value);
2039 			}
2040 			else if(instruction->opcode == Shader::OPCODE_DEFI)
2041 			{
2042 				int index = instruction->dst.index;
2043 				int integer[4];
2044 
2045 				integer[0] = instruction->src[0].integer[0];
2046 				integer[1] = instruction->src[0].integer[1];
2047 				integer[2] = instruction->src[0].integer[2];
2048 				integer[3] = instruction->src[0].integer[3];
2049 
2050 				setVertexShaderConstantI(index, integer);
2051 			}
2052 			else if(instruction->opcode == Shader::OPCODE_DEFB)
2053 			{
2054 				int index = instruction->dst.index;
2055 				int boolean = instruction->src[0].boolean[0];
2056 
2057 				setVertexShaderConstantB(index, &boolean);
2058 			}
2059 		}
2060 	}
2061 
loadConstants(const PixelShader * pixelShader)2062 	void Renderer::loadConstants(const PixelShader *pixelShader)
2063 	{
2064 		if(!pixelShader) return;
2065 
2066 		size_t count = pixelShader->getLength();
2067 
2068 		for(size_t i = 0; i < count; i++)
2069 		{
2070 			const Shader::Instruction *instruction = pixelShader->getInstruction(i);
2071 
2072 			if(instruction->opcode == Shader::OPCODE_DEF)
2073 			{
2074 				int index = instruction->dst.index;
2075 				float value[4];
2076 
2077 				value[0] = instruction->src[0].value[0];
2078 				value[1] = instruction->src[0].value[1];
2079 				value[2] = instruction->src[0].value[2];
2080 				value[3] = instruction->src[0].value[3];
2081 
2082 				setPixelShaderConstantF(index, value);
2083 			}
2084 			else if(instruction->opcode == Shader::OPCODE_DEFI)
2085 			{
2086 				int index = instruction->dst.index;
2087 				int integer[4];
2088 
2089 				integer[0] = instruction->src[0].integer[0];
2090 				integer[1] = instruction->src[0].integer[1];
2091 				integer[2] = instruction->src[0].integer[2];
2092 				integer[3] = instruction->src[0].integer[3];
2093 
2094 				setPixelShaderConstantI(index, integer);
2095 			}
2096 			else if(instruction->opcode == Shader::OPCODE_DEFB)
2097 			{
2098 				int index = instruction->dst.index;
2099 				int boolean = instruction->src[0].boolean[0];
2100 
2101 				setPixelShaderConstantB(index, &boolean);
2102 			}
2103 		}
2104 	}
2105 
setIndexBuffer(Resource * indexBuffer)2106 	void Renderer::setIndexBuffer(Resource *indexBuffer)
2107 	{
2108 		context->indexBuffer = indexBuffer;
2109 	}
2110 
setMultiSampleMask(unsigned int mask)2111 	void Renderer::setMultiSampleMask(unsigned int mask)
2112 	{
2113 		context->sampleMask = mask;
2114 	}
2115 
setTransparencyAntialiasing(TransparencyAntialiasing transparencyAntialiasing)2116 	void Renderer::setTransparencyAntialiasing(TransparencyAntialiasing transparencyAntialiasing)
2117 	{
2118 		sw::transparencyAntialiasing = transparencyAntialiasing;
2119 	}
2120 
isReadWriteTexture(int sampler)2121 	bool Renderer::isReadWriteTexture(int sampler)
2122 	{
2123 		for(int index = 0; index < RENDERTARGETS; index++)
2124 		{
2125 			if(context->renderTarget[index] && context->texture[sampler] == context->renderTarget[index]->getResource())
2126 			{
2127 				return true;
2128 			}
2129 		}
2130 
2131 		if(context->depthBuffer && context->texture[sampler] == context->depthBuffer->getResource())
2132 		{
2133 			return true;
2134 		}
2135 
2136 		return false;
2137 	}
2138 
updateClipper()2139 	void Renderer::updateClipper()
2140 	{
2141 		if(updateClipPlanes)
2142 		{
2143 			if(VertexProcessor::isFixedFunction())   // User plane in world space
2144 			{
2145 				const Matrix &scissorWorld = getViewTransform();
2146 
2147 				if(clipFlags & Clipper::CLIP_PLANE0) clipPlane[0] = scissorWorld * userPlane[0];
2148 				if(clipFlags & Clipper::CLIP_PLANE1) clipPlane[1] = scissorWorld * userPlane[1];
2149 				if(clipFlags & Clipper::CLIP_PLANE2) clipPlane[2] = scissorWorld * userPlane[2];
2150 				if(clipFlags & Clipper::CLIP_PLANE3) clipPlane[3] = scissorWorld * userPlane[3];
2151 				if(clipFlags & Clipper::CLIP_PLANE4) clipPlane[4] = scissorWorld * userPlane[4];
2152 				if(clipFlags & Clipper::CLIP_PLANE5) clipPlane[5] = scissorWorld * userPlane[5];
2153 			}
2154 			else   // User plane in clip space
2155 			{
2156 				if(clipFlags & Clipper::CLIP_PLANE0) clipPlane[0] = userPlane[0];
2157 				if(clipFlags & Clipper::CLIP_PLANE1) clipPlane[1] = userPlane[1];
2158 				if(clipFlags & Clipper::CLIP_PLANE2) clipPlane[2] = userPlane[2];
2159 				if(clipFlags & Clipper::CLIP_PLANE3) clipPlane[3] = userPlane[3];
2160 				if(clipFlags & Clipper::CLIP_PLANE4) clipPlane[4] = userPlane[4];
2161 				if(clipFlags & Clipper::CLIP_PLANE5) clipPlane[5] = userPlane[5];
2162 			}
2163 
2164 			updateClipPlanes = false;
2165 		}
2166 	}
2167 
setTextureResource(unsigned int sampler,Resource * resource)2168 	void Renderer::setTextureResource(unsigned int sampler, Resource *resource)
2169 	{
2170 		ASSERT(sampler < TOTAL_IMAGE_UNITS);
2171 
2172 		context->texture[sampler] = resource;
2173 	}
2174 
setTextureLevel(unsigned int sampler,unsigned int face,unsigned int level,Surface * surface,TextureType type)2175 	void Renderer::setTextureLevel(unsigned int sampler, unsigned int face, unsigned int level, Surface *surface, TextureType type)
2176 	{
2177 		ASSERT(sampler < TOTAL_IMAGE_UNITS && face < 6 && level < MIPMAP_LEVELS);
2178 
2179 		context->sampler[sampler].setTextureLevel(face, level, surface, type);
2180 	}
2181 
setTextureFilter(SamplerType type,int sampler,FilterType textureFilter)2182 	void Renderer::setTextureFilter(SamplerType type, int sampler, FilterType textureFilter)
2183 	{
2184 		if(type == SAMPLER_PIXEL)
2185 		{
2186 			PixelProcessor::setTextureFilter(sampler, textureFilter);
2187 		}
2188 		else
2189 		{
2190 			VertexProcessor::setTextureFilter(sampler, textureFilter);
2191 		}
2192 	}
2193 
setMipmapFilter(SamplerType type,int sampler,MipmapType mipmapFilter)2194 	void Renderer::setMipmapFilter(SamplerType type, int sampler, MipmapType mipmapFilter)
2195 	{
2196 		if(type == SAMPLER_PIXEL)
2197 		{
2198 			PixelProcessor::setMipmapFilter(sampler, mipmapFilter);
2199 		}
2200 		else
2201 		{
2202 			VertexProcessor::setMipmapFilter(sampler, mipmapFilter);
2203 		}
2204 	}
2205 
setGatherEnable(SamplerType type,int sampler,bool enable)2206 	void Renderer::setGatherEnable(SamplerType type, int sampler, bool enable)
2207 	{
2208 		if(type == SAMPLER_PIXEL)
2209 		{
2210 			PixelProcessor::setGatherEnable(sampler, enable);
2211 		}
2212 		else
2213 		{
2214 			VertexProcessor::setGatherEnable(sampler, enable);
2215 		}
2216 	}
2217 
setAddressingModeU(SamplerType type,int sampler,AddressingMode addressMode)2218 	void Renderer::setAddressingModeU(SamplerType type, int sampler, AddressingMode addressMode)
2219 	{
2220 		if(type == SAMPLER_PIXEL)
2221 		{
2222 			PixelProcessor::setAddressingModeU(sampler, addressMode);
2223 		}
2224 		else
2225 		{
2226 			VertexProcessor::setAddressingModeU(sampler, addressMode);
2227 		}
2228 	}
2229 
setAddressingModeV(SamplerType type,int sampler,AddressingMode addressMode)2230 	void Renderer::setAddressingModeV(SamplerType type, int sampler, AddressingMode addressMode)
2231 	{
2232 		if(type == SAMPLER_PIXEL)
2233 		{
2234 			PixelProcessor::setAddressingModeV(sampler, addressMode);
2235 		}
2236 		else
2237 		{
2238 			VertexProcessor::setAddressingModeV(sampler, addressMode);
2239 		}
2240 	}
2241 
setAddressingModeW(SamplerType type,int sampler,AddressingMode addressMode)2242 	void Renderer::setAddressingModeW(SamplerType type, int sampler, AddressingMode addressMode)
2243 	{
2244 		if(type == SAMPLER_PIXEL)
2245 		{
2246 			PixelProcessor::setAddressingModeW(sampler, addressMode);
2247 		}
2248 		else
2249 		{
2250 			VertexProcessor::setAddressingModeW(sampler, addressMode);
2251 		}
2252 	}
2253 
setReadSRGB(SamplerType type,int sampler,bool sRGB)2254 	void Renderer::setReadSRGB(SamplerType type, int sampler, bool sRGB)
2255 	{
2256 		if(type == SAMPLER_PIXEL)
2257 		{
2258 			PixelProcessor::setReadSRGB(sampler, sRGB);
2259 		}
2260 		else
2261 		{
2262 			VertexProcessor::setReadSRGB(sampler, sRGB);
2263 		}
2264 	}
2265 
setMipmapLOD(SamplerType type,int sampler,float bias)2266 	void Renderer::setMipmapLOD(SamplerType type, int sampler, float bias)
2267 	{
2268 		if(type == SAMPLER_PIXEL)
2269 		{
2270 			PixelProcessor::setMipmapLOD(sampler, bias);
2271 		}
2272 		else
2273 		{
2274 			VertexProcessor::setMipmapLOD(sampler, bias);
2275 		}
2276 	}
2277 
setBorderColor(SamplerType type,int sampler,const Color<float> & borderColor)2278 	void Renderer::setBorderColor(SamplerType type, int sampler, const Color<float> &borderColor)
2279 	{
2280 		if(type == SAMPLER_PIXEL)
2281 		{
2282 			PixelProcessor::setBorderColor(sampler, borderColor);
2283 		}
2284 		else
2285 		{
2286 			VertexProcessor::setBorderColor(sampler, borderColor);
2287 		}
2288 	}
2289 
setMaxAnisotropy(SamplerType type,int sampler,float maxAnisotropy)2290 	void Renderer::setMaxAnisotropy(SamplerType type, int sampler, float maxAnisotropy)
2291 	{
2292 		if(type == SAMPLER_PIXEL)
2293 		{
2294 			PixelProcessor::setMaxAnisotropy(sampler, maxAnisotropy);
2295 		}
2296 		else
2297 		{
2298 			VertexProcessor::setMaxAnisotropy(sampler, maxAnisotropy);
2299 		}
2300 	}
2301 
setSwizzleR(SamplerType type,int sampler,SwizzleType swizzleR)2302 	void Renderer::setSwizzleR(SamplerType type, int sampler, SwizzleType swizzleR)
2303 	{
2304 		if(type == SAMPLER_PIXEL)
2305 		{
2306 			PixelProcessor::setSwizzleR(sampler, swizzleR);
2307 		}
2308 		else
2309 		{
2310 			VertexProcessor::setSwizzleR(sampler, swizzleR);
2311 		}
2312 	}
2313 
setSwizzleG(SamplerType type,int sampler,SwizzleType swizzleG)2314 	void Renderer::setSwizzleG(SamplerType type, int sampler, SwizzleType swizzleG)
2315 	{
2316 		if(type == SAMPLER_PIXEL)
2317 		{
2318 			PixelProcessor::setSwizzleG(sampler, swizzleG);
2319 		}
2320 		else
2321 		{
2322 			VertexProcessor::setSwizzleG(sampler, swizzleG);
2323 		}
2324 	}
2325 
setSwizzleB(SamplerType type,int sampler,SwizzleType swizzleB)2326 	void Renderer::setSwizzleB(SamplerType type, int sampler, SwizzleType swizzleB)
2327 	{
2328 		if(type == SAMPLER_PIXEL)
2329 		{
2330 			PixelProcessor::setSwizzleB(sampler, swizzleB);
2331 		}
2332 		else
2333 		{
2334 			VertexProcessor::setSwizzleB(sampler, swizzleB);
2335 		}
2336 	}
2337 
setSwizzleA(SamplerType type,int sampler,SwizzleType swizzleA)2338 	void Renderer::setSwizzleA(SamplerType type, int sampler, SwizzleType swizzleA)
2339 	{
2340 		if(type == SAMPLER_PIXEL)
2341 		{
2342 			PixelProcessor::setSwizzleA(sampler, swizzleA);
2343 		}
2344 		else
2345 		{
2346 			VertexProcessor::setSwizzleA(sampler, swizzleA);
2347 		}
2348 	}
2349 
setPointSpriteEnable(bool pointSpriteEnable)2350 	void Renderer::setPointSpriteEnable(bool pointSpriteEnable)
2351 	{
2352 		context->setPointSpriteEnable(pointSpriteEnable);
2353 	}
2354 
setPointScaleEnable(bool pointScaleEnable)2355 	void Renderer::setPointScaleEnable(bool pointScaleEnable)
2356 	{
2357 		context->setPointScaleEnable(pointScaleEnable);
2358 	}
2359 
setLineWidth(float width)2360 	void Renderer::setLineWidth(float width)
2361 	{
2362 		context->lineWidth = width;
2363 	}
2364 
setDepthBias(float bias)2365 	void Renderer::setDepthBias(float bias)
2366 	{
2367 		depthBias = bias;
2368 	}
2369 
setSlopeDepthBias(float slopeBias)2370 	void Renderer::setSlopeDepthBias(float slopeBias)
2371 	{
2372 		slopeDepthBias = slopeBias;
2373 	}
2374 
setRasterizerDiscard(bool rasterizerDiscard)2375 	void Renderer::setRasterizerDiscard(bool rasterizerDiscard)
2376 	{
2377 		context->rasterizerDiscard = rasterizerDiscard;
2378 	}
2379 
setPixelShader(const PixelShader * shader)2380 	void Renderer::setPixelShader(const PixelShader *shader)
2381 	{
2382 		context->pixelShader = shader;
2383 
2384 		loadConstants(shader);
2385 	}
2386 
setVertexShader(const VertexShader * shader)2387 	void Renderer::setVertexShader(const VertexShader *shader)
2388 	{
2389 		context->vertexShader = shader;
2390 
2391 		loadConstants(shader);
2392 	}
2393 
setPixelShaderConstantF(int index,const float value[4],int count)2394 	void Renderer::setPixelShaderConstantF(int index, const float value[4], int count)
2395 	{
2396 		for(int i = 0; i < DRAW_COUNT; i++)
2397 		{
2398 			if(drawCall[i]->psDirtyConstF < index + count)
2399 			{
2400 				drawCall[i]->psDirtyConstF = index + count;
2401 			}
2402 		}
2403 
2404 		for(int i = 0; i < count; i++)
2405 		{
2406 			PixelProcessor::setFloatConstant(index + i, value);
2407 			value += 4;
2408 		}
2409 	}
2410 
setPixelShaderConstantI(int index,const int value[4],int count)2411 	void Renderer::setPixelShaderConstantI(int index, const int value[4], int count)
2412 	{
2413 		for(int i = 0; i < DRAW_COUNT; i++)
2414 		{
2415 			if(drawCall[i]->psDirtyConstI < index + count)
2416 			{
2417 				drawCall[i]->psDirtyConstI = index + count;
2418 			}
2419 		}
2420 
2421 		for(int i = 0; i < count; i++)
2422 		{
2423 			PixelProcessor::setIntegerConstant(index + i, value);
2424 			value += 4;
2425 		}
2426 	}
2427 
setPixelShaderConstantB(int index,const int * boolean,int count)2428 	void Renderer::setPixelShaderConstantB(int index, const int *boolean, int count)
2429 	{
2430 		for(int i = 0; i < DRAW_COUNT; i++)
2431 		{
2432 			if(drawCall[i]->psDirtyConstB < index + count)
2433 			{
2434 				drawCall[i]->psDirtyConstB = index + count;
2435 			}
2436 		}
2437 
2438 		for(int i = 0; i < count; i++)
2439 		{
2440 			PixelProcessor::setBooleanConstant(index + i, *boolean);
2441 			boolean++;
2442 		}
2443 	}
2444 
setVertexShaderConstantF(int index,const float value[4],int count)2445 	void Renderer::setVertexShaderConstantF(int index, const float value[4], int count)
2446 	{
2447 		for(int i = 0; i < DRAW_COUNT; i++)
2448 		{
2449 			if(drawCall[i]->vsDirtyConstF < index + count)
2450 			{
2451 				drawCall[i]->vsDirtyConstF = index + count;
2452 			}
2453 		}
2454 
2455 		for(int i = 0; i < count; i++)
2456 		{
2457 			VertexProcessor::setFloatConstant(index + i, value);
2458 			value += 4;
2459 		}
2460 	}
2461 
setVertexShaderConstantI(int index,const int value[4],int count)2462 	void Renderer::setVertexShaderConstantI(int index, const int value[4], int count)
2463 	{
2464 		for(int i = 0; i < DRAW_COUNT; i++)
2465 		{
2466 			if(drawCall[i]->vsDirtyConstI < index + count)
2467 			{
2468 				drawCall[i]->vsDirtyConstI = index + count;
2469 			}
2470 		}
2471 
2472 		for(int i = 0; i < count; i++)
2473 		{
2474 			VertexProcessor::setIntegerConstant(index + i, value);
2475 			value += 4;
2476 		}
2477 	}
2478 
setVertexShaderConstantB(int index,const int * boolean,int count)2479 	void Renderer::setVertexShaderConstantB(int index, const int *boolean, int count)
2480 	{
2481 		for(int i = 0; i < DRAW_COUNT; i++)
2482 		{
2483 			if(drawCall[i]->vsDirtyConstB < index + count)
2484 			{
2485 				drawCall[i]->vsDirtyConstB = index + count;
2486 			}
2487 		}
2488 
2489 		for(int i = 0; i < count; i++)
2490 		{
2491 			VertexProcessor::setBooleanConstant(index + i, *boolean);
2492 			boolean++;
2493 		}
2494 	}
2495 
setModelMatrix(const Matrix & M,int i)2496 	void Renderer::setModelMatrix(const Matrix &M, int i)
2497 	{
2498 		VertexProcessor::setModelMatrix(M, i);
2499 	}
2500 
setViewMatrix(const Matrix & V)2501 	void Renderer::setViewMatrix(const Matrix &V)
2502 	{
2503 		VertexProcessor::setViewMatrix(V);
2504 		updateClipPlanes = true;
2505 	}
2506 
setBaseMatrix(const Matrix & B)2507 	void Renderer::setBaseMatrix(const Matrix &B)
2508 	{
2509 		VertexProcessor::setBaseMatrix(B);
2510 		updateClipPlanes = true;
2511 	}
2512 
setProjectionMatrix(const Matrix & P)2513 	void Renderer::setProjectionMatrix(const Matrix &P)
2514 	{
2515 		VertexProcessor::setProjectionMatrix(P);
2516 		updateClipPlanes = true;
2517 	}
2518 
addQuery(Query * query)2519 	void Renderer::addQuery(Query *query)
2520 	{
2521 		queries.push_back(query);
2522 	}
2523 
removeQuery(Query * query)2524 	void Renderer::removeQuery(Query *query)
2525 	{
2526 		queries.remove(query);
2527 	}
2528 
2529 	#if PERF_HUD
getThreadCount()2530 		int Renderer::getThreadCount()
2531 		{
2532 			return threadCount;
2533 		}
2534 
getVertexTime(int thread)2535 		int64_t Renderer::getVertexTime(int thread)
2536 		{
2537 			return vertexTime[thread];
2538 		}
2539 
getSetupTime(int thread)2540 		int64_t Renderer::getSetupTime(int thread)
2541 		{
2542 			return setupTime[thread];
2543 		}
2544 
getPixelTime(int thread)2545 		int64_t Renderer::getPixelTime(int thread)
2546 		{
2547 			return pixelTime[thread];
2548 		}
2549 
resetTimers()2550 		void Renderer::resetTimers()
2551 		{
2552 			for(int thread = 0; thread < threadCount; thread++)
2553 			{
2554 				vertexTime[thread] = 0;
2555 				setupTime[thread] = 0;
2556 				pixelTime[thread] = 0;
2557 			}
2558 		}
2559 	#endif
2560 
setViewport(const Viewport & viewport)2561 	void Renderer::setViewport(const Viewport &viewport)
2562 	{
2563 		this->viewport = viewport;
2564 	}
2565 
setScissor(const Rect & scissor)2566 	void Renderer::setScissor(const Rect &scissor)
2567 	{
2568 		this->scissor = scissor;
2569 	}
2570 
setClipFlags(int flags)2571 	void Renderer::setClipFlags(int flags)
2572 	{
2573 		clipFlags = flags << 8;   // Bottom 8 bits used by legacy frustum
2574 	}
2575 
setClipPlane(unsigned int index,const float plane[4])2576 	void Renderer::setClipPlane(unsigned int index, const float plane[4])
2577 	{
2578 		if(index < MAX_CLIP_PLANES)
2579 		{
2580 			userPlane[index] = plane;
2581 		}
2582 		else ASSERT(false);
2583 
2584 		updateClipPlanes = true;
2585 	}
2586 
updateConfiguration(bool initialUpdate)2587 	void Renderer::updateConfiguration(bool initialUpdate)
2588 	{
2589 		bool newConfiguration = swiftConfig->hasNewConfiguration();
2590 
2591 		if(newConfiguration || initialUpdate)
2592 		{
2593 			terminateThreads();
2594 
2595 			SwiftConfig::Configuration configuration = {};
2596 			swiftConfig->getConfiguration(configuration);
2597 
2598 			precacheVertex = !newConfiguration && configuration.precache;
2599 			precacheSetup = !newConfiguration && configuration.precache;
2600 			precachePixel = !newConfiguration && configuration.precache;
2601 
2602 			VertexProcessor::setRoutineCacheSize(configuration.vertexRoutineCacheSize);
2603 			PixelProcessor::setRoutineCacheSize(configuration.pixelRoutineCacheSize);
2604 			SetupProcessor::setRoutineCacheSize(configuration.setupRoutineCacheSize);
2605 
2606 			switch(configuration.textureSampleQuality)
2607 			{
2608 			case 0:  Sampler::setFilterQuality(FILTER_POINT);       break;
2609 			case 1:  Sampler::setFilterQuality(FILTER_LINEAR);      break;
2610 			case 2:  Sampler::setFilterQuality(FILTER_ANISOTROPIC); break;
2611 			default: Sampler::setFilterQuality(FILTER_ANISOTROPIC); break;
2612 			}
2613 
2614 			switch(configuration.mipmapQuality)
2615 			{
2616 			case 0:  Sampler::setMipmapQuality(MIPMAP_POINT);  break;
2617 			case 1:  Sampler::setMipmapQuality(MIPMAP_LINEAR); break;
2618 			default: Sampler::setMipmapQuality(MIPMAP_LINEAR); break;
2619 			}
2620 
2621 			setPerspectiveCorrection(configuration.perspectiveCorrection);
2622 
2623 			switch(configuration.transcendentalPrecision)
2624 			{
2625 			case 0:
2626 				logPrecision = APPROXIMATE;
2627 				expPrecision = APPROXIMATE;
2628 				rcpPrecision = APPROXIMATE;
2629 				rsqPrecision = APPROXIMATE;
2630 				break;
2631 			case 1:
2632 				logPrecision = PARTIAL;
2633 				expPrecision = PARTIAL;
2634 				rcpPrecision = PARTIAL;
2635 				rsqPrecision = PARTIAL;
2636 				break;
2637 			case 2:
2638 				logPrecision = ACCURATE;
2639 				expPrecision = ACCURATE;
2640 				rcpPrecision = ACCURATE;
2641 				rsqPrecision = ACCURATE;
2642 				break;
2643 			case 3:
2644 				logPrecision = WHQL;
2645 				expPrecision = WHQL;
2646 				rcpPrecision = WHQL;
2647 				rsqPrecision = WHQL;
2648 				break;
2649 			case 4:
2650 				logPrecision = IEEE;
2651 				expPrecision = IEEE;
2652 				rcpPrecision = IEEE;
2653 				rsqPrecision = IEEE;
2654 				break;
2655 			default:
2656 				logPrecision = ACCURATE;
2657 				expPrecision = ACCURATE;
2658 				rcpPrecision = ACCURATE;
2659 				rsqPrecision = ACCURATE;
2660 				break;
2661 			}
2662 
2663 			switch(configuration.transparencyAntialiasing)
2664 			{
2665 			case 0:  transparencyAntialiasing = TRANSPARENCY_NONE;              break;
2666 			case 1:  transparencyAntialiasing = TRANSPARENCY_ALPHA_TO_COVERAGE; break;
2667 			default: transparencyAntialiasing = TRANSPARENCY_NONE;              break;
2668 			}
2669 
2670 			switch(configuration.threadCount)
2671 			{
2672 			case -1: threadCount = CPUID::coreCount();        break;
2673 			case 0:  threadCount = CPUID::processAffinity();  break;
2674 			default: threadCount = configuration.threadCount; break;
2675 			}
2676 
2677 			CPUID::setEnableSSE4_1(configuration.enableSSE4_1);
2678 			CPUID::setEnableSSSE3(configuration.enableSSSE3);
2679 			CPUID::setEnableSSE3(configuration.enableSSE3);
2680 			CPUID::setEnableSSE2(configuration.enableSSE2);
2681 			CPUID::setEnableSSE(configuration.enableSSE);
2682 
2683 			for(int pass = 0; pass < 10; pass++)
2684 			{
2685 				optimization[pass] = configuration.optimization[pass];
2686 			}
2687 
2688 			forceWindowed = configuration.forceWindowed;
2689 			complementaryDepthBuffer = configuration.complementaryDepthBuffer;
2690 			postBlendSRGB = configuration.postBlendSRGB;
2691 			exactColorRounding = configuration.exactColorRounding;
2692 			forceClearRegisters = configuration.forceClearRegisters;
2693 
2694 		#ifndef NDEBUG
2695 			minPrimitives = configuration.minPrimitives;
2696 			maxPrimitives = configuration.maxPrimitives;
2697 		#endif
2698 		}
2699 
2700 		if(!initialUpdate && !worker[0])
2701 		{
2702 			initializeThreads();
2703 		}
2704 	}
2705 }
2706