1 /*-------------------------------------------------------------------------
2  * drawElements Quality Program Reference Renderer
3  * -----------------------------------------------
4  *
5  * Copyright 2014 The Android Open Source Project
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  *      http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  *
19  *//*!
20  * \file
21  * \brief Reference implementation for per-fragment operations.
22  *//*--------------------------------------------------------------------*/
23 
24 #include "rrFragmentOperations.hpp"
25 #include "tcuVectorUtil.hpp"
26 #include "tcuTextureUtil.hpp"
27 
28 using tcu::IVec2;
29 using tcu::Vec3;
30 using tcu::Vec4;
31 using tcu::IVec4;
32 using tcu::UVec4;
33 using tcu::min;
34 using tcu::max;
35 using tcu::clamp;
36 using de::min;
37 using de::max;
38 using de::clamp;
39 
40 namespace rr
41 {
42 
43 // Return oldValue with the bits indicated by mask replaced by corresponding bits of newValue.
maskedBitReplace(int oldValue,int newValue,deUint32 mask)44 static inline int maskedBitReplace (int oldValue, int newValue, deUint32 mask)
45 {
46 	return (oldValue & ~mask) | (newValue & mask);
47 }
48 
isInsideRect(const IVec2 & point,const WindowRectangle & rect)49 static inline bool isInsideRect (const IVec2& point, const WindowRectangle& rect)
50 {
51 	return de::inBounds(point.x(), rect.left,		rect.left + rect.width) &&
52 		   de::inBounds(point.y(), rect.bottom,		rect.bottom + rect.height);
53 }
54 
unpremultiply(const Vec4 & v)55 static inline Vec4 unpremultiply (const Vec4& v)
56 {
57 	if (v.w() > 0.0f)
58 		return Vec4(v.x()/v.w(), v.y()/v.w(), v.z()/v.w(), v.w());
59 	else
60 	{
61 		DE_ASSERT(v.x() == 0.0f && v.y() == 0.0f && v.z() == 0.0f);
62 		return Vec4(0.0f, 0.0f, 0.0f, 0.0f);
63 	}
64 }
65 
clearMultisampleColorBuffer(const tcu::PixelBufferAccess & dst,const Vec4 & v,const WindowRectangle & r)66 void clearMultisampleColorBuffer	(const tcu::PixelBufferAccess& dst, const Vec4& v,	const WindowRectangle& r)	{ tcu::clear(tcu::getSubregion(dst, 0, r.left, r.bottom, dst.getWidth(), r.width, r.height), v);				}
clearMultisampleColorBuffer(const tcu::PixelBufferAccess & dst,const IVec4 & v,const WindowRectangle & r)67 void clearMultisampleColorBuffer	(const tcu::PixelBufferAccess& dst, const IVec4& v,	const WindowRectangle& r)	{ tcu::clear(tcu::getSubregion(dst, 0, r.left, r.bottom, dst.getWidth(), r.width, r.height), v);				}
clearMultisampleColorBuffer(const tcu::PixelBufferAccess & dst,const UVec4 & v,const WindowRectangle & r)68 void clearMultisampleColorBuffer	(const tcu::PixelBufferAccess& dst, const UVec4& v,	const WindowRectangle& r)	{ tcu::clear(tcu::getSubregion(dst, 0, r.left, r.bottom, dst.getWidth(), r.width, r.height), v.cast<int>());	}
clearMultisampleDepthBuffer(const tcu::PixelBufferAccess & dst,float v,const WindowRectangle & r)69 void clearMultisampleDepthBuffer	(const tcu::PixelBufferAccess& dst, float v,		const WindowRectangle& r)	{ tcu::clearDepth(tcu::getSubregion(dst, 0, r.left, r.bottom, dst.getWidth(), r.width, r.height), v);			}
clearMultisampleStencilBuffer(const tcu::PixelBufferAccess & dst,int v,const WindowRectangle & r)70 void clearMultisampleStencilBuffer	(const tcu::PixelBufferAccess& dst, int v,			const WindowRectangle& r)	{ tcu::clearStencil(tcu::getSubregion(dst, 0, r.left, r.bottom, dst.getWidth(), r.width, r.height), v);			}
71 
FragmentProcessor(void)72 FragmentProcessor::FragmentProcessor (void)
73 	: m_sampleRegister()
74 {
75 }
76 
executeScissorTest(int fragNdxOffset,int numSamplesPerFragment,const Fragment * inputFragments,const WindowRectangle & scissorRect)77 void FragmentProcessor::executeScissorTest (int fragNdxOffset, int numSamplesPerFragment, const Fragment* inputFragments, const WindowRectangle& scissorRect)
78 {
79 	for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)
80 	{
81 		if (m_sampleRegister[regSampleNdx].isAlive)
82 		{
83 			int fragNdx = fragNdxOffset + regSampleNdx/numSamplesPerFragment;
84 
85 			if (!isInsideRect(inputFragments[fragNdx].pixelCoord, scissorRect))
86 				m_sampleRegister[regSampleNdx].isAlive = false;
87 		}
88 	}
89 }
90 
executeStencilCompare(int fragNdxOffset,int numSamplesPerFragment,const Fragment * inputFragments,const StencilState & stencilState,int numStencilBits,const tcu::ConstPixelBufferAccess & stencilBuffer)91 void FragmentProcessor::executeStencilCompare (int fragNdxOffset, int numSamplesPerFragment, const Fragment* inputFragments, const StencilState& stencilState, int numStencilBits, const tcu::ConstPixelBufferAccess& stencilBuffer)
92 {
93 #define SAMPLE_REGISTER_STENCIL_COMPARE(COMPARE_EXPRESSION)																					\
94 	for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)															\
95 	{																																		\
96 		if (m_sampleRegister[regSampleNdx].isAlive)																							\
97 		{																																	\
98 			int					fragSampleNdx		= regSampleNdx % numSamplesPerFragment;													\
99 			const Fragment&		frag				= inputFragments[fragNdxOffset + regSampleNdx/numSamplesPerFragment];					\
100 			int					stencilBufferValue	= stencilBuffer.getPixStencil(fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y());	\
101 			int					maskedRef			= stencilState.compMask & clampedStencilRef;											\
102 			int					maskedBuf			= stencilState.compMask & stencilBufferValue;											\
103 			DE_UNREF(maskedRef);																											\
104 			DE_UNREF(maskedBuf);																											\
105 																																			\
106 			m_sampleRegister[regSampleNdx].stencilPassed = (COMPARE_EXPRESSION);															\
107 		}																																	\
108 	}
109 
110 	int clampedStencilRef = de::clamp(stencilState.ref, 0, (1<<numStencilBits)-1);
111 
112 	switch (stencilState.func)
113 	{
114 		case TESTFUNC_NEVER:	SAMPLE_REGISTER_STENCIL_COMPARE(false)						break;
115 		case TESTFUNC_ALWAYS:	SAMPLE_REGISTER_STENCIL_COMPARE(true)						break;
116 		case TESTFUNC_LESS:		SAMPLE_REGISTER_STENCIL_COMPARE(maskedRef <  maskedBuf)		break;
117 		case TESTFUNC_LEQUAL:	SAMPLE_REGISTER_STENCIL_COMPARE(maskedRef <= maskedBuf)		break;
118 		case TESTFUNC_GREATER:	SAMPLE_REGISTER_STENCIL_COMPARE(maskedRef >  maskedBuf)		break;
119 		case TESTFUNC_GEQUAL:	SAMPLE_REGISTER_STENCIL_COMPARE(maskedRef >= maskedBuf)		break;
120 		case TESTFUNC_EQUAL:	SAMPLE_REGISTER_STENCIL_COMPARE(maskedRef == maskedBuf)		break;
121 		case TESTFUNC_NOTEQUAL:	SAMPLE_REGISTER_STENCIL_COMPARE(maskedRef != maskedBuf)		break;
122 		default:
123 			DE_ASSERT(false);
124 	}
125 
126 #undef SAMPLE_REGISTER_STENCIL_COMPARE
127 }
128 
executeStencilSFail(int fragNdxOffset,int numSamplesPerFragment,const Fragment * inputFragments,const StencilState & stencilState,int numStencilBits,const tcu::PixelBufferAccess & stencilBuffer)129 void FragmentProcessor::executeStencilSFail (int fragNdxOffset, int numSamplesPerFragment, const Fragment* inputFragments, const StencilState& stencilState, int numStencilBits, const tcu::PixelBufferAccess& stencilBuffer)
130 {
131 #define SAMPLE_REGISTER_SFAIL(SFAIL_EXPRESSION)																																		\
132 	for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)																									\
133 	{																																												\
134 		if (m_sampleRegister[regSampleNdx].isAlive && !m_sampleRegister[regSampleNdx].stencilPassed)																				\
135 		{																																											\
136 			int					fragSampleNdx		= regSampleNdx % numSamplesPerFragment;																							\
137 			const Fragment&		frag				= inputFragments[fragNdxOffset + regSampleNdx/numSamplesPerFragment];															\
138 			int					stencilBufferValue	= stencilBuffer.getPixStencil(fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y());											\
139 																																													\
140 			stencilBuffer.setPixStencil(maskedBitReplace(stencilBufferValue, (SFAIL_EXPRESSION), stencilState.writeMask), fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y());	\
141 			m_sampleRegister[regSampleNdx].isAlive = false;																															\
142 		}																																											\
143 	}
144 
145 	int clampedStencilRef = de::clamp(stencilState.ref, 0, (1<<numStencilBits)-1);
146 
147 	switch (stencilState.sFail)
148 	{
149 		case STENCILOP_KEEP:		SAMPLE_REGISTER_SFAIL(stencilBufferValue)												break;
150 		case STENCILOP_ZERO:		SAMPLE_REGISTER_SFAIL(0)																break;
151 		case STENCILOP_REPLACE:		SAMPLE_REGISTER_SFAIL(clampedStencilRef)												break;
152 		case STENCILOP_INCR:		SAMPLE_REGISTER_SFAIL(de::clamp(stencilBufferValue+1, 0, (1<<numStencilBits) - 1))		break;
153 		case STENCILOP_DECR:		SAMPLE_REGISTER_SFAIL(de::clamp(stencilBufferValue-1, 0, (1<<numStencilBits) - 1))		break;
154 		case STENCILOP_INCR_WRAP:	SAMPLE_REGISTER_SFAIL((stencilBufferValue + 1) & ((1<<numStencilBits) - 1))				break;
155 		case STENCILOP_DECR_WRAP:	SAMPLE_REGISTER_SFAIL((stencilBufferValue - 1) & ((1<<numStencilBits) - 1))				break;
156 		case STENCILOP_INVERT:		SAMPLE_REGISTER_SFAIL((~stencilBufferValue) & ((1<<numStencilBits) - 1))				break;
157 		default:
158 			DE_ASSERT(false);
159 	}
160 
161 #undef SAMPLE_REGISTER_SFAIL
162 }
163 
executeDepthCompare(int fragNdxOffset,int numSamplesPerFragment,const Fragment * inputFragments,TestFunc depthFunc,const tcu::ConstPixelBufferAccess & depthBuffer)164 void FragmentProcessor::executeDepthCompare (int fragNdxOffset, int numSamplesPerFragment, const Fragment* inputFragments, TestFunc depthFunc, const tcu::ConstPixelBufferAccess& depthBuffer)
165 {
166 #define SAMPLE_REGISTER_DEPTH_COMPARE_F(COMPARE_EXPRESSION)																						\
167 	for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)																\
168 	{																																			\
169 		if (m_sampleRegister[regSampleNdx].isAlive)																								\
170 		{																																		\
171 			int					fragSampleNdx		= regSampleNdx % numSamplesPerFragment;														\
172 			const Fragment&		frag				= inputFragments[fragNdxOffset + regSampleNdx/numSamplesPerFragment];						\
173 			float				depthBufferValue	= depthBuffer.getPixDepth(fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y());			\
174 			float				sampleDepthFloat	= frag.sampleDepths[fragSampleNdx];															\
175 			float				sampleDepth			= de::clamp(sampleDepthFloat, 0.0f, 1.0f);													\
176 																																				\
177 			m_sampleRegister[regSampleNdx].depthPassed = (COMPARE_EXPRESSION);																	\
178 																																				\
179 			DE_UNREF(depthBufferValue);																											\
180 			DE_UNREF(sampleDepth);																												\
181 		}																																		\
182 	}
183 
184 #define SAMPLE_REGISTER_DEPTH_COMPARE_UI(COMPARE_EXPRESSION)																					\
185 	for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)																\
186 	{																																			\
187 		if (m_sampleRegister[regSampleNdx].isAlive)																								\
188 		{																																		\
189 			int					fragSampleNdx		= regSampleNdx % numSamplesPerFragment;														\
190 			const Fragment&		frag				= inputFragments[fragNdxOffset + regSampleNdx/numSamplesPerFragment];						\
191 			deUint32			depthBufferValue	= depthBuffer.getPixelUint(fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y()).x();	\
192 			float				sampleDepthFloat	= frag.sampleDepths[fragSampleNdx];															\
193 																																				\
194 			/* Convert input float to target buffer format for comparison */																	\
195 																																				\
196 			deUint32 buffer[2];																													\
197 																																				\
198 			DE_ASSERT(sizeof(buffer) >= (size_t)depthBuffer.getFormat().getPixelSize());														\
199 																																				\
200 			tcu::PixelBufferAccess access(depthBuffer.getFormat(), 1, 1, 1, &buffer);															\
201 			access.setPixDepth(sampleDepthFloat, 0, 0, 0);																						\
202 			deUint32 sampleDepth = access.getPixelUint(0, 0, 0).x();																			\
203 																																				\
204 			m_sampleRegister[regSampleNdx].depthPassed = (COMPARE_EXPRESSION);																	\
205 																																				\
206 			DE_UNREF(depthBufferValue);																											\
207 			DE_UNREF(sampleDepth);																												\
208 		}																																		\
209 	}
210 
211 	if (depthBuffer.getFormat().type == tcu::TextureFormat::FLOAT || depthBuffer.getFormat().type == tcu::TextureFormat::FLOAT_UNSIGNED_INT_24_8_REV)
212 	{
213 
214 		switch (depthFunc)
215 		{
216 			case TESTFUNC_NEVER:	SAMPLE_REGISTER_DEPTH_COMPARE_F(false)							break;
217 			case TESTFUNC_ALWAYS:	SAMPLE_REGISTER_DEPTH_COMPARE_F(true)								break;
218 			case TESTFUNC_LESS:		SAMPLE_REGISTER_DEPTH_COMPARE_F(sampleDepth <  depthBufferValue)	break;
219 			case TESTFUNC_LEQUAL:	SAMPLE_REGISTER_DEPTH_COMPARE_F(sampleDepth <= depthBufferValue)	break;
220 			case TESTFUNC_GREATER:	SAMPLE_REGISTER_DEPTH_COMPARE_F(sampleDepth >  depthBufferValue)	break;
221 			case TESTFUNC_GEQUAL:	SAMPLE_REGISTER_DEPTH_COMPARE_F(sampleDepth >= depthBufferValue)	break;
222 			case TESTFUNC_EQUAL:	SAMPLE_REGISTER_DEPTH_COMPARE_F(sampleDepth == depthBufferValue)	break;
223 			case TESTFUNC_NOTEQUAL:	SAMPLE_REGISTER_DEPTH_COMPARE_F(sampleDepth != depthBufferValue)	break;
224 			default:
225 				DE_ASSERT(false);
226 		}
227 
228 	}
229 	else
230 	{
231 		switch (depthFunc)
232 		{
233 			case TESTFUNC_NEVER:	SAMPLE_REGISTER_DEPTH_COMPARE_UI(false)							break;
234 			case TESTFUNC_ALWAYS:	SAMPLE_REGISTER_DEPTH_COMPARE_UI(true)								break;
235 			case TESTFUNC_LESS:		SAMPLE_REGISTER_DEPTH_COMPARE_UI(sampleDepth <  depthBufferValue)	break;
236 			case TESTFUNC_LEQUAL:	SAMPLE_REGISTER_DEPTH_COMPARE_UI(sampleDepth <= depthBufferValue)	break;
237 			case TESTFUNC_GREATER:	SAMPLE_REGISTER_DEPTH_COMPARE_UI(sampleDepth >  depthBufferValue)	break;
238 			case TESTFUNC_GEQUAL:	SAMPLE_REGISTER_DEPTH_COMPARE_UI(sampleDepth >= depthBufferValue)	break;
239 			case TESTFUNC_EQUAL:	SAMPLE_REGISTER_DEPTH_COMPARE_UI(sampleDepth == depthBufferValue)	break;
240 			case TESTFUNC_NOTEQUAL:	SAMPLE_REGISTER_DEPTH_COMPARE_UI(sampleDepth != depthBufferValue)	break;
241 			default:
242 				DE_ASSERT(false);
243 		}
244 	}
245 
246 #undef SAMPLE_REGISTER_DEPTH_COMPARE_F
247 #undef SAMPLE_REGISTER_DEPTH_COMPARE_UI
248 }
249 
executeDepthWrite(int fragNdxOffset,int numSamplesPerFragment,const Fragment * inputFragments,const tcu::PixelBufferAccess & depthBuffer)250 void FragmentProcessor::executeDepthWrite (int fragNdxOffset, int numSamplesPerFragment, const Fragment* inputFragments, const tcu::PixelBufferAccess& depthBuffer)
251 {
252 	for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)
253 	{
254 		if (m_sampleRegister[regSampleNdx].isAlive && m_sampleRegister[regSampleNdx].depthPassed)
255 		{
256 			int					fragSampleNdx	= regSampleNdx % numSamplesPerFragment;
257 			const Fragment&		frag			= inputFragments[fragNdxOffset + regSampleNdx/numSamplesPerFragment];
258 			const float			clampedDepth	= de::clamp(frag.sampleDepths[fragSampleNdx], 0.0f, 1.0f);
259 
260 			depthBuffer.setPixDepth(clampedDepth, fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y());
261 		}
262 	}
263 }
264 
executeStencilDpFailAndPass(int fragNdxOffset,int numSamplesPerFragment,const Fragment * inputFragments,const StencilState & stencilState,int numStencilBits,const tcu::PixelBufferAccess & stencilBuffer)265 void FragmentProcessor::executeStencilDpFailAndPass (int fragNdxOffset, int numSamplesPerFragment, const Fragment* inputFragments, const StencilState& stencilState, int numStencilBits, const tcu::PixelBufferAccess& stencilBuffer)
266 {
267 #define SAMPLE_REGISTER_DPFAIL_OR_DPPASS(CONDITION, EXPRESSION)																													\
268 	for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)																								\
269 	{																																											\
270 		if (m_sampleRegister[regSampleNdx].isAlive && (CONDITION))																												\
271 		{																																										\
272 			int					fragSampleNdx		= regSampleNdx % numSamplesPerFragment;																						\
273 			const Fragment&		frag				= inputFragments[fragNdxOffset + regSampleNdx/numSamplesPerFragment];														\
274 			int					stencilBufferValue	= stencilBuffer.getPixStencil(fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y());										\
275 																																												\
276 			stencilBuffer.setPixStencil(maskedBitReplace(stencilBufferValue, (EXPRESSION), stencilState.writeMask), fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y());	\
277 		}																																										\
278 	}
279 
280 #define SWITCH_DPFAIL_OR_DPPASS(OP_NAME, CONDITION)																											\
281 		switch (stencilState.OP_NAME)																														\
282 		{																																					\
283 			case STENCILOP_KEEP:		SAMPLE_REGISTER_DPFAIL_OR_DPPASS(CONDITION, stencilBufferValue)												break;	\
284 			case STENCILOP_ZERO:		SAMPLE_REGISTER_DPFAIL_OR_DPPASS(CONDITION, 0)																break;	\
285 			case STENCILOP_REPLACE:		SAMPLE_REGISTER_DPFAIL_OR_DPPASS(CONDITION, clampedStencilRef)												break;	\
286 			case STENCILOP_INCR:		SAMPLE_REGISTER_DPFAIL_OR_DPPASS(CONDITION, de::clamp(stencilBufferValue+1, 0, (1<<numStencilBits) - 1))	break;	\
287 			case STENCILOP_DECR:		SAMPLE_REGISTER_DPFAIL_OR_DPPASS(CONDITION, de::clamp(stencilBufferValue-1, 0, (1<<numStencilBits) - 1))	break;	\
288 			case STENCILOP_INCR_WRAP:	SAMPLE_REGISTER_DPFAIL_OR_DPPASS(CONDITION, (stencilBufferValue + 1) & ((1<<numStencilBits) - 1))			break;	\
289 			case STENCILOP_DECR_WRAP:	SAMPLE_REGISTER_DPFAIL_OR_DPPASS(CONDITION, (stencilBufferValue - 1) & ((1<<numStencilBits) - 1))			break;	\
290 			case STENCILOP_INVERT:		SAMPLE_REGISTER_DPFAIL_OR_DPPASS(CONDITION, (~stencilBufferValue) & ((1<<numStencilBits) - 1))				break;	\
291 			default:																																		\
292 				DE_ASSERT(false);																															\
293 		}
294 
295 	int clampedStencilRef = de::clamp(stencilState.ref, 0, (1<<numStencilBits)-1);
296 
297 	SWITCH_DPFAIL_OR_DPPASS(dpFail, !m_sampleRegister[regSampleNdx].depthPassed)
298 	SWITCH_DPFAIL_OR_DPPASS(dpPass, m_sampleRegister[regSampleNdx].depthPassed)
299 
300 #undef SWITCH_DPFAIL_OR_DPPASS
301 #undef SAMPLE_REGISTER_DPFAIL_OR_DPPASS
302 }
303 
executeBlendFactorComputeRGB(const Vec4 & blendColor,const BlendState & blendRGBState)304 void FragmentProcessor::executeBlendFactorComputeRGB (const Vec4& blendColor, const BlendState& blendRGBState)
305 {
306 #define SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, FACTOR_EXPRESSION)											\
307 	for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)								\
308 	{																											\
309 		if (m_sampleRegister[regSampleNdx].isAlive)																\
310 		{																										\
311 			const Vec4& src		= m_sampleRegister[regSampleNdx].clampedBlendSrcColor;							\
312 			const Vec4& src1	= m_sampleRegister[regSampleNdx].clampedBlendSrc1Color;							\
313 			const Vec4& dst		= m_sampleRegister[regSampleNdx].clampedBlendDstColor;							\
314 			DE_UNREF(src);																						\
315 			DE_UNREF(src1);																						\
316 			DE_UNREF(dst);																						\
317 																												\
318 			m_sampleRegister[regSampleNdx].FACTOR_NAME = clamp((FACTOR_EXPRESSION), Vec3(0.0f), Vec3(1.0f));	\
319 		}																										\
320 	}
321 
322 #define SWITCH_SRC_OR_DST_FACTOR_RGB(FUNC_NAME, FACTOR_NAME)																					\
323 	switch (blendRGBState.FUNC_NAME)																											\
324 	{																																			\
325 		case BLENDFUNC_ZERO:						SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(0.0f))								break;	\
326 		case BLENDFUNC_ONE:							SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(1.0f))								break;	\
327 		case BLENDFUNC_SRC_COLOR:					SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, src.swizzle(0,1,2))						break;	\
328 		case BLENDFUNC_ONE_MINUS_SRC_COLOR:			SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(1.0f) - src.swizzle(0,1,2))			break;	\
329 		case BLENDFUNC_DST_COLOR:					SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, dst.swizzle(0,1,2))						break;	\
330 		case BLENDFUNC_ONE_MINUS_DST_COLOR:			SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(1.0f) - dst.swizzle(0,1,2))			break;	\
331 		case BLENDFUNC_SRC_ALPHA:					SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(src.w()))							break;	\
332 		case BLENDFUNC_ONE_MINUS_SRC_ALPHA:			SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(1.0f - src.w()))						break;	\
333 		case BLENDFUNC_DST_ALPHA:					SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(dst.w()))							break;	\
334 		case BLENDFUNC_ONE_MINUS_DST_ALPHA:			SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(1.0f - dst.w()))						break;	\
335 		case BLENDFUNC_CONSTANT_COLOR:				SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, blendColor.swizzle(0,1,2))				break;	\
336 		case BLENDFUNC_ONE_MINUS_CONSTANT_COLOR:	SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(1.0f) - blendColor.swizzle(0,1,2))	break;	\
337 		case BLENDFUNC_CONSTANT_ALPHA:				SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(blendColor.w()))						break;	\
338 		case BLENDFUNC_ONE_MINUS_CONSTANT_ALPHA:	SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(1.0f - blendColor.w()))				break;	\
339 		case BLENDFUNC_SRC_ALPHA_SATURATE:			SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(de::min(src.w(), 1.0f - dst.w())))	break;	\
340 		case BLENDFUNC_SRC1_COLOR:					SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, src1.swizzle(0,1,2))						break;	\
341 		case BLENDFUNC_ONE_MINUS_SRC1_COLOR:		SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(1.0f) - src1.swizzle(0,1,2))			break;	\
342 		case BLENDFUNC_SRC1_ALPHA:					SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(src1.w()))							break;	\
343 		case BLENDFUNC_ONE_MINUS_SRC1_ALPHA:		SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(1.0f - src1.w()))					break;	\
344 		default:																																\
345 			DE_ASSERT(false);																													\
346 	}
347 
348 	SWITCH_SRC_OR_DST_FACTOR_RGB(srcFunc, blendSrcFactorRGB)
349 	SWITCH_SRC_OR_DST_FACTOR_RGB(dstFunc, blendDstFactorRGB)
350 
351 #undef SWITCH_SRC_OR_DST_FACTOR_RGB
352 #undef SAMPLE_REGISTER_BLEND_FACTOR
353 }
354 
executeBlendFactorComputeA(const Vec4 & blendColor,const BlendState & blendAState)355 void FragmentProcessor::executeBlendFactorComputeA (const Vec4& blendColor, const BlendState& blendAState)
356 {
357 #define SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, FACTOR_EXPRESSION)								\
358 	for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)					\
359 	{																								\
360 		if (m_sampleRegister[regSampleNdx].isAlive)													\
361 		{																							\
362 			const Vec4& src		= m_sampleRegister[regSampleNdx].clampedBlendSrcColor;				\
363 			const Vec4& src1	= m_sampleRegister[regSampleNdx].clampedBlendSrc1Color;				\
364 			const Vec4& dst		= m_sampleRegister[regSampleNdx].clampedBlendDstColor;				\
365 			DE_UNREF(src);																			\
366 			DE_UNREF(src1);																			\
367 			DE_UNREF(dst);																			\
368 																									\
369 			m_sampleRegister[regSampleNdx].FACTOR_NAME = clamp((FACTOR_EXPRESSION), 0.0f, 1.0f);	\
370 		}																							\
371 	}
372 
373 #define SWITCH_SRC_OR_DST_FACTOR_A(FUNC_NAME, FACTOR_NAME)																		\
374 	switch (blendAState.FUNC_NAME)																								\
375 	{																															\
376 		case BLENDFUNC_ZERO:						SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, 0.0f)						break;	\
377 		case BLENDFUNC_ONE:							SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, 1.0f)						break;	\
378 		case BLENDFUNC_SRC_COLOR:					SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, src.w())					break;	\
379 		case BLENDFUNC_ONE_MINUS_SRC_COLOR:			SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, 1.0f - src.w())			break;	\
380 		case BLENDFUNC_DST_COLOR:					SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, dst.w())					break;	\
381 		case BLENDFUNC_ONE_MINUS_DST_COLOR:			SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, 1.0f - dst.w())			break;	\
382 		case BLENDFUNC_SRC_ALPHA:					SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, src.w())					break;	\
383 		case BLENDFUNC_ONE_MINUS_SRC_ALPHA:			SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, 1.0f - src.w())			break;	\
384 		case BLENDFUNC_DST_ALPHA:					SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, dst.w())					break;	\
385 		case BLENDFUNC_ONE_MINUS_DST_ALPHA:			SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, 1.0f - dst.w())			break;	\
386 		case BLENDFUNC_CONSTANT_COLOR:				SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, blendColor.w())			break;	\
387 		case BLENDFUNC_ONE_MINUS_CONSTANT_COLOR:	SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, 1.0f - blendColor.w())	break;	\
388 		case BLENDFUNC_CONSTANT_ALPHA:				SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, blendColor.w())			break;	\
389 		case BLENDFUNC_ONE_MINUS_CONSTANT_ALPHA:	SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, 1.0f - blendColor.w())	break;	\
390 		case BLENDFUNC_SRC_ALPHA_SATURATE:			SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, 1.0f)						break;	\
391 		case BLENDFUNC_SRC1_COLOR:					SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, src1.w())					break;	\
392 		case BLENDFUNC_ONE_MINUS_SRC1_COLOR:		SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, 1.0f - src1.w())			break;	\
393 		case BLENDFUNC_SRC1_ALPHA:					SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, src1.w())					break;	\
394 		case BLENDFUNC_ONE_MINUS_SRC1_ALPHA:		SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, 1.0f - src1.w())			break;	\
395 		default:																												\
396 			DE_ASSERT(false);																									\
397 	}
398 
399 	SWITCH_SRC_OR_DST_FACTOR_A(srcFunc, blendSrcFactorA)
400 	SWITCH_SRC_OR_DST_FACTOR_A(dstFunc, blendDstFactorA)
401 
402 #undef SWITCH_SRC_OR_DST_FACTOR_A
403 #undef SAMPLE_REGISTER_BLEND_FACTOR
404 }
405 
executeBlend(const BlendState & blendRGBState,const BlendState & blendAState)406 void FragmentProcessor::executeBlend (const BlendState& blendRGBState, const BlendState& blendAState)
407 {
408 #define SAMPLE_REGISTER_BLENDED_COLOR(COLOR_NAME, COLOR_EXPRESSION)						\
409 	for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)		\
410 	{																					\
411 		if (m_sampleRegister[regSampleNdx].isAlive)										\
412 		{																				\
413 			SampleData& sample		= m_sampleRegister[regSampleNdx];					\
414 			const Vec4& srcColor	= sample.clampedBlendSrcColor;						\
415 			const Vec4& dstColor	= sample.clampedBlendDstColor;						\
416 																						\
417 			sample.COLOR_NAME = (COLOR_EXPRESSION);										\
418 		}																				\
419 	}
420 
421 	switch (blendRGBState.equation)
422 	{
423 		case BLENDEQUATION_ADD:					SAMPLE_REGISTER_BLENDED_COLOR(blendedRGB, srcColor.swizzle(0,1,2)*sample.blendSrcFactorRGB + dstColor.swizzle(0,1,2)*sample.blendDstFactorRGB)	break;
424 		case BLENDEQUATION_SUBTRACT:			SAMPLE_REGISTER_BLENDED_COLOR(blendedRGB, srcColor.swizzle(0,1,2)*sample.blendSrcFactorRGB - dstColor.swizzle(0,1,2)*sample.blendDstFactorRGB)	break;
425 		case BLENDEQUATION_REVERSE_SUBTRACT:	SAMPLE_REGISTER_BLENDED_COLOR(blendedRGB, dstColor.swizzle(0,1,2)*sample.blendDstFactorRGB - srcColor.swizzle(0,1,2)*sample.blendSrcFactorRGB)	break;
426 		case BLENDEQUATION_MIN:					SAMPLE_REGISTER_BLENDED_COLOR(blendedRGB, min(srcColor.swizzle(0,1,2), dstColor.swizzle(0,1,2)))												break;
427 		case BLENDEQUATION_MAX:					SAMPLE_REGISTER_BLENDED_COLOR(blendedRGB, max(srcColor.swizzle(0,1,2), dstColor.swizzle(0,1,2)))												break;
428 		default:
429 			DE_ASSERT(false);
430 	}
431 
432 	switch (blendAState.equation)
433 	{
434 		case BLENDEQUATION_ADD:					SAMPLE_REGISTER_BLENDED_COLOR(blendedA, srcColor.w()*sample.blendSrcFactorA + dstColor.w()*sample.blendDstFactorA)	break;
435 		case BLENDEQUATION_SUBTRACT:			SAMPLE_REGISTER_BLENDED_COLOR(blendedA, srcColor.w()*sample.blendSrcFactorA - dstColor.w()*sample.blendDstFactorA)	break;
436 		case BLENDEQUATION_REVERSE_SUBTRACT:	SAMPLE_REGISTER_BLENDED_COLOR(blendedA, dstColor.w()*sample.blendDstFactorA - srcColor.w()*sample.blendSrcFactorA)	break;
437 		case BLENDEQUATION_MIN:					SAMPLE_REGISTER_BLENDED_COLOR(blendedA, min(srcColor.w(), dstColor.w()))											break;
438 		case BLENDEQUATION_MAX:					SAMPLE_REGISTER_BLENDED_COLOR(blendedA, max(srcColor.w(), dstColor.w()))											break;
439 		default:
440 			DE_ASSERT(false);
441 	}
442 #undef SAMPLE_REGISTER_BLENDED_COLOR
443 }
444 
445 namespace advblend
446 {
447 
multiply(float src,float dst)448 inline float	multiply	(float src, float dst) { return src*dst;					}
screen(float src,float dst)449 inline float	screen		(float src, float dst) { return src + dst - src*dst;		}
darken(float src,float dst)450 inline float	darken		(float src, float dst) { return de::min(src, dst);			}
lighten(float src,float dst)451 inline float	lighten		(float src, float dst) { return de::max(src, dst);			}
difference(float src,float dst)452 inline float	difference	(float src, float dst) { return de::abs(dst-src);			}
exclusion(float src,float dst)453 inline float	exclusion	(float src, float dst) { return src + dst - 2.0f*src*dst;	}
454 
overlay(float src,float dst)455 inline float overlay (float src, float dst)
456 {
457 	if (dst <= 0.5f)
458 		return 2.0f*src*dst;
459 	else
460 		return 1.0f - 2.0f*(1.0f-src)*(1.0f-dst);
461 }
462 
colordodge(float src,float dst)463 inline float colordodge (float src, float dst)
464 {
465 	if (dst <= 0.0f)
466 		return 0.0f;
467 	else if (src < 1.0f)
468 		return de::min(1.0f, dst/(1.0f-src));
469 	else
470 		return 1.0f;
471 }
472 
colorburn(float src,float dst)473 inline float colorburn (float src, float dst)
474 {
475 	if (dst >= 1.0f)
476 		return 1.0f;
477 	else if (src > 0.0f)
478 		return 1.0f - de::min(1.0f, (1.0f-dst)/src);
479 	else
480 		return 0.0f;
481 }
482 
hardlight(float src,float dst)483 inline float hardlight (float src, float dst)
484 {
485 	if (src <= 0.5f)
486 		return 2.0f*src*dst;
487 	else
488 		return 1.0f - 2.0f*(1.0f-src)*(1.0f-dst);
489 }
490 
softlight(float src,float dst)491 inline float softlight (float src, float dst)
492 {
493 	if (src <= 0.5f)
494 		return dst - (1.0f - 2.0f*src)*dst*(1.0f-dst);
495 	else if (dst <= 0.25f)
496 		return dst + (2.0f*src - 1.0f)*dst*((16.0f*dst - 12.0f)*dst + 3.0f);
497 	else
498 		return dst + (2.0f*src - 1.0f)*(deFloatSqrt(dst)-dst);
499 }
500 
minComp(const Vec3 & v)501 inline float minComp (const Vec3& v)
502 {
503 	return de::min(de::min(v.x(), v.y()), v.z());
504 }
505 
maxComp(const Vec3 & v)506 inline float maxComp (const Vec3& v)
507 {
508 	return de::max(de::max(v.x(), v.y()), v.z());
509 }
510 
luminosity(const Vec3 & rgb)511 inline float luminosity (const Vec3& rgb)
512 {
513 	return dot(rgb, Vec3(0.3f, 0.59f, 0.11f));
514 }
515 
saturation(const Vec3 & rgb)516 inline float saturation (const Vec3& rgb)
517 {
518 	return maxComp(rgb) - minComp(rgb);
519 }
520 
setLum(const Vec3 & cbase,const Vec3 & clum)521 Vec3 setLum (const Vec3& cbase, const Vec3& clum)
522 {
523 	const float		lbase	= luminosity(cbase);
524 	const float		llum	= luminosity(clum);
525 	const float		ldiff	= llum - lbase;
526 	const Vec3		color	= cbase + Vec3(ldiff);
527 	const float		minC	= minComp(color);
528 	const float		maxC	= maxComp(color);
529 
530 	if (minC < 0.0f)
531 		return llum + ((color-llum)*llum / (llum != minC ? (llum-minC) : 1.0f));
532 	else if (maxC > 1.0f)
533 		return llum + ((color-llum)*(1.0f-llum) / (llum != maxC ? (maxC-llum) : 1.0f));
534 	else
535 		return color;
536 }
537 
setLumSat(const Vec3 & cbase,const Vec3 & csat,const Vec3 & clum)538 Vec3 setLumSat (const Vec3& cbase, const Vec3& csat, const Vec3& clum)
539 {
540 	const float		minbase	= minComp(cbase);
541 	const float		sbase	= saturation(cbase);
542 	const float		ssat	= saturation(csat);
543 	Vec3			color	= Vec3(0.0f);
544 
545 	if (sbase > 0.0f)
546 		color = (cbase - minbase) * ssat / sbase;
547 	else
548 		color = color;
549 
550 	return setLum(color, clum);
551 }
552 
553 } // advblend
554 
executeAdvancedBlend(BlendEquationAdvanced equation)555 void FragmentProcessor::executeAdvancedBlend (BlendEquationAdvanced equation)
556 {
557 	using namespace advblend;
558 
559 #define SAMPLE_REGISTER_ADV_BLEND(FUNCTION_NAME)											\
560 	for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)			\
561 	{																						\
562 		if (m_sampleRegister[regSampleNdx].isAlive)											\
563 		{																					\
564 			SampleData&	sample		= m_sampleRegister[regSampleNdx];						\
565 			const Vec4&	srcColor	= sample.clampedBlendSrcColor;							\
566 			const Vec4&	dstColor	= sample.clampedBlendDstColor;							\
567 			const Vec3&	bias		= sample.blendSrcFactorRGB;								\
568 			const float	p0			= sample.blendSrcFactorA;								\
569 			const float	r			= FUNCTION_NAME(srcColor[0], dstColor[0])*p0 + bias[0];	\
570 			const float	g			= FUNCTION_NAME(srcColor[1], dstColor[1])*p0 + bias[1];	\
571 			const float	b			= FUNCTION_NAME(srcColor[2], dstColor[2])*p0 + bias[2];	\
572 																							\
573 			sample.blendedRGB = Vec3(r, g, b);												\
574 		}																					\
575 	}
576 
577 #define SAMPLE_REGISTER_ADV_BLEND_HSL(COLOR_EXPRESSION)										\
578 	for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)			\
579 	{																						\
580 		if (m_sampleRegister[regSampleNdx].isAlive)											\
581 		{																					\
582 			SampleData&	sample		= m_sampleRegister[regSampleNdx];						\
583 			const Vec3	srcColor	= sample.clampedBlendSrcColor.swizzle(0,1,2);			\
584 			const Vec3	dstColor	= sample.clampedBlendDstColor.swizzle(0,1,2);			\
585 			const Vec3&	bias		= sample.blendSrcFactorRGB;								\
586 			const float	p0			= sample.blendSrcFactorA;								\
587 																							\
588 			sample.blendedRGB = (COLOR_EXPRESSION)*p0 + bias;								\
589 		}																					\
590 	}
591 
592 	// Pre-compute factors & compute alpha \todo [2014-03-18 pyry] Re-using variable names.
593 	// \note clampedBlend*Color contains clamped & unpremultiplied colors
594 	for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)
595 	{
596 		if (m_sampleRegister[regSampleNdx].isAlive)
597 		{
598 			SampleData&	sample		= m_sampleRegister[regSampleNdx];
599 			const Vec4&	srcColor	= sample.clampedBlendSrcColor;
600 			const Vec4&	dstColor	= sample.clampedBlendDstColor;
601 			const float	srcA		= srcColor.w();
602 			const float	dstA		= dstColor.w();
603 			const float	p0			= srcA*dstA;
604 			const float p1			= srcA*(1.0f-dstA);
605 			const float p2			= dstA*(1.0f-srcA);
606 			const Vec3	bias		(srcColor[0]*p1 + dstColor[0]*p2,
607 									 srcColor[1]*p1 + dstColor[1]*p2,
608 									 srcColor[2]*p1 + dstColor[2]*p2);
609 
610 			sample.blendSrcFactorRGB	= bias;
611 			sample.blendSrcFactorA		= p0;
612 			sample.blendedA				= p0 + p1 + p2;
613 		}
614 	}
615 
616 	switch (equation)
617 	{
618 		case BLENDEQUATION_ADVANCED_MULTIPLY:		SAMPLE_REGISTER_ADV_BLEND(multiply);									break;
619 		case BLENDEQUATION_ADVANCED_SCREEN:			SAMPLE_REGISTER_ADV_BLEND(screen);										break;
620 		case BLENDEQUATION_ADVANCED_OVERLAY:		SAMPLE_REGISTER_ADV_BLEND(overlay);										break;
621 		case BLENDEQUATION_ADVANCED_DARKEN:			SAMPLE_REGISTER_ADV_BLEND(darken);										break;
622 		case BLENDEQUATION_ADVANCED_LIGHTEN:		SAMPLE_REGISTER_ADV_BLEND(lighten);										break;
623 		case BLENDEQUATION_ADVANCED_COLORDODGE:		SAMPLE_REGISTER_ADV_BLEND(colordodge);									break;
624 		case BLENDEQUATION_ADVANCED_COLORBURN:		SAMPLE_REGISTER_ADV_BLEND(colorburn);									break;
625 		case BLENDEQUATION_ADVANCED_HARDLIGHT:		SAMPLE_REGISTER_ADV_BLEND(hardlight);									break;
626 		case BLENDEQUATION_ADVANCED_SOFTLIGHT:		SAMPLE_REGISTER_ADV_BLEND(softlight);									break;
627 		case BLENDEQUATION_ADVANCED_DIFFERENCE:		SAMPLE_REGISTER_ADV_BLEND(difference);									break;
628 		case BLENDEQUATION_ADVANCED_EXCLUSION:		SAMPLE_REGISTER_ADV_BLEND(exclusion);									break;
629 		case BLENDEQUATION_ADVANCED_HSL_HUE:		SAMPLE_REGISTER_ADV_BLEND_HSL(setLumSat(srcColor, dstColor, dstColor));	break;
630 		case BLENDEQUATION_ADVANCED_HSL_SATURATION:	SAMPLE_REGISTER_ADV_BLEND_HSL(setLumSat(dstColor, srcColor, dstColor));	break;
631 		case BLENDEQUATION_ADVANCED_HSL_COLOR:		SAMPLE_REGISTER_ADV_BLEND_HSL(setLum(srcColor, dstColor));				break;
632 		case BLENDEQUATION_ADVANCED_HSL_LUMINOSITY:	SAMPLE_REGISTER_ADV_BLEND_HSL(setLum(dstColor, srcColor));				break;
633 		default:
634 			DE_ASSERT(false);
635 	}
636 
637 #undef SAMPLE_REGISTER_ADV_BLEND
638 #undef SAMPLE_REGISTER_ADV_BLEND_HSL
639 }
640 
executeColorWrite(int fragNdxOffset,int numSamplesPerFragment,const Fragment * inputFragments,bool isSRGB,const tcu::PixelBufferAccess & colorBuffer)641 void FragmentProcessor::executeColorWrite (int fragNdxOffset, int numSamplesPerFragment, const Fragment* inputFragments, bool isSRGB, const tcu::PixelBufferAccess& colorBuffer)
642 {
643 	for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)
644 	{
645 		if (m_sampleRegister[regSampleNdx].isAlive)
646 		{
647 			int					fragSampleNdx	= regSampleNdx % numSamplesPerFragment;
648 			const Fragment&		frag			= inputFragments[fragNdxOffset + regSampleNdx/numSamplesPerFragment];
649 			Vec4				combinedColor;
650 
651 			combinedColor.xyz()	= m_sampleRegister[regSampleNdx].blendedRGB;
652 			combinedColor.w()	= m_sampleRegister[regSampleNdx].blendedA;
653 
654 			if (isSRGB)
655 				combinedColor = tcu::linearToSRGB(combinedColor);
656 
657 			colorBuffer.setPixel(combinedColor, fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y());
658 		}
659 	}
660 }
661 
executeRGBA8ColorWrite(int fragNdxOffset,int numSamplesPerFragment,const Fragment * inputFragments,const tcu::PixelBufferAccess & colorBuffer)662 void FragmentProcessor::executeRGBA8ColorWrite (int fragNdxOffset, int numSamplesPerFragment, const Fragment* inputFragments, const tcu::PixelBufferAccess& colorBuffer)
663 {
664 	const int		fragStride	= 4;
665 	const int		xStride		= colorBuffer.getRowPitch();
666 	const int		yStride		= colorBuffer.getSlicePitch();
667 	deUint8* const	basePtr		= (deUint8*)colorBuffer.getDataPtr();
668 
669 	for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)
670 	{
671 		if (m_sampleRegister[regSampleNdx].isAlive)
672 		{
673 			const int			fragSampleNdx	= regSampleNdx % numSamplesPerFragment;
674 			const Fragment&		frag			= inputFragments[fragNdxOffset + regSampleNdx/numSamplesPerFragment];
675 			deUint8*			dstPtr			= basePtr + fragSampleNdx*fragStride + frag.pixelCoord.x()*xStride + frag.pixelCoord.y()*yStride;
676 
677 			dstPtr[0] = tcu::floatToU8(m_sampleRegister[regSampleNdx].blendedRGB.x());
678 			dstPtr[1] = tcu::floatToU8(m_sampleRegister[regSampleNdx].blendedRGB.y());
679 			dstPtr[2] = tcu::floatToU8(m_sampleRegister[regSampleNdx].blendedRGB.z());
680 			dstPtr[3] = tcu::floatToU8(m_sampleRegister[regSampleNdx].blendedA);
681 		}
682 	}
683 }
684 
executeMaskedColorWrite(int fragNdxOffset,int numSamplesPerFragment,const Fragment * inputFragments,const Vec4 & colorMaskFactor,const Vec4 & colorMaskNegationFactor,bool isSRGB,const tcu::PixelBufferAccess & colorBuffer)685 void FragmentProcessor::executeMaskedColorWrite (int fragNdxOffset, int numSamplesPerFragment, const Fragment* inputFragments, const Vec4& colorMaskFactor, const Vec4& colorMaskNegationFactor, bool isSRGB, const tcu::PixelBufferAccess& colorBuffer)
686 {
687 	for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)
688 	{
689 		if (m_sampleRegister[regSampleNdx].isAlive)
690 		{
691 			int					fragSampleNdx	= regSampleNdx % numSamplesPerFragment;
692 			const Fragment&		frag			= inputFragments[fragNdxOffset + regSampleNdx/numSamplesPerFragment];
693 			Vec4				originalColor	= colorBuffer.getPixel(fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y());
694 			Vec4				newColor;
695 
696 			newColor.xyz()	= m_sampleRegister[regSampleNdx].blendedRGB;
697 			newColor.w()	= m_sampleRegister[regSampleNdx].blendedA;
698 
699 			if (isSRGB)
700 				newColor = tcu::linearToSRGB(newColor);
701 
702 			newColor = colorMaskFactor*newColor + colorMaskNegationFactor*originalColor;
703 
704 			colorBuffer.setPixel(newColor, fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y());
705 		}
706 	}
707 }
708 
executeSignedValueWrite(int fragNdxOffset,int numSamplesPerFragment,const Fragment * inputFragments,const tcu::BVec4 & colorMask,const tcu::PixelBufferAccess & colorBuffer)709 void FragmentProcessor::executeSignedValueWrite (int fragNdxOffset, int numSamplesPerFragment, const Fragment* inputFragments, const tcu::BVec4& colorMask, const tcu::PixelBufferAccess& colorBuffer)
710 {
711 	for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)
712 	{
713 		if (m_sampleRegister[regSampleNdx].isAlive)
714 		{
715 			int					fragSampleNdx	= regSampleNdx % numSamplesPerFragment;
716 			const Fragment&		frag			= inputFragments[fragNdxOffset + regSampleNdx/numSamplesPerFragment];
717 			const IVec4			originalValue	= colorBuffer.getPixelInt(fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y());
718 
719 			colorBuffer.setPixel(tcu::select(m_sampleRegister[regSampleNdx].signedValue, originalValue, colorMask), fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y());
720 		}
721 	}
722 }
723 
executeUnsignedValueWrite(int fragNdxOffset,int numSamplesPerFragment,const Fragment * inputFragments,const tcu::BVec4 & colorMask,const tcu::PixelBufferAccess & colorBuffer)724 void FragmentProcessor::executeUnsignedValueWrite (int fragNdxOffset, int numSamplesPerFragment, const Fragment* inputFragments, const tcu::BVec4& colorMask, const tcu::PixelBufferAccess& colorBuffer)
725 {
726 	for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)
727 	{
728 		if (m_sampleRegister[regSampleNdx].isAlive)
729 		{
730 			int					fragSampleNdx	= regSampleNdx % numSamplesPerFragment;
731 			const Fragment&		frag			= inputFragments[fragNdxOffset + regSampleNdx/numSamplesPerFragment];
732 			const UVec4			originalValue	= colorBuffer.getPixelUint(fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y());
733 
734 			colorBuffer.setPixel(tcu::select(m_sampleRegister[regSampleNdx].unsignedValue, originalValue, colorMask), fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y());
735 		}
736 	}
737 }
738 
render(const rr::MultisamplePixelBufferAccess & msColorBuffer,const rr::MultisamplePixelBufferAccess & msDepthBuffer,const rr::MultisamplePixelBufferAccess & msStencilBuffer,const Fragment * inputFragments,int numFragments,FaceType fragmentFacing,const FragmentOperationState & state)739 void FragmentProcessor::render (const rr::MultisamplePixelBufferAccess&		msColorBuffer,
740 								const rr::MultisamplePixelBufferAccess&		msDepthBuffer,
741 								const rr::MultisamplePixelBufferAccess&		msStencilBuffer,
742 								const Fragment*								inputFragments,
743 								int											numFragments,
744 								FaceType									fragmentFacing,
745 								const FragmentOperationState&				state)
746 {
747 	DE_ASSERT(fragmentFacing < FACETYPE_LAST);
748 	DE_ASSERT(state.numStencilBits < 32); // code bitshifts numStencilBits, avoid undefined behavior
749 
750 	const tcu::PixelBufferAccess&	colorBuffer			= msColorBuffer.raw();
751 	const tcu::PixelBufferAccess&	depthBuffer			= msDepthBuffer.raw();
752 	const tcu::PixelBufferAccess&	stencilBuffer		= msStencilBuffer.raw();
753 
754 	bool							hasDepth			= depthBuffer.getWidth() > 0	&& depthBuffer.getHeight() > 0		&& depthBuffer.getDepth() > 0;
755 	bool							hasStencil			= stencilBuffer.getWidth() > 0	&& stencilBuffer.getHeight() > 0	&& stencilBuffer.getDepth() > 0;
756 	bool							doDepthTest			= hasDepth && state.depthTestEnabled;
757 	bool							doStencilTest		= hasStencil && state.stencilTestEnabled;
758 
759 	tcu::TextureChannelClass		colorbufferClass	= tcu::getTextureChannelClass(msColorBuffer.raw().getFormat().type);
760 	rr::GenericVecType				fragmentDataType	= (colorbufferClass == tcu::TEXTURECHANNELCLASS_SIGNED_INTEGER) ? (rr::GENERICVECTYPE_INT32) : ((colorbufferClass == tcu::TEXTURECHANNELCLASS_UNSIGNED_INTEGER) ? (rr::GENERICVECTYPE_UINT32) : (rr::GENERICVECTYPE_FLOAT));
761 
762 	DE_ASSERT((!hasDepth || colorBuffer.getWidth() == depthBuffer.getWidth())	&& (!hasStencil || colorBuffer.getWidth() == stencilBuffer.getWidth()));
763 	DE_ASSERT((!hasDepth || colorBuffer.getHeight() == depthBuffer.getHeight())	&& (!hasStencil || colorBuffer.getHeight() == stencilBuffer.getHeight()));
764 	DE_ASSERT((!hasDepth || colorBuffer.getDepth() == depthBuffer.getDepth())	&& (!hasStencil || colorBuffer.getDepth() == stencilBuffer.getDepth()));
765 
766 	int						numSamplesPerFragment		= colorBuffer.getWidth();
767 	int						totalNumSamples				= numFragments*numSamplesPerFragment;
768 	int						numSampleGroups				= (totalNumSamples - 1) / SAMPLE_REGISTER_SIZE + 1; // \note totalNumSamples/SAMPLE_REGISTER_SIZE rounded up.
769 	const StencilState&		stencilState				= state.stencilStates[fragmentFacing];
770 	Vec4					colorMaskFactor				(state.colorMask[0] ? 1.0f : 0.0f, state.colorMask[1] ? 1.0f : 0.0f, state.colorMask[2] ? 1.0f : 0.0f, state.colorMask[3] ? 1.0f : 0.0f);
771 	Vec4					colorMaskNegationFactor		(state.colorMask[0] ? 0.0f : 1.0f, state.colorMask[1] ? 0.0f : 1.0f, state.colorMask[2] ? 0.0f : 1.0f, state.colorMask[3] ? 0.0f : 1.0f);
772 	bool					sRGBTarget					= state.sRGBEnabled && tcu::isSRGB(colorBuffer.getFormat());
773 
774 	DE_ASSERT(SAMPLE_REGISTER_SIZE % numSamplesPerFragment == 0);
775 
776 	// Divide the fragments' samples into groups of size SAMPLE_REGISTER_SIZE, and perform
777 	// the per-sample operations for one group at a time.
778 
779 	for (int sampleGroupNdx = 0; sampleGroupNdx < numSampleGroups; sampleGroupNdx++)
780 	{
781 		// The index of the fragment of the sample at the beginning of m_sampleRegisters.
782 		int groupFirstFragNdx = (sampleGroupNdx*SAMPLE_REGISTER_SIZE) / numSamplesPerFragment;
783 
784 		// Initialize sample data in the sample register.
785 
786 		for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)
787 		{
788 			int fragNdx			= groupFirstFragNdx + regSampleNdx/numSamplesPerFragment;
789 			int fragSampleNdx	= regSampleNdx % numSamplesPerFragment;
790 
791 			if (fragNdx < numFragments)
792 			{
793 				m_sampleRegister[regSampleNdx].isAlive		= (inputFragments[fragNdx].coverage & (1u << fragSampleNdx)) != 0;
794 				m_sampleRegister[regSampleNdx].depthPassed	= true; // \note This will stay true if depth test is disabled.
795 			}
796 			else
797 				m_sampleRegister[regSampleNdx].isAlive = false;
798 		}
799 
800 		// Scissor test.
801 
802 		if (state.scissorTestEnabled)
803 			executeScissorTest(groupFirstFragNdx, numSamplesPerFragment, inputFragments, state.scissorRectangle);
804 
805 		// Stencil test.
806 
807 		if (doStencilTest)
808 		{
809 			executeStencilCompare(groupFirstFragNdx, numSamplesPerFragment, inputFragments, stencilState, state.numStencilBits, stencilBuffer);
810 			executeStencilSFail(groupFirstFragNdx, numSamplesPerFragment, inputFragments, stencilState, state.numStencilBits, stencilBuffer);
811 		}
812 
813 		// Depth test.
814 		// \note Current value of isAlive is needed for dpPass and dpFail, so it's only updated after them and not right after depth test.
815 
816 		if (doDepthTest)
817 		{
818 			executeDepthCompare(groupFirstFragNdx, numSamplesPerFragment, inputFragments, state.depthFunc, depthBuffer);
819 
820 			if (state.depthMask)
821 				executeDepthWrite(groupFirstFragNdx, numSamplesPerFragment, inputFragments, depthBuffer);
822 		}
823 
824 		// Do dpFail and dpPass stencil writes.
825 
826 		if (doStencilTest)
827 			executeStencilDpFailAndPass(groupFirstFragNdx, numSamplesPerFragment, inputFragments, stencilState, state.numStencilBits, stencilBuffer);
828 
829 		// Kill the samples that failed depth test.
830 
831 		if (doDepthTest)
832 		{
833 			for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)
834 				m_sampleRegister[regSampleNdx].isAlive = m_sampleRegister[regSampleNdx].isAlive && m_sampleRegister[regSampleNdx].depthPassed;
835 		}
836 
837 		// Paint fragments to target
838 
839 		switch (fragmentDataType)
840 		{
841 			case rr::GENERICVECTYPE_FLOAT:
842 				// Blend calculation - only if using blend.
843 				if (state.blendMode == BLENDMODE_STANDARD)
844 				{
845 					// Put dst color to register, doing srgb-to-linear conversion if needed.
846 					for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)
847 					{
848 						if (m_sampleRegister[regSampleNdx].isAlive)
849 						{
850 							int					fragSampleNdx	= regSampleNdx % numSamplesPerFragment;
851 							const Fragment&		frag			= inputFragments[groupFirstFragNdx + regSampleNdx/numSamplesPerFragment];
852 							Vec4				dstColor		= colorBuffer.getPixel(fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y());
853 
854 							m_sampleRegister[regSampleNdx].clampedBlendSrcColor		= clamp(frag.value.get<float>(), Vec4(0.0f), Vec4(1.0f));
855 							m_sampleRegister[regSampleNdx].clampedBlendSrc1Color	= clamp(frag.value1.get<float>(), Vec4(0.0f), Vec4(1.0f));
856 							m_sampleRegister[regSampleNdx].clampedBlendDstColor		= clamp(sRGBTarget ? tcu::sRGBToLinear(dstColor) : dstColor, Vec4(0.0f), Vec4(1.0f));
857 						}
858 					}
859 
860 					// Calculate blend factors to register.
861 					executeBlendFactorComputeRGB(state.blendColor, state.blendRGBState);
862 					executeBlendFactorComputeA(state.blendColor, state.blendAState);
863 
864 					// Compute blended color.
865 					executeBlend(state.blendRGBState, state.blendAState);
866 				}
867 				else if (state.blendMode == BLENDMODE_ADVANCED)
868 				{
869 					// Unpremultiply colors for blending, and do sRGB->linear if necessary
870 					// \todo [2014-03-17 pyry] Re-consider clampedBlend*Color var names
871 					for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)
872 					{
873 						if (m_sampleRegister[regSampleNdx].isAlive)
874 						{
875 							int					fragSampleNdx	= regSampleNdx % numSamplesPerFragment;
876 							const Fragment&		frag			= inputFragments[groupFirstFragNdx + regSampleNdx/numSamplesPerFragment];
877 							const Vec4			srcColor		= frag.value.get<float>();
878 							const Vec4			dstColor		= colorBuffer.getPixel(fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y());
879 
880 							m_sampleRegister[regSampleNdx].clampedBlendSrcColor		= unpremultiply(clamp(srcColor, Vec4(0.0f), Vec4(1.0f)));
881 							m_sampleRegister[regSampleNdx].clampedBlendDstColor		= unpremultiply(clamp(sRGBTarget ? tcu::sRGBToLinear(dstColor) : dstColor, Vec4(0.0f), Vec4(1.0f)));
882 						}
883 					}
884 
885 					executeAdvancedBlend(state.blendEquationAdvaced);
886 				}
887 				else
888 				{
889 					// Not using blend - just put values to register as-is.
890 					DE_ASSERT(state.blendMode == BLENDMODE_NONE);
891 
892 					for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)
893 					{
894 						if (m_sampleRegister[regSampleNdx].isAlive)
895 						{
896 							const Fragment& frag = inputFragments[groupFirstFragNdx + regSampleNdx/numSamplesPerFragment];
897 
898 							m_sampleRegister[regSampleNdx].blendedRGB	= frag.value.get<float>().xyz();
899 							m_sampleRegister[regSampleNdx].blendedA		= frag.value.get<float>().w();
900 						}
901 					}
902 				}
903 
904 				// Finally, write the colors to the color buffer.
905 
906 				if (state.colorMask[0] && state.colorMask[1] && state.colorMask[2] && state.colorMask[3])
907 				{
908 					if (colorBuffer.getFormat() == tcu::TextureFormat(tcu::TextureFormat::RGBA, tcu::TextureFormat::UNORM_INT8))
909 						executeRGBA8ColorWrite(groupFirstFragNdx, numSamplesPerFragment, inputFragments, colorBuffer);
910 					else
911 						executeColorWrite(groupFirstFragNdx, numSamplesPerFragment, inputFragments, sRGBTarget, colorBuffer);
912 				}
913 				else if (state.colorMask[0] || state.colorMask[1] || state.colorMask[2] || state.colorMask[3])
914 					executeMaskedColorWrite(groupFirstFragNdx, numSamplesPerFragment, inputFragments, colorMaskFactor, colorMaskNegationFactor, sRGBTarget, colorBuffer);
915 				break;
916 
917 			case rr::GENERICVECTYPE_INT32:
918 				// Write fragments
919 				for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)
920 				{
921 					if (m_sampleRegister[regSampleNdx].isAlive)
922 					{
923 						const Fragment& frag = inputFragments[groupFirstFragNdx + regSampleNdx/numSamplesPerFragment];
924 
925 						m_sampleRegister[regSampleNdx].signedValue = frag.value.get<deInt32>();
926 					}
927 				}
928 
929 				if (state.colorMask[0] || state.colorMask[1] || state.colorMask[2] || state.colorMask[3])
930 					executeSignedValueWrite(groupFirstFragNdx, numSamplesPerFragment, inputFragments, state.colorMask, colorBuffer);
931 				break;
932 
933 			case rr::GENERICVECTYPE_UINT32:
934 				// Write fragments
935 				for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)
936 				{
937 					if (m_sampleRegister[regSampleNdx].isAlive)
938 					{
939 						const Fragment& frag = inputFragments[groupFirstFragNdx + regSampleNdx/numSamplesPerFragment];
940 
941 						m_sampleRegister[regSampleNdx].unsignedValue = frag.value.get<deUint32>();
942 					}
943 				}
944 
945 				if (state.colorMask[0] || state.colorMask[1] || state.colorMask[2] || state.colorMask[3])
946 					executeUnsignedValueWrite(groupFirstFragNdx, numSamplesPerFragment, inputFragments, state.colorMask, colorBuffer);
947 				break;
948 
949 			default:
950 				DE_ASSERT(DE_FALSE);
951 		}
952 	}
953 }
954 
955 } // rr
956