1 /*-------------------------------------------------------------------------
2 * drawElements Quality Program Reference Renderer
3 * -----------------------------------------------
4 *
5 * Copyright 2014 The Android Open Source Project
6 *
7 * Licensed under the Apache License, Version 2.0 (the "License");
8 * you may not use this file except in compliance with the License.
9 * You may obtain a copy of the License at
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
18 *
19 *//*!
20 * \file
21 * \brief Reference implementation for per-fragment operations.
22 *//*--------------------------------------------------------------------*/
23
24 #include "rrFragmentOperations.hpp"
25 #include "tcuVectorUtil.hpp"
26 #include "tcuTextureUtil.hpp"
27
28 using tcu::IVec2;
29 using tcu::Vec3;
30 using tcu::Vec4;
31 using tcu::IVec4;
32 using tcu::UVec4;
33 using tcu::min;
34 using tcu::max;
35 using tcu::clamp;
36 using de::min;
37 using de::max;
38 using de::clamp;
39
40 namespace rr
41 {
42
43 // Return oldValue with the bits indicated by mask replaced by corresponding bits of newValue.
maskedBitReplace(int oldValue,int newValue,deUint32 mask)44 static inline int maskedBitReplace (int oldValue, int newValue, deUint32 mask)
45 {
46 return (oldValue & ~mask) | (newValue & mask);
47 }
48
isInsideRect(const IVec2 & point,const WindowRectangle & rect)49 static inline bool isInsideRect (const IVec2& point, const WindowRectangle& rect)
50 {
51 return de::inBounds(point.x(), rect.left, rect.left + rect.width) &&
52 de::inBounds(point.y(), rect.bottom, rect.bottom + rect.height);
53 }
54
unpremultiply(const Vec4 & v)55 static inline Vec4 unpremultiply (const Vec4& v)
56 {
57 if (v.w() > 0.0f)
58 return Vec4(v.x()/v.w(), v.y()/v.w(), v.z()/v.w(), v.w());
59 else
60 {
61 DE_ASSERT(v.x() == 0.0f && v.y() == 0.0f && v.z() == 0.0f);
62 return Vec4(0.0f, 0.0f, 0.0f, 0.0f);
63 }
64 }
65
clearMultisampleColorBuffer(const tcu::PixelBufferAccess & dst,const Vec4 & v,const WindowRectangle & r)66 void clearMultisampleColorBuffer (const tcu::PixelBufferAccess& dst, const Vec4& v, const WindowRectangle& r) { tcu::clear(tcu::getSubregion(dst, 0, r.left, r.bottom, dst.getWidth(), r.width, r.height), v); }
clearMultisampleColorBuffer(const tcu::PixelBufferAccess & dst,const IVec4 & v,const WindowRectangle & r)67 void clearMultisampleColorBuffer (const tcu::PixelBufferAccess& dst, const IVec4& v, const WindowRectangle& r) { tcu::clear(tcu::getSubregion(dst, 0, r.left, r.bottom, dst.getWidth(), r.width, r.height), v); }
clearMultisampleColorBuffer(const tcu::PixelBufferAccess & dst,const UVec4 & v,const WindowRectangle & r)68 void clearMultisampleColorBuffer (const tcu::PixelBufferAccess& dst, const UVec4& v, const WindowRectangle& r) { tcu::clear(tcu::getSubregion(dst, 0, r.left, r.bottom, dst.getWidth(), r.width, r.height), v.cast<int>()); }
clearMultisampleDepthBuffer(const tcu::PixelBufferAccess & dst,float v,const WindowRectangle & r)69 void clearMultisampleDepthBuffer (const tcu::PixelBufferAccess& dst, float v, const WindowRectangle& r) { tcu::clearDepth(tcu::getSubregion(dst, 0, r.left, r.bottom, dst.getWidth(), r.width, r.height), v); }
clearMultisampleStencilBuffer(const tcu::PixelBufferAccess & dst,int v,const WindowRectangle & r)70 void clearMultisampleStencilBuffer (const tcu::PixelBufferAccess& dst, int v, const WindowRectangle& r) { tcu::clearStencil(tcu::getSubregion(dst, 0, r.left, r.bottom, dst.getWidth(), r.width, r.height), v); }
71
FragmentProcessor(void)72 FragmentProcessor::FragmentProcessor (void)
73 : m_sampleRegister()
74 {
75 }
76
executeScissorTest(int fragNdxOffset,int numSamplesPerFragment,const Fragment * inputFragments,const WindowRectangle & scissorRect)77 void FragmentProcessor::executeScissorTest (int fragNdxOffset, int numSamplesPerFragment, const Fragment* inputFragments, const WindowRectangle& scissorRect)
78 {
79 for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)
80 {
81 if (m_sampleRegister[regSampleNdx].isAlive)
82 {
83 int fragNdx = fragNdxOffset + regSampleNdx/numSamplesPerFragment;
84
85 if (!isInsideRect(inputFragments[fragNdx].pixelCoord, scissorRect))
86 m_sampleRegister[regSampleNdx].isAlive = false;
87 }
88 }
89 }
90
executeStencilCompare(int fragNdxOffset,int numSamplesPerFragment,const Fragment * inputFragments,const StencilState & stencilState,int numStencilBits,const tcu::ConstPixelBufferAccess & stencilBuffer)91 void FragmentProcessor::executeStencilCompare (int fragNdxOffset, int numSamplesPerFragment, const Fragment* inputFragments, const StencilState& stencilState, int numStencilBits, const tcu::ConstPixelBufferAccess& stencilBuffer)
92 {
93 #define SAMPLE_REGISTER_STENCIL_COMPARE(COMPARE_EXPRESSION) \
94 for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++) \
95 { \
96 if (m_sampleRegister[regSampleNdx].isAlive) \
97 { \
98 int fragSampleNdx = regSampleNdx % numSamplesPerFragment; \
99 const Fragment& frag = inputFragments[fragNdxOffset + regSampleNdx/numSamplesPerFragment]; \
100 int stencilBufferValue = stencilBuffer.getPixStencil(fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y()); \
101 int maskedRef = stencilState.compMask & clampedStencilRef; \
102 int maskedBuf = stencilState.compMask & stencilBufferValue; \
103 DE_UNREF(maskedRef); \
104 DE_UNREF(maskedBuf); \
105 \
106 m_sampleRegister[regSampleNdx].stencilPassed = (COMPARE_EXPRESSION); \
107 } \
108 }
109
110 int clampedStencilRef = de::clamp(stencilState.ref, 0, (1<<numStencilBits)-1);
111
112 switch (stencilState.func)
113 {
114 case TESTFUNC_NEVER: SAMPLE_REGISTER_STENCIL_COMPARE(false) break;
115 case TESTFUNC_ALWAYS: SAMPLE_REGISTER_STENCIL_COMPARE(true) break;
116 case TESTFUNC_LESS: SAMPLE_REGISTER_STENCIL_COMPARE(maskedRef < maskedBuf) break;
117 case TESTFUNC_LEQUAL: SAMPLE_REGISTER_STENCIL_COMPARE(maskedRef <= maskedBuf) break;
118 case TESTFUNC_GREATER: SAMPLE_REGISTER_STENCIL_COMPARE(maskedRef > maskedBuf) break;
119 case TESTFUNC_GEQUAL: SAMPLE_REGISTER_STENCIL_COMPARE(maskedRef >= maskedBuf) break;
120 case TESTFUNC_EQUAL: SAMPLE_REGISTER_STENCIL_COMPARE(maskedRef == maskedBuf) break;
121 case TESTFUNC_NOTEQUAL: SAMPLE_REGISTER_STENCIL_COMPARE(maskedRef != maskedBuf) break;
122 default:
123 DE_ASSERT(false);
124 }
125
126 #undef SAMPLE_REGISTER_STENCIL_COMPARE
127 }
128
executeStencilSFail(int fragNdxOffset,int numSamplesPerFragment,const Fragment * inputFragments,const StencilState & stencilState,int numStencilBits,const tcu::PixelBufferAccess & stencilBuffer)129 void FragmentProcessor::executeStencilSFail (int fragNdxOffset, int numSamplesPerFragment, const Fragment* inputFragments, const StencilState& stencilState, int numStencilBits, const tcu::PixelBufferAccess& stencilBuffer)
130 {
131 #define SAMPLE_REGISTER_SFAIL(SFAIL_EXPRESSION) \
132 for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++) \
133 { \
134 if (m_sampleRegister[regSampleNdx].isAlive && !m_sampleRegister[regSampleNdx].stencilPassed) \
135 { \
136 int fragSampleNdx = regSampleNdx % numSamplesPerFragment; \
137 const Fragment& frag = inputFragments[fragNdxOffset + regSampleNdx/numSamplesPerFragment]; \
138 int stencilBufferValue = stencilBuffer.getPixStencil(fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y()); \
139 \
140 stencilBuffer.setPixStencil(maskedBitReplace(stencilBufferValue, (SFAIL_EXPRESSION), stencilState.writeMask), fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y()); \
141 m_sampleRegister[regSampleNdx].isAlive = false; \
142 } \
143 }
144
145 int clampedStencilRef = de::clamp(stencilState.ref, 0, (1<<numStencilBits)-1);
146
147 switch (stencilState.sFail)
148 {
149 case STENCILOP_KEEP: SAMPLE_REGISTER_SFAIL(stencilBufferValue) break;
150 case STENCILOP_ZERO: SAMPLE_REGISTER_SFAIL(0) break;
151 case STENCILOP_REPLACE: SAMPLE_REGISTER_SFAIL(clampedStencilRef) break;
152 case STENCILOP_INCR: SAMPLE_REGISTER_SFAIL(de::clamp(stencilBufferValue+1, 0, (1<<numStencilBits) - 1)) break;
153 case STENCILOP_DECR: SAMPLE_REGISTER_SFAIL(de::clamp(stencilBufferValue-1, 0, (1<<numStencilBits) - 1)) break;
154 case STENCILOP_INCR_WRAP: SAMPLE_REGISTER_SFAIL((stencilBufferValue + 1) & ((1<<numStencilBits) - 1)) break;
155 case STENCILOP_DECR_WRAP: SAMPLE_REGISTER_SFAIL((stencilBufferValue - 1) & ((1<<numStencilBits) - 1)) break;
156 case STENCILOP_INVERT: SAMPLE_REGISTER_SFAIL((~stencilBufferValue) & ((1<<numStencilBits) - 1)) break;
157 default:
158 DE_ASSERT(false);
159 }
160
161 #undef SAMPLE_REGISTER_SFAIL
162 }
163
executeDepthCompare(int fragNdxOffset,int numSamplesPerFragment,const Fragment * inputFragments,TestFunc depthFunc,const tcu::ConstPixelBufferAccess & depthBuffer)164 void FragmentProcessor::executeDepthCompare (int fragNdxOffset, int numSamplesPerFragment, const Fragment* inputFragments, TestFunc depthFunc, const tcu::ConstPixelBufferAccess& depthBuffer)
165 {
166 #define SAMPLE_REGISTER_DEPTH_COMPARE_F(COMPARE_EXPRESSION) \
167 for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++) \
168 { \
169 if (m_sampleRegister[regSampleNdx].isAlive) \
170 { \
171 int fragSampleNdx = regSampleNdx % numSamplesPerFragment; \
172 const Fragment& frag = inputFragments[fragNdxOffset + regSampleNdx/numSamplesPerFragment]; \
173 float depthBufferValue = depthBuffer.getPixDepth(fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y()); \
174 float sampleDepthFloat = frag.sampleDepths[fragSampleNdx]; \
175 float sampleDepth = de::clamp(sampleDepthFloat, 0.0f, 1.0f); \
176 \
177 m_sampleRegister[regSampleNdx].depthPassed = (COMPARE_EXPRESSION); \
178 \
179 DE_UNREF(depthBufferValue); \
180 DE_UNREF(sampleDepth); \
181 } \
182 }
183
184 #define SAMPLE_REGISTER_DEPTH_COMPARE_UI(COMPARE_EXPRESSION) \
185 for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++) \
186 { \
187 if (m_sampleRegister[regSampleNdx].isAlive) \
188 { \
189 int fragSampleNdx = regSampleNdx % numSamplesPerFragment; \
190 const Fragment& frag = inputFragments[fragNdxOffset + regSampleNdx/numSamplesPerFragment]; \
191 deUint32 depthBufferValue = depthBuffer.getPixelUint(fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y()).x(); \
192 float sampleDepthFloat = frag.sampleDepths[fragSampleNdx]; \
193 \
194 /* Convert input float to target buffer format for comparison */ \
195 \
196 deUint32 buffer[2]; \
197 \
198 DE_ASSERT(sizeof(buffer) >= (size_t)depthBuffer.getFormat().getPixelSize()); \
199 \
200 tcu::PixelBufferAccess access(depthBuffer.getFormat(), 1, 1, 1, &buffer); \
201 access.setPixDepth(sampleDepthFloat, 0, 0, 0); \
202 deUint32 sampleDepth = access.getPixelUint(0, 0, 0).x(); \
203 \
204 m_sampleRegister[regSampleNdx].depthPassed = (COMPARE_EXPRESSION); \
205 \
206 DE_UNREF(depthBufferValue); \
207 DE_UNREF(sampleDepth); \
208 } \
209 }
210
211 if (depthBuffer.getFormat().type == tcu::TextureFormat::FLOAT || depthBuffer.getFormat().type == tcu::TextureFormat::FLOAT_UNSIGNED_INT_24_8_REV)
212 {
213
214 switch (depthFunc)
215 {
216 case TESTFUNC_NEVER: SAMPLE_REGISTER_DEPTH_COMPARE_F(false) break;
217 case TESTFUNC_ALWAYS: SAMPLE_REGISTER_DEPTH_COMPARE_F(true) break;
218 case TESTFUNC_LESS: SAMPLE_REGISTER_DEPTH_COMPARE_F(sampleDepth < depthBufferValue) break;
219 case TESTFUNC_LEQUAL: SAMPLE_REGISTER_DEPTH_COMPARE_F(sampleDepth <= depthBufferValue) break;
220 case TESTFUNC_GREATER: SAMPLE_REGISTER_DEPTH_COMPARE_F(sampleDepth > depthBufferValue) break;
221 case TESTFUNC_GEQUAL: SAMPLE_REGISTER_DEPTH_COMPARE_F(sampleDepth >= depthBufferValue) break;
222 case TESTFUNC_EQUAL: SAMPLE_REGISTER_DEPTH_COMPARE_F(sampleDepth == depthBufferValue) break;
223 case TESTFUNC_NOTEQUAL: SAMPLE_REGISTER_DEPTH_COMPARE_F(sampleDepth != depthBufferValue) break;
224 default:
225 DE_ASSERT(false);
226 }
227
228 }
229 else
230 {
231 switch (depthFunc)
232 {
233 case TESTFUNC_NEVER: SAMPLE_REGISTER_DEPTH_COMPARE_UI(false) break;
234 case TESTFUNC_ALWAYS: SAMPLE_REGISTER_DEPTH_COMPARE_UI(true) break;
235 case TESTFUNC_LESS: SAMPLE_REGISTER_DEPTH_COMPARE_UI(sampleDepth < depthBufferValue) break;
236 case TESTFUNC_LEQUAL: SAMPLE_REGISTER_DEPTH_COMPARE_UI(sampleDepth <= depthBufferValue) break;
237 case TESTFUNC_GREATER: SAMPLE_REGISTER_DEPTH_COMPARE_UI(sampleDepth > depthBufferValue) break;
238 case TESTFUNC_GEQUAL: SAMPLE_REGISTER_DEPTH_COMPARE_UI(sampleDepth >= depthBufferValue) break;
239 case TESTFUNC_EQUAL: SAMPLE_REGISTER_DEPTH_COMPARE_UI(sampleDepth == depthBufferValue) break;
240 case TESTFUNC_NOTEQUAL: SAMPLE_REGISTER_DEPTH_COMPARE_UI(sampleDepth != depthBufferValue) break;
241 default:
242 DE_ASSERT(false);
243 }
244 }
245
246 #undef SAMPLE_REGISTER_DEPTH_COMPARE_F
247 #undef SAMPLE_REGISTER_DEPTH_COMPARE_UI
248 }
249
executeDepthWrite(int fragNdxOffset,int numSamplesPerFragment,const Fragment * inputFragments,const tcu::PixelBufferAccess & depthBuffer)250 void FragmentProcessor::executeDepthWrite (int fragNdxOffset, int numSamplesPerFragment, const Fragment* inputFragments, const tcu::PixelBufferAccess& depthBuffer)
251 {
252 for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)
253 {
254 if (m_sampleRegister[regSampleNdx].isAlive && m_sampleRegister[regSampleNdx].depthPassed)
255 {
256 int fragSampleNdx = regSampleNdx % numSamplesPerFragment;
257 const Fragment& frag = inputFragments[fragNdxOffset + regSampleNdx/numSamplesPerFragment];
258 const float clampedDepth = de::clamp(frag.sampleDepths[fragSampleNdx], 0.0f, 1.0f);
259
260 depthBuffer.setPixDepth(clampedDepth, fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y());
261 }
262 }
263 }
264
executeStencilDpFailAndPass(int fragNdxOffset,int numSamplesPerFragment,const Fragment * inputFragments,const StencilState & stencilState,int numStencilBits,const tcu::PixelBufferAccess & stencilBuffer)265 void FragmentProcessor::executeStencilDpFailAndPass (int fragNdxOffset, int numSamplesPerFragment, const Fragment* inputFragments, const StencilState& stencilState, int numStencilBits, const tcu::PixelBufferAccess& stencilBuffer)
266 {
267 #define SAMPLE_REGISTER_DPFAIL_OR_DPPASS(CONDITION, EXPRESSION) \
268 for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++) \
269 { \
270 if (m_sampleRegister[regSampleNdx].isAlive && (CONDITION)) \
271 { \
272 int fragSampleNdx = regSampleNdx % numSamplesPerFragment; \
273 const Fragment& frag = inputFragments[fragNdxOffset + regSampleNdx/numSamplesPerFragment]; \
274 int stencilBufferValue = stencilBuffer.getPixStencil(fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y()); \
275 \
276 stencilBuffer.setPixStencil(maskedBitReplace(stencilBufferValue, (EXPRESSION), stencilState.writeMask), fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y()); \
277 } \
278 }
279
280 #define SWITCH_DPFAIL_OR_DPPASS(OP_NAME, CONDITION) \
281 switch (stencilState.OP_NAME) \
282 { \
283 case STENCILOP_KEEP: SAMPLE_REGISTER_DPFAIL_OR_DPPASS(CONDITION, stencilBufferValue) break; \
284 case STENCILOP_ZERO: SAMPLE_REGISTER_DPFAIL_OR_DPPASS(CONDITION, 0) break; \
285 case STENCILOP_REPLACE: SAMPLE_REGISTER_DPFAIL_OR_DPPASS(CONDITION, clampedStencilRef) break; \
286 case STENCILOP_INCR: SAMPLE_REGISTER_DPFAIL_OR_DPPASS(CONDITION, de::clamp(stencilBufferValue+1, 0, (1<<numStencilBits) - 1)) break; \
287 case STENCILOP_DECR: SAMPLE_REGISTER_DPFAIL_OR_DPPASS(CONDITION, de::clamp(stencilBufferValue-1, 0, (1<<numStencilBits) - 1)) break; \
288 case STENCILOP_INCR_WRAP: SAMPLE_REGISTER_DPFAIL_OR_DPPASS(CONDITION, (stencilBufferValue + 1) & ((1<<numStencilBits) - 1)) break; \
289 case STENCILOP_DECR_WRAP: SAMPLE_REGISTER_DPFAIL_OR_DPPASS(CONDITION, (stencilBufferValue - 1) & ((1<<numStencilBits) - 1)) break; \
290 case STENCILOP_INVERT: SAMPLE_REGISTER_DPFAIL_OR_DPPASS(CONDITION, (~stencilBufferValue) & ((1<<numStencilBits) - 1)) break; \
291 default: \
292 DE_ASSERT(false); \
293 }
294
295 int clampedStencilRef = de::clamp(stencilState.ref, 0, (1<<numStencilBits)-1);
296
297 SWITCH_DPFAIL_OR_DPPASS(dpFail, !m_sampleRegister[regSampleNdx].depthPassed)
298 SWITCH_DPFAIL_OR_DPPASS(dpPass, m_sampleRegister[regSampleNdx].depthPassed)
299
300 #undef SWITCH_DPFAIL_OR_DPPASS
301 #undef SAMPLE_REGISTER_DPFAIL_OR_DPPASS
302 }
303
executeBlendFactorComputeRGB(const Vec4 & blendColor,const BlendState & blendRGBState)304 void FragmentProcessor::executeBlendFactorComputeRGB (const Vec4& blendColor, const BlendState& blendRGBState)
305 {
306 #define SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, FACTOR_EXPRESSION) \
307 for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++) \
308 { \
309 if (m_sampleRegister[regSampleNdx].isAlive) \
310 { \
311 const Vec4& src = m_sampleRegister[regSampleNdx].clampedBlendSrcColor; \
312 const Vec4& src1 = m_sampleRegister[regSampleNdx].clampedBlendSrc1Color; \
313 const Vec4& dst = m_sampleRegister[regSampleNdx].clampedBlendDstColor; \
314 DE_UNREF(src); \
315 DE_UNREF(src1); \
316 DE_UNREF(dst); \
317 \
318 m_sampleRegister[regSampleNdx].FACTOR_NAME = clamp((FACTOR_EXPRESSION), Vec3(0.0f), Vec3(1.0f)); \
319 } \
320 }
321
322 #define SWITCH_SRC_OR_DST_FACTOR_RGB(FUNC_NAME, FACTOR_NAME) \
323 switch (blendRGBState.FUNC_NAME) \
324 { \
325 case BLENDFUNC_ZERO: SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(0.0f)) break; \
326 case BLENDFUNC_ONE: SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(1.0f)) break; \
327 case BLENDFUNC_SRC_COLOR: SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, src.swizzle(0,1,2)) break; \
328 case BLENDFUNC_ONE_MINUS_SRC_COLOR: SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(1.0f) - src.swizzle(0,1,2)) break; \
329 case BLENDFUNC_DST_COLOR: SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, dst.swizzle(0,1,2)) break; \
330 case BLENDFUNC_ONE_MINUS_DST_COLOR: SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(1.0f) - dst.swizzle(0,1,2)) break; \
331 case BLENDFUNC_SRC_ALPHA: SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(src.w())) break; \
332 case BLENDFUNC_ONE_MINUS_SRC_ALPHA: SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(1.0f - src.w())) break; \
333 case BLENDFUNC_DST_ALPHA: SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(dst.w())) break; \
334 case BLENDFUNC_ONE_MINUS_DST_ALPHA: SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(1.0f - dst.w())) break; \
335 case BLENDFUNC_CONSTANT_COLOR: SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, blendColor.swizzle(0,1,2)) break; \
336 case BLENDFUNC_ONE_MINUS_CONSTANT_COLOR: SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(1.0f) - blendColor.swizzle(0,1,2)) break; \
337 case BLENDFUNC_CONSTANT_ALPHA: SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(blendColor.w())) break; \
338 case BLENDFUNC_ONE_MINUS_CONSTANT_ALPHA: SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(1.0f - blendColor.w())) break; \
339 case BLENDFUNC_SRC_ALPHA_SATURATE: SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(de::min(src.w(), 1.0f - dst.w()))) break; \
340 case BLENDFUNC_SRC1_COLOR: SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, src1.swizzle(0,1,2)) break; \
341 case BLENDFUNC_ONE_MINUS_SRC1_COLOR: SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(1.0f) - src1.swizzle(0,1,2)) break; \
342 case BLENDFUNC_SRC1_ALPHA: SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(src1.w())) break; \
343 case BLENDFUNC_ONE_MINUS_SRC1_ALPHA: SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(1.0f - src1.w())) break; \
344 default: \
345 DE_ASSERT(false); \
346 }
347
348 SWITCH_SRC_OR_DST_FACTOR_RGB(srcFunc, blendSrcFactorRGB)
349 SWITCH_SRC_OR_DST_FACTOR_RGB(dstFunc, blendDstFactorRGB)
350
351 #undef SWITCH_SRC_OR_DST_FACTOR_RGB
352 #undef SAMPLE_REGISTER_BLEND_FACTOR
353 }
354
executeBlendFactorComputeA(const Vec4 & blendColor,const BlendState & blendAState)355 void FragmentProcessor::executeBlendFactorComputeA (const Vec4& blendColor, const BlendState& blendAState)
356 {
357 #define SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, FACTOR_EXPRESSION) \
358 for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++) \
359 { \
360 if (m_sampleRegister[regSampleNdx].isAlive) \
361 { \
362 const Vec4& src = m_sampleRegister[regSampleNdx].clampedBlendSrcColor; \
363 const Vec4& src1 = m_sampleRegister[regSampleNdx].clampedBlendSrc1Color; \
364 const Vec4& dst = m_sampleRegister[regSampleNdx].clampedBlendDstColor; \
365 DE_UNREF(src); \
366 DE_UNREF(src1); \
367 DE_UNREF(dst); \
368 \
369 m_sampleRegister[regSampleNdx].FACTOR_NAME = clamp((FACTOR_EXPRESSION), 0.0f, 1.0f); \
370 } \
371 }
372
373 #define SWITCH_SRC_OR_DST_FACTOR_A(FUNC_NAME, FACTOR_NAME) \
374 switch (blendAState.FUNC_NAME) \
375 { \
376 case BLENDFUNC_ZERO: SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, 0.0f) break; \
377 case BLENDFUNC_ONE: SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, 1.0f) break; \
378 case BLENDFUNC_SRC_COLOR: SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, src.w()) break; \
379 case BLENDFUNC_ONE_MINUS_SRC_COLOR: SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, 1.0f - src.w()) break; \
380 case BLENDFUNC_DST_COLOR: SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, dst.w()) break; \
381 case BLENDFUNC_ONE_MINUS_DST_COLOR: SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, 1.0f - dst.w()) break; \
382 case BLENDFUNC_SRC_ALPHA: SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, src.w()) break; \
383 case BLENDFUNC_ONE_MINUS_SRC_ALPHA: SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, 1.0f - src.w()) break; \
384 case BLENDFUNC_DST_ALPHA: SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, dst.w()) break; \
385 case BLENDFUNC_ONE_MINUS_DST_ALPHA: SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, 1.0f - dst.w()) break; \
386 case BLENDFUNC_CONSTANT_COLOR: SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, blendColor.w()) break; \
387 case BLENDFUNC_ONE_MINUS_CONSTANT_COLOR: SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, 1.0f - blendColor.w()) break; \
388 case BLENDFUNC_CONSTANT_ALPHA: SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, blendColor.w()) break; \
389 case BLENDFUNC_ONE_MINUS_CONSTANT_ALPHA: SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, 1.0f - blendColor.w()) break; \
390 case BLENDFUNC_SRC_ALPHA_SATURATE: SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, 1.0f) break; \
391 case BLENDFUNC_SRC1_COLOR: SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, src1.w()) break; \
392 case BLENDFUNC_ONE_MINUS_SRC1_COLOR: SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, 1.0f - src1.w()) break; \
393 case BLENDFUNC_SRC1_ALPHA: SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, src1.w()) break; \
394 case BLENDFUNC_ONE_MINUS_SRC1_ALPHA: SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, 1.0f - src1.w()) break; \
395 default: \
396 DE_ASSERT(false); \
397 }
398
399 SWITCH_SRC_OR_DST_FACTOR_A(srcFunc, blendSrcFactorA)
400 SWITCH_SRC_OR_DST_FACTOR_A(dstFunc, blendDstFactorA)
401
402 #undef SWITCH_SRC_OR_DST_FACTOR_A
403 #undef SAMPLE_REGISTER_BLEND_FACTOR
404 }
405
executeBlend(const BlendState & blendRGBState,const BlendState & blendAState)406 void FragmentProcessor::executeBlend (const BlendState& blendRGBState, const BlendState& blendAState)
407 {
408 #define SAMPLE_REGISTER_BLENDED_COLOR(COLOR_NAME, COLOR_EXPRESSION) \
409 for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++) \
410 { \
411 if (m_sampleRegister[regSampleNdx].isAlive) \
412 { \
413 SampleData& sample = m_sampleRegister[regSampleNdx]; \
414 const Vec4& srcColor = sample.clampedBlendSrcColor; \
415 const Vec4& dstColor = sample.clampedBlendDstColor; \
416 \
417 sample.COLOR_NAME = (COLOR_EXPRESSION); \
418 } \
419 }
420
421 switch (blendRGBState.equation)
422 {
423 case BLENDEQUATION_ADD: SAMPLE_REGISTER_BLENDED_COLOR(blendedRGB, srcColor.swizzle(0,1,2)*sample.blendSrcFactorRGB + dstColor.swizzle(0,1,2)*sample.blendDstFactorRGB) break;
424 case BLENDEQUATION_SUBTRACT: SAMPLE_REGISTER_BLENDED_COLOR(blendedRGB, srcColor.swizzle(0,1,2)*sample.blendSrcFactorRGB - dstColor.swizzle(0,1,2)*sample.blendDstFactorRGB) break;
425 case BLENDEQUATION_REVERSE_SUBTRACT: SAMPLE_REGISTER_BLENDED_COLOR(blendedRGB, dstColor.swizzle(0,1,2)*sample.blendDstFactorRGB - srcColor.swizzle(0,1,2)*sample.blendSrcFactorRGB) break;
426 case BLENDEQUATION_MIN: SAMPLE_REGISTER_BLENDED_COLOR(blendedRGB, min(srcColor.swizzle(0,1,2), dstColor.swizzle(0,1,2))) break;
427 case BLENDEQUATION_MAX: SAMPLE_REGISTER_BLENDED_COLOR(blendedRGB, max(srcColor.swizzle(0,1,2), dstColor.swizzle(0,1,2))) break;
428 default:
429 DE_ASSERT(false);
430 }
431
432 switch (blendAState.equation)
433 {
434 case BLENDEQUATION_ADD: SAMPLE_REGISTER_BLENDED_COLOR(blendedA, srcColor.w()*sample.blendSrcFactorA + dstColor.w()*sample.blendDstFactorA) break;
435 case BLENDEQUATION_SUBTRACT: SAMPLE_REGISTER_BLENDED_COLOR(blendedA, srcColor.w()*sample.blendSrcFactorA - dstColor.w()*sample.blendDstFactorA) break;
436 case BLENDEQUATION_REVERSE_SUBTRACT: SAMPLE_REGISTER_BLENDED_COLOR(blendedA, dstColor.w()*sample.blendDstFactorA - srcColor.w()*sample.blendSrcFactorA) break;
437 case BLENDEQUATION_MIN: SAMPLE_REGISTER_BLENDED_COLOR(blendedA, min(srcColor.w(), dstColor.w())) break;
438 case BLENDEQUATION_MAX: SAMPLE_REGISTER_BLENDED_COLOR(blendedA, max(srcColor.w(), dstColor.w())) break;
439 default:
440 DE_ASSERT(false);
441 }
442 #undef SAMPLE_REGISTER_BLENDED_COLOR
443 }
444
445 namespace advblend
446 {
447
multiply(float src,float dst)448 inline float multiply (float src, float dst) { return src*dst; }
screen(float src,float dst)449 inline float screen (float src, float dst) { return src + dst - src*dst; }
darken(float src,float dst)450 inline float darken (float src, float dst) { return de::min(src, dst); }
lighten(float src,float dst)451 inline float lighten (float src, float dst) { return de::max(src, dst); }
difference(float src,float dst)452 inline float difference (float src, float dst) { return de::abs(dst-src); }
exclusion(float src,float dst)453 inline float exclusion (float src, float dst) { return src + dst - 2.0f*src*dst; }
454
overlay(float src,float dst)455 inline float overlay (float src, float dst)
456 {
457 if (dst <= 0.5f)
458 return 2.0f*src*dst;
459 else
460 return 1.0f - 2.0f*(1.0f-src)*(1.0f-dst);
461 }
462
colordodge(float src,float dst)463 inline float colordodge (float src, float dst)
464 {
465 if (dst <= 0.0f)
466 return 0.0f;
467 else if (src < 1.0f)
468 return de::min(1.0f, dst/(1.0f-src));
469 else
470 return 1.0f;
471 }
472
colorburn(float src,float dst)473 inline float colorburn (float src, float dst)
474 {
475 if (dst >= 1.0f)
476 return 1.0f;
477 else if (src > 0.0f)
478 return 1.0f - de::min(1.0f, (1.0f-dst)/src);
479 else
480 return 0.0f;
481 }
482
hardlight(float src,float dst)483 inline float hardlight (float src, float dst)
484 {
485 if (src <= 0.5f)
486 return 2.0f*src*dst;
487 else
488 return 1.0f - 2.0f*(1.0f-src)*(1.0f-dst);
489 }
490
softlight(float src,float dst)491 inline float softlight (float src, float dst)
492 {
493 if (src <= 0.5f)
494 return dst - (1.0f - 2.0f*src)*dst*(1.0f-dst);
495 else if (dst <= 0.25f)
496 return dst + (2.0f*src - 1.0f)*dst*((16.0f*dst - 12.0f)*dst + 3.0f);
497 else
498 return dst + (2.0f*src - 1.0f)*(deFloatSqrt(dst)-dst);
499 }
500
minComp(const Vec3 & v)501 inline float minComp (const Vec3& v)
502 {
503 return de::min(de::min(v.x(), v.y()), v.z());
504 }
505
maxComp(const Vec3 & v)506 inline float maxComp (const Vec3& v)
507 {
508 return de::max(de::max(v.x(), v.y()), v.z());
509 }
510
luminosity(const Vec3 & rgb)511 inline float luminosity (const Vec3& rgb)
512 {
513 return dot(rgb, Vec3(0.3f, 0.59f, 0.11f));
514 }
515
saturation(const Vec3 & rgb)516 inline float saturation (const Vec3& rgb)
517 {
518 return maxComp(rgb) - minComp(rgb);
519 }
520
setLum(const Vec3 & cbase,const Vec3 & clum)521 Vec3 setLum (const Vec3& cbase, const Vec3& clum)
522 {
523 const float lbase = luminosity(cbase);
524 const float llum = luminosity(clum);
525 const float ldiff = llum - lbase;
526 const Vec3 color = cbase + Vec3(ldiff);
527 const float minC = minComp(color);
528 const float maxC = maxComp(color);
529
530 if (minC < 0.0f)
531 return llum + ((color-llum)*llum / (llum != minC ? (llum-minC) : 1.0f));
532 else if (maxC > 1.0f)
533 return llum + ((color-llum)*(1.0f-llum) / (llum != maxC ? (maxC-llum) : 1.0f));
534 else
535 return color;
536 }
537
setLumSat(const Vec3 & cbase,const Vec3 & csat,const Vec3 & clum)538 Vec3 setLumSat (const Vec3& cbase, const Vec3& csat, const Vec3& clum)
539 {
540 const float minbase = minComp(cbase);
541 const float sbase = saturation(cbase);
542 const float ssat = saturation(csat);
543 Vec3 color = Vec3(0.0f);
544
545 if (sbase > 0.0f)
546 color = (cbase - minbase) * ssat / sbase;
547 else
548 color = color;
549
550 return setLum(color, clum);
551 }
552
553 } // advblend
554
executeAdvancedBlend(BlendEquationAdvanced equation)555 void FragmentProcessor::executeAdvancedBlend (BlendEquationAdvanced equation)
556 {
557 using namespace advblend;
558
559 #define SAMPLE_REGISTER_ADV_BLEND(FUNCTION_NAME) \
560 for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++) \
561 { \
562 if (m_sampleRegister[regSampleNdx].isAlive) \
563 { \
564 SampleData& sample = m_sampleRegister[regSampleNdx]; \
565 const Vec4& srcColor = sample.clampedBlendSrcColor; \
566 const Vec4& dstColor = sample.clampedBlendDstColor; \
567 const Vec3& bias = sample.blendSrcFactorRGB; \
568 const float p0 = sample.blendSrcFactorA; \
569 const float r = FUNCTION_NAME(srcColor[0], dstColor[0])*p0 + bias[0]; \
570 const float g = FUNCTION_NAME(srcColor[1], dstColor[1])*p0 + bias[1]; \
571 const float b = FUNCTION_NAME(srcColor[2], dstColor[2])*p0 + bias[2]; \
572 \
573 sample.blendedRGB = Vec3(r, g, b); \
574 } \
575 }
576
577 #define SAMPLE_REGISTER_ADV_BLEND_HSL(COLOR_EXPRESSION) \
578 for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++) \
579 { \
580 if (m_sampleRegister[regSampleNdx].isAlive) \
581 { \
582 SampleData& sample = m_sampleRegister[regSampleNdx]; \
583 const Vec3 srcColor = sample.clampedBlendSrcColor.swizzle(0,1,2); \
584 const Vec3 dstColor = sample.clampedBlendDstColor.swizzle(0,1,2); \
585 const Vec3& bias = sample.blendSrcFactorRGB; \
586 const float p0 = sample.blendSrcFactorA; \
587 \
588 sample.blendedRGB = (COLOR_EXPRESSION)*p0 + bias; \
589 } \
590 }
591
592 // Pre-compute factors & compute alpha \todo [2014-03-18 pyry] Re-using variable names.
593 // \note clampedBlend*Color contains clamped & unpremultiplied colors
594 for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)
595 {
596 if (m_sampleRegister[regSampleNdx].isAlive)
597 {
598 SampleData& sample = m_sampleRegister[regSampleNdx];
599 const Vec4& srcColor = sample.clampedBlendSrcColor;
600 const Vec4& dstColor = sample.clampedBlendDstColor;
601 const float srcA = srcColor.w();
602 const float dstA = dstColor.w();
603 const float p0 = srcA*dstA;
604 const float p1 = srcA*(1.0f-dstA);
605 const float p2 = dstA*(1.0f-srcA);
606 const Vec3 bias (srcColor[0]*p1 + dstColor[0]*p2,
607 srcColor[1]*p1 + dstColor[1]*p2,
608 srcColor[2]*p1 + dstColor[2]*p2);
609
610 sample.blendSrcFactorRGB = bias;
611 sample.blendSrcFactorA = p0;
612 sample.blendedA = p0 + p1 + p2;
613 }
614 }
615
616 switch (equation)
617 {
618 case BLENDEQUATION_ADVANCED_MULTIPLY: SAMPLE_REGISTER_ADV_BLEND(multiply); break;
619 case BLENDEQUATION_ADVANCED_SCREEN: SAMPLE_REGISTER_ADV_BLEND(screen); break;
620 case BLENDEQUATION_ADVANCED_OVERLAY: SAMPLE_REGISTER_ADV_BLEND(overlay); break;
621 case BLENDEQUATION_ADVANCED_DARKEN: SAMPLE_REGISTER_ADV_BLEND(darken); break;
622 case BLENDEQUATION_ADVANCED_LIGHTEN: SAMPLE_REGISTER_ADV_BLEND(lighten); break;
623 case BLENDEQUATION_ADVANCED_COLORDODGE: SAMPLE_REGISTER_ADV_BLEND(colordodge); break;
624 case BLENDEQUATION_ADVANCED_COLORBURN: SAMPLE_REGISTER_ADV_BLEND(colorburn); break;
625 case BLENDEQUATION_ADVANCED_HARDLIGHT: SAMPLE_REGISTER_ADV_BLEND(hardlight); break;
626 case BLENDEQUATION_ADVANCED_SOFTLIGHT: SAMPLE_REGISTER_ADV_BLEND(softlight); break;
627 case BLENDEQUATION_ADVANCED_DIFFERENCE: SAMPLE_REGISTER_ADV_BLEND(difference); break;
628 case BLENDEQUATION_ADVANCED_EXCLUSION: SAMPLE_REGISTER_ADV_BLEND(exclusion); break;
629 case BLENDEQUATION_ADVANCED_HSL_HUE: SAMPLE_REGISTER_ADV_BLEND_HSL(setLumSat(srcColor, dstColor, dstColor)); break;
630 case BLENDEQUATION_ADVANCED_HSL_SATURATION: SAMPLE_REGISTER_ADV_BLEND_HSL(setLumSat(dstColor, srcColor, dstColor)); break;
631 case BLENDEQUATION_ADVANCED_HSL_COLOR: SAMPLE_REGISTER_ADV_BLEND_HSL(setLum(srcColor, dstColor)); break;
632 case BLENDEQUATION_ADVANCED_HSL_LUMINOSITY: SAMPLE_REGISTER_ADV_BLEND_HSL(setLum(dstColor, srcColor)); break;
633 default:
634 DE_ASSERT(false);
635 }
636
637 #undef SAMPLE_REGISTER_ADV_BLEND
638 #undef SAMPLE_REGISTER_ADV_BLEND_HSL
639 }
640
executeColorWrite(int fragNdxOffset,int numSamplesPerFragment,const Fragment * inputFragments,bool isSRGB,const tcu::PixelBufferAccess & colorBuffer)641 void FragmentProcessor::executeColorWrite (int fragNdxOffset, int numSamplesPerFragment, const Fragment* inputFragments, bool isSRGB, const tcu::PixelBufferAccess& colorBuffer)
642 {
643 for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)
644 {
645 if (m_sampleRegister[regSampleNdx].isAlive)
646 {
647 int fragSampleNdx = regSampleNdx % numSamplesPerFragment;
648 const Fragment& frag = inputFragments[fragNdxOffset + regSampleNdx/numSamplesPerFragment];
649 Vec4 combinedColor;
650
651 combinedColor.xyz() = m_sampleRegister[regSampleNdx].blendedRGB;
652 combinedColor.w() = m_sampleRegister[regSampleNdx].blendedA;
653
654 if (isSRGB)
655 combinedColor = tcu::linearToSRGB(combinedColor);
656
657 colorBuffer.setPixel(combinedColor, fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y());
658 }
659 }
660 }
661
executeRGBA8ColorWrite(int fragNdxOffset,int numSamplesPerFragment,const Fragment * inputFragments,const tcu::PixelBufferAccess & colorBuffer)662 void FragmentProcessor::executeRGBA8ColorWrite (int fragNdxOffset, int numSamplesPerFragment, const Fragment* inputFragments, const tcu::PixelBufferAccess& colorBuffer)
663 {
664 const int fragStride = 4;
665 const int xStride = colorBuffer.getRowPitch();
666 const int yStride = colorBuffer.getSlicePitch();
667 deUint8* const basePtr = (deUint8*)colorBuffer.getDataPtr();
668
669 for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)
670 {
671 if (m_sampleRegister[regSampleNdx].isAlive)
672 {
673 const int fragSampleNdx = regSampleNdx % numSamplesPerFragment;
674 const Fragment& frag = inputFragments[fragNdxOffset + regSampleNdx/numSamplesPerFragment];
675 deUint8* dstPtr = basePtr + fragSampleNdx*fragStride + frag.pixelCoord.x()*xStride + frag.pixelCoord.y()*yStride;
676
677 dstPtr[0] = tcu::floatToU8(m_sampleRegister[regSampleNdx].blendedRGB.x());
678 dstPtr[1] = tcu::floatToU8(m_sampleRegister[regSampleNdx].blendedRGB.y());
679 dstPtr[2] = tcu::floatToU8(m_sampleRegister[regSampleNdx].blendedRGB.z());
680 dstPtr[3] = tcu::floatToU8(m_sampleRegister[regSampleNdx].blendedA);
681 }
682 }
683 }
684
executeMaskedColorWrite(int fragNdxOffset,int numSamplesPerFragment,const Fragment * inputFragments,const Vec4 & colorMaskFactor,const Vec4 & colorMaskNegationFactor,bool isSRGB,const tcu::PixelBufferAccess & colorBuffer)685 void FragmentProcessor::executeMaskedColorWrite (int fragNdxOffset, int numSamplesPerFragment, const Fragment* inputFragments, const Vec4& colorMaskFactor, const Vec4& colorMaskNegationFactor, bool isSRGB, const tcu::PixelBufferAccess& colorBuffer)
686 {
687 for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)
688 {
689 if (m_sampleRegister[regSampleNdx].isAlive)
690 {
691 int fragSampleNdx = regSampleNdx % numSamplesPerFragment;
692 const Fragment& frag = inputFragments[fragNdxOffset + regSampleNdx/numSamplesPerFragment];
693 Vec4 originalColor = colorBuffer.getPixel(fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y());
694 Vec4 newColor;
695
696 newColor.xyz() = m_sampleRegister[regSampleNdx].blendedRGB;
697 newColor.w() = m_sampleRegister[regSampleNdx].blendedA;
698
699 if (isSRGB)
700 newColor = tcu::linearToSRGB(newColor);
701
702 newColor = colorMaskFactor*newColor + colorMaskNegationFactor*originalColor;
703
704 colorBuffer.setPixel(newColor, fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y());
705 }
706 }
707 }
708
executeSignedValueWrite(int fragNdxOffset,int numSamplesPerFragment,const Fragment * inputFragments,const tcu::BVec4 & colorMask,const tcu::PixelBufferAccess & colorBuffer)709 void FragmentProcessor::executeSignedValueWrite (int fragNdxOffset, int numSamplesPerFragment, const Fragment* inputFragments, const tcu::BVec4& colorMask, const tcu::PixelBufferAccess& colorBuffer)
710 {
711 for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)
712 {
713 if (m_sampleRegister[regSampleNdx].isAlive)
714 {
715 int fragSampleNdx = regSampleNdx % numSamplesPerFragment;
716 const Fragment& frag = inputFragments[fragNdxOffset + regSampleNdx/numSamplesPerFragment];
717 const IVec4 originalValue = colorBuffer.getPixelInt(fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y());
718
719 colorBuffer.setPixel(tcu::select(m_sampleRegister[regSampleNdx].signedValue, originalValue, colorMask), fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y());
720 }
721 }
722 }
723
executeUnsignedValueWrite(int fragNdxOffset,int numSamplesPerFragment,const Fragment * inputFragments,const tcu::BVec4 & colorMask,const tcu::PixelBufferAccess & colorBuffer)724 void FragmentProcessor::executeUnsignedValueWrite (int fragNdxOffset, int numSamplesPerFragment, const Fragment* inputFragments, const tcu::BVec4& colorMask, const tcu::PixelBufferAccess& colorBuffer)
725 {
726 for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)
727 {
728 if (m_sampleRegister[regSampleNdx].isAlive)
729 {
730 int fragSampleNdx = regSampleNdx % numSamplesPerFragment;
731 const Fragment& frag = inputFragments[fragNdxOffset + regSampleNdx/numSamplesPerFragment];
732 const UVec4 originalValue = colorBuffer.getPixelUint(fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y());
733
734 colorBuffer.setPixel(tcu::select(m_sampleRegister[regSampleNdx].unsignedValue, originalValue, colorMask), fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y());
735 }
736 }
737 }
738
render(const rr::MultisamplePixelBufferAccess & msColorBuffer,const rr::MultisamplePixelBufferAccess & msDepthBuffer,const rr::MultisamplePixelBufferAccess & msStencilBuffer,const Fragment * inputFragments,int numFragments,FaceType fragmentFacing,const FragmentOperationState & state)739 void FragmentProcessor::render (const rr::MultisamplePixelBufferAccess& msColorBuffer,
740 const rr::MultisamplePixelBufferAccess& msDepthBuffer,
741 const rr::MultisamplePixelBufferAccess& msStencilBuffer,
742 const Fragment* inputFragments,
743 int numFragments,
744 FaceType fragmentFacing,
745 const FragmentOperationState& state)
746 {
747 DE_ASSERT(fragmentFacing < FACETYPE_LAST);
748 DE_ASSERT(state.numStencilBits < 32); // code bitshifts numStencilBits, avoid undefined behavior
749
750 const tcu::PixelBufferAccess& colorBuffer = msColorBuffer.raw();
751 const tcu::PixelBufferAccess& depthBuffer = msDepthBuffer.raw();
752 const tcu::PixelBufferAccess& stencilBuffer = msStencilBuffer.raw();
753
754 bool hasDepth = depthBuffer.getWidth() > 0 && depthBuffer.getHeight() > 0 && depthBuffer.getDepth() > 0;
755 bool hasStencil = stencilBuffer.getWidth() > 0 && stencilBuffer.getHeight() > 0 && stencilBuffer.getDepth() > 0;
756 bool doDepthTest = hasDepth && state.depthTestEnabled;
757 bool doStencilTest = hasStencil && state.stencilTestEnabled;
758
759 tcu::TextureChannelClass colorbufferClass = tcu::getTextureChannelClass(msColorBuffer.raw().getFormat().type);
760 rr::GenericVecType fragmentDataType = (colorbufferClass == tcu::TEXTURECHANNELCLASS_SIGNED_INTEGER) ? (rr::GENERICVECTYPE_INT32) : ((colorbufferClass == tcu::TEXTURECHANNELCLASS_UNSIGNED_INTEGER) ? (rr::GENERICVECTYPE_UINT32) : (rr::GENERICVECTYPE_FLOAT));
761
762 DE_ASSERT((!hasDepth || colorBuffer.getWidth() == depthBuffer.getWidth()) && (!hasStencil || colorBuffer.getWidth() == stencilBuffer.getWidth()));
763 DE_ASSERT((!hasDepth || colorBuffer.getHeight() == depthBuffer.getHeight()) && (!hasStencil || colorBuffer.getHeight() == stencilBuffer.getHeight()));
764 DE_ASSERT((!hasDepth || colorBuffer.getDepth() == depthBuffer.getDepth()) && (!hasStencil || colorBuffer.getDepth() == stencilBuffer.getDepth()));
765
766 int numSamplesPerFragment = colorBuffer.getWidth();
767 int totalNumSamples = numFragments*numSamplesPerFragment;
768 int numSampleGroups = (totalNumSamples - 1) / SAMPLE_REGISTER_SIZE + 1; // \note totalNumSamples/SAMPLE_REGISTER_SIZE rounded up.
769 const StencilState& stencilState = state.stencilStates[fragmentFacing];
770 Vec4 colorMaskFactor (state.colorMask[0] ? 1.0f : 0.0f, state.colorMask[1] ? 1.0f : 0.0f, state.colorMask[2] ? 1.0f : 0.0f, state.colorMask[3] ? 1.0f : 0.0f);
771 Vec4 colorMaskNegationFactor (state.colorMask[0] ? 0.0f : 1.0f, state.colorMask[1] ? 0.0f : 1.0f, state.colorMask[2] ? 0.0f : 1.0f, state.colorMask[3] ? 0.0f : 1.0f);
772 bool sRGBTarget = state.sRGBEnabled && tcu::isSRGB(colorBuffer.getFormat());
773
774 DE_ASSERT(SAMPLE_REGISTER_SIZE % numSamplesPerFragment == 0);
775
776 // Divide the fragments' samples into groups of size SAMPLE_REGISTER_SIZE, and perform
777 // the per-sample operations for one group at a time.
778
779 for (int sampleGroupNdx = 0; sampleGroupNdx < numSampleGroups; sampleGroupNdx++)
780 {
781 // The index of the fragment of the sample at the beginning of m_sampleRegisters.
782 int groupFirstFragNdx = (sampleGroupNdx*SAMPLE_REGISTER_SIZE) / numSamplesPerFragment;
783
784 // Initialize sample data in the sample register.
785
786 for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)
787 {
788 int fragNdx = groupFirstFragNdx + regSampleNdx/numSamplesPerFragment;
789 int fragSampleNdx = regSampleNdx % numSamplesPerFragment;
790
791 if (fragNdx < numFragments)
792 {
793 m_sampleRegister[regSampleNdx].isAlive = (inputFragments[fragNdx].coverage & (1u << fragSampleNdx)) != 0;
794 m_sampleRegister[regSampleNdx].depthPassed = true; // \note This will stay true if depth test is disabled.
795 }
796 else
797 m_sampleRegister[regSampleNdx].isAlive = false;
798 }
799
800 // Scissor test.
801
802 if (state.scissorTestEnabled)
803 executeScissorTest(groupFirstFragNdx, numSamplesPerFragment, inputFragments, state.scissorRectangle);
804
805 // Stencil test.
806
807 if (doStencilTest)
808 {
809 executeStencilCompare(groupFirstFragNdx, numSamplesPerFragment, inputFragments, stencilState, state.numStencilBits, stencilBuffer);
810 executeStencilSFail(groupFirstFragNdx, numSamplesPerFragment, inputFragments, stencilState, state.numStencilBits, stencilBuffer);
811 }
812
813 // Depth test.
814 // \note Current value of isAlive is needed for dpPass and dpFail, so it's only updated after them and not right after depth test.
815
816 if (doDepthTest)
817 {
818 executeDepthCompare(groupFirstFragNdx, numSamplesPerFragment, inputFragments, state.depthFunc, depthBuffer);
819
820 if (state.depthMask)
821 executeDepthWrite(groupFirstFragNdx, numSamplesPerFragment, inputFragments, depthBuffer);
822 }
823
824 // Do dpFail and dpPass stencil writes.
825
826 if (doStencilTest)
827 executeStencilDpFailAndPass(groupFirstFragNdx, numSamplesPerFragment, inputFragments, stencilState, state.numStencilBits, stencilBuffer);
828
829 // Kill the samples that failed depth test.
830
831 if (doDepthTest)
832 {
833 for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)
834 m_sampleRegister[regSampleNdx].isAlive = m_sampleRegister[regSampleNdx].isAlive && m_sampleRegister[regSampleNdx].depthPassed;
835 }
836
837 // Paint fragments to target
838
839 switch (fragmentDataType)
840 {
841 case rr::GENERICVECTYPE_FLOAT:
842 // Blend calculation - only if using blend.
843 if (state.blendMode == BLENDMODE_STANDARD)
844 {
845 // Put dst color to register, doing srgb-to-linear conversion if needed.
846 for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)
847 {
848 if (m_sampleRegister[regSampleNdx].isAlive)
849 {
850 int fragSampleNdx = regSampleNdx % numSamplesPerFragment;
851 const Fragment& frag = inputFragments[groupFirstFragNdx + regSampleNdx/numSamplesPerFragment];
852 Vec4 dstColor = colorBuffer.getPixel(fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y());
853
854 m_sampleRegister[regSampleNdx].clampedBlendSrcColor = clamp(frag.value.get<float>(), Vec4(0.0f), Vec4(1.0f));
855 m_sampleRegister[regSampleNdx].clampedBlendSrc1Color = clamp(frag.value1.get<float>(), Vec4(0.0f), Vec4(1.0f));
856 m_sampleRegister[regSampleNdx].clampedBlendDstColor = clamp(sRGBTarget ? tcu::sRGBToLinear(dstColor) : dstColor, Vec4(0.0f), Vec4(1.0f));
857 }
858 }
859
860 // Calculate blend factors to register.
861 executeBlendFactorComputeRGB(state.blendColor, state.blendRGBState);
862 executeBlendFactorComputeA(state.blendColor, state.blendAState);
863
864 // Compute blended color.
865 executeBlend(state.blendRGBState, state.blendAState);
866 }
867 else if (state.blendMode == BLENDMODE_ADVANCED)
868 {
869 // Unpremultiply colors for blending, and do sRGB->linear if necessary
870 // \todo [2014-03-17 pyry] Re-consider clampedBlend*Color var names
871 for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)
872 {
873 if (m_sampleRegister[regSampleNdx].isAlive)
874 {
875 int fragSampleNdx = regSampleNdx % numSamplesPerFragment;
876 const Fragment& frag = inputFragments[groupFirstFragNdx + regSampleNdx/numSamplesPerFragment];
877 const Vec4 srcColor = frag.value.get<float>();
878 const Vec4 dstColor = colorBuffer.getPixel(fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y());
879
880 m_sampleRegister[regSampleNdx].clampedBlendSrcColor = unpremultiply(clamp(srcColor, Vec4(0.0f), Vec4(1.0f)));
881 m_sampleRegister[regSampleNdx].clampedBlendDstColor = unpremultiply(clamp(sRGBTarget ? tcu::sRGBToLinear(dstColor) : dstColor, Vec4(0.0f), Vec4(1.0f)));
882 }
883 }
884
885 executeAdvancedBlend(state.blendEquationAdvaced);
886 }
887 else
888 {
889 // Not using blend - just put values to register as-is.
890 DE_ASSERT(state.blendMode == BLENDMODE_NONE);
891
892 for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)
893 {
894 if (m_sampleRegister[regSampleNdx].isAlive)
895 {
896 const Fragment& frag = inputFragments[groupFirstFragNdx + regSampleNdx/numSamplesPerFragment];
897
898 m_sampleRegister[regSampleNdx].blendedRGB = frag.value.get<float>().xyz();
899 m_sampleRegister[regSampleNdx].blendedA = frag.value.get<float>().w();
900 }
901 }
902 }
903
904 // Finally, write the colors to the color buffer.
905
906 if (state.colorMask[0] && state.colorMask[1] && state.colorMask[2] && state.colorMask[3])
907 {
908 if (colorBuffer.getFormat() == tcu::TextureFormat(tcu::TextureFormat::RGBA, tcu::TextureFormat::UNORM_INT8))
909 executeRGBA8ColorWrite(groupFirstFragNdx, numSamplesPerFragment, inputFragments, colorBuffer);
910 else
911 executeColorWrite(groupFirstFragNdx, numSamplesPerFragment, inputFragments, sRGBTarget, colorBuffer);
912 }
913 else if (state.colorMask[0] || state.colorMask[1] || state.colorMask[2] || state.colorMask[3])
914 executeMaskedColorWrite(groupFirstFragNdx, numSamplesPerFragment, inputFragments, colorMaskFactor, colorMaskNegationFactor, sRGBTarget, colorBuffer);
915 break;
916
917 case rr::GENERICVECTYPE_INT32:
918 // Write fragments
919 for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)
920 {
921 if (m_sampleRegister[regSampleNdx].isAlive)
922 {
923 const Fragment& frag = inputFragments[groupFirstFragNdx + regSampleNdx/numSamplesPerFragment];
924
925 m_sampleRegister[regSampleNdx].signedValue = frag.value.get<deInt32>();
926 }
927 }
928
929 if (state.colorMask[0] || state.colorMask[1] || state.colorMask[2] || state.colorMask[3])
930 executeSignedValueWrite(groupFirstFragNdx, numSamplesPerFragment, inputFragments, state.colorMask, colorBuffer);
931 break;
932
933 case rr::GENERICVECTYPE_UINT32:
934 // Write fragments
935 for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)
936 {
937 if (m_sampleRegister[regSampleNdx].isAlive)
938 {
939 const Fragment& frag = inputFragments[groupFirstFragNdx + regSampleNdx/numSamplesPerFragment];
940
941 m_sampleRegister[regSampleNdx].unsignedValue = frag.value.get<deUint32>();
942 }
943 }
944
945 if (state.colorMask[0] || state.colorMask[1] || state.colorMask[2] || state.colorMask[3])
946 executeUnsignedValueWrite(groupFirstFragNdx, numSamplesPerFragment, inputFragments, state.colorMask, colorBuffer);
947 break;
948
949 default:
950 DE_ASSERT(DE_FALSE);
951 }
952 }
953 }
954
955 } // rr
956