1 /****************************************************************************
2 * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * @file blend.cpp
24 *
25 * @brief Implementation for blending operations.
26 *
27 ******************************************************************************/
28 #include "state.h"
29
30 template <bool Color, bool Alpha>
GenerateBlendFactor(SWR_BLEND_FACTOR func,simdvector & constantColor,simdvector & src,simdvector & src1,simdvector & dst,simdvector & out)31 INLINE void GenerateBlendFactor(SWR_BLEND_FACTOR func,
32 simdvector& constantColor,
33 simdvector& src,
34 simdvector& src1,
35 simdvector& dst,
36 simdvector& out)
37 {
38 simdvector result;
39
40 switch (func)
41 {
42 case BLENDFACTOR_ZERO:
43 result.x = _simd_setzero_ps();
44 result.y = _simd_setzero_ps();
45 result.z = _simd_setzero_ps();
46 result.w = _simd_setzero_ps();
47 break;
48
49 case BLENDFACTOR_ONE:
50 result.x = _simd_set1_ps(1.0);
51 result.y = _simd_set1_ps(1.0);
52 result.z = _simd_set1_ps(1.0);
53 result.w = _simd_set1_ps(1.0);
54 break;
55
56 case BLENDFACTOR_SRC_COLOR:
57 result = src;
58 break;
59
60 case BLENDFACTOR_DST_COLOR:
61 result = dst;
62 break;
63
64 case BLENDFACTOR_INV_SRC_COLOR:
65 result.x = _simd_sub_ps(_simd_set1_ps(1.0), src.x);
66 result.y = _simd_sub_ps(_simd_set1_ps(1.0), src.y);
67 result.z = _simd_sub_ps(_simd_set1_ps(1.0), src.z);
68 result.w = _simd_sub_ps(_simd_set1_ps(1.0), src.w);
69 break;
70
71 case BLENDFACTOR_INV_DST_COLOR:
72 result.x = _simd_sub_ps(_simd_set1_ps(1.0), dst.x);
73 result.y = _simd_sub_ps(_simd_set1_ps(1.0), dst.y);
74 result.z = _simd_sub_ps(_simd_set1_ps(1.0), dst.z);
75 result.w = _simd_sub_ps(_simd_set1_ps(1.0), dst.w);
76 break;
77
78 case BLENDFACTOR_SRC_ALPHA:
79 result.x = src.w;
80 result.y = src.w;
81 result.z = src.w;
82 result.w = src.w;
83 break;
84
85 case BLENDFACTOR_INV_SRC_ALPHA:
86 {
87 simdscalar oneMinusSrcA = _simd_sub_ps(_simd_set1_ps(1.0), src.w);
88 result.x = oneMinusSrcA;
89 result.y = oneMinusSrcA;
90 result.z = oneMinusSrcA;
91 result.w = oneMinusSrcA;
92 break;
93 }
94
95 case BLENDFACTOR_DST_ALPHA:
96 result.x = dst.w;
97 result.y = dst.w;
98 result.z = dst.w;
99 result.w = dst.w;
100 break;
101
102 case BLENDFACTOR_INV_DST_ALPHA:
103 {
104 simdscalar oneMinusDstA = _simd_sub_ps(_simd_set1_ps(1.0), dst.w);
105 result.x = oneMinusDstA;
106 result.y = oneMinusDstA;
107 result.z = oneMinusDstA;
108 result.w = oneMinusDstA;
109 break;
110 }
111
112 case BLENDFACTOR_SRC_ALPHA_SATURATE:
113 {
114 simdscalar sat = _simd_min_ps(src.w, _simd_sub_ps(_simd_set1_ps(1.0), dst.w));
115 result.x = sat;
116 result.y = sat;
117 result.z = sat;
118 result.w = _simd_set1_ps(1.0);
119 break;
120 }
121
122 case BLENDFACTOR_CONST_COLOR:
123 result.x = constantColor[0];
124 result.y = constantColor[1];
125 result.z = constantColor[2];
126 result.w = constantColor[3];
127 break;
128
129 case BLENDFACTOR_CONST_ALPHA:
130 result.x = result.y = result.z = result.w = constantColor[3];
131 break;
132
133 case BLENDFACTOR_INV_CONST_COLOR:
134 {
135 result.x = _simd_sub_ps(_simd_set1_ps(1.0f), constantColor[0]);
136 result.y = _simd_sub_ps(_simd_set1_ps(1.0f), constantColor[1]);
137 result.z = _simd_sub_ps(_simd_set1_ps(1.0f), constantColor[2]);
138 result.w = _simd_sub_ps(_simd_set1_ps(1.0f), constantColor[3]);
139 break;
140 }
141
142 case BLENDFACTOR_INV_CONST_ALPHA:
143 {
144 result.x = result.y = result.z = result.w =
145 _simd_sub_ps(_simd_set1_ps(1.0f), constantColor[3]);
146 break;
147 }
148
149 case BLENDFACTOR_SRC1_COLOR:
150 result.x = src1.x;
151 result.y = src1.y;
152 result.z = src1.z;
153 result.w = src1.w;
154 break;
155
156 case BLENDFACTOR_SRC1_ALPHA:
157 result.x = result.y = result.z = result.w = src1.w;
158 break;
159
160 case BLENDFACTOR_INV_SRC1_COLOR:
161 result.x = _simd_sub_ps(_simd_set1_ps(1.0f), src1.x);
162 result.y = _simd_sub_ps(_simd_set1_ps(1.0f), src1.y);
163 result.z = _simd_sub_ps(_simd_set1_ps(1.0f), src1.z);
164 result.w = _simd_sub_ps(_simd_set1_ps(1.0f), src1.w);
165 break;
166
167 case BLENDFACTOR_INV_SRC1_ALPHA:
168 result.x = result.y = result.z = result.w = _simd_sub_ps(_simd_set1_ps(1.0f), src1.w);
169 break;
170
171 default:
172 SWR_INVALID("Unimplemented blend factor: %d", func);
173 }
174
175 if (Color)
176 {
177 out.x = result.x;
178 out.y = result.y;
179 out.z = result.z;
180 }
181 if (Alpha)
182 {
183 out.w = result.w;
184 }
185 }
186
187 template <bool Color, bool Alpha>
BlendFunc(SWR_BLEND_OP blendOp,simdvector & src,simdvector & srcFactor,simdvector & dst,simdvector & dstFactor,simdvector & out)188 INLINE void BlendFunc(SWR_BLEND_OP blendOp,
189 simdvector& src,
190 simdvector& srcFactor,
191 simdvector& dst,
192 simdvector& dstFactor,
193 simdvector& out)
194 {
195 simdvector result;
196
197 switch (blendOp)
198 {
199 case BLENDOP_ADD:
200 result.x = _simd_fmadd_ps(srcFactor.x, src.x, _simd_mul_ps(dstFactor.x, dst.x));
201 result.y = _simd_fmadd_ps(srcFactor.y, src.y, _simd_mul_ps(dstFactor.y, dst.y));
202 result.z = _simd_fmadd_ps(srcFactor.z, src.z, _simd_mul_ps(dstFactor.z, dst.z));
203 result.w = _simd_fmadd_ps(srcFactor.w, src.w, _simd_mul_ps(dstFactor.w, dst.w));
204 break;
205
206 case BLENDOP_SUBTRACT:
207 result.x = _simd_fmsub_ps(srcFactor.x, src.x, _simd_mul_ps(dstFactor.x, dst.x));
208 result.y = _simd_fmsub_ps(srcFactor.y, src.y, _simd_mul_ps(dstFactor.y, dst.y));
209 result.z = _simd_fmsub_ps(srcFactor.z, src.z, _simd_mul_ps(dstFactor.z, dst.z));
210 result.w = _simd_fmsub_ps(srcFactor.w, src.w, _simd_mul_ps(dstFactor.w, dst.w));
211 break;
212
213 case BLENDOP_REVSUBTRACT:
214 result.x = _simd_fmsub_ps(dstFactor.x, dst.x, _simd_mul_ps(srcFactor.x, src.x));
215 result.y = _simd_fmsub_ps(dstFactor.y, dst.y, _simd_mul_ps(srcFactor.y, src.y));
216 result.z = _simd_fmsub_ps(dstFactor.z, dst.z, _simd_mul_ps(srcFactor.z, src.z));
217 result.w = _simd_fmsub_ps(dstFactor.w, dst.w, _simd_mul_ps(srcFactor.w, src.w));
218 break;
219
220 case BLENDOP_MIN:
221 result.x = _simd_min_ps(_simd_mul_ps(srcFactor.x, src.x), _simd_mul_ps(dstFactor.x, dst.x));
222 result.y = _simd_min_ps(_simd_mul_ps(srcFactor.y, src.y), _simd_mul_ps(dstFactor.y, dst.y));
223 result.z = _simd_min_ps(_simd_mul_ps(srcFactor.z, src.z), _simd_mul_ps(dstFactor.z, dst.z));
224 result.w = _simd_min_ps(_simd_mul_ps(srcFactor.w, src.w), _simd_mul_ps(dstFactor.w, dst.w));
225 break;
226
227 case BLENDOP_MAX:
228 result.x = _simd_max_ps(_simd_mul_ps(srcFactor.x, src.x), _simd_mul_ps(dstFactor.x, dst.x));
229 result.y = _simd_max_ps(_simd_mul_ps(srcFactor.y, src.y), _simd_mul_ps(dstFactor.y, dst.y));
230 result.z = _simd_max_ps(_simd_mul_ps(srcFactor.z, src.z), _simd_mul_ps(dstFactor.z, dst.z));
231 result.w = _simd_max_ps(_simd_mul_ps(srcFactor.w, src.w), _simd_mul_ps(dstFactor.w, dst.w));
232 break;
233
234 default:
235 SWR_INVALID("Unimplemented blend function: %d", blendOp);
236 }
237
238 if (Color)
239 {
240 out.x = result.x;
241 out.y = result.y;
242 out.z = result.z;
243 }
244 if (Alpha)
245 {
246 out.w = result.w;
247 }
248 }
249
250 template <SWR_TYPE type>
Clamp(simdvector & src)251 INLINE void Clamp(simdvector& src)
252 {
253 switch (type)
254 {
255 case SWR_TYPE_FLOAT:
256 break;
257
258 case SWR_TYPE_UNORM:
259 src.x = _simd_max_ps(src.x, _simd_setzero_ps());
260 src.x = _simd_min_ps(src.x, _simd_set1_ps(1.0f));
261
262 src.y = _simd_max_ps(src.y, _simd_setzero_ps());
263 src.y = _simd_min_ps(src.y, _simd_set1_ps(1.0f));
264
265 src.z = _simd_max_ps(src.z, _simd_setzero_ps());
266 src.z = _simd_min_ps(src.z, _simd_set1_ps(1.0f));
267
268 src.w = _simd_max_ps(src.w, _simd_setzero_ps());
269 src.w = _simd_min_ps(src.w, _simd_set1_ps(1.0f));
270 break;
271
272 case SWR_TYPE_SNORM:
273 src.x = _simd_max_ps(src.x, _simd_set1_ps(-1.0f));
274 src.x = _simd_min_ps(src.x, _simd_set1_ps(1.0f));
275
276 src.y = _simd_max_ps(src.y, _simd_set1_ps(-1.0f));
277 src.y = _simd_min_ps(src.y, _simd_set1_ps(1.0f));
278
279 src.z = _simd_max_ps(src.z, _simd_set1_ps(-1.0f));
280 src.z = _simd_min_ps(src.z, _simd_set1_ps(1.0f));
281
282 src.w = _simd_max_ps(src.w, _simd_set1_ps(-1.0f));
283 src.w = _simd_min_ps(src.w, _simd_set1_ps(1.0f));
284 break;
285
286 default:
287 SWR_INVALID("Unimplemented clamp: %d", type);
288 break;
289 }
290 }
291
292 template <SWR_TYPE type>
Blend(const SWR_BLEND_STATE * pBlendState,const SWR_RENDER_TARGET_BLEND_STATE * pState,simdvector & src,simdvector & src1,uint8_t * pDst,simdvector & result)293 void Blend(const SWR_BLEND_STATE* pBlendState,
294 const SWR_RENDER_TARGET_BLEND_STATE* pState,
295 simdvector& src,
296 simdvector& src1,
297 uint8_t* pDst,
298 simdvector& result)
299 {
300 // load render target
301 simdvector dst;
302 LoadSOA<KNOB_COLOR_HOT_TILE_FORMAT>(pDst, dst);
303
304 simdvector constColor;
305 constColor.x = _simd_broadcast_ss(&pBlendState->constantColor[0]);
306 constColor.y = _simd_broadcast_ss(&pBlendState->constantColor[1]);
307 constColor.z = _simd_broadcast_ss(&pBlendState->constantColor[2]);
308 constColor.w = _simd_broadcast_ss(&pBlendState->constantColor[3]);
309
310 // clamp src/dst/constant
311 Clamp<type>(src);
312 Clamp<type>(src1);
313 Clamp<type>(dst);
314 Clamp<type>(constColor);
315
316 simdvector srcFactor, dstFactor;
317 if (pBlendState->independentAlphaBlendEnable)
318 {
319 GenerateBlendFactor<true, false>(
320 (SWR_BLEND_FACTOR)pState->sourceBlendFactor, constColor, src, src1, dst, srcFactor);
321 GenerateBlendFactor<false, true>((SWR_BLEND_FACTOR)pState->sourceAlphaBlendFactor,
322 constColor,
323 src,
324 src1,
325 dst,
326 srcFactor);
327
328 GenerateBlendFactor<true, false>(
329 (SWR_BLEND_FACTOR)pState->destBlendFactor, constColor, src, src1, dst, dstFactor);
330 GenerateBlendFactor<false, true>(
331 (SWR_BLEND_FACTOR)pState->destAlphaBlendFactor, constColor, src, src1, dst, dstFactor);
332
333 BlendFunc<true, false>(
334 (SWR_BLEND_OP)pState->colorBlendFunc, src, srcFactor, dst, dstFactor, result);
335 BlendFunc<false, true>(
336 (SWR_BLEND_OP)pState->alphaBlendFunc, src, srcFactor, dst, dstFactor, result);
337 }
338 else
339 {
340 GenerateBlendFactor<true, true>(
341 (SWR_BLEND_FACTOR)pState->sourceBlendFactor, constColor, src, src1, dst, srcFactor);
342 GenerateBlendFactor<true, true>(
343 (SWR_BLEND_FACTOR)pState->destBlendFactor, constColor, src, src1, dst, dstFactor);
344
345 BlendFunc<true, true>(
346 (SWR_BLEND_OP)pState->colorBlendFunc, src, srcFactor, dst, dstFactor, result);
347 }
348 }
349