1 /****************************************************************************
2 * Copyright (C) 2014-2015 Intel Corporation.   All Rights Reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * @file blend.cpp
24 *
25 * @brief Implementation for blending operations.
26 *
27 ******************************************************************************/
28 #include "state.h"
29 
30 template<bool Color, bool Alpha>
31 INLINE
GenerateBlendFactor(SWR_BLEND_FACTOR func,simdvector & constantColor,simdvector & src,simdvector & src1,simdvector & dst,simdvector & out)32 void GenerateBlendFactor(SWR_BLEND_FACTOR func, simdvector &constantColor, simdvector &src, simdvector &src1, simdvector &dst, simdvector &out)
33 {
34     simdvector result;
35 
36     switch (func)
37     {
38     case BLENDFACTOR_ZERO:
39         result.x = _simd_setzero_ps();
40         result.y = _simd_setzero_ps();
41         result.z = _simd_setzero_ps();
42         result.w = _simd_setzero_ps();
43         break;
44 
45     case BLENDFACTOR_ONE:
46         result.x = _simd_set1_ps(1.0);
47         result.y = _simd_set1_ps(1.0);
48         result.z = _simd_set1_ps(1.0);
49         result.w = _simd_set1_ps(1.0);
50         break;
51 
52     case BLENDFACTOR_SRC_COLOR:
53         result = src;
54         break;
55 
56     case BLENDFACTOR_DST_COLOR:
57         result = dst;
58         break;
59 
60     case BLENDFACTOR_INV_SRC_COLOR:
61         result.x = _simd_sub_ps(_simd_set1_ps(1.0), src.x);
62         result.y = _simd_sub_ps(_simd_set1_ps(1.0), src.y);
63         result.z = _simd_sub_ps(_simd_set1_ps(1.0), src.z);
64         result.w = _simd_sub_ps(_simd_set1_ps(1.0), src.w);
65         break;
66 
67     case BLENDFACTOR_INV_DST_COLOR:
68         result.x = _simd_sub_ps(_simd_set1_ps(1.0), dst.x);
69         result.y = _simd_sub_ps(_simd_set1_ps(1.0), dst.y);
70         result.z = _simd_sub_ps(_simd_set1_ps(1.0), dst.z);
71         result.w = _simd_sub_ps(_simd_set1_ps(1.0), dst.w);
72         break;
73 
74     case BLENDFACTOR_SRC_ALPHA: result.x = src.w;
75         result.y = src.w;
76         result.z = src.w;
77         result.w = src.w;
78         break;
79 
80     case BLENDFACTOR_INV_SRC_ALPHA:
81     {
82         simdscalar oneMinusSrcA = _simd_sub_ps(_simd_set1_ps(1.0), src.w);
83         result.x = oneMinusSrcA;
84         result.y = oneMinusSrcA;
85         result.z = oneMinusSrcA;
86         result.w = oneMinusSrcA;
87         break;
88     }
89 
90     case BLENDFACTOR_DST_ALPHA: result.x = dst.w;
91         result.y = dst.w;
92         result.z = dst.w;
93         result.w = dst.w;
94         break;
95 
96     case BLENDFACTOR_INV_DST_ALPHA:
97     {
98         simdscalar oneMinusDstA = _simd_sub_ps(_simd_set1_ps(1.0), dst.w);
99         result.x = oneMinusDstA;
100         result.y = oneMinusDstA;
101         result.z = oneMinusDstA;
102         result.w = oneMinusDstA;
103         break;
104     }
105 
106     case BLENDFACTOR_SRC_ALPHA_SATURATE:
107     {
108         simdscalar sat = _simd_min_ps(src.w, _simd_sub_ps(_simd_set1_ps(1.0), dst.w));
109         result.x = sat;
110         result.y = sat;
111         result.z = sat;
112         result.w = _simd_set1_ps(1.0);
113         break;
114     }
115 
116     case BLENDFACTOR_CONST_COLOR:
117         result.x = constantColor[0];
118         result.y = constantColor[1];
119         result.z = constantColor[2];
120         result.w = constantColor[3];
121         break;
122 
123     case BLENDFACTOR_CONST_ALPHA:
124         result.x = result.y = result.z = result.w = constantColor[3];
125         break;
126 
127     case BLENDFACTOR_INV_CONST_COLOR:
128     {
129         result.x = _simd_sub_ps(_simd_set1_ps(1.0f), constantColor[0]);
130         result.y = _simd_sub_ps(_simd_set1_ps(1.0f), constantColor[1]);
131         result.z = _simd_sub_ps(_simd_set1_ps(1.0f), constantColor[2]);
132         result.w = _simd_sub_ps(_simd_set1_ps(1.0f), constantColor[3]);
133         break;
134     }
135 
136     case BLENDFACTOR_INV_CONST_ALPHA:
137     {
138         result.x = result.y = result.z = result.w = _simd_sub_ps(_simd_set1_ps(1.0f), constantColor[3]);
139         break;
140     }
141 
142     case BLENDFACTOR_SRC1_COLOR:
143         result.x = src1.x;
144         result.y = src1.y;
145         result.z = src1.z;
146         result.w = src1.w;
147         break;
148 
149     case BLENDFACTOR_SRC1_ALPHA:
150         result.x = result.y = result.z = result.w = src1.w;
151         break;
152 
153     case BLENDFACTOR_INV_SRC1_COLOR:
154         result.x = _simd_sub_ps(_simd_set1_ps(1.0f), src1.x);
155         result.y = _simd_sub_ps(_simd_set1_ps(1.0f), src1.y);
156         result.z = _simd_sub_ps(_simd_set1_ps(1.0f), src1.z);
157         result.w = _simd_sub_ps(_simd_set1_ps(1.0f), src1.w);
158         break;
159 
160     case BLENDFACTOR_INV_SRC1_ALPHA:
161         result.x = result.y = result.z = result.w = _simd_sub_ps(_simd_set1_ps(1.0f), src1.w);
162         break;
163 
164     default: SWR_INVALID("Unimplemented blend factor: %d", func);
165     }
166 
167     if (Color)
168     {
169         out.x = result.x;
170         out.y = result.y;
171         out.z = result.z;
172     }
173     if (Alpha)
174     {
175         out.w = result.w;
176     }
177 
178 }
179 
180 template<bool Color, bool Alpha>
BlendFunc(SWR_BLEND_OP blendOp,simdvector & src,simdvector & srcFactor,simdvector & dst,simdvector & dstFactor,simdvector & out)181 INLINE void BlendFunc(SWR_BLEND_OP blendOp, simdvector &src, simdvector &srcFactor, simdvector &dst, simdvector &dstFactor, simdvector &out)
182 {
183     simdvector result;
184 
185     switch (blendOp)
186     {
187     case BLENDOP_ADD:
188         result.x = _simd_fmadd_ps(srcFactor.x, src.x, _simd_mul_ps(dstFactor.x, dst.x));
189         result.y = _simd_fmadd_ps(srcFactor.y, src.y, _simd_mul_ps(dstFactor.y, dst.y));
190         result.z = _simd_fmadd_ps(srcFactor.z, src.z, _simd_mul_ps(dstFactor.z, dst.z));
191         result.w = _simd_fmadd_ps(srcFactor.w, src.w, _simd_mul_ps(dstFactor.w, dst.w));
192         break;
193 
194     case BLENDOP_SUBTRACT:
195         result.x = _simd_fmsub_ps(srcFactor.x, src.x, _simd_mul_ps(dstFactor.x, dst.x));
196         result.y = _simd_fmsub_ps(srcFactor.y, src.y, _simd_mul_ps(dstFactor.y, dst.y));
197         result.z = _simd_fmsub_ps(srcFactor.z, src.z, _simd_mul_ps(dstFactor.z, dst.z));
198         result.w = _simd_fmsub_ps(srcFactor.w, src.w, _simd_mul_ps(dstFactor.w, dst.w));
199         break;
200 
201     case BLENDOP_REVSUBTRACT:
202         result.x = _simd_fmsub_ps(dstFactor.x, dst.x, _simd_mul_ps(srcFactor.x, src.x));
203         result.y = _simd_fmsub_ps(dstFactor.y, dst.y, _simd_mul_ps(srcFactor.y, src.y));
204         result.z = _simd_fmsub_ps(dstFactor.z, dst.z, _simd_mul_ps(srcFactor.z, src.z));
205         result.w = _simd_fmsub_ps(dstFactor.w, dst.w, _simd_mul_ps(srcFactor.w, src.w));
206         break;
207 
208     case BLENDOP_MIN:
209         result.x = _simd_min_ps(_simd_mul_ps(srcFactor.x, src.x), _simd_mul_ps(dstFactor.x, dst.x));
210         result.y = _simd_min_ps(_simd_mul_ps(srcFactor.y, src.y), _simd_mul_ps(dstFactor.y, dst.y));
211         result.z = _simd_min_ps(_simd_mul_ps(srcFactor.z, src.z), _simd_mul_ps(dstFactor.z, dst.z));
212         result.w = _simd_min_ps(_simd_mul_ps(srcFactor.w, src.w), _simd_mul_ps(dstFactor.w, dst.w));
213         break;
214 
215     case BLENDOP_MAX:
216         result.x = _simd_max_ps(_simd_mul_ps(srcFactor.x, src.x), _simd_mul_ps(dstFactor.x, dst.x));
217         result.y = _simd_max_ps(_simd_mul_ps(srcFactor.y, src.y), _simd_mul_ps(dstFactor.y, dst.y));
218         result.z = _simd_max_ps(_simd_mul_ps(srcFactor.z, src.z), _simd_mul_ps(dstFactor.z, dst.z));
219         result.w = _simd_max_ps(_simd_mul_ps(srcFactor.w, src.w), _simd_mul_ps(dstFactor.w, dst.w));
220         break;
221 
222     default:
223         SWR_INVALID("Unimplemented blend function: %d", blendOp);
224     }
225 
226     if (Color)
227     {
228         out.x = result.x;
229         out.y = result.y;
230         out.z = result.z;
231     }
232     if (Alpha)
233     {
234         out.w = result.w;
235     }
236 }
237 
238 template<SWR_TYPE type>
Clamp(simdvector & src)239 INLINE void Clamp(simdvector &src)
240 {
241     switch (type)
242     {
243     case SWR_TYPE_FLOAT:
244         break;
245 
246     case SWR_TYPE_UNORM:
247         src.x = _simd_max_ps(src.x, _simd_setzero_ps());
248         src.x = _simd_min_ps(src.x, _simd_set1_ps(1.0f));
249 
250         src.y = _simd_max_ps(src.y, _simd_setzero_ps());
251         src.y = _simd_min_ps(src.y, _simd_set1_ps(1.0f));
252 
253         src.z = _simd_max_ps(src.z, _simd_setzero_ps());
254         src.z = _simd_min_ps(src.z, _simd_set1_ps(1.0f));
255 
256         src.w = _simd_max_ps(src.w, _simd_setzero_ps());
257         src.w = _simd_min_ps(src.w, _simd_set1_ps(1.0f));
258         break;
259 
260     case SWR_TYPE_SNORM:
261         src.x = _simd_max_ps(src.x, _simd_set1_ps(-1.0f));
262         src.x = _simd_min_ps(src.x, _simd_set1_ps(1.0f));
263 
264         src.y = _simd_max_ps(src.y, _simd_set1_ps(-1.0f));
265         src.y = _simd_min_ps(src.y, _simd_set1_ps(1.0f));
266 
267         src.z = _simd_max_ps(src.z, _simd_set1_ps(-1.0f));
268         src.z = _simd_min_ps(src.z, _simd_set1_ps(1.0f));
269 
270         src.w = _simd_max_ps(src.w, _simd_set1_ps(-1.0f));
271         src.w = _simd_min_ps(src.w, _simd_set1_ps(1.0f));
272         break;
273 
274     default:
275         SWR_INVALID("Unimplemented clamp: %d", type);
276         break;
277     }
278 }
279 
280 template<SWR_TYPE type>
Blend(const SWR_BLEND_STATE * pBlendState,const SWR_RENDER_TARGET_BLEND_STATE * pState,simdvector & src,simdvector & src1,uint8_t * pDst,simdvector & result)281 void Blend(const SWR_BLEND_STATE *pBlendState, const SWR_RENDER_TARGET_BLEND_STATE *pState, simdvector &src, simdvector& src1, uint8_t *pDst, simdvector &result)
282 {
283     // load render target
284     simdvector dst;
285     LoadSOA<KNOB_COLOR_HOT_TILE_FORMAT>(pDst, dst);
286 
287     simdvector constColor;
288     constColor.x = _simd_broadcast_ss(&pBlendState->constantColor[0]);
289     constColor.y = _simd_broadcast_ss(&pBlendState->constantColor[1]);
290     constColor.z = _simd_broadcast_ss(&pBlendState->constantColor[2]);
291     constColor.w = _simd_broadcast_ss(&pBlendState->constantColor[3]);
292 
293     // clamp src/dst/constant
294     Clamp<type>(src);
295     Clamp<type>(src1);
296     Clamp<type>(dst);
297     Clamp<type>(constColor);
298 
299     simdvector srcFactor, dstFactor;
300     if (pBlendState->independentAlphaBlendEnable)
301     {
302         GenerateBlendFactor<true, false>((SWR_BLEND_FACTOR)pState->sourceBlendFactor, constColor, src, src1, dst, srcFactor);
303         GenerateBlendFactor<false, true>((SWR_BLEND_FACTOR)pState->sourceAlphaBlendFactor, constColor, src, src1, dst, srcFactor);
304 
305         GenerateBlendFactor<true, false>((SWR_BLEND_FACTOR)pState->destBlendFactor, constColor, src, src1, dst, dstFactor);
306         GenerateBlendFactor<false, true>((SWR_BLEND_FACTOR)pState->destAlphaBlendFactor, constColor, src, src1, dst, dstFactor);
307 
308         BlendFunc<true, false>((SWR_BLEND_OP)pState->colorBlendFunc, src, srcFactor, dst, dstFactor, result);
309         BlendFunc<false, true>((SWR_BLEND_OP)pState->alphaBlendFunc, src, srcFactor, dst, dstFactor, result);
310     }
311     else
312     {
313         GenerateBlendFactor<true, true>((SWR_BLEND_FACTOR)pState->sourceBlendFactor, constColor, src, src1, dst, srcFactor);
314         GenerateBlendFactor<true, true>((SWR_BLEND_FACTOR)pState->destBlendFactor, constColor, src, src1, dst, dstFactor);
315 
316         BlendFunc<true, true>((SWR_BLEND_OP)pState->colorBlendFunc, src, srcFactor, dst, dstFactor, result);
317     }
318 }
319