1 /****************************************************************************
2  * Copyright (C) 2014-2015 Intel Corporation.   All Rights Reserved.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  * @file blend.cpp
24  *
25  * @brief Implementation for blending operations.
26  *
27  ******************************************************************************/
28 #include "state.h"
29 
30 template <bool Color, bool Alpha>
GenerateBlendFactor(SWR_BLEND_FACTOR func,simdvector & constantColor,simdvector & src,simdvector & src1,simdvector & dst,simdvector & out)31 INLINE void GenerateBlendFactor(SWR_BLEND_FACTOR func,
32                                 simdvector&      constantColor,
33                                 simdvector&      src,
34                                 simdvector&      src1,
35                                 simdvector&      dst,
36                                 simdvector&      out)
37 {
38     simdvector result;
39 
40     switch (func)
41     {
42     case BLENDFACTOR_ZERO:
43         result.x = _simd_setzero_ps();
44         result.y = _simd_setzero_ps();
45         result.z = _simd_setzero_ps();
46         result.w = _simd_setzero_ps();
47         break;
48 
49     case BLENDFACTOR_ONE:
50         result.x = _simd_set1_ps(1.0);
51         result.y = _simd_set1_ps(1.0);
52         result.z = _simd_set1_ps(1.0);
53         result.w = _simd_set1_ps(1.0);
54         break;
55 
56     case BLENDFACTOR_SRC_COLOR:
57         result = src;
58         break;
59 
60     case BLENDFACTOR_DST_COLOR:
61         result = dst;
62         break;
63 
64     case BLENDFACTOR_INV_SRC_COLOR:
65         result.x = _simd_sub_ps(_simd_set1_ps(1.0), src.x);
66         result.y = _simd_sub_ps(_simd_set1_ps(1.0), src.y);
67         result.z = _simd_sub_ps(_simd_set1_ps(1.0), src.z);
68         result.w = _simd_sub_ps(_simd_set1_ps(1.0), src.w);
69         break;
70 
71     case BLENDFACTOR_INV_DST_COLOR:
72         result.x = _simd_sub_ps(_simd_set1_ps(1.0), dst.x);
73         result.y = _simd_sub_ps(_simd_set1_ps(1.0), dst.y);
74         result.z = _simd_sub_ps(_simd_set1_ps(1.0), dst.z);
75         result.w = _simd_sub_ps(_simd_set1_ps(1.0), dst.w);
76         break;
77 
78     case BLENDFACTOR_SRC_ALPHA:
79         result.x = src.w;
80         result.y = src.w;
81         result.z = src.w;
82         result.w = src.w;
83         break;
84 
85     case BLENDFACTOR_INV_SRC_ALPHA:
86     {
87         simdscalar oneMinusSrcA = _simd_sub_ps(_simd_set1_ps(1.0), src.w);
88         result.x                = oneMinusSrcA;
89         result.y                = oneMinusSrcA;
90         result.z                = oneMinusSrcA;
91         result.w                = oneMinusSrcA;
92         break;
93     }
94 
95     case BLENDFACTOR_DST_ALPHA:
96         result.x = dst.w;
97         result.y = dst.w;
98         result.z = dst.w;
99         result.w = dst.w;
100         break;
101 
102     case BLENDFACTOR_INV_DST_ALPHA:
103     {
104         simdscalar oneMinusDstA = _simd_sub_ps(_simd_set1_ps(1.0), dst.w);
105         result.x                = oneMinusDstA;
106         result.y                = oneMinusDstA;
107         result.z                = oneMinusDstA;
108         result.w                = oneMinusDstA;
109         break;
110     }
111 
112     case BLENDFACTOR_SRC_ALPHA_SATURATE:
113     {
114         simdscalar sat = _simd_min_ps(src.w, _simd_sub_ps(_simd_set1_ps(1.0), dst.w));
115         result.x       = sat;
116         result.y       = sat;
117         result.z       = sat;
118         result.w       = _simd_set1_ps(1.0);
119         break;
120     }
121 
122     case BLENDFACTOR_CONST_COLOR:
123         result.x = constantColor[0];
124         result.y = constantColor[1];
125         result.z = constantColor[2];
126         result.w = constantColor[3];
127         break;
128 
129     case BLENDFACTOR_CONST_ALPHA:
130         result.x = result.y = result.z = result.w = constantColor[3];
131         break;
132 
133     case BLENDFACTOR_INV_CONST_COLOR:
134     {
135         result.x = _simd_sub_ps(_simd_set1_ps(1.0f), constantColor[0]);
136         result.y = _simd_sub_ps(_simd_set1_ps(1.0f), constantColor[1]);
137         result.z = _simd_sub_ps(_simd_set1_ps(1.0f), constantColor[2]);
138         result.w = _simd_sub_ps(_simd_set1_ps(1.0f), constantColor[3]);
139         break;
140     }
141 
142     case BLENDFACTOR_INV_CONST_ALPHA:
143     {
144         result.x = result.y = result.z = result.w =
145             _simd_sub_ps(_simd_set1_ps(1.0f), constantColor[3]);
146         break;
147     }
148 
149     case BLENDFACTOR_SRC1_COLOR:
150         result.x = src1.x;
151         result.y = src1.y;
152         result.z = src1.z;
153         result.w = src1.w;
154         break;
155 
156     case BLENDFACTOR_SRC1_ALPHA:
157         result.x = result.y = result.z = result.w = src1.w;
158         break;
159 
160     case BLENDFACTOR_INV_SRC1_COLOR:
161         result.x = _simd_sub_ps(_simd_set1_ps(1.0f), src1.x);
162         result.y = _simd_sub_ps(_simd_set1_ps(1.0f), src1.y);
163         result.z = _simd_sub_ps(_simd_set1_ps(1.0f), src1.z);
164         result.w = _simd_sub_ps(_simd_set1_ps(1.0f), src1.w);
165         break;
166 
167     case BLENDFACTOR_INV_SRC1_ALPHA:
168         result.x = result.y = result.z = result.w = _simd_sub_ps(_simd_set1_ps(1.0f), src1.w);
169         break;
170 
171     default:
172         SWR_INVALID("Unimplemented blend factor: %d", func);
173     }
174 
175     if (Color)
176     {
177         out.x = result.x;
178         out.y = result.y;
179         out.z = result.z;
180     }
181     if (Alpha)
182     {
183         out.w = result.w;
184     }
185 }
186 
187 template <bool Color, bool Alpha>
BlendFunc(SWR_BLEND_OP blendOp,simdvector & src,simdvector & srcFactor,simdvector & dst,simdvector & dstFactor,simdvector & out)188 INLINE void BlendFunc(SWR_BLEND_OP blendOp,
189                       simdvector&  src,
190                       simdvector&  srcFactor,
191                       simdvector&  dst,
192                       simdvector&  dstFactor,
193                       simdvector&  out)
194 {
195     simdvector result;
196 
197     switch (blendOp)
198     {
199     case BLENDOP_ADD:
200         result.x = _simd_fmadd_ps(srcFactor.x, src.x, _simd_mul_ps(dstFactor.x, dst.x));
201         result.y = _simd_fmadd_ps(srcFactor.y, src.y, _simd_mul_ps(dstFactor.y, dst.y));
202         result.z = _simd_fmadd_ps(srcFactor.z, src.z, _simd_mul_ps(dstFactor.z, dst.z));
203         result.w = _simd_fmadd_ps(srcFactor.w, src.w, _simd_mul_ps(dstFactor.w, dst.w));
204         break;
205 
206     case BLENDOP_SUBTRACT:
207         result.x = _simd_fmsub_ps(srcFactor.x, src.x, _simd_mul_ps(dstFactor.x, dst.x));
208         result.y = _simd_fmsub_ps(srcFactor.y, src.y, _simd_mul_ps(dstFactor.y, dst.y));
209         result.z = _simd_fmsub_ps(srcFactor.z, src.z, _simd_mul_ps(dstFactor.z, dst.z));
210         result.w = _simd_fmsub_ps(srcFactor.w, src.w, _simd_mul_ps(dstFactor.w, dst.w));
211         break;
212 
213     case BLENDOP_REVSUBTRACT:
214         result.x = _simd_fmsub_ps(dstFactor.x, dst.x, _simd_mul_ps(srcFactor.x, src.x));
215         result.y = _simd_fmsub_ps(dstFactor.y, dst.y, _simd_mul_ps(srcFactor.y, src.y));
216         result.z = _simd_fmsub_ps(dstFactor.z, dst.z, _simd_mul_ps(srcFactor.z, src.z));
217         result.w = _simd_fmsub_ps(dstFactor.w, dst.w, _simd_mul_ps(srcFactor.w, src.w));
218         break;
219 
220     case BLENDOP_MIN:
221         result.x = _simd_min_ps(_simd_mul_ps(srcFactor.x, src.x), _simd_mul_ps(dstFactor.x, dst.x));
222         result.y = _simd_min_ps(_simd_mul_ps(srcFactor.y, src.y), _simd_mul_ps(dstFactor.y, dst.y));
223         result.z = _simd_min_ps(_simd_mul_ps(srcFactor.z, src.z), _simd_mul_ps(dstFactor.z, dst.z));
224         result.w = _simd_min_ps(_simd_mul_ps(srcFactor.w, src.w), _simd_mul_ps(dstFactor.w, dst.w));
225         break;
226 
227     case BLENDOP_MAX:
228         result.x = _simd_max_ps(_simd_mul_ps(srcFactor.x, src.x), _simd_mul_ps(dstFactor.x, dst.x));
229         result.y = _simd_max_ps(_simd_mul_ps(srcFactor.y, src.y), _simd_mul_ps(dstFactor.y, dst.y));
230         result.z = _simd_max_ps(_simd_mul_ps(srcFactor.z, src.z), _simd_mul_ps(dstFactor.z, dst.z));
231         result.w = _simd_max_ps(_simd_mul_ps(srcFactor.w, src.w), _simd_mul_ps(dstFactor.w, dst.w));
232         break;
233 
234     default:
235         SWR_INVALID("Unimplemented blend function: %d", blendOp);
236     }
237 
238     if (Color)
239     {
240         out.x = result.x;
241         out.y = result.y;
242         out.z = result.z;
243     }
244     if (Alpha)
245     {
246         out.w = result.w;
247     }
248 }
249 
250 template <SWR_TYPE type>
Clamp(simdvector & src)251 INLINE void Clamp(simdvector& src)
252 {
253     switch (type)
254     {
255     case SWR_TYPE_FLOAT:
256         break;
257 
258     case SWR_TYPE_UNORM:
259         src.x = _simd_max_ps(src.x, _simd_setzero_ps());
260         src.x = _simd_min_ps(src.x, _simd_set1_ps(1.0f));
261 
262         src.y = _simd_max_ps(src.y, _simd_setzero_ps());
263         src.y = _simd_min_ps(src.y, _simd_set1_ps(1.0f));
264 
265         src.z = _simd_max_ps(src.z, _simd_setzero_ps());
266         src.z = _simd_min_ps(src.z, _simd_set1_ps(1.0f));
267 
268         src.w = _simd_max_ps(src.w, _simd_setzero_ps());
269         src.w = _simd_min_ps(src.w, _simd_set1_ps(1.0f));
270         break;
271 
272     case SWR_TYPE_SNORM:
273         src.x = _simd_max_ps(src.x, _simd_set1_ps(-1.0f));
274         src.x = _simd_min_ps(src.x, _simd_set1_ps(1.0f));
275 
276         src.y = _simd_max_ps(src.y, _simd_set1_ps(-1.0f));
277         src.y = _simd_min_ps(src.y, _simd_set1_ps(1.0f));
278 
279         src.z = _simd_max_ps(src.z, _simd_set1_ps(-1.0f));
280         src.z = _simd_min_ps(src.z, _simd_set1_ps(1.0f));
281 
282         src.w = _simd_max_ps(src.w, _simd_set1_ps(-1.0f));
283         src.w = _simd_min_ps(src.w, _simd_set1_ps(1.0f));
284         break;
285 
286     default:
287         SWR_INVALID("Unimplemented clamp: %d", type);
288         break;
289     }
290 }
291 
292 template <SWR_TYPE type>
Blend(const SWR_BLEND_STATE * pBlendState,const SWR_RENDER_TARGET_BLEND_STATE * pState,simdvector & src,simdvector & src1,uint8_t * pDst,simdvector & result)293 void Blend(const SWR_BLEND_STATE*               pBlendState,
294            const SWR_RENDER_TARGET_BLEND_STATE* pState,
295            simdvector&                          src,
296            simdvector&                          src1,
297            uint8_t*                             pDst,
298            simdvector&                          result)
299 {
300     // load render target
301     simdvector dst;
302     LoadSOA<KNOB_COLOR_HOT_TILE_FORMAT>(pDst, dst);
303 
304     simdvector constColor;
305     constColor.x = _simd_broadcast_ss(&pBlendState->constantColor[0]);
306     constColor.y = _simd_broadcast_ss(&pBlendState->constantColor[1]);
307     constColor.z = _simd_broadcast_ss(&pBlendState->constantColor[2]);
308     constColor.w = _simd_broadcast_ss(&pBlendState->constantColor[3]);
309 
310     // clamp src/dst/constant
311     Clamp<type>(src);
312     Clamp<type>(src1);
313     Clamp<type>(dst);
314     Clamp<type>(constColor);
315 
316     simdvector srcFactor, dstFactor;
317     if (pBlendState->independentAlphaBlendEnable)
318     {
319         GenerateBlendFactor<true, false>(
320             (SWR_BLEND_FACTOR)pState->sourceBlendFactor, constColor, src, src1, dst, srcFactor);
321         GenerateBlendFactor<false, true>((SWR_BLEND_FACTOR)pState->sourceAlphaBlendFactor,
322                                          constColor,
323                                          src,
324                                          src1,
325                                          dst,
326                                          srcFactor);
327 
328         GenerateBlendFactor<true, false>(
329             (SWR_BLEND_FACTOR)pState->destBlendFactor, constColor, src, src1, dst, dstFactor);
330         GenerateBlendFactor<false, true>(
331             (SWR_BLEND_FACTOR)pState->destAlphaBlendFactor, constColor, src, src1, dst, dstFactor);
332 
333         BlendFunc<true, false>(
334             (SWR_BLEND_OP)pState->colorBlendFunc, src, srcFactor, dst, dstFactor, result);
335         BlendFunc<false, true>(
336             (SWR_BLEND_OP)pState->alphaBlendFunc, src, srcFactor, dst, dstFactor, result);
337     }
338     else
339     {
340         GenerateBlendFactor<true, true>(
341             (SWR_BLEND_FACTOR)pState->sourceBlendFactor, constColor, src, src1, dst, srcFactor);
342         GenerateBlendFactor<true, true>(
343             (SWR_BLEND_FACTOR)pState->destBlendFactor, constColor, src, src1, dst, dstFactor);
344 
345         BlendFunc<true, true>(
346             (SWR_BLEND_OP)pState->colorBlendFunc, src, srcFactor, dst, dstFactor, result);
347     }
348 }
349