1 /*
2  * Copyright 2016 Google Inc.
3  *
4  * Use of this source code is governed by a BSD-style license that can be
5  * found in the LICENSE file.
6  */
7 
8 #include "SkPM4fPriv.h"
9 #include "SkUtils.h"
10 #include "SkXfermode.h"
11 
12 enum DstType {
13     kLinear_Dst,
14     kSRGB_Dst,
15 };
16 
scale_by_coverage(const Sk4f & x4,uint8_t coverage)17 static Sk4f scale_by_coverage(const Sk4f& x4, uint8_t coverage) {
18     return x4 * Sk4f(coverage * (1/255.0f));
19 }
20 
lerp(const Sk4f & src,const Sk4f & dst,uint8_t srcCoverage)21 static Sk4f lerp(const Sk4f& src, const Sk4f& dst, uint8_t srcCoverage) {
22     return dst + (src - dst) * Sk4f(srcCoverage * (1/255.0f));
23 }
24 
load_dst(SkPMColor dstC)25 template <DstType D> Sk4f load_dst(SkPMColor dstC) {
26     return (D == kSRGB_Dst) ? Sk4f_fromS32(dstC) : Sk4f_fromL32(dstC);
27 }
28 
srgb_4b_to_linear_unit(SkPMColor dstC)29 static Sk4f srgb_4b_to_linear_unit(SkPMColor dstC) {
30     return Sk4f_fromS32(dstC);
31 }
32 
store_dst(const Sk4f & x4)33 template <DstType D> uint32_t store_dst(const Sk4f& x4) {
34     return (D == kSRGB_Dst) ? Sk4f_toS32(x4) : Sk4f_toL32(x4);
35 }
36 
linear_unit_to_srgb_255f(const Sk4f & l4)37 static Sk4f linear_unit_to_srgb_255f(const Sk4f& l4) {
38     return linear_to_srgb(l4) * Sk4f(255) + Sk4f(0.5f);
39 }
40 
41 ///////////////////////////////////////////////////////////////////////////////////////////////////
42 
general_1(const SkXfermode * xfer,uint32_t dst[],const SkPM4f * src,int count,const SkAlpha aa[])43 template <DstType D> void general_1(const SkXfermode* xfer, uint32_t dst[],
44                                     const SkPM4f* src, int count, const SkAlpha aa[]) {
45     SkXfermodeProc4f proc = xfer->getProc4f();
46     SkPM4f d;
47     if (aa) {
48         for (int i = 0; i < count; ++i) {
49             Sk4f d4 = load_dst<D>(dst[i]);
50             d4.store(d.fVec);
51             Sk4f r4 = Sk4f::Load(proc(*src, d).fVec);
52             dst[i] = store_dst<D>(lerp(r4, d4, aa[i]));
53         }
54     } else {
55         for (int i = 0; i < count; ++i) {
56             load_dst<D>(dst[i]).store(d.fVec);
57             Sk4f r4 = Sk4f::Load(proc(*src, d).fVec);
58             dst[i] = store_dst<D>(r4);
59         }
60     }
61 }
62 
general_n(const SkXfermode * xfer,uint32_t dst[],const SkPM4f src[],int count,const SkAlpha aa[])63 template <DstType D> void general_n(const SkXfermode* xfer, uint32_t dst[],
64                                     const SkPM4f src[], int count, const SkAlpha aa[]) {
65     SkXfermodeProc4f proc = xfer->getProc4f();
66     SkPM4f d;
67     if (aa) {
68         for (int i = 0; i < count; ++i) {
69             Sk4f d4 = load_dst<D>(dst[i]);
70             d4.store(d.fVec);
71             Sk4f r4 = Sk4f::Load(proc(src[i], d).fVec);
72             dst[i] = store_dst<D>(lerp(r4, d4, aa[i]));
73         }
74     } else {
75         for (int i = 0; i < count; ++i) {
76             load_dst<D>(dst[i]).store(d.fVec);
77             Sk4f r4 = Sk4f::Load(proc(src[i], d).fVec);
78             dst[i] = store_dst<D>(r4);
79         }
80     }
81 }
82 
83 const SkXfermode::D32Proc gProcs_General[] = {
84     general_n<kLinear_Dst>, general_n<kLinear_Dst>,
85     general_1<kLinear_Dst>, general_1<kLinear_Dst>,
86     general_n<kSRGB_Dst>,   general_n<kSRGB_Dst>,
87     general_1<kSRGB_Dst>,   general_1<kSRGB_Dst>,
88 };
89 
90 ///////////////////////////////////////////////////////////////////////////////////////////////////
91 
clear_linear(const SkXfermode *,uint32_t dst[],const SkPM4f[],int count,const SkAlpha aa[])92 static void clear_linear(const SkXfermode*, uint32_t dst[], const SkPM4f[],
93                            int count, const SkAlpha aa[]) {
94     if (aa) {
95         for (int i = 0; i < count; ++i) {
96             unsigned a = aa[i];
97             if (a) {
98                 SkPMColor dstC = dst[i];
99                 SkPMColor C = 0;
100                 if (0xFF != a) {
101                     C = SkFourByteInterp(C, dstC, a);
102                 }
103                 dst[i] = C;
104             }
105         }
106     } else {
107         sk_memset32(dst, 0, count);
108     }
109 }
110 
clear_srgb(const SkXfermode *,uint32_t dst[],const SkPM4f[],int count,const SkAlpha aa[])111 static void clear_srgb(const SkXfermode*, uint32_t dst[], const SkPM4f[],
112                        int count, const SkAlpha aa[]) {
113     if (aa) {
114         for (int i = 0; i < count; ++i) {
115             if (aa[i]) {
116                 Sk4f d = Sk4f_fromS32(dst[i]) * Sk4f((255 - aa[i]) * (1/255.0f));
117                 dst[i] = Sk4f_toS32(d);
118             }
119         }
120     } else {
121         sk_memset32(dst, 0, count);
122     }
123 }
124 
125 const SkXfermode::D32Proc gProcs_Clear[] = {
126     clear_linear,   clear_linear,
127     clear_linear,   clear_linear,
128     clear_srgb,     clear_srgb,
129     clear_srgb,     clear_srgb,
130 };
131 
132 ///////////////////////////////////////////////////////////////////////////////////////////////////
133 
src_n(const SkXfermode *,uint32_t dst[],const SkPM4f src[],int count,const SkAlpha aa[])134 template <DstType D> void src_n(const SkXfermode*, uint32_t dst[],
135                                 const SkPM4f src[], int count, const SkAlpha aa[]) {
136     for (int i = 0; i < count; ++i) {
137         unsigned a = 0xFF;
138         if (aa) {
139             a = aa[i];
140             if (0 == a) {
141                 continue;
142             }
143         }
144         Sk4f r4 = Sk4f::Load(src[i].fVec);   // src always overrides dst
145         if (a != 0xFF) {
146             Sk4f d4 = load_dst<D>(dst[i]);
147             r4 = lerp(r4, d4, a);
148         }
149         dst[i] = store_dst<D>(r4);
150     }
151 }
152 
lerp(const Sk4f & src,const Sk4f & dst,const Sk4f & src_scale)153 static Sk4f lerp(const Sk4f& src, const Sk4f& dst, const Sk4f& src_scale) {
154     return dst + (src - dst) * src_scale;
155 }
156 
src_1(const SkXfermode *,uint32_t dst[],const SkPM4f * src,int count,const SkAlpha aa[])157 template <DstType D> void src_1(const SkXfermode*, uint32_t dst[],
158                                 const SkPM4f* src, int count, const SkAlpha aa[]) {
159     const Sk4f s4 = Sk4f::Load(src->fVec);
160 
161     if (aa) {
162         if (D == kLinear_Dst) {
163             // operate in bias-255 space for src and dst
164             const Sk4f& s4_255 = s4 * Sk4f(255);
165             while (count >= 4) {
166                 Sk4f aa4 = SkNx_cast<float>(Sk4b::Load(aa)) * Sk4f(1/255.f);
167                 Sk4f r0 = lerp(s4_255, to_4f(dst[0]), Sk4f(aa4[0])) + Sk4f(0.5f);
168                 Sk4f r1 = lerp(s4_255, to_4f(dst[1]), Sk4f(aa4[1])) + Sk4f(0.5f);
169                 Sk4f r2 = lerp(s4_255, to_4f(dst[2]), Sk4f(aa4[2])) + Sk4f(0.5f);
170                 Sk4f r3 = lerp(s4_255, to_4f(dst[3]), Sk4f(aa4[3])) + Sk4f(0.5f);
171                 Sk4f_ToBytes((uint8_t*)dst, r0, r1, r2, r3);
172 
173                 dst += 4;
174                 aa += 4;
175                 count -= 4;
176             }
177         } else {    // kSRGB
178             while (count >= 4) {
179                 Sk4f aa4 = SkNx_cast<float>(Sk4b::Load(aa)) * Sk4f(1/255.0f);
180 
181                 /*  If we ever natively support convert 255_linear -> 255_srgb, then perhaps
182                  *  it would be faster (and possibly allow more code sharing with kLinear) to
183                  *  stay in that space.
184                  */
185                 Sk4f r0 = lerp(s4, load_dst<D>(dst[0]), Sk4f(aa4[0]));
186                 Sk4f r1 = lerp(s4, load_dst<D>(dst[1]), Sk4f(aa4[1]));
187                 Sk4f r2 = lerp(s4, load_dst<D>(dst[2]), Sk4f(aa4[2]));
188                 Sk4f r3 = lerp(s4, load_dst<D>(dst[3]), Sk4f(aa4[3]));
189                 Sk4f_ToBytes((uint8_t*)dst,
190                              linear_unit_to_srgb_255f(r0),
191                              linear_unit_to_srgb_255f(r1),
192                              linear_unit_to_srgb_255f(r2),
193                              linear_unit_to_srgb_255f(r3));
194 
195                 dst += 4;
196                 aa += 4;
197                 count -= 4;
198             }
199         }
200         for (int i = 0; i < count; ++i) {
201             unsigned a = aa[i];
202             Sk4f d4 = load_dst<D>(dst[i]);
203             dst[i] = store_dst<D>(lerp(s4, d4, a));
204         }
205     } else {
206         sk_memset32(dst, store_dst<D>(s4), count);
207     }
208 }
209 
210 const SkXfermode::D32Proc gProcs_Src[] = {
211     src_n<kLinear_Dst>, src_n<kLinear_Dst>,
212     src_1<kLinear_Dst>, src_1<kLinear_Dst>,
213     src_n<kSRGB_Dst>,   src_n<kSRGB_Dst>,
214     src_1<kSRGB_Dst>,   src_1<kSRGB_Dst>,
215 };
216 
217 ///////////////////////////////////////////////////////////////////////////////////////////////////
218 
dst(const SkXfermode *,uint32_t dst[],const SkPM4f[],int count,const SkAlpha aa[])219 static void dst(const SkXfermode*, uint32_t dst[], const SkPM4f[], int count, const SkAlpha aa[]) {}
220 
221 const SkXfermode::D32Proc gProcs_Dst[] = {
222     dst, dst, dst, dst, dst, dst, dst, dst,
223 };
224 
225 ///////////////////////////////////////////////////////////////////////////////////////////////////
226 
srcover_n(const SkXfermode *,uint32_t dst[],const SkPM4f src[],int count,const SkAlpha aa[])227 template <DstType D> void srcover_n(const SkXfermode*, uint32_t dst[],
228                                     const SkPM4f src[], int count, const SkAlpha aa[]) {
229     if (aa) {
230         for (int i = 0; i < count; ++i) {
231             unsigned a = aa[i];
232             if (0 == a) {
233                 continue;
234             }
235             Sk4f s4 = Sk4f::Load(src[i].fVec);
236             Sk4f d4 = load_dst<D>(dst[i]);
237             if (a != 0xFF) {
238                 s4 = scale_by_coverage(s4, a);
239             }
240             Sk4f r4 = s4 + d4 * Sk4f(1 - get_alpha(s4));
241             dst[i] = store_dst<D>(r4);
242         }
243     } else {
244         for (int i = 0; i < count; ++i) {
245             Sk4f s4 = Sk4f::Load(src[i].fVec);
246             Sk4f d4 = load_dst<D>(dst[i]);
247             Sk4f r4 = s4 + d4 * Sk4f(1 - get_alpha(s4));
248             dst[i] = store_dst<D>(r4);
249         }
250     }
251 }
252 
srcover_linear_dst_1(const SkXfermode *,uint32_t dst[],const SkPM4f * src,int count,const SkAlpha aa[])253 static void srcover_linear_dst_1(const SkXfermode*, uint32_t dst[],
254                                  const SkPM4f* src, int count, const SkAlpha aa[]) {
255     const Sk4f s4 = Sk4f::Load(src->fVec);
256     const Sk4f dst_scale = Sk4f(1 - get_alpha(s4));
257 
258     if (aa) {
259         for (int i = 0; i < count; ++i) {
260             unsigned a = aa[i];
261             if (0 == a) {
262                 continue;
263             }
264             Sk4f d4 = Sk4f_fromL32(dst[i]);
265             Sk4f r4;
266             if (a != 0xFF) {
267                 Sk4f s4_aa = scale_by_coverage(s4, a);
268                 r4 = s4_aa + d4 * Sk4f(1 - get_alpha(s4_aa));
269             } else {
270                 r4 = s4 + d4 * dst_scale;
271             }
272             dst[i] = Sk4f_toL32(r4);
273         }
274     } else {
275         const Sk4f s4_255 = s4 * Sk4f(255) + Sk4f(0.5f);   // +0.5 to pre-bias for rounding
276         while (count >= 4) {
277             Sk4f d0 = to_4f(dst[0]);
278             Sk4f d1 = to_4f(dst[1]);
279             Sk4f d2 = to_4f(dst[2]);
280             Sk4f d3 = to_4f(dst[3]);
281             Sk4f_ToBytes((uint8_t*)dst,
282                          s4_255 + d0 * dst_scale,
283                          s4_255 + d1 * dst_scale,
284                          s4_255 + d2 * dst_scale,
285                          s4_255 + d3 * dst_scale);
286             dst += 4;
287             count -= 4;
288         }
289         for (int i = 0; i < count; ++i) {
290             Sk4f d4 = to_4f(dst[i]);
291             dst[i] = to_4b(s4_255 + d4 * dst_scale);
292         }
293     }
294 }
295 
srcover_srgb_dst_1(const SkXfermode *,uint32_t dst[],const SkPM4f * src,int count,const SkAlpha aa[])296 static void srcover_srgb_dst_1(const SkXfermode*, uint32_t dst[],
297                                const SkPM4f* src, int count, const SkAlpha aa[]) {
298     Sk4f s4 = Sk4f::Load(src->fVec);
299     Sk4f dst_scale = Sk4f(1 - get_alpha(s4));
300 
301     if (aa) {
302         for (int i = 0; i < count; ++i) {
303             unsigned a = aa[i];
304             if (0 == a) {
305                 continue;
306             }
307             Sk4f d4 = srgb_4b_to_linear_unit(dst[i]);
308             Sk4f r4;
309             if (a != 0xFF) {
310                 const Sk4f s4_aa = scale_by_coverage(s4, a);
311                 r4 = s4_aa + d4 * Sk4f(1 - get_alpha(s4_aa));
312             } else {
313                 r4 = s4 + d4 * dst_scale;
314             }
315             dst[i] = to_4b(linear_unit_to_srgb_255f(r4));
316         }
317     } else {
318         while (count >= 4) {
319             Sk4f d0 = srgb_4b_to_linear_unit(dst[0]);
320             Sk4f d1 = srgb_4b_to_linear_unit(dst[1]);
321             Sk4f d2 = srgb_4b_to_linear_unit(dst[2]);
322             Sk4f d3 = srgb_4b_to_linear_unit(dst[3]);
323             Sk4f_ToBytes((uint8_t*)dst,
324                          linear_unit_to_srgb_255f(s4 + d0 * dst_scale),
325                          linear_unit_to_srgb_255f(s4 + d1 * dst_scale),
326                          linear_unit_to_srgb_255f(s4 + d2 * dst_scale),
327                          linear_unit_to_srgb_255f(s4 + d3 * dst_scale));
328             dst += 4;
329             count -= 4;
330         }
331         for (int i = 0; i < count; ++i) {
332             Sk4f d4 = srgb_4b_to_linear_unit(dst[i]);
333             dst[i] = to_4b(linear_unit_to_srgb_255f(s4 + d4 * dst_scale));
334         }
335     }
336 }
337 
338 const SkXfermode::D32Proc gProcs_SrcOver[] = {
339     srcover_n<kLinear_Dst>, src_n<kLinear_Dst>,
340     srcover_linear_dst_1,   src_1<kLinear_Dst>,
341 
342     srcover_n<kSRGB_Dst>,   src_n<kSRGB_Dst>,
343     srcover_srgb_dst_1,     src_1<kSRGB_Dst>,
344 };
345 
346 ///////////////////////////////////////////////////////////////////////////////////////////////////
347 
find_proc(SkXfermode::Mode mode,uint32_t flags)348 static SkXfermode::D32Proc find_proc(SkXfermode::Mode mode, uint32_t flags) {
349     SkASSERT(0 == (flags & ~7));
350     flags &= 7;
351 
352     switch (mode) {
353         case SkXfermode::kClear_Mode:   return gProcs_Clear[flags];
354         case SkXfermode::kSrc_Mode:     return gProcs_Src[flags];
355         case SkXfermode::kDst_Mode:     return gProcs_Dst[flags];
356         case SkXfermode::kSrcOver_Mode: return gProcs_SrcOver[flags];
357         default:
358             break;
359     }
360     return gProcs_General[flags];
361 }
362 
onGetD32Proc(uint32_t flags) const363 SkXfermode::D32Proc SkXfermode::onGetD32Proc(uint32_t flags) const {
364     SkASSERT(0 == (flags & ~7));
365     flags &= 7;
366 
367     Mode mode;
368     return this->asMode(&mode) ? find_proc(mode, flags) : gProcs_General[flags];
369 }
370 
GetD32Proc(SkXfermode * xfer,uint32_t flags)371 SkXfermode::D32Proc SkXfermode::GetD32Proc(SkXfermode* xfer, uint32_t flags) {
372     return xfer ? xfer->onGetD32Proc(flags) : find_proc(SkXfermode::kSrcOver_Mode, flags);
373 }
374 
375 ///////////////////////////////////////////////////////////////////////////////////////////////////
376 #include "SkColorPriv.h"
377 
lcd16_to_unit_4f(uint16_t rgb)378 static Sk4f lcd16_to_unit_4f(uint16_t rgb) {
379 #ifdef SK_PMCOLOR_IS_RGBA
380     Sk4i rgbi = Sk4i(SkGetPackedR16(rgb), SkGetPackedG16(rgb), SkGetPackedB16(rgb), 0);
381 #else
382     Sk4i rgbi = Sk4i(SkGetPackedB16(rgb), SkGetPackedG16(rgb), SkGetPackedR16(rgb), 0);
383 #endif
384     return SkNx_cast<float>(rgbi) * Sk4f(1.0f/31, 1.0f/63, 1.0f/31, 0);
385 }
386 
387 template <DstType D>
src_1_lcd(uint32_t dst[],const SkPM4f * src,int count,const uint16_t lcd[])388 void src_1_lcd(uint32_t dst[], const SkPM4f* src, int count, const uint16_t lcd[]) {
389     const Sk4f s4 = Sk4f::Load(src->fVec);
390 
391     if (D == kLinear_Dst) {
392         // operate in bias-255 space for src and dst
393         const Sk4f s4bias = s4 * Sk4f(255);
394         for (int i = 0; i < count; ++i) {
395             uint16_t rgb = lcd[i];
396             if (0 == rgb) {
397                 continue;
398             }
399             Sk4f d4bias = to_4f(dst[i]);
400             dst[i] = to_4b(lerp(s4bias, d4bias, lcd16_to_unit_4f(rgb))) | (SK_A32_MASK << SK_A32_SHIFT);
401         }
402     } else {    // kSRGB
403         for (int i = 0; i < count; ++i) {
404             uint16_t rgb = lcd[i];
405             if (0 == rgb) {
406                 continue;
407             }
408             Sk4f d4 = load_dst<D>(dst[i]);
409             dst[i] = store_dst<D>(lerp(s4, d4, lcd16_to_unit_4f(rgb))) | (SK_A32_MASK << SK_A32_SHIFT);
410         }
411     }
412 }
413 
414 template <DstType D>
src_n_lcd(uint32_t dst[],const SkPM4f src[],int count,const uint16_t lcd[])415 void src_n_lcd(uint32_t dst[], const SkPM4f src[], int count, const uint16_t lcd[]) {
416     for (int i = 0; i < count; ++i) {
417         uint16_t rgb = lcd[i];
418         if (0 == rgb) {
419             continue;
420         }
421         Sk4f s4 = Sk4f::Load(src[i].fVec);
422         Sk4f d4 = load_dst<D>(dst[i]);
423         dst[i] = store_dst<D>(lerp(s4, d4, lcd16_to_unit_4f(rgb))) | (SK_A32_MASK << SK_A32_SHIFT);
424     }
425 }
426 
427 template <DstType D>
srcover_1_lcd(uint32_t dst[],const SkPM4f * src,int count,const uint16_t lcd[])428 void srcover_1_lcd(uint32_t dst[], const SkPM4f* src, int count, const uint16_t lcd[]) {
429     const Sk4f s4 = Sk4f::Load(src->fVec);
430     Sk4f dst_scale = Sk4f(1 - get_alpha(s4));
431 
432     for (int i = 0; i < count; ++i) {
433         uint16_t rgb = lcd[i];
434         if (0 == rgb) {
435             continue;
436         }
437         Sk4f d4 = load_dst<D>(dst[i]);
438         Sk4f r4 = s4 + d4 * dst_scale;
439         r4 = lerp(r4, d4, lcd16_to_unit_4f(rgb));
440         dst[i] = store_dst<D>(r4) | (SK_A32_MASK << SK_A32_SHIFT);
441     }
442 }
443 
444 template <DstType D>
srcover_n_lcd(uint32_t dst[],const SkPM4f src[],int count,const uint16_t lcd[])445 void srcover_n_lcd(uint32_t dst[], const SkPM4f src[], int count, const uint16_t lcd[]) {
446     for (int i = 0; i < count; ++i) {
447         uint16_t rgb = lcd[i];
448         if (0 == rgb) {
449             continue;
450         }
451         Sk4f s4 = Sk4f::Load(src[i].fVec);
452         Sk4f dst_scale = Sk4f(1 - get_alpha(s4));
453         Sk4f d4 = load_dst<D>(dst[i]);
454         Sk4f r4 = s4 + d4 * dst_scale;
455         r4 = lerp(r4, d4, lcd16_to_unit_4f(rgb));
456         dst[i] = store_dst<D>(r4) | (SK_A32_MASK << SK_A32_SHIFT);
457     }
458 }
459 
GetLCD32Proc(uint32_t flags)460 SkXfermode::LCD32Proc SkXfermode::GetLCD32Proc(uint32_t flags) {
461     SkASSERT((flags & ~7) == 0);
462     flags &= 7;
463 
464     const LCD32Proc procs[] = {
465         srcover_n_lcd<kSRGB_Dst>,   src_n_lcd<kSRGB_Dst>,
466         srcover_1_lcd<kSRGB_Dst>,   src_1_lcd<kSRGB_Dst>,
467 
468         srcover_n_lcd<kLinear_Dst>, src_n_lcd<kLinear_Dst>,
469         srcover_1_lcd<kLinear_Dst>, src_1_lcd<kLinear_Dst>,
470     };
471     return procs[flags];
472 }
473