1 /*
2  * Copyright 2006 The Android Open Source Project
3  *
4  * Use of this source code is governed by a BSD-style license that can be
5  * found in the LICENSE file.
6  */
7 
8 #ifndef SkColorPriv_DEFINED
9 #define SkColorPriv_DEFINED
10 
11 // turn this own for extra debug checking when blending onto 565
12 #ifdef SK_DEBUG
13     #define CHECK_FOR_565_OVERFLOW
14 #endif
15 
16 #include "SkColor.h"
17 #include "SkMath.h"
18 
19 //////////////////////////////////////////////////////////////////////////////
20 
21 #define SkASSERT_IS_BYTE(x)     SkASSERT(0 == ((x) & ~0xFF))
22 
23 /*
24  *  Skia's 32bit backend only supports 1 sizzle order at a time (compile-time).
25  *  This is specified by 4 defines SK_A32_SHIFT, SK_R32_SHIFT, ... for G and B.
26  *
27  *  For easier compatibility with Skia's GPU backend, we further restrict these
28  *  to either (in memory-byte-order) RGBA or BGRA. Note that this "order" does
29  *  not directly correspond to the same shift-order, since we have to take endianess
30  *  into account.
31  *
32  *  Here we enforce this constraint.
33  */
34 
35 #ifdef SK_CPU_BENDIAN
36     #define SK_RGBA_R32_SHIFT   24
37     #define SK_RGBA_G32_SHIFT   16
38     #define SK_RGBA_B32_SHIFT   8
39     #define SK_RGBA_A32_SHIFT   0
40 
41     #define SK_BGRA_B32_SHIFT   24
42     #define SK_BGRA_G32_SHIFT   16
43     #define SK_BGRA_R32_SHIFT   8
44     #define SK_BGRA_A32_SHIFT   0
45 #else
46     #define SK_RGBA_R32_SHIFT   0
47     #define SK_RGBA_G32_SHIFT   8
48     #define SK_RGBA_B32_SHIFT   16
49     #define SK_RGBA_A32_SHIFT   24
50 
51     #define SK_BGRA_B32_SHIFT   0
52     #define SK_BGRA_G32_SHIFT   8
53     #define SK_BGRA_R32_SHIFT   16
54     #define SK_BGRA_A32_SHIFT   24
55 #endif
56 
57 #if defined(SK_PMCOLOR_IS_RGBA) && defined(SK_PMCOLOR_IS_BGRA)
58     #error "can't define PMCOLOR to be RGBA and BGRA"
59 #endif
60 
61 #define LOCAL_PMCOLOR_SHIFTS_EQUIVALENT_TO_RGBA  \
62     (SK_A32_SHIFT == SK_RGBA_A32_SHIFT &&    \
63      SK_R32_SHIFT == SK_RGBA_R32_SHIFT &&    \
64      SK_G32_SHIFT == SK_RGBA_G32_SHIFT &&    \
65      SK_B32_SHIFT == SK_RGBA_B32_SHIFT)
66 
67 #define LOCAL_PMCOLOR_SHIFTS_EQUIVALENT_TO_BGRA  \
68     (SK_A32_SHIFT == SK_BGRA_A32_SHIFT &&    \
69      SK_R32_SHIFT == SK_BGRA_R32_SHIFT &&    \
70      SK_G32_SHIFT == SK_BGRA_G32_SHIFT &&    \
71      SK_B32_SHIFT == SK_BGRA_B32_SHIFT)
72 
73 
74 #define SK_A_INDEX  (SK_A32_SHIFT/8)
75 #define SK_R_INDEX  (SK_R32_SHIFT/8)
76 #define SK_G_INDEX  (SK_G32_SHIFT/8)
77 #define SK_B_INDEX  (SK_B32_SHIFT/8)
78 
79 #if defined(SK_PMCOLOR_IS_RGBA) && !LOCAL_PMCOLOR_SHIFTS_EQUIVALENT_TO_RGBA
80     #error "SK_PMCOLOR_IS_RGBA does not match SK_*32_SHIFT values"
81 #endif
82 
83 #if defined(SK_PMCOLOR_IS_BGRA) && !LOCAL_PMCOLOR_SHIFTS_EQUIVALENT_TO_BGRA
84     #error "SK_PMCOLOR_IS_BGRA does not match SK_*32_SHIFT values"
85 #endif
86 
87 #if !defined(SK_PMCOLOR_IS_RGBA) && !defined(SK_PMCOLOR_IS_BGRA)
88     // deduce which to define from the _SHIFT defines
89 
90     #if LOCAL_PMCOLOR_SHIFTS_EQUIVALENT_TO_RGBA
91         #define SK_PMCOLOR_IS_RGBA
92     #elif LOCAL_PMCOLOR_SHIFTS_EQUIVALENT_TO_BGRA
93         #define SK_PMCOLOR_IS_BGRA
94     #else
95         #error "need 32bit packing to be either RGBA or BGRA"
96     #endif
97 #endif
98 
99 // hide these now that we're done
100 #undef LOCAL_PMCOLOR_SHIFTS_EQUIVALENT_TO_RGBA
101 #undef LOCAL_PMCOLOR_SHIFTS_EQUIVALENT_TO_BGRA
102 
103 //////////////////////////////////////////////////////////////////////////////
104 
105 // Reverse the bytes coorsponding to RED and BLUE in a packed pixels. Note the
106 // pair of them are in the same 2 slots in both RGBA and BGRA, thus there is
107 // no need to pass in the colortype to this function.
SkSwizzle_RB(uint32_t c)108 static inline uint32_t SkSwizzle_RB(uint32_t c) {
109     static const uint32_t kRBMask = (0xFF << SK_R32_SHIFT) | (0xFF << SK_B32_SHIFT);
110 
111     unsigned c0 = (c >> SK_R32_SHIFT) & 0xFF;
112     unsigned c1 = (c >> SK_B32_SHIFT) & 0xFF;
113     return (c & ~kRBMask) | (c0 << SK_B32_SHIFT) | (c1 << SK_R32_SHIFT);
114 }
115 
SkPackARGB_as_RGBA(U8CPU a,U8CPU r,U8CPU g,U8CPU b)116 static inline uint32_t SkPackARGB_as_RGBA(U8CPU a, U8CPU r, U8CPU g, U8CPU b) {
117     SkASSERT_IS_BYTE(a);
118     SkASSERT_IS_BYTE(r);
119     SkASSERT_IS_BYTE(g);
120     SkASSERT_IS_BYTE(b);
121     return (a << SK_RGBA_A32_SHIFT) | (r << SK_RGBA_R32_SHIFT) |
122            (g << SK_RGBA_G32_SHIFT) | (b << SK_RGBA_B32_SHIFT);
123 }
124 
SkPackARGB_as_BGRA(U8CPU a,U8CPU r,U8CPU g,U8CPU b)125 static inline uint32_t SkPackARGB_as_BGRA(U8CPU a, U8CPU r, U8CPU g, U8CPU b) {
126     SkASSERT_IS_BYTE(a);
127     SkASSERT_IS_BYTE(r);
128     SkASSERT_IS_BYTE(g);
129     SkASSERT_IS_BYTE(b);
130     return (a << SK_BGRA_A32_SHIFT) | (r << SK_BGRA_R32_SHIFT) |
131            (g << SK_BGRA_G32_SHIFT) | (b << SK_BGRA_B32_SHIFT);
132 }
133 
SkSwizzle_RGBA_to_PMColor(uint32_t c)134 static inline SkPMColor SkSwizzle_RGBA_to_PMColor(uint32_t c) {
135 #ifdef SK_PMCOLOR_IS_RGBA
136     return c;
137 #else
138     return SkSwizzle_RB(c);
139 #endif
140 }
141 
SkSwizzle_BGRA_to_PMColor(uint32_t c)142 static inline SkPMColor SkSwizzle_BGRA_to_PMColor(uint32_t c) {
143 #ifdef SK_PMCOLOR_IS_BGRA
144     return c;
145 #else
146     return SkSwizzle_RB(c);
147 #endif
148 }
149 
150 //////////////////////////////////////////////////////////////////////////////
151 
152 ///@{
153 /** See ITU-R Recommendation BT.709 at http://www.itu.int/rec/R-REC-BT.709/ .*/
154 #define SK_ITU_BT709_LUM_COEFF_R (0.2126f)
155 #define SK_ITU_BT709_LUM_COEFF_G (0.7152f)
156 #define SK_ITU_BT709_LUM_COEFF_B (0.0722f)
157 ///@}
158 
159 ///@{
160 /** A float value which specifies this channel's contribution to luminance. */
161 #define SK_LUM_COEFF_R SK_ITU_BT709_LUM_COEFF_R
162 #define SK_LUM_COEFF_G SK_ITU_BT709_LUM_COEFF_G
163 #define SK_LUM_COEFF_B SK_ITU_BT709_LUM_COEFF_B
164 ///@}
165 
166 /** Computes the luminance from the given r, g, and b in accordance with
167     SK_LUM_COEFF_X. For correct results, r, g, and b should be in linear space.
168 */
SkComputeLuminance(U8CPU r,U8CPU g,U8CPU b)169 static inline U8CPU SkComputeLuminance(U8CPU r, U8CPU g, U8CPU b) {
170     //The following is
171     //r * SK_LUM_COEFF_R + g * SK_LUM_COEFF_G + b * SK_LUM_COEFF_B
172     //with SK_LUM_COEFF_X in 1.8 fixed point (rounding adjusted to sum to 256).
173     return (r * 54 + g * 183 + b * 19) >> 8;
174 }
175 
176 /** Turn 0..255 into 0..256 by adding 1 at the half-way point. Used to turn a
177     byte into a scale value, so that we can say scale * value >> 8 instead of
178     alpha * value / 255.
179 
180     In debugging, asserts that alpha is 0..255
181 */
SkAlpha255To256(U8CPU alpha)182 static inline unsigned SkAlpha255To256(U8CPU alpha) {
183     SkASSERT(SkToU8(alpha) == alpha);
184     // this one assues that blending on top of an opaque dst keeps it that way
185     // even though it is less accurate than a+(a>>7) for non-opaque dsts
186     return alpha + 1;
187 }
188 
189 /**
190  *  Turn a 0..255 value into a 0..256 value, rounding up if the value is >= 0x80.
191  *  This is slightly more accurate than SkAlpha255To256.
192  */
Sk255To256(U8CPU value)193 static inline unsigned Sk255To256(U8CPU value) {
194     SkASSERT(SkToU8(value) == value);
195     return value + (value >> 7);
196 }
197 
198 /** Multiplify value by 0..256, and shift the result down 8
199     (i.e. return (value * alpha256) >> 8)
200  */
201 #define SkAlphaMul(value, alpha256)     (((value) * (alpha256)) >> 8)
202 
203 /** Calculates 256 - (value * alpha256) / 255 in range [0,256],
204  *  for [0,255] value and [0,256] alpha256.
205  */
SkAlphaMulInv256(U16CPU value,U16CPU alpha256)206 static inline U16CPU SkAlphaMulInv256(U16CPU value, U16CPU alpha256) {
207     unsigned prod = 0xFFFF - value * alpha256;
208     return (prod + (prod >> 8)) >> 8;
209 }
210 
211 //  The caller may want negative values, so keep all params signed (int)
212 //  so we don't accidentally slip into unsigned math and lose the sign
213 //  extension when we shift (in SkAlphaMul)
SkAlphaBlend(int src,int dst,int scale256)214 static inline int SkAlphaBlend(int src, int dst, int scale256) {
215     SkASSERT((unsigned)scale256 <= 256);
216     return dst + SkAlphaMul(src - dst, scale256);
217 }
218 
219 /**
220  *  Returns (src * alpha + dst * (255 - alpha)) / 255
221  *
222  *  This is more accurate than SkAlphaBlend, but slightly slower
223  */
SkAlphaBlend255(S16CPU src,S16CPU dst,U8CPU alpha)224 static inline int SkAlphaBlend255(S16CPU src, S16CPU dst, U8CPU alpha) {
225     SkASSERT((int16_t)src == src);
226     SkASSERT((int16_t)dst == dst);
227     SkASSERT((uint8_t)alpha == alpha);
228 
229     int prod = (src - dst) * alpha + 128;
230     prod = (prod + (prod >> 8)) >> 8;
231     return dst + prod;
232 }
233 
SkUnitScalarClampToByte(SkScalar x)234 static inline U8CPU SkUnitScalarClampToByte(SkScalar x) {
235     return static_cast<U8CPU>(SkScalarPin(x, 0, 1) * 255 + 0.5);
236 }
237 
238 #define SK_R16_BITS     5
239 #define SK_G16_BITS     6
240 #define SK_B16_BITS     5
241 
242 #define SK_R16_SHIFT    (SK_B16_BITS + SK_G16_BITS)
243 #define SK_G16_SHIFT    (SK_B16_BITS)
244 #define SK_B16_SHIFT    0
245 
246 #define SK_R16_MASK     ((1 << SK_R16_BITS) - 1)
247 #define SK_G16_MASK     ((1 << SK_G16_BITS) - 1)
248 #define SK_B16_MASK     ((1 << SK_B16_BITS) - 1)
249 
250 #define SkGetPackedR16(color)   (((unsigned)(color) >> SK_R16_SHIFT) & SK_R16_MASK)
251 #define SkGetPackedG16(color)   (((unsigned)(color) >> SK_G16_SHIFT) & SK_G16_MASK)
252 #define SkGetPackedB16(color)   (((unsigned)(color) >> SK_B16_SHIFT) & SK_B16_MASK)
253 
254 #define SkR16Assert(r)  SkASSERT((unsigned)(r) <= SK_R16_MASK)
255 #define SkG16Assert(g)  SkASSERT((unsigned)(g) <= SK_G16_MASK)
256 #define SkB16Assert(b)  SkASSERT((unsigned)(b) <= SK_B16_MASK)
257 
SkPackRGB16(unsigned r,unsigned g,unsigned b)258 static inline uint16_t SkPackRGB16(unsigned r, unsigned g, unsigned b) {
259     SkASSERT(r <= SK_R16_MASK);
260     SkASSERT(g <= SK_G16_MASK);
261     SkASSERT(b <= SK_B16_MASK);
262 
263     return SkToU16((r << SK_R16_SHIFT) | (g << SK_G16_SHIFT) | (b << SK_B16_SHIFT));
264 }
265 
266 #define SK_R16_MASK_IN_PLACE        (SK_R16_MASK << SK_R16_SHIFT)
267 #define SK_G16_MASK_IN_PLACE        (SK_G16_MASK << SK_G16_SHIFT)
268 #define SK_B16_MASK_IN_PLACE        (SK_B16_MASK << SK_B16_SHIFT)
269 
270 /** Expand the 16bit color into a 32bit value that can be scaled all at once
271     by a value up to 32. Used in conjunction with SkCompact_rgb_16.
272 */
SkExpand_rgb_16(U16CPU c)273 static inline uint32_t SkExpand_rgb_16(U16CPU c) {
274     SkASSERT(c == (uint16_t)c);
275 
276     return ((c & SK_G16_MASK_IN_PLACE) << 16) | (c & ~SK_G16_MASK_IN_PLACE);
277 }
278 
279 /** Compress an expanded value (from SkExpand_rgb_16) back down to a 16bit
280     color value. The computation yields only 16bits of valid data, but we claim
281     to return 32bits, so that the compiler won't generate extra instructions to
282     "clean" the top 16bits. However, the top 16 can contain garbage, so it is
283     up to the caller to safely ignore them.
284 */
SkCompact_rgb_16(uint32_t c)285 static inline U16CPU SkCompact_rgb_16(uint32_t c) {
286     return ((c >> 16) & SK_G16_MASK_IN_PLACE) | (c & ~SK_G16_MASK_IN_PLACE);
287 }
288 
289 /** Scale the 16bit color value by the 0..256 scale parameter.
290     The computation yields only 16bits of valid data, but we claim
291     to return 32bits, so that the compiler won't generate extra instructions to
292     "clean" the top 16bits.
293 */
SkAlphaMulRGB16(U16CPU c,unsigned scale)294 static inline U16CPU SkAlphaMulRGB16(U16CPU c, unsigned scale) {
295     return SkCompact_rgb_16(SkExpand_rgb_16(c) * (scale >> 3) >> 5);
296 }
297 
298 // this helper explicitly returns a clean 16bit value (but slower)
299 #define SkAlphaMulRGB16_ToU16(c, s)  (uint16_t)SkAlphaMulRGB16(c, s)
300 
301 /** Blend pre-expanded RGB32 with 16bit color value by the 0..32 scale parameter.
302     The computation yields only 16bits of valid data, but we claim to return
303     32bits, so that the compiler won't generate extra instructions to "clean"
304     the top 16bits.
305 */
SkBlend32_RGB16(uint32_t src_expand,uint16_t dst,unsigned scale)306 static inline U16CPU SkBlend32_RGB16(uint32_t src_expand, uint16_t dst, unsigned scale) {
307     uint32_t dst_expand = SkExpand_rgb_16(dst) * scale;
308     return SkCompact_rgb_16((src_expand + dst_expand) >> 5);
309 }
310 
311 /** Blend src and dst 16bit colors by the 0..256 scale parameter.
312     The computation yields only 16bits of valid data, but we claim
313     to return 32bits, so that the compiler won't generate extra instructions to
314     "clean" the top 16bits.
315 */
SkBlendRGB16(U16CPU src,U16CPU dst,int srcScale)316 static inline U16CPU SkBlendRGB16(U16CPU src, U16CPU dst, int srcScale) {
317     SkASSERT((unsigned)srcScale <= 256);
318 
319     srcScale >>= 3;
320 
321     uint32_t src32 = SkExpand_rgb_16(src);
322     uint32_t dst32 = SkExpand_rgb_16(dst);
323     return SkCompact_rgb_16(dst32 + ((src32 - dst32) * srcScale >> 5));
324 }
325 
SkBlendRGB16(const uint16_t src[],uint16_t dst[],int srcScale,int count)326 static inline void SkBlendRGB16(const uint16_t src[], uint16_t dst[],
327                                 int srcScale, int count) {
328     SkASSERT(count > 0);
329     SkASSERT((unsigned)srcScale <= 256);
330 
331     srcScale >>= 3;
332 
333     do {
334         uint32_t src32 = SkExpand_rgb_16(*src++);
335         uint32_t dst32 = SkExpand_rgb_16(*dst);
336         *dst++ = static_cast<uint16_t>(
337             SkCompact_rgb_16(dst32 + ((src32 - dst32) * srcScale >> 5)));
338     } while (--count > 0);
339 }
340 
341 #ifdef SK_DEBUG
SkRGB16Add(U16CPU a,U16CPU b)342     static inline U16CPU SkRGB16Add(U16CPU a, U16CPU b) {
343         SkASSERT(SkGetPackedR16(a) + SkGetPackedR16(b) <= SK_R16_MASK);
344         SkASSERT(SkGetPackedG16(a) + SkGetPackedG16(b) <= SK_G16_MASK);
345         SkASSERT(SkGetPackedB16(a) + SkGetPackedB16(b) <= SK_B16_MASK);
346 
347         return a + b;
348     }
349 #else
350     #define SkRGB16Add(a, b)  ((a) + (b))
351 #endif
352 
353 ///////////////////////////////////////////////////////////////////////////////
354 
355 #define SK_A32_BITS     8
356 #define SK_R32_BITS     8
357 #define SK_G32_BITS     8
358 #define SK_B32_BITS     8
359 
360 #define SK_A32_MASK     ((1 << SK_A32_BITS) - 1)
361 #define SK_R32_MASK     ((1 << SK_R32_BITS) - 1)
362 #define SK_G32_MASK     ((1 << SK_G32_BITS) - 1)
363 #define SK_B32_MASK     ((1 << SK_B32_BITS) - 1)
364 
365 #define SkGetPackedA32(packed)      ((uint32_t)((packed) << (24 - SK_A32_SHIFT)) >> 24)
366 #define SkGetPackedR32(packed)      ((uint32_t)((packed) << (24 - SK_R32_SHIFT)) >> 24)
367 #define SkGetPackedG32(packed)      ((uint32_t)((packed) << (24 - SK_G32_SHIFT)) >> 24)
368 #define SkGetPackedB32(packed)      ((uint32_t)((packed) << (24 - SK_B32_SHIFT)) >> 24)
369 
370 #define SkA32Assert(a)  SkASSERT((unsigned)(a) <= SK_A32_MASK)
371 #define SkR32Assert(r)  SkASSERT((unsigned)(r) <= SK_R32_MASK)
372 #define SkG32Assert(g)  SkASSERT((unsigned)(g) <= SK_G32_MASK)
373 #define SkB32Assert(b)  SkASSERT((unsigned)(b) <= SK_B32_MASK)
374 
375 #ifdef SK_DEBUG
376     #define SkPMColorAssert(color_value)                                    \
377         do {                                                                \
378             SkPMColor pm_color_value = (color_value);                       \
379             uint32_t alpha_color_value = SkGetPackedA32(pm_color_value);    \
380             SkA32Assert(alpha_color_value);                                 \
381             SkASSERT(SkGetPackedR32(pm_color_value) <= alpha_color_value);  \
382             SkASSERT(SkGetPackedG32(pm_color_value) <= alpha_color_value);  \
383             SkASSERT(SkGetPackedB32(pm_color_value) <= alpha_color_value);  \
384         } while (false)
385 #else
386     #define SkPMColorAssert(c)
387 #endif
388 
SkPMColorValid(SkPMColor c)389 static inline bool SkPMColorValid(SkPMColor c) {
390     auto a = SkGetPackedA32(c);
391     bool valid = a <= SK_A32_MASK
392               && SkGetPackedR32(c) <= a
393               && SkGetPackedG32(c) <= a
394               && SkGetPackedB32(c) <= a;
395     if (valid) {
396         SkPMColorAssert(c);  // Make sure we're consistent when it counts.
397     }
398     return valid;
399 }
400 
401 /**
402  *  Pack the components into a SkPMColor, checking (in the debug version) that
403  *  the components are 0..255, and are already premultiplied (i.e. alpha >= color)
404  */
SkPackARGB32(U8CPU a,U8CPU r,U8CPU g,U8CPU b)405 static inline SkPMColor SkPackARGB32(U8CPU a, U8CPU r, U8CPU g, U8CPU b) {
406     SkA32Assert(a);
407     SkASSERT(r <= a);
408     SkASSERT(g <= a);
409     SkASSERT(b <= a);
410 
411     return (a << SK_A32_SHIFT) | (r << SK_R32_SHIFT) |
412            (g << SK_G32_SHIFT) | (b << SK_B32_SHIFT);
413 }
414 
SkPackPMColor_as_RGBA(SkPMColor c)415 static inline uint32_t SkPackPMColor_as_RGBA(SkPMColor c) {
416     return SkPackARGB_as_RGBA(SkGetPackedA32(c), SkGetPackedR32(c),
417                               SkGetPackedG32(c), SkGetPackedB32(c));
418 }
419 
SkPackPMColor_as_BGRA(SkPMColor c)420 static inline uint32_t SkPackPMColor_as_BGRA(SkPMColor c) {
421     return SkPackARGB_as_BGRA(SkGetPackedA32(c), SkGetPackedR32(c),
422                               SkGetPackedG32(c), SkGetPackedB32(c));
423 }
424 
425 /**
426  * Abstract 4-byte interpolation, implemented on top of SkPMColor
427  * utility functions. Third parameter controls blending of the first two:
428  *   (src, dst, 0) returns dst
429  *   (src, dst, 0xFF) returns src
430  *   srcWeight is [0..256], unlike SkFourByteInterp which takes [0..255]
431  */
SkFourByteInterp256(SkPMColor src,SkPMColor dst,unsigned scale)432 static inline SkPMColor SkFourByteInterp256(SkPMColor src, SkPMColor dst,
433                                          unsigned scale) {
434     unsigned a = SkAlphaBlend(SkGetPackedA32(src), SkGetPackedA32(dst), scale);
435     unsigned r = SkAlphaBlend(SkGetPackedR32(src), SkGetPackedR32(dst), scale);
436     unsigned g = SkAlphaBlend(SkGetPackedG32(src), SkGetPackedG32(dst), scale);
437     unsigned b = SkAlphaBlend(SkGetPackedB32(src), SkGetPackedB32(dst), scale);
438 
439     return SkPackARGB32(a, r, g, b);
440 }
441 
442 /**
443  * Abstract 4-byte interpolation, implemented on top of SkPMColor
444  * utility functions. Third parameter controls blending of the first two:
445  *   (src, dst, 0) returns dst
446  *   (src, dst, 0xFF) returns src
447  */
SkFourByteInterp(SkPMColor src,SkPMColor dst,U8CPU srcWeight)448 static inline SkPMColor SkFourByteInterp(SkPMColor src, SkPMColor dst,
449                                          U8CPU srcWeight) {
450     unsigned scale = SkAlpha255To256(srcWeight);
451     return SkFourByteInterp256(src, dst, scale);
452 }
453 
454 /**
455  * 0xAARRGGBB -> 0x00AA00GG, 0x00RR00BB
456  */
SkSplay(uint32_t color,uint32_t * ag,uint32_t * rb)457 static inline void SkSplay(uint32_t color, uint32_t* ag, uint32_t* rb) {
458     const uint32_t mask = 0x00FF00FF;
459     *ag = (color >> 8) & mask;
460     *rb = color & mask;
461 }
462 
463 /**
464  * 0xAARRGGBB -> 0x00AA00GG00RR00BB
465  * (note, ARGB -> AGRB)
466  */
SkSplay(uint32_t color)467 static inline uint64_t SkSplay(uint32_t color) {
468     const uint32_t mask = 0x00FF00FF;
469     uint64_t agrb = (color >> 8) & mask;  // 0x0000000000AA00GG
470     agrb <<= 32;                          // 0x00AA00GG00000000
471     agrb |= color & mask;                 // 0x00AA00GG00RR00BB
472     return agrb;
473 }
474 
475 /**
476  * 0xAAxxGGxx, 0xRRxxBBxx-> 0xAARRGGBB
477  */
SkUnsplay(uint32_t ag,uint32_t rb)478 static inline uint32_t SkUnsplay(uint32_t ag, uint32_t rb) {
479     const uint32_t mask = 0xFF00FF00;
480     return (ag & mask) | ((rb & mask) >> 8);
481 }
482 
483 /**
484  * 0xAAxxGGxxRRxxBBxx -> 0xAARRGGBB
485  * (note, AGRB -> ARGB)
486  */
SkUnsplay(uint64_t agrb)487 static inline uint32_t SkUnsplay(uint64_t agrb) {
488     const uint32_t mask = 0xFF00FF00;
489     return SkPMColor(
490         ((agrb & mask) >> 8) |   // 0x00RR00BB
491         ((agrb >> 32) & mask));  // 0xAARRGGBB
492 }
493 
SkFastFourByteInterp256_32(SkPMColor src,SkPMColor dst,unsigned scale)494 static inline SkPMColor SkFastFourByteInterp256_32(SkPMColor src, SkPMColor dst, unsigned scale) {
495     SkASSERT(scale <= 256);
496 
497     // Two 8-bit blends per two 32-bit registers, with space to make sure the math doesn't collide.
498     uint32_t src_ag, src_rb, dst_ag, dst_rb;
499     SkSplay(src, &src_ag, &src_rb);
500     SkSplay(dst, &dst_ag, &dst_rb);
501 
502     const uint32_t ret_ag = src_ag * scale + (256 - scale) * dst_ag;
503     const uint32_t ret_rb = src_rb * scale + (256 - scale) * dst_rb;
504 
505     return SkUnsplay(ret_ag, ret_rb);
506 }
507 
SkFastFourByteInterp256_64(SkPMColor src,SkPMColor dst,unsigned scale)508 static inline SkPMColor SkFastFourByteInterp256_64(SkPMColor src, SkPMColor dst, unsigned scale) {
509     SkASSERT(scale <= 256);
510     // Four 8-bit blends in one 64-bit register, with space to make sure the math doesn't collide.
511     return SkUnsplay(SkSplay(src) * scale + (256-scale) * SkSplay(dst));
512 }
513 
514 // TODO(mtklein): Replace slow versions with fast versions, using scale + (scale>>7) everywhere.
515 
516 /**
517  * Same as SkFourByteInterp256, but faster.
518  */
SkFastFourByteInterp256(SkPMColor src,SkPMColor dst,unsigned scale)519 static inline SkPMColor SkFastFourByteInterp256(SkPMColor src, SkPMColor dst, unsigned scale) {
520     // On a 64-bit machine, _64 is about 10% faster than _32, but ~40% slower on a 32-bit machine.
521     if (sizeof(void*) == 4) {
522         return SkFastFourByteInterp256_32(src, dst, scale);
523     } else {
524         return SkFastFourByteInterp256_64(src, dst, scale);
525     }
526 }
527 
528 /**
529  * Nearly the same as SkFourByteInterp, but faster and a touch more accurate, due to better
530  * srcWeight scaling to [0, 256].
531  */
SkFastFourByteInterp(SkPMColor src,SkPMColor dst,U8CPU srcWeight)532 static inline SkPMColor SkFastFourByteInterp(SkPMColor src,
533                                              SkPMColor dst,
534                                              U8CPU srcWeight) {
535     SkASSERT(srcWeight <= 255);
536     // scale = srcWeight + (srcWeight >> 7) is more accurate than
537     // scale = srcWeight + 1, but 7% slower
538     return SkFastFourByteInterp256(src, dst, srcWeight + (srcWeight >> 7));
539 }
540 
541 /**
542  *  Same as SkPackARGB32, but this version guarantees to not check that the
543  *  values are premultiplied in the debug version.
544  */
SkPackARGB32NoCheck(U8CPU a,U8CPU r,U8CPU g,U8CPU b)545 static inline SkPMColor SkPackARGB32NoCheck(U8CPU a, U8CPU r, U8CPU g, U8CPU b) {
546     return (a << SK_A32_SHIFT) | (r << SK_R32_SHIFT) |
547            (g << SK_G32_SHIFT) | (b << SK_B32_SHIFT);
548 }
549 
550 static inline
SkPremultiplyARGBInline(U8CPU a,U8CPU r,U8CPU g,U8CPU b)551 SkPMColor SkPremultiplyARGBInline(U8CPU a, U8CPU r, U8CPU g, U8CPU b) {
552     SkA32Assert(a);
553     SkR32Assert(r);
554     SkG32Assert(g);
555     SkB32Assert(b);
556 
557     if (a != 255) {
558         r = SkMulDiv255Round(r, a);
559         g = SkMulDiv255Round(g, a);
560         b = SkMulDiv255Round(b, a);
561     }
562     return SkPackARGB32(a, r, g, b);
563 }
564 
565 // When Android is compiled optimizing for size, SkAlphaMulQ doesn't get
566 // inlined; forcing inlining significantly improves performance.
SkAlphaMulQ(uint32_t c,unsigned scale)567 static SK_ALWAYS_INLINE uint32_t SkAlphaMulQ(uint32_t c, unsigned scale) {
568     uint32_t mask = 0xFF00FF;
569 
570     uint32_t rb = ((c & mask) * scale) >> 8;
571     uint32_t ag = ((c >> 8) & mask) * scale;
572     return (rb & mask) | (ag & ~mask);
573 }
574 
SkPMSrcOver(SkPMColor src,SkPMColor dst)575 static inline SkPMColor SkPMSrcOver(SkPMColor src, SkPMColor dst) {
576     return src + SkAlphaMulQ(dst, SkAlpha255To256(255 - SkGetPackedA32(src)));
577 }
578 
579 /**
580  * Interpolates between colors src and dst using [0,256] scale.
581  */
SkPMLerp(SkPMColor src,SkPMColor dst,unsigned scale)582 static inline SkPMColor SkPMLerp(SkPMColor src, SkPMColor dst, unsigned scale) {
583     return SkFastFourByteInterp256(src, dst, scale);
584 }
585 
SkBlendARGB32(SkPMColor src,SkPMColor dst,U8CPU aa)586 static inline SkPMColor SkBlendARGB32(SkPMColor src, SkPMColor dst, U8CPU aa) {
587     SkASSERT((unsigned)aa <= 255);
588 
589     unsigned src_scale = SkAlpha255To256(aa);
590     unsigned dst_scale = SkAlphaMulInv256(SkGetPackedA32(src), src_scale);
591 
592     const uint32_t mask = 0xFF00FF;
593 
594     uint32_t src_rb = (src & mask) * src_scale;
595     uint32_t src_ag = ((src >> 8) & mask) * src_scale;
596 
597     uint32_t dst_rb = (dst & mask) * dst_scale;
598     uint32_t dst_ag = ((dst >> 8) & mask) * dst_scale;
599 
600     return (((src_rb + dst_rb) >> 8) & mask) | ((src_ag + dst_ag) & ~mask);
601 }
602 
603 ////////////////////////////////////////////////////////////////////////////////////////////
604 // Convert a 32bit pixel to a 16bit pixel (no dither)
605 
606 #define SkR32ToR16_MACRO(r)   ((unsigned)(r) >> (SK_R32_BITS - SK_R16_BITS))
607 #define SkG32ToG16_MACRO(g)   ((unsigned)(g) >> (SK_G32_BITS - SK_G16_BITS))
608 #define SkB32ToB16_MACRO(b)   ((unsigned)(b) >> (SK_B32_BITS - SK_B16_BITS))
609 
610 #ifdef SK_DEBUG
SkR32ToR16(unsigned r)611     static inline unsigned SkR32ToR16(unsigned r) {
612         SkR32Assert(r);
613         return SkR32ToR16_MACRO(r);
614     }
SkG32ToG16(unsigned g)615     static inline unsigned SkG32ToG16(unsigned g) {
616         SkG32Assert(g);
617         return SkG32ToG16_MACRO(g);
618     }
SkB32ToB16(unsigned b)619     static inline unsigned SkB32ToB16(unsigned b) {
620         SkB32Assert(b);
621         return SkB32ToB16_MACRO(b);
622     }
623 #else
624     #define SkR32ToR16(r)   SkR32ToR16_MACRO(r)
625     #define SkG32ToG16(g)   SkG32ToG16_MACRO(g)
626     #define SkB32ToB16(b)   SkB32ToB16_MACRO(b)
627 #endif
628 
629 #define SkPacked32ToR16(c)  (((unsigned)(c) >> (SK_R32_SHIFT + SK_R32_BITS - SK_R16_BITS)) & SK_R16_MASK)
630 #define SkPacked32ToG16(c)  (((unsigned)(c) >> (SK_G32_SHIFT + SK_G32_BITS - SK_G16_BITS)) & SK_G16_MASK)
631 #define SkPacked32ToB16(c)  (((unsigned)(c) >> (SK_B32_SHIFT + SK_B32_BITS - SK_B16_BITS)) & SK_B16_MASK)
632 
SkPixel32ToPixel16(SkPMColor c)633 static inline U16CPU SkPixel32ToPixel16(SkPMColor c) {
634     unsigned r = ((c >> (SK_R32_SHIFT + (8 - SK_R16_BITS))) & SK_R16_MASK) << SK_R16_SHIFT;
635     unsigned g = ((c >> (SK_G32_SHIFT + (8 - SK_G16_BITS))) & SK_G16_MASK) << SK_G16_SHIFT;
636     unsigned b = ((c >> (SK_B32_SHIFT + (8 - SK_B16_BITS))) & SK_B16_MASK) << SK_B16_SHIFT;
637     return r | g | b;
638 }
639 
SkPack888ToRGB16(U8CPU r,U8CPU g,U8CPU b)640 static inline U16CPU SkPack888ToRGB16(U8CPU r, U8CPU g, U8CPU b) {
641     return  (SkR32ToR16(r) << SK_R16_SHIFT) |
642             (SkG32ToG16(g) << SK_G16_SHIFT) |
643             (SkB32ToB16(b) << SK_B16_SHIFT);
644 }
645 
646 #define SkPixel32ToPixel16_ToU16(src)   SkToU16(SkPixel32ToPixel16(src))
647 
648 /////////////////////////////////////////////////////////////////////////////////////////
649 // Fast dither from 32->16
650 
651 #define SkShouldDitherXY(x, y)  (((x) ^ (y)) & 1)
652 
SkDitherPack888ToRGB16(U8CPU r,U8CPU g,U8CPU b)653 static inline uint16_t SkDitherPack888ToRGB16(U8CPU r, U8CPU g, U8CPU b) {
654     r = ((r << 1) - ((r >> (8 - SK_R16_BITS) << (8 - SK_R16_BITS)) | (r >> SK_R16_BITS))) >> (8 - SK_R16_BITS);
655     g = ((g << 1) - ((g >> (8 - SK_G16_BITS) << (8 - SK_G16_BITS)) | (g >> SK_G16_BITS))) >> (8 - SK_G16_BITS);
656     b = ((b << 1) - ((b >> (8 - SK_B16_BITS) << (8 - SK_B16_BITS)) | (b >> SK_B16_BITS))) >> (8 - SK_B16_BITS);
657 
658     return SkPackRGB16(r, g, b);
659 }
660 
SkDitherPixel32ToPixel16(SkPMColor c)661 static inline uint16_t SkDitherPixel32ToPixel16(SkPMColor c) {
662     return SkDitherPack888ToRGB16(SkGetPackedR32(c), SkGetPackedG32(c), SkGetPackedB32(c));
663 }
664 
665 /*  Return c in expanded_rgb_16 format, but also scaled up by 32 (5 bits)
666     It is now suitable for combining with a scaled expanded_rgb_16 color
667     as in SkSrcOver32To16().
668     We must do this 565 high-bit replication, in order for the subsequent add
669     to saturate properly (and not overflow). If we take the 8 bits as is, it is
670     possible to overflow.
671 */
SkPMColorToExpanded16x5(SkPMColor c)672 static inline uint32_t SkPMColorToExpanded16x5(SkPMColor c) {
673     unsigned sr = SkPacked32ToR16(c);
674     unsigned sg = SkPacked32ToG16(c);
675     unsigned sb = SkPacked32ToB16(c);
676 
677     sr = (sr << 5) | sr;
678     sg = (sg << 5) | (sg >> 1);
679     sb = (sb << 5) | sb;
680     return (sr << 11) | (sg << 21) | (sb << 0);
681 }
682 
683 /*  SrcOver the 32bit src color with the 16bit dst, returning a 16bit value
684     (with dirt in the high 16bits, so caller beware).
685 */
SkSrcOver32To16(SkPMColor src,uint16_t dst)686 static inline U16CPU SkSrcOver32To16(SkPMColor src, uint16_t dst) {
687     unsigned sr = SkGetPackedR32(src);
688     unsigned sg = SkGetPackedG32(src);
689     unsigned sb = SkGetPackedB32(src);
690 
691     unsigned dr = SkGetPackedR16(dst);
692     unsigned dg = SkGetPackedG16(dst);
693     unsigned db = SkGetPackedB16(dst);
694 
695     unsigned isa = 255 - SkGetPackedA32(src);
696 
697     dr = (sr + SkMul16ShiftRound(dr, isa, SK_R16_BITS)) >> (8 - SK_R16_BITS);
698     dg = (sg + SkMul16ShiftRound(dg, isa, SK_G16_BITS)) >> (8 - SK_G16_BITS);
699     db = (sb + SkMul16ShiftRound(db, isa, SK_B16_BITS)) >> (8 - SK_B16_BITS);
700 
701     return SkPackRGB16(dr, dg, db);
702 }
703 
704 ////////////////////////////////////////////////////////////////////////////////////////////
705 // Convert a 16bit pixel to a 32bit pixel
706 
SkR16ToR32(unsigned r)707 static inline unsigned SkR16ToR32(unsigned r) {
708     return (r << (8 - SK_R16_BITS)) | (r >> (2 * SK_R16_BITS - 8));
709 }
710 
SkG16ToG32(unsigned g)711 static inline unsigned SkG16ToG32(unsigned g) {
712     return (g << (8 - SK_G16_BITS)) | (g >> (2 * SK_G16_BITS - 8));
713 }
714 
SkB16ToB32(unsigned b)715 static inline unsigned SkB16ToB32(unsigned b) {
716     return (b << (8 - SK_B16_BITS)) | (b >> (2 * SK_B16_BITS - 8));
717 }
718 
719 #define SkPacked16ToR32(c)      SkR16ToR32(SkGetPackedR16(c))
720 #define SkPacked16ToG32(c)      SkG16ToG32(SkGetPackedG16(c))
721 #define SkPacked16ToB32(c)      SkB16ToB32(SkGetPackedB16(c))
722 
SkPixel16ToPixel32(U16CPU src)723 static inline SkPMColor SkPixel16ToPixel32(U16CPU src) {
724     SkASSERT(src == SkToU16(src));
725 
726     unsigned    r = SkPacked16ToR32(src);
727     unsigned    g = SkPacked16ToG32(src);
728     unsigned    b = SkPacked16ToB32(src);
729 
730     SkASSERT((r >> (8 - SK_R16_BITS)) == SkGetPackedR16(src));
731     SkASSERT((g >> (8 - SK_G16_BITS)) == SkGetPackedG16(src));
732     SkASSERT((b >> (8 - SK_B16_BITS)) == SkGetPackedB16(src));
733 
734     return SkPackARGB32(0xFF, r, g, b);
735 }
736 
737 // similar to SkPixel16ToPixel32, but returns SkColor instead of SkPMColor
SkPixel16ToColor(U16CPU src)738 static inline SkColor SkPixel16ToColor(U16CPU src) {
739     SkASSERT(src == SkToU16(src));
740 
741     unsigned    r = SkPacked16ToR32(src);
742     unsigned    g = SkPacked16ToG32(src);
743     unsigned    b = SkPacked16ToB32(src);
744 
745     SkASSERT((r >> (8 - SK_R16_BITS)) == SkGetPackedR16(src));
746     SkASSERT((g >> (8 - SK_G16_BITS)) == SkGetPackedG16(src));
747     SkASSERT((b >> (8 - SK_B16_BITS)) == SkGetPackedB16(src));
748 
749     return SkColorSetRGB(r, g, b);
750 }
751 
752 ///////////////////////////////////////////////////////////////////////////////
753 
754 typedef uint16_t SkPMColor16;
755 
756 // Put in OpenGL order (r g b a)
757 #define SK_A4444_SHIFT    0
758 #define SK_R4444_SHIFT    12
759 #define SK_G4444_SHIFT    8
760 #define SK_B4444_SHIFT    4
761 
762 #define SkA32To4444(a)  ((unsigned)(a) >> 4)
763 #define SkR32To4444(r)  ((unsigned)(r) >> 4)
764 #define SkG32To4444(g)  ((unsigned)(g) >> 4)
765 #define SkB32To4444(b)  ((unsigned)(b) >> 4)
766 
SkReplicateNibble(unsigned nib)767 static inline U8CPU SkReplicateNibble(unsigned nib) {
768     SkASSERT(nib <= 0xF);
769     return (nib << 4) | nib;
770 }
771 
772 #define SkA4444ToA32(a)     SkReplicateNibble(a)
773 #define SkR4444ToR32(r)     SkReplicateNibble(r)
774 #define SkG4444ToG32(g)     SkReplicateNibble(g)
775 #define SkB4444ToB32(b)     SkReplicateNibble(b)
776 
777 #define SkGetPackedA4444(c)     (((unsigned)(c) >> SK_A4444_SHIFT) & 0xF)
778 #define SkGetPackedR4444(c)     (((unsigned)(c) >> SK_R4444_SHIFT) & 0xF)
779 #define SkGetPackedG4444(c)     (((unsigned)(c) >> SK_G4444_SHIFT) & 0xF)
780 #define SkGetPackedB4444(c)     (((unsigned)(c) >> SK_B4444_SHIFT) & 0xF)
781 
782 #define SkPacked4444ToA32(c)    SkReplicateNibble(SkGetPackedA4444(c))
783 #define SkPacked4444ToR32(c)    SkReplicateNibble(SkGetPackedR4444(c))
784 #define SkPacked4444ToG32(c)    SkReplicateNibble(SkGetPackedG4444(c))
785 #define SkPacked4444ToB32(c)    SkReplicateNibble(SkGetPackedB4444(c))
786 
787 #ifdef SK_DEBUG
SkPMColor16Assert(U16CPU c)788 static inline void SkPMColor16Assert(U16CPU c) {
789     unsigned a = SkGetPackedA4444(c);
790     unsigned r = SkGetPackedR4444(c);
791     unsigned g = SkGetPackedG4444(c);
792     unsigned b = SkGetPackedB4444(c);
793 
794     SkASSERT(a <= 0xF);
795     SkASSERT(r <= a);
796     SkASSERT(g <= a);
797     SkASSERT(b <= a);
798 }
799 #else
800 #define SkPMColor16Assert(c)
801 #endif
802 
SkAlpha15To16(unsigned a)803 static inline unsigned SkAlpha15To16(unsigned a) {
804     SkASSERT(a <= 0xF);
805     return a + (a >> 3);
806 }
807 
808 #ifdef SK_DEBUG
SkAlphaMul4(int value,int scale)809     static inline int SkAlphaMul4(int value, int scale) {
810         SkASSERT((unsigned)scale <= 0x10);
811         return value * scale >> 4;
812     }
813 #else
814     #define SkAlphaMul4(value, scale)   ((value) * (scale) >> 4)
815 #endif
816 
SkR4444ToR565(unsigned r)817 static inline unsigned SkR4444ToR565(unsigned r) {
818     SkASSERT(r <= 0xF);
819     return (r << (SK_R16_BITS - 4)) | (r >> (8 - SK_R16_BITS));
820 }
821 
SkG4444ToG565(unsigned g)822 static inline unsigned SkG4444ToG565(unsigned g) {
823     SkASSERT(g <= 0xF);
824     return (g << (SK_G16_BITS - 4)) | (g >> (8 - SK_G16_BITS));
825 }
826 
SkB4444ToB565(unsigned b)827 static inline unsigned SkB4444ToB565(unsigned b) {
828     SkASSERT(b <= 0xF);
829     return (b << (SK_B16_BITS - 4)) | (b >> (8 - SK_B16_BITS));
830 }
831 
SkPackARGB4444(unsigned a,unsigned r,unsigned g,unsigned b)832 static inline SkPMColor16 SkPackARGB4444(unsigned a, unsigned r,
833                                          unsigned g, unsigned b) {
834     SkASSERT(a <= 0xF);
835     SkASSERT(r <= a);
836     SkASSERT(g <= a);
837     SkASSERT(b <= a);
838 
839     return (SkPMColor16)((a << SK_A4444_SHIFT) | (r << SK_R4444_SHIFT) |
840                          (g << SK_G4444_SHIFT) | (b << SK_B4444_SHIFT));
841 }
842 
SkAlphaMulQ4(SkPMColor16 c,int scale)843 static inline SkPMColor16 SkAlphaMulQ4(SkPMColor16 c, int scale) {
844     SkASSERT(scale <= 16);
845 
846     const unsigned mask = 0xF0F;    //gMask_0F0F;
847 
848 #if 0
849     unsigned rb = ((c & mask) * scale) >> 4;
850     unsigned ag = ((c >> 4) & mask) * scale;
851     return (rb & mask) | (ag & ~mask);
852 #else
853     unsigned expanded_c = (c & mask) | ((c & (mask << 4)) << 12);
854     unsigned scaled_c = (expanded_c * scale) >> 4;
855     return (scaled_c & mask) | ((scaled_c >> 12) & (mask << 4));
856 #endif
857 }
858 
859 /** Expand the SkPMColor16 color into a 32bit value that can be scaled all at
860     once by a value up to 16.
861 */
SkExpand_4444(U16CPU c)862 static inline uint32_t SkExpand_4444(U16CPU c) {
863     SkASSERT(c == (uint16_t)c);
864 
865     const unsigned mask = 0xF0F;    //gMask_0F0F;
866     return (c & mask) | ((c & ~mask) << 12);
867 }
868 
SkSrcOver4444To16(SkPMColor16 s,uint16_t d)869 static inline uint16_t SkSrcOver4444To16(SkPMColor16 s, uint16_t d) {
870     unsigned sa = SkGetPackedA4444(s);
871     unsigned sr = SkR4444ToR565(SkGetPackedR4444(s));
872     unsigned sg = SkG4444ToG565(SkGetPackedG4444(s));
873     unsigned sb = SkB4444ToB565(SkGetPackedB4444(s));
874 
875     // To avoid overflow, we have to clear the low bit of the synthetic sg
876     // if the src alpha is <= 7.
877     // to see why, try blending 0x4444 on top of 565-white and watch green
878     // overflow (sum == 64)
879     sg &= ~(~(sa >> 3) & 1);
880 
881     unsigned scale = SkAlpha15To16(15 - sa);
882     unsigned dr = SkAlphaMul4(SkGetPackedR16(d), scale);
883     unsigned dg = SkAlphaMul4(SkGetPackedG16(d), scale);
884     unsigned db = SkAlphaMul4(SkGetPackedB16(d), scale);
885 
886 #if 0
887     if (sg + dg > 63) {
888         SkDebugf("---- SkSrcOver4444To16 src=%x dst=%x scale=%d, sg=%d dg=%d\n", s, d, scale, sg, dg);
889     }
890 #endif
891     return SkPackRGB16(sr + dr, sg + dg, sb + db);
892 }
893 
SkBlend4444To16(SkPMColor16 src,uint16_t dst,int scale16)894 static inline uint16_t SkBlend4444To16(SkPMColor16 src, uint16_t dst, int scale16) {
895     SkASSERT((unsigned)scale16 <= 16);
896 
897     return SkSrcOver4444To16(SkAlphaMulQ4(src, scale16), dst);
898 }
899 
SkPixel4444ToPixel32(U16CPU c)900 static inline SkPMColor SkPixel4444ToPixel32(U16CPU c) {
901     uint32_t d = (SkGetPackedA4444(c) << SK_A32_SHIFT) |
902                  (SkGetPackedR4444(c) << SK_R32_SHIFT) |
903                  (SkGetPackedG4444(c) << SK_G32_SHIFT) |
904                  (SkGetPackedB4444(c) << SK_B32_SHIFT);
905     return d | (d << 4);
906 }
907 
SkPixel32ToPixel4444(SkPMColor c)908 static inline SkPMColor16 SkPixel32ToPixel4444(SkPMColor c) {
909     return  (((c >> (SK_A32_SHIFT + 4)) & 0xF) << SK_A4444_SHIFT) |
910     (((c >> (SK_R32_SHIFT + 4)) & 0xF) << SK_R4444_SHIFT) |
911     (((c >> (SK_G32_SHIFT + 4)) & 0xF) << SK_G4444_SHIFT) |
912     (((c >> (SK_B32_SHIFT + 4)) & 0xF) << SK_B4444_SHIFT);
913 }
914 
915 // cheap 2x2 dither
SkDitherARGB32To4444(U8CPU a,U8CPU r,U8CPU g,U8CPU b)916 static inline SkPMColor16 SkDitherARGB32To4444(U8CPU a, U8CPU r,
917                                                U8CPU g, U8CPU b) {
918     // to ensure that we stay a legal premultiplied color, we take the max()
919     // of the truncated and dithered alpha values. If we didn't, cases like
920     // SkDitherARGB32To4444(0x31, 0x2E, ...) would generate SkPackARGB4444(2, 3, ...)
921     // which is not legal premultiplied, since a < color
922     unsigned dithered_a = ((a << 1) - ((a >> 4 << 4) | (a >> 4))) >> 4;
923     a = SkMax32(a >> 4, dithered_a);
924     // these we just dither in place
925     r = ((r << 1) - ((r >> 4 << 4) | (r >> 4))) >> 4;
926     g = ((g << 1) - ((g >> 4 << 4) | (g >> 4))) >> 4;
927     b = ((b << 1) - ((b >> 4 << 4) | (b >> 4))) >> 4;
928 
929     return SkPackARGB4444(a, r, g, b);
930 }
931 
SkDitherPixel32To4444(SkPMColor c)932 static inline SkPMColor16 SkDitherPixel32To4444(SkPMColor c) {
933     return SkDitherARGB32To4444(SkGetPackedA32(c), SkGetPackedR32(c),
934                                 SkGetPackedG32(c), SkGetPackedB32(c));
935 }
936 
937 /*  Assumes 16bit is in standard RGBA order.
938     Transforms a normal ARGB_8888 into the same byte order as
939     expanded ARGB_4444, but keeps each component 8bits
940 */
SkExpand_8888(SkPMColor c)941 static inline uint32_t SkExpand_8888(SkPMColor c) {
942     return  (((c >> SK_R32_SHIFT) & 0xFF) << 24) |
943             (((c >> SK_G32_SHIFT) & 0xFF) <<  8) |
944             (((c >> SK_B32_SHIFT) & 0xFF) << 16) |
945             (((c >> SK_A32_SHIFT) & 0xFF) <<  0);
946 }
947 
948 /*  Undo the operation of SkExpand_8888, turning the argument back into
949     a SkPMColor.
950 */
SkCompact_8888(uint32_t c)951 static inline SkPMColor SkCompact_8888(uint32_t c) {
952     return  (((c >> 24) & 0xFF) << SK_R32_SHIFT) |
953             (((c >>  8) & 0xFF) << SK_G32_SHIFT) |
954             (((c >> 16) & 0xFF) << SK_B32_SHIFT) |
955             (((c >>  0) & 0xFF) << SK_A32_SHIFT);
956 }
957 
958 /*  Like SkExpand_8888, this transforms a pmcolor into the expanded 4444 format,
959     but this routine just keeps the high 4bits of each component in the low
960     4bits of the result (just like a newly expanded PMColor16).
961 */
SkExpand32_4444(SkPMColor c)962 static inline uint32_t SkExpand32_4444(SkPMColor c) {
963     return  (((c >> (SK_R32_SHIFT + 4)) & 0xF) << 24) |
964             (((c >> (SK_G32_SHIFT + 4)) & 0xF) <<  8) |
965             (((c >> (SK_B32_SHIFT + 4)) & 0xF) << 16) |
966             (((c >> (SK_A32_SHIFT + 4)) & 0xF) <<  0);
967 }
968 
969 // takes two values and alternamtes them as part of a memset16
970 // used for cheap 2x2 dithering when the colors are opaque
971 void sk_dither_memset16(uint16_t dst[], uint16_t value, uint16_t other, int n);
972 
973 ///////////////////////////////////////////////////////////////////////////////
974 
SkUpscale31To32(int value)975 static inline int SkUpscale31To32(int value) {
976     SkASSERT((unsigned)value <= 31);
977     return value + (value >> 4);
978 }
979 
SkBlend32(int src,int dst,int scale)980 static inline int SkBlend32(int src, int dst, int scale) {
981     SkASSERT((unsigned)src <= 0xFF);
982     SkASSERT((unsigned)dst <= 0xFF);
983     SkASSERT((unsigned)scale <= 32);
984     return dst + ((src - dst) * scale >> 5);
985 }
986 
SkBlendLCD16(int srcA,int srcR,int srcG,int srcB,SkPMColor dst,uint16_t mask)987 static inline SkPMColor SkBlendLCD16(int srcA, int srcR, int srcG, int srcB,
988                                      SkPMColor dst, uint16_t mask) {
989     if (mask == 0) {
990         return dst;
991     }
992 
993     /*  We want all of these in 5bits, hence the shifts in case one of them
994      *  (green) is 6bits.
995      */
996     int maskR = SkGetPackedR16(mask) >> (SK_R16_BITS - 5);
997     int maskG = SkGetPackedG16(mask) >> (SK_G16_BITS - 5);
998     int maskB = SkGetPackedB16(mask) >> (SK_B16_BITS - 5);
999 
1000     // Now upscale them to 0..32, so we can use blend32
1001     maskR = SkUpscale31To32(maskR);
1002     maskG = SkUpscale31To32(maskG);
1003     maskB = SkUpscale31To32(maskB);
1004 
1005     // srcA has been upscaled to 256 before passed into this function
1006     maskR = maskR * srcA >> 8;
1007     maskG = maskG * srcA >> 8;
1008     maskB = maskB * srcA >> 8;
1009 
1010     int dstR = SkGetPackedR32(dst);
1011     int dstG = SkGetPackedG32(dst);
1012     int dstB = SkGetPackedB32(dst);
1013 
1014     // LCD blitting is only supported if the dst is known/required
1015     // to be opaque
1016     return SkPackARGB32(0xFF,
1017                         SkBlend32(srcR, dstR, maskR),
1018                         SkBlend32(srcG, dstG, maskG),
1019                         SkBlend32(srcB, dstB, maskB));
1020 }
1021 
SkBlendLCD16Opaque(int srcR,int srcG,int srcB,SkPMColor dst,uint16_t mask,SkPMColor opaqueDst)1022 static inline SkPMColor SkBlendLCD16Opaque(int srcR, int srcG, int srcB,
1023                                            SkPMColor dst, uint16_t mask,
1024                                            SkPMColor opaqueDst) {
1025     if (mask == 0) {
1026         return dst;
1027     }
1028 
1029     if (0xFFFF == mask) {
1030         return opaqueDst;
1031     }
1032 
1033     /*  We want all of these in 5bits, hence the shifts in case one of them
1034      *  (green) is 6bits.
1035      */
1036     int maskR = SkGetPackedR16(mask) >> (SK_R16_BITS - 5);
1037     int maskG = SkGetPackedG16(mask) >> (SK_G16_BITS - 5);
1038     int maskB = SkGetPackedB16(mask) >> (SK_B16_BITS - 5);
1039 
1040     // Now upscale them to 0..32, so we can use blend32
1041     maskR = SkUpscale31To32(maskR);
1042     maskG = SkUpscale31To32(maskG);
1043     maskB = SkUpscale31To32(maskB);
1044 
1045     int dstR = SkGetPackedR32(dst);
1046     int dstG = SkGetPackedG32(dst);
1047     int dstB = SkGetPackedB32(dst);
1048 
1049     // LCD blitting is only supported if the dst is known/required
1050     // to be opaque
1051     return SkPackARGB32(0xFF,
1052                         SkBlend32(srcR, dstR, maskR),
1053                         SkBlend32(srcG, dstG, maskG),
1054                         SkBlend32(srcB, dstB, maskB));
1055 }
1056 
SkBlitLCD16Row(SkPMColor dst[],const uint16_t mask[],SkColor src,int width,SkPMColor)1057 static inline void SkBlitLCD16Row(SkPMColor dst[], const uint16_t mask[],
1058                                   SkColor src, int width, SkPMColor) {
1059     int srcA = SkColorGetA(src);
1060     int srcR = SkColorGetR(src);
1061     int srcG = SkColorGetG(src);
1062     int srcB = SkColorGetB(src);
1063 
1064     srcA = SkAlpha255To256(srcA);
1065 
1066     for (int i = 0; i < width; i++) {
1067         dst[i] = SkBlendLCD16(srcA, srcR, srcG, srcB, dst[i], mask[i]);
1068     }
1069 }
1070 
SkBlitLCD16OpaqueRow(SkPMColor dst[],const uint16_t mask[],SkColor src,int width,SkPMColor opaqueDst)1071 static inline void SkBlitLCD16OpaqueRow(SkPMColor dst[], const uint16_t mask[],
1072                                         SkColor src, int width,
1073                                         SkPMColor opaqueDst) {
1074     int srcR = SkColorGetR(src);
1075     int srcG = SkColorGetG(src);
1076     int srcB = SkColorGetB(src);
1077 
1078     for (int i = 0; i < width; i++) {
1079         dst[i] = SkBlendLCD16Opaque(srcR, srcG, srcB, dst[i], mask[i],
1080                                     opaqueDst);
1081     }
1082 }
1083 
1084 #endif
1085