1 #ifndef SkColor_opts_neon_DEFINED
2 #define SkColor_opts_neon_DEFINED
3 
4 #include "SkTypes.h"
5 #include "SkColorPriv.h"
6 
7 #include <arm_neon.h>
8 
9 #define NEON_A (SK_A32_SHIFT / 8)
10 #define NEON_R (SK_R32_SHIFT / 8)
11 #define NEON_G (SK_G32_SHIFT / 8)
12 #define NEON_B (SK_B32_SHIFT / 8)
13 
SkAlpha255To256_neon8(uint8x8_t alpha)14 static inline uint16x8_t SkAlpha255To256_neon8(uint8x8_t alpha) {
15     return vaddw_u8(vdupq_n_u16(1), alpha);
16 }
17 
SkAlphaMul_neon8(uint8x8_t color,uint16x8_t scale)18 static inline uint8x8_t SkAlphaMul_neon8(uint8x8_t color, uint16x8_t scale) {
19     return vshrn_n_u16(vmovl_u8(color) * scale, 8);
20 }
21 
SkAlphaMulQ_neon8(uint8x8x4_t color,uint16x8_t scale)22 static inline uint8x8x4_t SkAlphaMulQ_neon8(uint8x8x4_t color, uint16x8_t scale) {
23     uint8x8x4_t ret;
24 
25     ret.val[NEON_A] = SkAlphaMul_neon8(color.val[NEON_A], scale);
26     ret.val[NEON_R] = SkAlphaMul_neon8(color.val[NEON_R], scale);
27     ret.val[NEON_G] = SkAlphaMul_neon8(color.val[NEON_G], scale);
28     ret.val[NEON_B] = SkAlphaMul_neon8(color.val[NEON_B], scale);
29 
30     return ret;
31 }
32 
33 /* This function expands 8 pixels from RGB565 (R, G, B from high to low) to
34  * SkPMColor (all possible configurations supported) in the exact same way as
35  * SkPixel16ToPixel32.
36  */
SkPixel16ToPixel32_neon8(uint16x8_t vsrc)37 static inline uint8x8x4_t SkPixel16ToPixel32_neon8(uint16x8_t vsrc) {
38 
39     uint8x8x4_t ret;
40     uint8x8_t vr, vg, vb;
41 
42     vr = vmovn_u16(vshrq_n_u16(vsrc, SK_R16_SHIFT));
43     vg = vmovn_u16(vshrq_n_u16(vshlq_n_u16(vsrc, SK_R16_BITS), SK_R16_BITS + SK_B16_BITS));
44     vb = vmovn_u16(vsrc & vdupq_n_u16(SK_B16_MASK));
45 
46     ret.val[NEON_A] = vdup_n_u8(0xFF);
47     ret.val[NEON_R] = vshl_n_u8(vr, 8 - SK_R16_BITS) | vshr_n_u8(vr, 2 * SK_R16_BITS - 8);
48     ret.val[NEON_G] = vshl_n_u8(vg, 8 - SK_G16_BITS) | vshr_n_u8(vg, 2 * SK_G16_BITS - 8);
49     ret.val[NEON_B] = vshl_n_u8(vb, 8 - SK_B16_BITS) | vshr_n_u8(vb, 2 * SK_B16_BITS - 8);
50 
51     return ret;
52 }
53 
54 /* This function packs 8 pixels from SkPMColor (all possible configurations
55  * supported) to RGB565 (R, G, B from high to low) in the exact same way as
56  * SkPixel32ToPixel16.
57  */
SkPixel32ToPixel16_neon8(uint8x8x4_t vsrc)58 static inline uint16x8_t SkPixel32ToPixel16_neon8(uint8x8x4_t vsrc) {
59 
60     uint16x8_t ret;
61 
62     ret = vshll_n_u8(vsrc.val[NEON_R], 8);
63     ret = vsriq_n_u16(ret, vshll_n_u8(vsrc.val[NEON_G], 8), SK_R16_BITS);
64     ret = vsriq_n_u16(ret, vshll_n_u8(vsrc.val[NEON_B], 8), SK_R16_BITS + SK_G16_BITS);
65 
66     return ret;
67 }
68 
69 /* This function blends 8 pixels of the same channel in the exact same way as
70  * SkBlend32.
71  */
SkBlend32_neon8(uint8x8_t src,uint8x8_t dst,uint16x8_t scale)72 static inline uint8x8_t SkBlend32_neon8(uint8x8_t src, uint8x8_t dst, uint16x8_t scale) {
73     int16x8_t src_wide, dst_wide;
74 
75     src_wide = vreinterpretq_s16_u16(vmovl_u8(src));
76     dst_wide = vreinterpretq_s16_u16(vmovl_u8(dst));
77 
78     src_wide = (src_wide - dst_wide) * vreinterpretq_s16_u16(scale);
79 
80     dst_wide += vshrq_n_s16(src_wide, 5);
81 
82     return vmovn_u16(vreinterpretq_u16_s16(dst_wide));
83 }
84 
SkFourByteInterp256_neon(SkPMColor src,SkPMColor dst,unsigned srcScale)85 static inline SkPMColor SkFourByteInterp256_neon(SkPMColor src, SkPMColor dst,
86                                                  unsigned srcScale) {
87     SkASSERT(srcScale <= 256);
88     int16x8_t vscale = vdupq_n_s16(srcScale);
89     int16x8_t vsrc_wide, vdst_wide, vdiff;
90     uint8x8_t res;
91 
92     vsrc_wide = vreinterpretq_s16_u16(vmovl_u8(vreinterpret_u8_u32(vdup_n_u32(src))));
93     vdst_wide = vreinterpretq_s16_u16(vmovl_u8(vreinterpret_u8_u32(vdup_n_u32(dst))));
94 
95     vdiff = vsrc_wide - vdst_wide;
96     vdiff *= vscale;
97 
98     vdiff = vshrq_n_s16(vdiff, 8);
99 
100     vdst_wide += vdiff;
101 
102     res = vmovn_u16(vreinterpretq_u16_s16(vdst_wide));
103 
104     return vget_lane_u32(vreinterpret_u32_u8(res), 0);
105 }
106 
SkFourByteInterp_neon(SkPMColor src,SkPMColor dst,U8CPU srcWeight)107 static inline SkPMColor SkFourByteInterp_neon(SkPMColor src, SkPMColor dst,
108                                               U8CPU srcWeight) {
109     SkASSERT(srcWeight <= 255);
110     unsigned scale = SkAlpha255To256(srcWeight);
111     return SkFourByteInterp256_neon(src, dst, scale);
112 }
113 
114 #endif /* #ifndef SkColor_opts_neon_DEFINED */
115