1
2 #include "SkBlitMask.h"
3 #include "SkColor_opts_neon.h"
4
D32_A8_Black_neon(void * SK_RESTRICT dst,size_t dstRB,const void * SK_RESTRICT maskPtr,size_t maskRB,SkColor,int width,int height)5 static void D32_A8_Black_neon(void* SK_RESTRICT dst, size_t dstRB,
6 const void* SK_RESTRICT maskPtr, size_t maskRB,
7 SkColor, int width, int height) {
8 SkPMColor* SK_RESTRICT device = (SkPMColor*)dst;
9 const uint8_t* SK_RESTRICT mask = (const uint8_t*)maskPtr;
10
11 maskRB -= width;
12 dstRB -= (width << 2);
13 do {
14 int w = width;
15 while (w >= 8) {
16 uint8x8_t vmask = vld1_u8(mask);
17 uint16x8_t vscale = vsubw_u8(vdupq_n_u16(256), vmask);
18 uint8x8x4_t vdevice = vld4_u8((uint8_t*)device);
19
20 vdevice = SkAlphaMulQ_neon8(vdevice, vscale);
21 vdevice.val[NEON_A] += vmask;
22
23 vst4_u8((uint8_t*)device, vdevice);
24
25 mask += 8;
26 device += 8;
27 w -= 8;
28 }
29 while (w-- > 0) {
30 unsigned aa = *mask++;
31 *device = (aa << SK_A32_SHIFT)
32 + SkAlphaMulQ(*device, SkAlpha255To256(255 - aa));
33 device += 1;
34 };
35 device = (uint32_t*)((char*)device + dstRB);
36 mask += maskRB;
37 } while (--height != 0);
38 }
39
40 template <bool isColor>
D32_A8_Opaque_Color_neon(void * SK_RESTRICT dst,size_t dstRB,const void * SK_RESTRICT maskPtr,size_t maskRB,SkColor color,int width,int height)41 static void D32_A8_Opaque_Color_neon(void* SK_RESTRICT dst, size_t dstRB,
42 const void* SK_RESTRICT maskPtr, size_t maskRB,
43 SkColor color, int width, int height) {
44 SkPMColor pmc = SkPreMultiplyColor(color);
45 SkPMColor* SK_RESTRICT device = (SkPMColor*)dst;
46 const uint8_t* SK_RESTRICT mask = (const uint8_t*)maskPtr;
47 uint8x8x4_t vpmc;
48
49 maskRB -= width;
50 dstRB -= (width << 2);
51
52 if (width >= 8) {
53 vpmc.val[NEON_A] = vdup_n_u8(SkGetPackedA32(pmc));
54 vpmc.val[NEON_R] = vdup_n_u8(SkGetPackedR32(pmc));
55 vpmc.val[NEON_G] = vdup_n_u8(SkGetPackedG32(pmc));
56 vpmc.val[NEON_B] = vdup_n_u8(SkGetPackedB32(pmc));
57 }
58 do {
59 int w = width;
60 while (w >= 8) {
61 uint8x8_t vmask = vld1_u8(mask);
62 uint16x8_t vscale, vmask256 = SkAlpha255To256_neon8(vmask);
63 if (isColor) {
64 vscale = vsubw_u8(vdupq_n_u16(256),
65 SkAlphaMul_neon8(vpmc.val[NEON_A], vmask256));
66 } else {
67 vscale = vsubw_u8(vdupq_n_u16(256), vmask);
68 }
69 uint8x8x4_t vdev = vld4_u8((uint8_t*)device);
70
71 vdev.val[NEON_A] = SkAlphaMul_neon8(vpmc.val[NEON_A], vmask256)
72 + SkAlphaMul_neon8(vdev.val[NEON_A], vscale);
73 vdev.val[NEON_R] = SkAlphaMul_neon8(vpmc.val[NEON_R], vmask256)
74 + SkAlphaMul_neon8(vdev.val[NEON_R], vscale);
75 vdev.val[NEON_G] = SkAlphaMul_neon8(vpmc.val[NEON_G], vmask256)
76 + SkAlphaMul_neon8(vdev.val[NEON_G], vscale);
77 vdev.val[NEON_B] = SkAlphaMul_neon8(vpmc.val[NEON_B], vmask256)
78 + SkAlphaMul_neon8(vdev.val[NEON_B], vscale);
79
80 vst4_u8((uint8_t*)device, vdev);
81
82 mask += 8;
83 device += 8;
84 w -= 8;
85 }
86
87 while (w--) {
88 unsigned aa = *mask++;
89 if (isColor) {
90 *device = SkBlendARGB32(pmc, *device, aa);
91 } else {
92 *device = SkAlphaMulQ(pmc, SkAlpha255To256(aa))
93 + SkAlphaMulQ(*device, SkAlpha255To256(255 - aa));
94 }
95 device += 1;
96 };
97
98 device = (uint32_t*)((char*)device + dstRB);
99 mask += maskRB;
100
101 } while (--height != 0);
102 }
103
D32_A8_Opaque_neon(void * SK_RESTRICT dst,size_t dstRB,const void * SK_RESTRICT maskPtr,size_t maskRB,SkColor color,int width,int height)104 static void D32_A8_Opaque_neon(void* SK_RESTRICT dst, size_t dstRB,
105 const void* SK_RESTRICT maskPtr, size_t maskRB,
106 SkColor color, int width, int height) {
107 D32_A8_Opaque_Color_neon<false>(dst, dstRB, maskPtr, maskRB, color, width, height);
108 }
109
D32_A8_Color_neon(void * SK_RESTRICT dst,size_t dstRB,const void * SK_RESTRICT maskPtr,size_t maskRB,SkColor color,int width,int height)110 static void D32_A8_Color_neon(void* SK_RESTRICT dst, size_t dstRB,
111 const void* SK_RESTRICT maskPtr, size_t maskRB,
112 SkColor color, int width, int height) {
113 D32_A8_Opaque_Color_neon<true>(dst, dstRB, maskPtr, maskRB, color, width, height);
114 }
115
D32_A8_Factory_neon(SkColor color)116 SkBlitMask::ColorProc D32_A8_Factory_neon(SkColor color) {
117 if (SK_ColorBLACK == color) {
118 return D32_A8_Black_neon;
119 } else if (0xFF == SkColorGetA(color)) {
120 return D32_A8_Opaque_neon;
121 } else {
122 return D32_A8_Color_neon;
123 }
124 }
125
126 ////////////////////////////////////////////////////////////////////////////////
127
SkBlitLCD16OpaqueRow_neon(SkPMColor dst[],const uint16_t src[],SkColor color,int width,SkPMColor opaqueDst)128 void SkBlitLCD16OpaqueRow_neon(SkPMColor dst[], const uint16_t src[],
129 SkColor color, int width,
130 SkPMColor opaqueDst) {
131 int colR = SkColorGetR(color);
132 int colG = SkColorGetG(color);
133 int colB = SkColorGetB(color);
134
135 uint8x8_t vcolR, vcolG, vcolB;
136 uint8x8_t vopqDstA, vopqDstR, vopqDstG, vopqDstB;
137
138 if (width >= 8) {
139 vcolR = vdup_n_u8(colR);
140 vcolG = vdup_n_u8(colG);
141 vcolB = vdup_n_u8(colB);
142 vopqDstA = vdup_n_u8(SkGetPackedA32(opaqueDst));
143 vopqDstR = vdup_n_u8(SkGetPackedR32(opaqueDst));
144 vopqDstG = vdup_n_u8(SkGetPackedG32(opaqueDst));
145 vopqDstB = vdup_n_u8(SkGetPackedB32(opaqueDst));
146 }
147
148 while (width >= 8) {
149 uint8x8x4_t vdst;
150 uint16x8_t vmask;
151 uint16x8_t vmaskR, vmaskG, vmaskB;
152 uint8x8_t vsel_trans, vsel_opq;
153
154 vdst = vld4_u8((uint8_t*)dst);
155 vmask = vld1q_u16(src);
156
157 // Prepare compare masks
158 vsel_trans = vmovn_u16(vceqq_u16(vmask, vdupq_n_u16(0)));
159 vsel_opq = vmovn_u16(vceqq_u16(vmask, vdupq_n_u16(0xFFFF)));
160
161 // Get all the color masks on 5 bits
162 vmaskR = vshrq_n_u16(vmask, SK_R16_SHIFT);
163 vmaskG = vshrq_n_u16(vshlq_n_u16(vmask, SK_R16_BITS),
164 SK_B16_BITS + SK_R16_BITS + 1);
165 vmaskB = vmask & vdupq_n_u16(SK_B16_MASK);
166
167 // Upscale to 0..32
168 vmaskR = vmaskR + vshrq_n_u16(vmaskR, 4);
169 vmaskG = vmaskG + vshrq_n_u16(vmaskG, 4);
170 vmaskB = vmaskB + vshrq_n_u16(vmaskB, 4);
171
172 vdst.val[NEON_A] = vbsl_u8(vsel_trans, vdst.val[NEON_A], vdup_n_u8(0xFF));
173 vdst.val[NEON_A] = vbsl_u8(vsel_opq, vopqDstA, vdst.val[NEON_A]);
174
175 vdst.val[NEON_R] = SkBlend32_neon8(vcolR, vdst.val[NEON_R], vmaskR);
176 vdst.val[NEON_G] = SkBlend32_neon8(vcolG, vdst.val[NEON_G], vmaskG);
177 vdst.val[NEON_B] = SkBlend32_neon8(vcolB, vdst.val[NEON_B], vmaskB);
178
179 vdst.val[NEON_R] = vbsl_u8(vsel_opq, vopqDstR, vdst.val[NEON_R]);
180 vdst.val[NEON_G] = vbsl_u8(vsel_opq, vopqDstG, vdst.val[NEON_G]);
181 vdst.val[NEON_B] = vbsl_u8(vsel_opq, vopqDstB, vdst.val[NEON_B]);
182
183 vst4_u8((uint8_t*)dst, vdst);
184
185 dst += 8;
186 src += 8;
187 width -= 8;
188 }
189
190 // Leftovers
191 for (int i = 0; i < width; i++) {
192 dst[i] = SkBlendLCD16Opaque(colR, colG, colB, dst[i], src[i],
193 opaqueDst);
194 }
195 }
196
SkBlitLCD16Row_neon(SkPMColor dst[],const uint16_t src[],SkColor color,int width,SkPMColor)197 void SkBlitLCD16Row_neon(SkPMColor dst[], const uint16_t src[],
198 SkColor color, int width, SkPMColor) {
199 int colA = SkColorGetA(color);
200 int colR = SkColorGetR(color);
201 int colG = SkColorGetG(color);
202 int colB = SkColorGetB(color);
203
204 colA = SkAlpha255To256(colA);
205
206 uint8x8_t vcolR, vcolG, vcolB;
207 uint16x8_t vcolA;
208
209 if (width >= 8) {
210 vcolA = vdupq_n_u16(colA);
211 vcolR = vdup_n_u8(colR);
212 vcolG = vdup_n_u8(colG);
213 vcolB = vdup_n_u8(colB);
214 }
215
216 while (width >= 8) {
217 uint8x8x4_t vdst;
218 uint16x8_t vmask;
219 uint16x8_t vmaskR, vmaskG, vmaskB;
220
221 vdst = vld4_u8((uint8_t*)dst);
222 vmask = vld1q_u16(src);
223
224 // Get all the color masks on 5 bits
225 vmaskR = vshrq_n_u16(vmask, SK_R16_SHIFT);
226 vmaskG = vshrq_n_u16(vshlq_n_u16(vmask, SK_R16_BITS),
227 SK_B16_BITS + SK_R16_BITS + 1);
228 vmaskB = vmask & vdupq_n_u16(SK_B16_MASK);
229
230 // Upscale to 0..32
231 vmaskR = vmaskR + vshrq_n_u16(vmaskR, 4);
232 vmaskG = vmaskG + vshrq_n_u16(vmaskG, 4);
233 vmaskB = vmaskB + vshrq_n_u16(vmaskB, 4);
234
235 vmaskR = vshrq_n_u16(vmaskR * vcolA, 8);
236 vmaskG = vshrq_n_u16(vmaskG * vcolA, 8);
237 vmaskB = vshrq_n_u16(vmaskB * vcolA, 8);
238
239 vdst.val[NEON_A] = vdup_n_u8(0xFF);
240 vdst.val[NEON_R] = SkBlend32_neon8(vcolR, vdst.val[NEON_R], vmaskR);
241 vdst.val[NEON_G] = SkBlend32_neon8(vcolG, vdst.val[NEON_G], vmaskG);
242 vdst.val[NEON_B] = SkBlend32_neon8(vcolB, vdst.val[NEON_B], vmaskB);
243
244 vst4_u8((uint8_t*)dst, vdst);
245
246 dst += 8;
247 src += 8;
248 width -= 8;
249 }
250
251 for (int i = 0; i < width; i++) {
252 dst[i] = SkBlendLCD16(colA, colR, colG, colB, dst[i], src[i]);
253 }
254 }
255