1 /*
2 * Copyright 2006 The Android Open Source Project
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8 #include "Sk4px.h"
9 #include "SkColorData.h"
10 #include "SkCoreBlitters.h"
11 #include "SkShader.h"
12 #include "SkUtils.h"
13 #include "SkXfermodePriv.h"
14
upscale_31_to_32(int value)15 static inline int upscale_31_to_32(int value) {
16 SkASSERT((unsigned)value <= 31);
17 return value + (value >> 4);
18 }
19
blend_32(int src,int dst,int scale)20 static inline int blend_32(int src, int dst, int scale) {
21 SkASSERT((unsigned)src <= 0xFF);
22 SkASSERT((unsigned)dst <= 0xFF);
23 SkASSERT((unsigned)scale <= 32);
24 return dst + ((src - dst) * scale >> 5);
25 }
26
blend_lcd16(int srcA,int srcR,int srcG,int srcB,SkPMColor dst,uint16_t mask)27 static inline SkPMColor blend_lcd16(int srcA, int srcR, int srcG, int srcB,
28 SkPMColor dst, uint16_t mask) {
29 if (mask == 0) {
30 return dst;
31 }
32
33 /* We want all of these in 5bits, hence the shifts in case one of them
34 * (green) is 6bits.
35 */
36 int maskR = SkGetPackedR16(mask) >> (SK_R16_BITS - 5);
37 int maskG = SkGetPackedG16(mask) >> (SK_G16_BITS - 5);
38 int maskB = SkGetPackedB16(mask) >> (SK_B16_BITS - 5);
39
40 // Now upscale them to 0..32, so we can use blend32
41 maskR = upscale_31_to_32(maskR);
42 maskG = upscale_31_to_32(maskG);
43 maskB = upscale_31_to_32(maskB);
44
45 // srcA has been upscaled to 256 before passed into this function
46 maskR = maskR * srcA >> 8;
47 maskG = maskG * srcA >> 8;
48 maskB = maskB * srcA >> 8;
49
50 int dstR = SkGetPackedR32(dst);
51 int dstG = SkGetPackedG32(dst);
52 int dstB = SkGetPackedB32(dst);
53
54 // LCD blitting is only supported if the dst is known/required
55 // to be opaque
56 return SkPackARGB32(0xFF,
57 blend_32(srcR, dstR, maskR),
58 blend_32(srcG, dstG, maskG),
59 blend_32(srcB, dstB, maskB));
60 }
61
blend_lcd16_opaque(int srcR,int srcG,int srcB,SkPMColor dst,uint16_t mask,SkPMColor opaqueDst)62 static inline SkPMColor blend_lcd16_opaque(int srcR, int srcG, int srcB,
63 SkPMColor dst, uint16_t mask,
64 SkPMColor opaqueDst) {
65 if (mask == 0) {
66 return dst;
67 }
68
69 if (0xFFFF == mask) {
70 return opaqueDst;
71 }
72
73 /* We want all of these in 5bits, hence the shifts in case one of them
74 * (green) is 6bits.
75 */
76 int maskR = SkGetPackedR16(mask) >> (SK_R16_BITS - 5);
77 int maskG = SkGetPackedG16(mask) >> (SK_G16_BITS - 5);
78 int maskB = SkGetPackedB16(mask) >> (SK_B16_BITS - 5);
79
80 // Now upscale them to 0..32, so we can use blend32
81 maskR = upscale_31_to_32(maskR);
82 maskG = upscale_31_to_32(maskG);
83 maskB = upscale_31_to_32(maskB);
84
85 int dstR = SkGetPackedR32(dst);
86 int dstG = SkGetPackedG32(dst);
87 int dstB = SkGetPackedB32(dst);
88
89 // LCD blitting is only supported if the dst is known/required
90 // to be opaque
91 return SkPackARGB32(0xFF,
92 blend_32(srcR, dstR, maskR),
93 blend_32(srcG, dstG, maskG),
94 blend_32(srcB, dstB, maskB));
95 }
96
97
98 // TODO: rewrite at least the SSE code here. It's miserable.
99
100 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2
101 #include <emmintrin.h>
102
103 // The following (left) shifts cause the top 5 bits of the mask components to
104 // line up with the corresponding components in an SkPMColor.
105 // Note that the mask's RGB16 order may differ from the SkPMColor order.
106 #define SK_R16x5_R32x5_SHIFT (SK_R32_SHIFT - SK_R16_SHIFT - SK_R16_BITS + 5)
107 #define SK_G16x5_G32x5_SHIFT (SK_G32_SHIFT - SK_G16_SHIFT - SK_G16_BITS + 5)
108 #define SK_B16x5_B32x5_SHIFT (SK_B32_SHIFT - SK_B16_SHIFT - SK_B16_BITS + 5)
109
110 #if SK_R16x5_R32x5_SHIFT == 0
111 #define SkPackedR16x5ToUnmaskedR32x5_SSE2(x) (x)
112 #elif SK_R16x5_R32x5_SHIFT > 0
113 #define SkPackedR16x5ToUnmaskedR32x5_SSE2(x) (_mm_slli_epi32(x, SK_R16x5_R32x5_SHIFT))
114 #else
115 #define SkPackedR16x5ToUnmaskedR32x5_SSE2(x) (_mm_srli_epi32(x, -SK_R16x5_R32x5_SHIFT))
116 #endif
117
118 #if SK_G16x5_G32x5_SHIFT == 0
119 #define SkPackedG16x5ToUnmaskedG32x5_SSE2(x) (x)
120 #elif SK_G16x5_G32x5_SHIFT > 0
121 #define SkPackedG16x5ToUnmaskedG32x5_SSE2(x) (_mm_slli_epi32(x, SK_G16x5_G32x5_SHIFT))
122 #else
123 #define SkPackedG16x5ToUnmaskedG32x5_SSE2(x) (_mm_srli_epi32(x, -SK_G16x5_G32x5_SHIFT))
124 #endif
125
126 #if SK_B16x5_B32x5_SHIFT == 0
127 #define SkPackedB16x5ToUnmaskedB32x5_SSE2(x) (x)
128 #elif SK_B16x5_B32x5_SHIFT > 0
129 #define SkPackedB16x5ToUnmaskedB32x5_SSE2(x) (_mm_slli_epi32(x, SK_B16x5_B32x5_SHIFT))
130 #else
131 #define SkPackedB16x5ToUnmaskedB32x5_SSE2(x) (_mm_srli_epi32(x, -SK_B16x5_B32x5_SHIFT))
132 #endif
133
blend_lcd16_sse2(__m128i & src,__m128i & dst,__m128i & mask,__m128i & srcA)134 static __m128i blend_lcd16_sse2(__m128i &src, __m128i &dst, __m128i &mask, __m128i &srcA) {
135 // In the following comments, the components of src, dst and mask are
136 // abbreviated as (s)rc, (d)st, and (m)ask. Color components are marked
137 // by an R, G, B, or A suffix. Components of one of the four pixels that
138 // are processed in parallel are marked with 0, 1, 2, and 3. "d1B", for
139 // example is the blue channel of the second destination pixel. Memory
140 // layout is shown for an ARGB byte order in a color value.
141
142 // src and srcA store 8-bit values interleaved with zeros.
143 // src = (0xFF, 0, sR, 0, sG, 0, sB, 0, 0xFF, 0, sR, 0, sG, 0, sB, 0)
144 // srcA = (srcA, 0, srcA, 0, srcA, 0, srcA, 0,
145 // srcA, 0, srcA, 0, srcA, 0, srcA, 0)
146 // mask stores 16-bit values (compressed three channels) interleaved with zeros.
147 // Lo and Hi denote the low and high bytes of a 16-bit value, respectively.
148 // mask = (m0RGBLo, m0RGBHi, 0, 0, m1RGBLo, m1RGBHi, 0, 0,
149 // m2RGBLo, m2RGBHi, 0, 0, m3RGBLo, m3RGBHi, 0, 0)
150
151 // Get the R,G,B of each 16bit mask pixel, we want all of them in 5 bits.
152 // r = (0, m0R, 0, 0, 0, m1R, 0, 0, 0, m2R, 0, 0, 0, m3R, 0, 0)
153 __m128i r = _mm_and_si128(SkPackedR16x5ToUnmaskedR32x5_SSE2(mask),
154 _mm_set1_epi32(0x1F << SK_R32_SHIFT));
155
156 // g = (0, 0, m0G, 0, 0, 0, m1G, 0, 0, 0, m2G, 0, 0, 0, m3G, 0)
157 __m128i g = _mm_and_si128(SkPackedG16x5ToUnmaskedG32x5_SSE2(mask),
158 _mm_set1_epi32(0x1F << SK_G32_SHIFT));
159
160 // b = (0, 0, 0, m0B, 0, 0, 0, m1B, 0, 0, 0, m2B, 0, 0, 0, m3B)
161 __m128i b = _mm_and_si128(SkPackedB16x5ToUnmaskedB32x5_SSE2(mask),
162 _mm_set1_epi32(0x1F << SK_B32_SHIFT));
163
164 // Pack the 4 16bit mask pixels into 4 32bit pixels, (p0, p1, p2, p3)
165 // Each component (m0R, m0G, etc.) is then a 5-bit value aligned to an
166 // 8-bit position
167 // mask = (0, m0R, m0G, m0B, 0, m1R, m1G, m1B,
168 // 0, m2R, m2G, m2B, 0, m3R, m3G, m3B)
169 mask = _mm_or_si128(_mm_or_si128(r, g), b);
170
171 // Interleave R,G,B into the lower byte of word.
172 // i.e. split the sixteen 8-bit values from mask into two sets of eight
173 // 16-bit values, padded by zero.
174 __m128i maskLo, maskHi;
175 // maskLo = (0, 0, m0R, 0, m0G, 0, m0B, 0, 0, 0, m1R, 0, m1G, 0, m1B, 0)
176 maskLo = _mm_unpacklo_epi8(mask, _mm_setzero_si128());
177 // maskHi = (0, 0, m2R, 0, m2G, 0, m2B, 0, 0, 0, m3R, 0, m3G, 0, m3B, 0)
178 maskHi = _mm_unpackhi_epi8(mask, _mm_setzero_si128());
179
180 // Upscale from 0..31 to 0..32
181 // (allows to replace division by left-shift further down)
182 // Left-shift each component by 4 and add the result back to that component,
183 // mapping numbers in the range 0..15 to 0..15, and 16..31 to 17..32
184 maskLo = _mm_add_epi16(maskLo, _mm_srli_epi16(maskLo, 4));
185 maskHi = _mm_add_epi16(maskHi, _mm_srli_epi16(maskHi, 4));
186
187 // Multiply each component of maskLo and maskHi by srcA
188 maskLo = _mm_mullo_epi16(maskLo, srcA);
189 maskHi = _mm_mullo_epi16(maskHi, srcA);
190
191 // Left shift mask components by 8 (divide by 256)
192 maskLo = _mm_srli_epi16(maskLo, 8);
193 maskHi = _mm_srli_epi16(maskHi, 8);
194
195 // Interleave R,G,B into the lower byte of the word
196 // dstLo = (0, 0, d0R, 0, d0G, 0, d0B, 0, 0, 0, d1R, 0, d1G, 0, d1B, 0)
197 __m128i dstLo = _mm_unpacklo_epi8(dst, _mm_setzero_si128());
198 // dstLo = (0, 0, d2R, 0, d2G, 0, d2B, 0, 0, 0, d3R, 0, d3G, 0, d3B, 0)
199 __m128i dstHi = _mm_unpackhi_epi8(dst, _mm_setzero_si128());
200
201 // mask = (src - dst) * mask
202 maskLo = _mm_mullo_epi16(maskLo, _mm_sub_epi16(src, dstLo));
203 maskHi = _mm_mullo_epi16(maskHi, _mm_sub_epi16(src, dstHi));
204
205 // mask = (src - dst) * mask >> 5
206 maskLo = _mm_srai_epi16(maskLo, 5);
207 maskHi = _mm_srai_epi16(maskHi, 5);
208
209 // Add two pixels into result.
210 // result = dst + ((src - dst) * mask >> 5)
211 __m128i resultLo = _mm_add_epi16(dstLo, maskLo);
212 __m128i resultHi = _mm_add_epi16(dstHi, maskHi);
213
214 // Pack into 4 32bit dst pixels.
215 // resultLo and resultHi contain eight 16-bit components (two pixels) each.
216 // Merge into one SSE regsiter with sixteen 8-bit values (four pixels),
217 // clamping to 255 if necessary.
218 return _mm_packus_epi16(resultLo, resultHi);
219 }
220
blend_lcd16_opaque_sse2(__m128i & src,__m128i & dst,__m128i & mask)221 static __m128i blend_lcd16_opaque_sse2(__m128i &src, __m128i &dst, __m128i &mask) {
222 // In the following comments, the components of src, dst and mask are
223 // abbreviated as (s)rc, (d)st, and (m)ask. Color components are marked
224 // by an R, G, B, or A suffix. Components of one of the four pixels that
225 // are processed in parallel are marked with 0, 1, 2, and 3. "d1B", for
226 // example is the blue channel of the second destination pixel. Memory
227 // layout is shown for an ARGB byte order in a color value.
228
229 // src and srcA store 8-bit values interleaved with zeros.
230 // src = (0xFF, 0, sR, 0, sG, 0, sB, 0, 0xFF, 0, sR, 0, sG, 0, sB, 0)
231 // mask stores 16-bit values (shown as high and low bytes) interleaved with
232 // zeros
233 // mask = (m0RGBLo, m0RGBHi, 0, 0, m1RGBLo, m1RGBHi, 0, 0,
234 // m2RGBLo, m2RGBHi, 0, 0, m3RGBLo, m3RGBHi, 0, 0)
235
236 // Get the R,G,B of each 16bit mask pixel, we want all of them in 5 bits.
237 // r = (0, m0R, 0, 0, 0, m1R, 0, 0, 0, m2R, 0, 0, 0, m3R, 0, 0)
238 __m128i r = _mm_and_si128(SkPackedR16x5ToUnmaskedR32x5_SSE2(mask),
239 _mm_set1_epi32(0x1F << SK_R32_SHIFT));
240
241 // g = (0, 0, m0G, 0, 0, 0, m1G, 0, 0, 0, m2G, 0, 0, 0, m3G, 0)
242 __m128i g = _mm_and_si128(SkPackedG16x5ToUnmaskedG32x5_SSE2(mask),
243 _mm_set1_epi32(0x1F << SK_G32_SHIFT));
244
245 // b = (0, 0, 0, m0B, 0, 0, 0, m1B, 0, 0, 0, m2B, 0, 0, 0, m3B)
246 __m128i b = _mm_and_si128(SkPackedB16x5ToUnmaskedB32x5_SSE2(mask),
247 _mm_set1_epi32(0x1F << SK_B32_SHIFT));
248
249 // Pack the 4 16bit mask pixels into 4 32bit pixels, (p0, p1, p2, p3)
250 // Each component (m0R, m0G, etc.) is then a 5-bit value aligned to an
251 // 8-bit position
252 // mask = (0, m0R, m0G, m0B, 0, m1R, m1G, m1B,
253 // 0, m2R, m2G, m2B, 0, m3R, m3G, m3B)
254 mask = _mm_or_si128(_mm_or_si128(r, g), b);
255
256 // Interleave R,G,B into the lower byte of word.
257 // i.e. split the sixteen 8-bit values from mask into two sets of eight
258 // 16-bit values, padded by zero.
259 __m128i maskLo, maskHi;
260 // maskLo = (0, 0, m0R, 0, m0G, 0, m0B, 0, 0, 0, m1R, 0, m1G, 0, m1B, 0)
261 maskLo = _mm_unpacklo_epi8(mask, _mm_setzero_si128());
262 // maskHi = (0, 0, m2R, 0, m2G, 0, m2B, 0, 0, 0, m3R, 0, m3G, 0, m3B, 0)
263 maskHi = _mm_unpackhi_epi8(mask, _mm_setzero_si128());
264
265 // Upscale from 0..31 to 0..32
266 // (allows to replace division by left-shift further down)
267 // Left-shift each component by 4 and add the result back to that component,
268 // mapping numbers in the range 0..15 to 0..15, and 16..31 to 17..32
269 maskLo = _mm_add_epi16(maskLo, _mm_srli_epi16(maskLo, 4));
270 maskHi = _mm_add_epi16(maskHi, _mm_srli_epi16(maskHi, 4));
271
272 // Interleave R,G,B into the lower byte of the word
273 // dstLo = (0, 0, d0R, 0, d0G, 0, d0B, 0, 0, 0, d1R, 0, d1G, 0, d1B, 0)
274 __m128i dstLo = _mm_unpacklo_epi8(dst, _mm_setzero_si128());
275 // dstLo = (0, 0, d2R, 0, d2G, 0, d2B, 0, 0, 0, d3R, 0, d3G, 0, d3B, 0)
276 __m128i dstHi = _mm_unpackhi_epi8(dst, _mm_setzero_si128());
277
278 // mask = (src - dst) * mask
279 maskLo = _mm_mullo_epi16(maskLo, _mm_sub_epi16(src, dstLo));
280 maskHi = _mm_mullo_epi16(maskHi, _mm_sub_epi16(src, dstHi));
281
282 // mask = (src - dst) * mask >> 5
283 maskLo = _mm_srai_epi16(maskLo, 5);
284 maskHi = _mm_srai_epi16(maskHi, 5);
285
286 // Add two pixels into result.
287 // result = dst + ((src - dst) * mask >> 5)
288 __m128i resultLo = _mm_add_epi16(dstLo, maskLo);
289 __m128i resultHi = _mm_add_epi16(dstHi, maskHi);
290
291 // Pack into 4 32bit dst pixels and force opaque.
292 // resultLo and resultHi contain eight 16-bit components (two pixels) each.
293 // Merge into one SSE regsiter with sixteen 8-bit values (four pixels),
294 // clamping to 255 if necessary. Set alpha components to 0xFF.
295 return _mm_or_si128(_mm_packus_epi16(resultLo, resultHi),
296 _mm_set1_epi32(SK_A32_MASK << SK_A32_SHIFT));
297 }
298
blit_row_lcd16(SkPMColor dst[],const uint16_t mask[],SkColor src,int width,SkPMColor)299 void blit_row_lcd16(SkPMColor dst[], const uint16_t mask[], SkColor src, int width, SkPMColor) {
300 if (width <= 0) {
301 return;
302 }
303
304 int srcA = SkColorGetA(src);
305 int srcR = SkColorGetR(src);
306 int srcG = SkColorGetG(src);
307 int srcB = SkColorGetB(src);
308
309 srcA = SkAlpha255To256(srcA);
310
311 if (width >= 4) {
312 SkASSERT(((size_t)dst & 0x03) == 0);
313 while (((size_t)dst & 0x0F) != 0) {
314 *dst = blend_lcd16(srcA, srcR, srcG, srcB, *dst, *mask);
315 mask++;
316 dst++;
317 width--;
318 }
319
320 __m128i *d = reinterpret_cast<__m128i*>(dst);
321 // Set alpha to 0xFF and replicate source four times in SSE register.
322 __m128i src_sse = _mm_set1_epi32(SkPackARGB32(0xFF, srcR, srcG, srcB));
323 // Interleave with zeros to get two sets of four 16-bit values.
324 src_sse = _mm_unpacklo_epi8(src_sse, _mm_setzero_si128());
325 // Set srcA_sse to contain eight copies of srcA, padded with zero.
326 // src_sse=(0xFF, 0, sR, 0, sG, 0, sB, 0, 0xFF, 0, sR, 0, sG, 0, sB, 0)
327 __m128i srcA_sse = _mm_set1_epi16(srcA);
328 while (width >= 4) {
329 // Load four destination pixels into dst_sse.
330 __m128i dst_sse = _mm_load_si128(d);
331 // Load four 16-bit masks into lower half of mask_sse.
332 __m128i mask_sse = _mm_loadl_epi64(
333 reinterpret_cast<const __m128i*>(mask));
334
335 // Check whether masks are equal to 0 and get the highest bit
336 // of each byte of result, if masks are all zero, we will get
337 // pack_cmp to 0xFFFF
338 int pack_cmp = _mm_movemask_epi8(_mm_cmpeq_epi16(mask_sse,
339 _mm_setzero_si128()));
340
341 // if mask pixels are not all zero, we will blend the dst pixels
342 if (pack_cmp != 0xFFFF) {
343 // Unpack 4 16bit mask pixels to
344 // mask_sse = (m0RGBLo, m0RGBHi, 0, 0, m1RGBLo, m1RGBHi, 0, 0,
345 // m2RGBLo, m2RGBHi, 0, 0, m3RGBLo, m3RGBHi, 0, 0)
346 mask_sse = _mm_unpacklo_epi16(mask_sse,
347 _mm_setzero_si128());
348
349 // Process 4 32bit dst pixels
350 __m128i result = blend_lcd16_sse2(src_sse, dst_sse, mask_sse, srcA_sse);
351 _mm_store_si128(d, result);
352 }
353
354 d++;
355 mask += 4;
356 width -= 4;
357 }
358
359 dst = reinterpret_cast<SkPMColor*>(d);
360 }
361
362 while (width > 0) {
363 *dst = blend_lcd16(srcA, srcR, srcG, srcB, *dst, *mask);
364 mask++;
365 dst++;
366 width--;
367 }
368 }
369
blit_row_lcd16_opaque(SkPMColor dst[],const uint16_t mask[],SkColor src,int width,SkPMColor opaqueDst)370 void blit_row_lcd16_opaque(SkPMColor dst[], const uint16_t mask[],
371 SkColor src, int width, SkPMColor opaqueDst) {
372 if (width <= 0) {
373 return;
374 }
375
376 int srcR = SkColorGetR(src);
377 int srcG = SkColorGetG(src);
378 int srcB = SkColorGetB(src);
379
380 if (width >= 4) {
381 SkASSERT(((size_t)dst & 0x03) == 0);
382 while (((size_t)dst & 0x0F) != 0) {
383 *dst = blend_lcd16_opaque(srcR, srcG, srcB, *dst, *mask, opaqueDst);
384 mask++;
385 dst++;
386 width--;
387 }
388
389 __m128i *d = reinterpret_cast<__m128i*>(dst);
390 // Set alpha to 0xFF and replicate source four times in SSE register.
391 __m128i src_sse = _mm_set1_epi32(SkPackARGB32(0xFF, srcR, srcG, srcB));
392 // Set srcA_sse to contain eight copies of srcA, padded with zero.
393 // src_sse=(0xFF, 0, sR, 0, sG, 0, sB, 0, 0xFF, 0, sR, 0, sG, 0, sB, 0)
394 src_sse = _mm_unpacklo_epi8(src_sse, _mm_setzero_si128());
395 while (width >= 4) {
396 // Load four destination pixels into dst_sse.
397 __m128i dst_sse = _mm_load_si128(d);
398 // Load four 16-bit masks into lower half of mask_sse.
399 __m128i mask_sse = _mm_loadl_epi64(
400 reinterpret_cast<const __m128i*>(mask));
401
402 // Check whether masks are equal to 0 and get the highest bit
403 // of each byte of result, if masks are all zero, we will get
404 // pack_cmp to 0xFFFF
405 int pack_cmp = _mm_movemask_epi8(_mm_cmpeq_epi16(mask_sse,
406 _mm_setzero_si128()));
407
408 // if mask pixels are not all zero, we will blend the dst pixels
409 if (pack_cmp != 0xFFFF) {
410 // Unpack 4 16bit mask pixels to
411 // mask_sse = (m0RGBLo, m0RGBHi, 0, 0, m1RGBLo, m1RGBHi, 0, 0,
412 // m2RGBLo, m2RGBHi, 0, 0, m3RGBLo, m3RGBHi, 0, 0)
413 mask_sse = _mm_unpacklo_epi16(mask_sse,
414 _mm_setzero_si128());
415
416 // Process 4 32bit dst pixels
417 __m128i result = blend_lcd16_opaque_sse2(src_sse, dst_sse, mask_sse);
418 _mm_store_si128(d, result);
419 }
420
421 d++;
422 mask += 4;
423 width -= 4;
424 }
425
426 dst = reinterpret_cast<SkPMColor*>(d);
427 }
428
429 while (width > 0) {
430 *dst = blend_lcd16_opaque(srcR, srcG, srcB, *dst, *mask, opaqueDst);
431 mask++;
432 dst++;
433 width--;
434 }
435 }
436
437 #elif defined(SK_ARM_HAS_NEON)
438 #include <arm_neon.h>
439
440 #define NEON_A (SK_A32_SHIFT / 8)
441 #define NEON_R (SK_R32_SHIFT / 8)
442 #define NEON_G (SK_G32_SHIFT / 8)
443 #define NEON_B (SK_B32_SHIFT / 8)
444
blend_32_neon(uint8x8_t src,uint8x8_t dst,uint16x8_t scale)445 static inline uint8x8_t blend_32_neon(uint8x8_t src, uint8x8_t dst, uint16x8_t scale) {
446 int16x8_t src_wide, dst_wide;
447
448 src_wide = vreinterpretq_s16_u16(vmovl_u8(src));
449 dst_wide = vreinterpretq_s16_u16(vmovl_u8(dst));
450
451 src_wide = (src_wide - dst_wide) * vreinterpretq_s16_u16(scale);
452
453 dst_wide += vshrq_n_s16(src_wide, 5);
454
455 return vmovn_u16(vreinterpretq_u16_s16(dst_wide));
456 }
457
blit_row_lcd16_opaque(SkPMColor dst[],const uint16_t src[],SkColor color,int width,SkPMColor opaqueDst)458 void blit_row_lcd16_opaque(SkPMColor dst[], const uint16_t src[],
459 SkColor color, int width,
460 SkPMColor opaqueDst) {
461 int colR = SkColorGetR(color);
462 int colG = SkColorGetG(color);
463 int colB = SkColorGetB(color);
464
465 uint8x8_t vcolR = vdup_n_u8(colR);
466 uint8x8_t vcolG = vdup_n_u8(colG);
467 uint8x8_t vcolB = vdup_n_u8(colB);
468 uint8x8_t vopqDstA = vdup_n_u8(SkGetPackedA32(opaqueDst));
469 uint8x8_t vopqDstR = vdup_n_u8(SkGetPackedR32(opaqueDst));
470 uint8x8_t vopqDstG = vdup_n_u8(SkGetPackedG32(opaqueDst));
471 uint8x8_t vopqDstB = vdup_n_u8(SkGetPackedB32(opaqueDst));
472
473 while (width >= 8) {
474 uint8x8x4_t vdst;
475 uint16x8_t vmask;
476 uint16x8_t vmaskR, vmaskG, vmaskB;
477 uint8x8_t vsel_trans, vsel_opq;
478
479 vdst = vld4_u8((uint8_t*)dst);
480 vmask = vld1q_u16(src);
481
482 // Prepare compare masks
483 vsel_trans = vmovn_u16(vceqq_u16(vmask, vdupq_n_u16(0)));
484 vsel_opq = vmovn_u16(vceqq_u16(vmask, vdupq_n_u16(0xFFFF)));
485
486 // Get all the color masks on 5 bits
487 vmaskR = vshrq_n_u16(vmask, SK_R16_SHIFT);
488 vmaskG = vshrq_n_u16(vshlq_n_u16(vmask, SK_R16_BITS),
489 SK_B16_BITS + SK_R16_BITS + 1);
490 vmaskB = vmask & vdupq_n_u16(SK_B16_MASK);
491
492 // Upscale to 0..32
493 vmaskR = vmaskR + vshrq_n_u16(vmaskR, 4);
494 vmaskG = vmaskG + vshrq_n_u16(vmaskG, 4);
495 vmaskB = vmaskB + vshrq_n_u16(vmaskB, 4);
496
497 vdst.val[NEON_A] = vbsl_u8(vsel_trans, vdst.val[NEON_A], vdup_n_u8(0xFF));
498 vdst.val[NEON_A] = vbsl_u8(vsel_opq, vopqDstA, vdst.val[NEON_A]);
499
500 vdst.val[NEON_R] = blend_32_neon(vcolR, vdst.val[NEON_R], vmaskR);
501 vdst.val[NEON_G] = blend_32_neon(vcolG, vdst.val[NEON_G], vmaskG);
502 vdst.val[NEON_B] = blend_32_neon(vcolB, vdst.val[NEON_B], vmaskB);
503
504 vdst.val[NEON_R] = vbsl_u8(vsel_opq, vopqDstR, vdst.val[NEON_R]);
505 vdst.val[NEON_G] = vbsl_u8(vsel_opq, vopqDstG, vdst.val[NEON_G]);
506 vdst.val[NEON_B] = vbsl_u8(vsel_opq, vopqDstB, vdst.val[NEON_B]);
507
508 vst4_u8((uint8_t*)dst, vdst);
509
510 dst += 8;
511 src += 8;
512 width -= 8;
513 }
514
515 // Leftovers
516 for (int i = 0; i < width; i++) {
517 dst[i] = blend_lcd16_opaque(colR, colG, colB, dst[i], src[i], opaqueDst);
518 }
519 }
520
blit_row_lcd16(SkPMColor dst[],const uint16_t src[],SkColor color,int width,SkPMColor)521 void blit_row_lcd16(SkPMColor dst[], const uint16_t src[],
522 SkColor color, int width, SkPMColor) {
523 int colA = SkColorGetA(color);
524 int colR = SkColorGetR(color);
525 int colG = SkColorGetG(color);
526 int colB = SkColorGetB(color);
527
528 colA = SkAlpha255To256(colA);
529
530 uint16x8_t vcolA = vdupq_n_u16(colA);
531 uint8x8_t vcolR = vdup_n_u8(colR);
532 uint8x8_t vcolG = vdup_n_u8(colG);
533 uint8x8_t vcolB = vdup_n_u8(colB);
534
535 while (width >= 8) {
536 uint8x8x4_t vdst;
537 uint16x8_t vmask;
538 uint16x8_t vmaskR, vmaskG, vmaskB;
539
540 vdst = vld4_u8((uint8_t*)dst);
541 vmask = vld1q_u16(src);
542
543 // Get all the color masks on 5 bits
544 vmaskR = vshrq_n_u16(vmask, SK_R16_SHIFT);
545 vmaskG = vshrq_n_u16(vshlq_n_u16(vmask, SK_R16_BITS),
546 SK_B16_BITS + SK_R16_BITS + 1);
547 vmaskB = vmask & vdupq_n_u16(SK_B16_MASK);
548
549 // Upscale to 0..32
550 vmaskR = vmaskR + vshrq_n_u16(vmaskR, 4);
551 vmaskG = vmaskG + vshrq_n_u16(vmaskG, 4);
552 vmaskB = vmaskB + vshrq_n_u16(vmaskB, 4);
553
554 vmaskR = vshrq_n_u16(vmaskR * vcolA, 8);
555 vmaskG = vshrq_n_u16(vmaskG * vcolA, 8);
556 vmaskB = vshrq_n_u16(vmaskB * vcolA, 8);
557
558 vdst.val[NEON_A] = vdup_n_u8(0xFF);
559 vdst.val[NEON_R] = blend_32_neon(vcolR, vdst.val[NEON_R], vmaskR);
560 vdst.val[NEON_G] = blend_32_neon(vcolG, vdst.val[NEON_G], vmaskG);
561 vdst.val[NEON_B] = blend_32_neon(vcolB, vdst.val[NEON_B], vmaskB);
562
563 vst4_u8((uint8_t*)dst, vdst);
564
565 dst += 8;
566 src += 8;
567 width -= 8;
568 }
569
570 for (int i = 0; i < width; i++) {
571 dst[i] = blend_lcd16(colA, colR, colG, colB, dst[i], src[i]);
572 }
573 }
574
575 #else
576
blit_row_lcd16(SkPMColor dst[],const uint16_t mask[],SkColor src,int width,SkPMColor)577 static inline void blit_row_lcd16(SkPMColor dst[], const uint16_t mask[],
578 SkColor src, int width, SkPMColor) {
579 int srcA = SkColorGetA(src);
580 int srcR = SkColorGetR(src);
581 int srcG = SkColorGetG(src);
582 int srcB = SkColorGetB(src);
583
584 srcA = SkAlpha255To256(srcA);
585
586 for (int i = 0; i < width; i++) {
587 dst[i] = blend_lcd16(srcA, srcR, srcG, srcB, dst[i], mask[i]);
588 }
589 }
590
blit_row_lcd16_opaque(SkPMColor dst[],const uint16_t mask[],SkColor src,int width,SkPMColor opaqueDst)591 static inline void blit_row_lcd16_opaque(SkPMColor dst[], const uint16_t mask[],
592 SkColor src, int width,
593 SkPMColor opaqueDst) {
594 int srcR = SkColorGetR(src);
595 int srcG = SkColorGetG(src);
596 int srcB = SkColorGetB(src);
597
598 for (int i = 0; i < width; i++) {
599 dst[i] = blend_lcd16_opaque(srcR, srcG, srcB, dst[i], mask[i], opaqueDst);
600 }
601 }
602
603 #endif
604
blit_color(const SkPixmap & device,const SkMask & mask,const SkIRect & clip,SkColor color)605 static bool blit_color(const SkPixmap& device,
606 const SkMask& mask,
607 const SkIRect& clip,
608 SkColor color) {
609 int x = clip.fLeft,
610 y = clip.fTop;
611
612 if (device.colorType() == kN32_SkColorType && mask.fFormat == SkMask::kA8_Format) {
613 SkOpts::blit_mask_d32_a8(device.writable_addr32(x,y), device.rowBytes(),
614 (const SkAlpha*)mask.getAddr(x,y), mask.fRowBytes,
615 color, clip.width(), clip.height());
616 return true;
617 }
618
619 if (device.colorType() == kN32_SkColorType && mask.fFormat == SkMask::kLCD16_Format) {
620 auto dstRow = device.writable_addr32(x,y);
621 auto maskRow = (const uint16_t*)mask.getAddr(x,y);
622
623 auto blit_row = blit_row_lcd16;
624 SkPMColor opaqueDst = 0; // ignored unless opaque
625
626 if (0xff == SkColorGetA(color)) {
627 blit_row = blit_row_lcd16_opaque;
628 opaqueDst = SkPreMultiplyColor(color);
629 }
630
631 for (int height = clip.height(); height --> 0; ) {
632 blit_row(dstRow, maskRow, color, clip.width(), opaqueDst);
633
634 dstRow = (SkPMColor*) (( char*) dstRow + device.rowBytes());
635 maskRow = (const uint16_t*)((const char*)maskRow + mask.fRowBytes);
636 }
637 return true;
638 }
639
640 return false;
641 }
642
643 ///////////////////////////////////////////////////////////////////////////////
644
SkARGB32_Blit32(const SkPixmap & device,const SkMask & mask,const SkIRect & clip,SkPMColor srcColor)645 static void SkARGB32_Blit32(const SkPixmap& device, const SkMask& mask,
646 const SkIRect& clip, SkPMColor srcColor) {
647 U8CPU alpha = SkGetPackedA32(srcColor);
648 unsigned flags = SkBlitRow::kSrcPixelAlpha_Flag32;
649 if (alpha != 255) {
650 flags |= SkBlitRow::kGlobalAlpha_Flag32;
651 }
652 SkBlitRow::Proc32 proc = SkBlitRow::Factory32(flags);
653
654 int x = clip.fLeft;
655 int y = clip.fTop;
656 int width = clip.width();
657 int height = clip.height();
658
659 SkPMColor* dstRow = device.writable_addr32(x, y);
660 const SkPMColor* srcRow = reinterpret_cast<const SkPMColor*>(mask.getAddr8(x, y));
661
662 do {
663 proc(dstRow, srcRow, width, alpha);
664 dstRow = (SkPMColor*)((char*)dstRow + device.rowBytes());
665 srcRow = (const SkPMColor*)((const char*)srcRow + mask.fRowBytes);
666 } while (--height != 0);
667 }
668
669 //////////////////////////////////////////////////////////////////////////////////////
670
SkARGB32_Blitter(const SkPixmap & device,const SkPaint & paint)671 SkARGB32_Blitter::SkARGB32_Blitter(const SkPixmap& device, const SkPaint& paint)
672 : INHERITED(device) {
673 SkColor color = paint.getColor();
674 fColor = color;
675
676 fSrcA = SkColorGetA(color);
677 unsigned scale = SkAlpha255To256(fSrcA);
678 fSrcR = SkAlphaMul(SkColorGetR(color), scale);
679 fSrcG = SkAlphaMul(SkColorGetG(color), scale);
680 fSrcB = SkAlphaMul(SkColorGetB(color), scale);
681
682 fPMColor = SkPackARGB32(fSrcA, fSrcR, fSrcG, fSrcB);
683 }
684
justAnOpaqueColor(uint32_t * value)685 const SkPixmap* SkARGB32_Blitter::justAnOpaqueColor(uint32_t* value) {
686 if (255 == fSrcA) {
687 *value = fPMColor;
688 return &fDevice;
689 }
690 return nullptr;
691 }
692
693 #if defined _WIN32 // disable warning : local variable used without having been initialized
694 #pragma warning ( push )
695 #pragma warning ( disable : 4701 )
696 #endif
697
blitH(int x,int y,int width)698 void SkARGB32_Blitter::blitH(int x, int y, int width) {
699 SkASSERT(x >= 0 && y >= 0 && x + width <= fDevice.width());
700
701 uint32_t* device = fDevice.writable_addr32(x, y);
702 SkBlitRow::Color32(device, device, width, fPMColor);
703 }
704
blitAntiH(int x,int y,const SkAlpha antialias[],const int16_t runs[])705 void SkARGB32_Blitter::blitAntiH(int x, int y, const SkAlpha antialias[],
706 const int16_t runs[]) {
707 if (fSrcA == 0) {
708 return;
709 }
710
711 uint32_t color = fPMColor;
712 uint32_t* device = fDevice.writable_addr32(x, y);
713 unsigned opaqueMask = fSrcA; // if fSrcA is 0xFF, then we will catch the fast opaque case
714
715 for (;;) {
716 int count = runs[0];
717 SkASSERT(count >= 0);
718 if (count <= 0) {
719 return;
720 }
721 unsigned aa = antialias[0];
722 if (aa) {
723 if ((opaqueMask & aa) == 255) {
724 sk_memset32(device, color, count);
725 } else {
726 uint32_t sc = SkAlphaMulQ(color, SkAlpha255To256(aa));
727 SkBlitRow::Color32(device, device, count, sc);
728 }
729 }
730 runs += count;
731 antialias += count;
732 device += count;
733 }
734 }
735
blitAntiH2(int x,int y,U8CPU a0,U8CPU a1)736 void SkARGB32_Blitter::blitAntiH2(int x, int y, U8CPU a0, U8CPU a1) {
737 uint32_t* device = fDevice.writable_addr32(x, y);
738 SkDEBUGCODE((void)fDevice.writable_addr32(x + 1, y);)
739
740 device[0] = SkBlendARGB32(fPMColor, device[0], a0);
741 device[1] = SkBlendARGB32(fPMColor, device[1], a1);
742 }
743
blitAntiV2(int x,int y,U8CPU a0,U8CPU a1)744 void SkARGB32_Blitter::blitAntiV2(int x, int y, U8CPU a0, U8CPU a1) {
745 uint32_t* device = fDevice.writable_addr32(x, y);
746 SkDEBUGCODE((void)fDevice.writable_addr32(x, y + 1);)
747
748 device[0] = SkBlendARGB32(fPMColor, device[0], a0);
749 device = (uint32_t*)((char*)device + fDevice.rowBytes());
750 device[0] = SkBlendARGB32(fPMColor, device[0], a1);
751 }
752
753 //////////////////////////////////////////////////////////////////////////////////////
754
755 #define solid_8_pixels(mask, dst, color) \
756 do { \
757 if (mask & 0x80) dst[0] = color; \
758 if (mask & 0x40) dst[1] = color; \
759 if (mask & 0x20) dst[2] = color; \
760 if (mask & 0x10) dst[3] = color; \
761 if (mask & 0x08) dst[4] = color; \
762 if (mask & 0x04) dst[5] = color; \
763 if (mask & 0x02) dst[6] = color; \
764 if (mask & 0x01) dst[7] = color; \
765 } while (0)
766
767 #define SK_BLITBWMASK_NAME SkARGB32_BlitBW
768 #define SK_BLITBWMASK_ARGS , SkPMColor color
769 #define SK_BLITBWMASK_BLIT8(mask, dst) solid_8_pixels(mask, dst, color)
770 #define SK_BLITBWMASK_GETADDR writable_addr32
771 #define SK_BLITBWMASK_DEVTYPE uint32_t
772 #include "SkBlitBWMaskTemplate.h"
773
774 #define blend_8_pixels(mask, dst, sc, dst_scale) \
775 do { \
776 if (mask & 0x80) { dst[0] = sc + SkAlphaMulQ(dst[0], dst_scale); } \
777 if (mask & 0x40) { dst[1] = sc + SkAlphaMulQ(dst[1], dst_scale); } \
778 if (mask & 0x20) { dst[2] = sc + SkAlphaMulQ(dst[2], dst_scale); } \
779 if (mask & 0x10) { dst[3] = sc + SkAlphaMulQ(dst[3], dst_scale); } \
780 if (mask & 0x08) { dst[4] = sc + SkAlphaMulQ(dst[4], dst_scale); } \
781 if (mask & 0x04) { dst[5] = sc + SkAlphaMulQ(dst[5], dst_scale); } \
782 if (mask & 0x02) { dst[6] = sc + SkAlphaMulQ(dst[6], dst_scale); } \
783 if (mask & 0x01) { dst[7] = sc + SkAlphaMulQ(dst[7], dst_scale); } \
784 } while (0)
785
786 #define SK_BLITBWMASK_NAME SkARGB32_BlendBW
787 #define SK_BLITBWMASK_ARGS , uint32_t sc, unsigned dst_scale
788 #define SK_BLITBWMASK_BLIT8(mask, dst) blend_8_pixels(mask, dst, sc, dst_scale)
789 #define SK_BLITBWMASK_GETADDR writable_addr32
790 #define SK_BLITBWMASK_DEVTYPE uint32_t
791 #include "SkBlitBWMaskTemplate.h"
792
blitMask(const SkMask & mask,const SkIRect & clip)793 void SkARGB32_Blitter::blitMask(const SkMask& mask, const SkIRect& clip) {
794 SkASSERT(mask.fBounds.contains(clip));
795 SkASSERT(fSrcA != 0xFF);
796
797 if (fSrcA == 0) {
798 return;
799 }
800
801 if (blit_color(fDevice, mask, clip, fColor)) {
802 return;
803 }
804
805 switch (mask.fFormat) {
806 case SkMask::kBW_Format:
807 SkARGB32_BlendBW(fDevice, mask, clip, fPMColor, SkAlpha255To256(255 - fSrcA));
808 break;
809 case SkMask::kARGB32_Format:
810 SkARGB32_Blit32(fDevice, mask, clip, fPMColor);
811 break;
812 default:
813 SK_ABORT("Mask format not handled.");
814 }
815 }
816
blitMask(const SkMask & mask,const SkIRect & clip)817 void SkARGB32_Opaque_Blitter::blitMask(const SkMask& mask,
818 const SkIRect& clip) {
819 SkASSERT(mask.fBounds.contains(clip));
820
821 if (blit_color(fDevice, mask, clip, fColor)) {
822 return;
823 }
824
825 switch (mask.fFormat) {
826 case SkMask::kBW_Format:
827 SkARGB32_BlitBW(fDevice, mask, clip, fPMColor);
828 break;
829 case SkMask::kARGB32_Format:
830 SkARGB32_Blit32(fDevice, mask, clip, fPMColor);
831 break;
832 default:
833 SK_ABORT("Mask format not handled.");
834 }
835 }
836
blitAntiH2(int x,int y,U8CPU a0,U8CPU a1)837 void SkARGB32_Opaque_Blitter::blitAntiH2(int x, int y, U8CPU a0, U8CPU a1) {
838 uint32_t* device = fDevice.writable_addr32(x, y);
839 SkDEBUGCODE((void)fDevice.writable_addr32(x + 1, y);)
840
841 device[0] = SkFastFourByteInterp(fPMColor, device[0], a0);
842 device[1] = SkFastFourByteInterp(fPMColor, device[1], a1);
843 }
844
blitAntiV2(int x,int y,U8CPU a0,U8CPU a1)845 void SkARGB32_Opaque_Blitter::blitAntiV2(int x, int y, U8CPU a0, U8CPU a1) {
846 uint32_t* device = fDevice.writable_addr32(x, y);
847 SkDEBUGCODE((void)fDevice.writable_addr32(x, y + 1);)
848
849 device[0] = SkFastFourByteInterp(fPMColor, device[0], a0);
850 device = (uint32_t*)((char*)device + fDevice.rowBytes());
851 device[0] = SkFastFourByteInterp(fPMColor, device[0], a1);
852 }
853
854 ///////////////////////////////////////////////////////////////////////////////
855
blitV(int x,int y,int height,SkAlpha alpha)856 void SkARGB32_Blitter::blitV(int x, int y, int height, SkAlpha alpha) {
857 if (alpha == 0 || fSrcA == 0) {
858 return;
859 }
860
861 uint32_t* device = fDevice.writable_addr32(x, y);
862 uint32_t color = fPMColor;
863
864 if (alpha != 255) {
865 color = SkAlphaMulQ(color, SkAlpha255To256(alpha));
866 }
867
868 unsigned dst_scale = SkAlpha255To256(255 - SkGetPackedA32(color));
869 size_t rowBytes = fDevice.rowBytes();
870 while (--height >= 0) {
871 device[0] = color + SkAlphaMulQ(device[0], dst_scale);
872 device = (uint32_t*)((char*)device + rowBytes);
873 }
874 }
875
blitRect(int x,int y,int width,int height)876 void SkARGB32_Blitter::blitRect(int x, int y, int width, int height) {
877 SkASSERT(x >= 0 && y >= 0 && x + width <= fDevice.width() && y + height <= fDevice.height());
878
879 if (fSrcA == 0) {
880 return;
881 }
882
883 uint32_t* device = fDevice.writable_addr32(x, y);
884 uint32_t color = fPMColor;
885 size_t rowBytes = fDevice.rowBytes();
886
887 while (--height >= 0) {
888 SkBlitRow::Color32(device, device, width, color);
889 device = (uint32_t*)((char*)device + rowBytes);
890 }
891 }
892
893 #if defined _WIN32
894 #pragma warning ( pop )
895 #endif
896
897 ///////////////////////////////////////////////////////////////////////
898
blitAntiH(int x,int y,const SkAlpha antialias[],const int16_t runs[])899 void SkARGB32_Black_Blitter::blitAntiH(int x, int y, const SkAlpha antialias[],
900 const int16_t runs[]) {
901 uint32_t* device = fDevice.writable_addr32(x, y);
902 SkPMColor black = (SkPMColor)(SK_A32_MASK << SK_A32_SHIFT);
903
904 for (;;) {
905 int count = runs[0];
906 SkASSERT(count >= 0);
907 if (count <= 0) {
908 return;
909 }
910 unsigned aa = antialias[0];
911 if (aa) {
912 if (aa == 255) {
913 sk_memset32(device, black, count);
914 } else {
915 SkPMColor src = aa << SK_A32_SHIFT;
916 unsigned dst_scale = 256 - aa;
917 int n = count;
918 do {
919 --n;
920 device[n] = src + SkAlphaMulQ(device[n], dst_scale);
921 } while (n > 0);
922 }
923 }
924 runs += count;
925 antialias += count;
926 device += count;
927 }
928 }
929
blitAntiH2(int x,int y,U8CPU a0,U8CPU a1)930 void SkARGB32_Black_Blitter::blitAntiH2(int x, int y, U8CPU a0, U8CPU a1) {
931 uint32_t* device = fDevice.writable_addr32(x, y);
932 SkDEBUGCODE((void)fDevice.writable_addr32(x + 1, y);)
933
934 device[0] = (a0 << SK_A32_SHIFT) + SkAlphaMulQ(device[0], 256 - a0);
935 device[1] = (a1 << SK_A32_SHIFT) + SkAlphaMulQ(device[1], 256 - a1);
936 }
937
blitAntiV2(int x,int y,U8CPU a0,U8CPU a1)938 void SkARGB32_Black_Blitter::blitAntiV2(int x, int y, U8CPU a0, U8CPU a1) {
939 uint32_t* device = fDevice.writable_addr32(x, y);
940 SkDEBUGCODE((void)fDevice.writable_addr32(x, y + 1);)
941
942 device[0] = (a0 << SK_A32_SHIFT) + SkAlphaMulQ(device[0], 256 - a0);
943 device = (uint32_t*)((char*)device + fDevice.rowBytes());
944 device[0] = (a1 << SK_A32_SHIFT) + SkAlphaMulQ(device[0], 256 - a1);
945 }
946
947 ///////////////////////////////////////////////////////////////////////////////
948
949 // Special version of SkBlitRow::Factory32 that knows we're in kSrc_Mode,
950 // instead of kSrcOver_Mode
blend_srcmode(SkPMColor * SK_RESTRICT device,const SkPMColor * SK_RESTRICT span,int count,U8CPU aa)951 static void blend_srcmode(SkPMColor* SK_RESTRICT device,
952 const SkPMColor* SK_RESTRICT span,
953 int count, U8CPU aa) {
954 int aa256 = SkAlpha255To256(aa);
955 for (int i = 0; i < count; ++i) {
956 device[i] = SkFourByteInterp256(span[i], device[i], aa256);
957 }
958 }
959
SkARGB32_Shader_Blitter(const SkPixmap & device,const SkPaint & paint,SkShaderBase::Context * shaderContext)960 SkARGB32_Shader_Blitter::SkARGB32_Shader_Blitter(const SkPixmap& device,
961 const SkPaint& paint, SkShaderBase::Context* shaderContext)
962 : INHERITED(device, paint, shaderContext)
963 {
964 fBuffer = (SkPMColor*)sk_malloc_throw(device.width() * (sizeof(SkPMColor)));
965
966 fXfermode = SkXfermode::Peek(paint.getBlendMode());
967
968 int flags = 0;
969 if (!(shaderContext->getFlags() & SkShaderBase::kOpaqueAlpha_Flag)) {
970 flags |= SkBlitRow::kSrcPixelAlpha_Flag32;
971 }
972 // we call this on the output from the shader
973 fProc32 = SkBlitRow::Factory32(flags);
974 // we call this on the output from the shader + alpha from the aa buffer
975 fProc32Blend = SkBlitRow::Factory32(flags | SkBlitRow::kGlobalAlpha_Flag32);
976
977 fShadeDirectlyIntoDevice = false;
978 if (fXfermode == nullptr) {
979 if (shaderContext->getFlags() & SkShaderBase::kOpaqueAlpha_Flag) {
980 fShadeDirectlyIntoDevice = true;
981 }
982 } else {
983 if (SkBlendMode::kSrc == paint.getBlendMode()) {
984 fShadeDirectlyIntoDevice = true;
985 fProc32Blend = blend_srcmode;
986 }
987 }
988
989 fConstInY = SkToBool(shaderContext->getFlags() & SkShaderBase::kConstInY32_Flag);
990 }
991
~SkARGB32_Shader_Blitter()992 SkARGB32_Shader_Blitter::~SkARGB32_Shader_Blitter() {
993 sk_free(fBuffer);
994 }
995
blitH(int x,int y,int width)996 void SkARGB32_Shader_Blitter::blitH(int x, int y, int width) {
997 SkASSERT(x >= 0 && y >= 0 && x + width <= fDevice.width());
998
999 uint32_t* device = fDevice.writable_addr32(x, y);
1000
1001 if (fShadeDirectlyIntoDevice) {
1002 fShaderContext->shadeSpan(x, y, device, width);
1003 } else {
1004 SkPMColor* span = fBuffer;
1005 fShaderContext->shadeSpan(x, y, span, width);
1006 if (fXfermode) {
1007 fXfermode->xfer32(device, span, width, nullptr);
1008 } else {
1009 fProc32(device, span, width, 255);
1010 }
1011 }
1012 }
1013
blitRect(int x,int y,int width,int height)1014 void SkARGB32_Shader_Blitter::blitRect(int x, int y, int width, int height) {
1015 SkASSERT(x >= 0 && y >= 0 &&
1016 x + width <= fDevice.width() && y + height <= fDevice.height());
1017
1018 uint32_t* device = fDevice.writable_addr32(x, y);
1019 size_t deviceRB = fDevice.rowBytes();
1020 auto* shaderContext = fShaderContext;
1021 SkPMColor* span = fBuffer;
1022
1023 if (fConstInY) {
1024 if (fShadeDirectlyIntoDevice) {
1025 // shade the first row directly into the device
1026 shaderContext->shadeSpan(x, y, device, width);
1027 span = device;
1028 while (--height > 0) {
1029 device = (uint32_t*)((char*)device + deviceRB);
1030 memcpy(device, span, width << 2);
1031 }
1032 } else {
1033 shaderContext->shadeSpan(x, y, span, width);
1034 SkXfermode* xfer = fXfermode;
1035 if (xfer) {
1036 do {
1037 xfer->xfer32(device, span, width, nullptr);
1038 y += 1;
1039 device = (uint32_t*)((char*)device + deviceRB);
1040 } while (--height > 0);
1041 } else {
1042 SkBlitRow::Proc32 proc = fProc32;
1043 do {
1044 proc(device, span, width, 255);
1045 y += 1;
1046 device = (uint32_t*)((char*)device + deviceRB);
1047 } while (--height > 0);
1048 }
1049 }
1050 return;
1051 }
1052
1053 if (fShadeDirectlyIntoDevice) {
1054 do {
1055 shaderContext->shadeSpan(x, y, device, width);
1056 y += 1;
1057 device = (uint32_t*)((char*)device + deviceRB);
1058 } while (--height > 0);
1059 } else {
1060 SkXfermode* xfer = fXfermode;
1061 if (xfer) {
1062 do {
1063 shaderContext->shadeSpan(x, y, span, width);
1064 xfer->xfer32(device, span, width, nullptr);
1065 y += 1;
1066 device = (uint32_t*)((char*)device + deviceRB);
1067 } while (--height > 0);
1068 } else {
1069 SkBlitRow::Proc32 proc = fProc32;
1070 do {
1071 shaderContext->shadeSpan(x, y, span, width);
1072 proc(device, span, width, 255);
1073 y += 1;
1074 device = (uint32_t*)((char*)device + deviceRB);
1075 } while (--height > 0);
1076 }
1077 }
1078 }
1079
blitAntiH(int x,int y,const SkAlpha antialias[],const int16_t runs[])1080 void SkARGB32_Shader_Blitter::blitAntiH(int x, int y, const SkAlpha antialias[],
1081 const int16_t runs[]) {
1082 SkPMColor* span = fBuffer;
1083 uint32_t* device = fDevice.writable_addr32(x, y);
1084 auto* shaderContext = fShaderContext;
1085
1086 if (fXfermode && !fShadeDirectlyIntoDevice) {
1087 for (;;) {
1088 SkXfermode* xfer = fXfermode;
1089
1090 int count = *runs;
1091 if (count <= 0)
1092 break;
1093 int aa = *antialias;
1094 if (aa) {
1095 shaderContext->shadeSpan(x, y, span, count);
1096 if (aa == 255) {
1097 xfer->xfer32(device, span, count, nullptr);
1098 } else {
1099 // count is almost always 1
1100 for (int i = count - 1; i >= 0; --i) {
1101 xfer->xfer32(&device[i], &span[i], 1, antialias);
1102 }
1103 }
1104 }
1105 device += count;
1106 runs += count;
1107 antialias += count;
1108 x += count;
1109 }
1110 } else if (fShadeDirectlyIntoDevice ||
1111 (shaderContext->getFlags() & SkShaderBase::kOpaqueAlpha_Flag)) {
1112 for (;;) {
1113 int count = *runs;
1114 if (count <= 0) {
1115 break;
1116 }
1117 int aa = *antialias;
1118 if (aa) {
1119 if (aa == 255) {
1120 // cool, have the shader draw right into the device
1121 shaderContext->shadeSpan(x, y, device, count);
1122 } else {
1123 shaderContext->shadeSpan(x, y, span, count);
1124 fProc32Blend(device, span, count, aa);
1125 }
1126 }
1127 device += count;
1128 runs += count;
1129 antialias += count;
1130 x += count;
1131 }
1132 } else {
1133 for (;;) {
1134 int count = *runs;
1135 if (count <= 0) {
1136 break;
1137 }
1138 int aa = *antialias;
1139 if (aa) {
1140 shaderContext->shadeSpan(x, y, span, count);
1141 if (aa == 255) {
1142 fProc32(device, span, count, 255);
1143 } else {
1144 fProc32Blend(device, span, count, aa);
1145 }
1146 }
1147 device += count;
1148 runs += count;
1149 antialias += count;
1150 x += count;
1151 }
1152 }
1153 }
1154
blend_row_A8(SkPMColor * dst,const void * vmask,const SkPMColor * src,int n)1155 static void blend_row_A8(SkPMColor* dst, const void* vmask, const SkPMColor* src, int n) {
1156 auto mask = (const uint8_t*)vmask;
1157
1158 #ifdef SK_SUPPORT_LEGACY_A8_MASKBLITTER
1159 for (int i = 0; i < n; ++i) {
1160 if (mask[i]) {
1161 dst[i] = SkBlendARGB32(src[i], dst[i], mask[i]);
1162 }
1163 }
1164 #else
1165 Sk4px::MapDstSrcAlpha(n, dst, src, mask, [](const Sk4px& d, const Sk4px& s, const Sk4px& aa) {
1166 const auto s_aa = s.approxMulDiv255(aa);
1167 return s_aa + d.approxMulDiv255(s_aa.alphas().inv());
1168 });
1169 #endif
1170 }
1171
blend_row_A8_opaque(SkPMColor * dst,const void * vmask,const SkPMColor * src,int n)1172 static void blend_row_A8_opaque(SkPMColor* dst, const void* vmask, const SkPMColor* src, int n) {
1173 auto mask = (const uint8_t*)vmask;
1174
1175 #ifdef SK_SUPPORT_LEGACY_A8_MASKBLITTER
1176 for (int i = 0; i < n; ++i) {
1177 if (int m = mask[i]) {
1178 m += (m >> 7);
1179 dst[i] = SkAlphaMulQ(src[i], m) + SkAlphaMulQ(dst[i], 256 - m);
1180 }
1181 }
1182 #else
1183 Sk4px::MapDstSrcAlpha(n, dst, src, mask, [](const Sk4px& d, const Sk4px& s, const Sk4px& aa) {
1184 return (s * aa + d * aa.inv()).div255();
1185 });
1186 #endif
1187 }
1188
blend_row_lcd16(SkPMColor * dst,const void * vmask,const SkPMColor * src,int n)1189 static void blend_row_lcd16(SkPMColor* dst, const void* vmask, const SkPMColor* src, int n) {
1190 auto src_alpha_blend = [](int s, int d, int sa, int m) {
1191 return d + SkAlphaMul(s - SkAlphaMul(sa, d), m);
1192 };
1193
1194 auto upscale_31_to_255 = [](int v) {
1195 return (v << 3) | (v >> 2);
1196 };
1197
1198 auto mask = (const uint16_t*)vmask;
1199 for (int i = 0; i < n; ++i) {
1200 uint16_t m = mask[i];
1201 if (0 == m) {
1202 continue;
1203 }
1204
1205 SkPMColor s = src[i];
1206 SkPMColor d = dst[i];
1207
1208 int srcA = SkGetPackedA32(s);
1209 int srcR = SkGetPackedR32(s);
1210 int srcG = SkGetPackedG32(s);
1211 int srcB = SkGetPackedB32(s);
1212
1213 srcA += srcA >> 7;
1214
1215 // We're ignoring the least significant bit of the green coverage channel here.
1216 int maskR = SkGetPackedR16(m) >> (SK_R16_BITS - 5);
1217 int maskG = SkGetPackedG16(m) >> (SK_G16_BITS - 5);
1218 int maskB = SkGetPackedB16(m) >> (SK_B16_BITS - 5);
1219
1220 // Scale up to 8-bit coverage to work with SkAlphaMul() in src_alpha_blend().
1221 maskR = upscale_31_to_255(maskR);
1222 maskG = upscale_31_to_255(maskG);
1223 maskB = upscale_31_to_255(maskB);
1224
1225 // This LCD blit routine only works if the destination is opaque.
1226 dst[i] = SkPackARGB32(0xFF,
1227 src_alpha_blend(srcR, SkGetPackedR32(d), srcA, maskR),
1228 src_alpha_blend(srcG, SkGetPackedG32(d), srcA, maskG),
1229 src_alpha_blend(srcB, SkGetPackedB32(d), srcA, maskB));
1230 }
1231 }
1232
blend_row_LCD16_opaque(SkPMColor * dst,const void * vmask,const SkPMColor * src,int n)1233 static void blend_row_LCD16_opaque(SkPMColor* dst, const void* vmask, const SkPMColor* src, int n) {
1234 auto mask = (const uint16_t*)vmask;
1235
1236 for (int i = 0; i < n; ++i) {
1237 uint16_t m = mask[i];
1238 if (0 == m) {
1239 continue;
1240 }
1241
1242 SkPMColor s = src[i];
1243 SkPMColor d = dst[i];
1244
1245 int srcR = SkGetPackedR32(s);
1246 int srcG = SkGetPackedG32(s);
1247 int srcB = SkGetPackedB32(s);
1248
1249 // We're ignoring the least significant bit of the green coverage channel here.
1250 int maskR = SkGetPackedR16(m) >> (SK_R16_BITS - 5);
1251 int maskG = SkGetPackedG16(m) >> (SK_G16_BITS - 5);
1252 int maskB = SkGetPackedB16(m) >> (SK_B16_BITS - 5);
1253
1254 // Now upscale them to 0..32, so we can use blend_32.
1255 maskR = upscale_31_to_32(maskR);
1256 maskG = upscale_31_to_32(maskG);
1257 maskB = upscale_31_to_32(maskB);
1258
1259 // This LCD blit routine only works if the destination is opaque.
1260 dst[i] = SkPackARGB32(0xFF,
1261 blend_32(srcR, SkGetPackedR32(d), maskR),
1262 blend_32(srcG, SkGetPackedG32(d), maskG),
1263 blend_32(srcB, SkGetPackedB32(d), maskB));
1264 }
1265 }
1266
blitMask(const SkMask & mask,const SkIRect & clip)1267 void SkARGB32_Shader_Blitter::blitMask(const SkMask& mask, const SkIRect& clip) {
1268 // we only handle kA8 with an xfermode
1269 if (fXfermode && (SkMask::kA8_Format != mask.fFormat)) {
1270 this->INHERITED::blitMask(mask, clip);
1271 return;
1272 }
1273
1274 SkASSERT(mask.fBounds.contains(clip));
1275
1276 void (*blend_row)(SkPMColor*, const void* mask, const SkPMColor*, int) = nullptr;
1277
1278 if (!fXfermode) {
1279 bool opaque = (fShaderContext->getFlags() & SkShaderBase::kOpaqueAlpha_Flag);
1280
1281 if (mask.fFormat == SkMask::kA8_Format && opaque) {
1282 blend_row = blend_row_A8_opaque;
1283 } else if (mask.fFormat == SkMask::kA8_Format) {
1284 blend_row = blend_row_A8;
1285 } else if (mask.fFormat == SkMask::kLCD16_Format && opaque) {
1286 blend_row = blend_row_LCD16_opaque;
1287 } else if (mask.fFormat == SkMask::kLCD16_Format) {
1288 blend_row = blend_row_lcd16;
1289 } else {
1290 this->INHERITED::blitMask(mask, clip);
1291 return;
1292 }
1293 }
1294
1295 const int x = clip.fLeft;
1296 const int width = clip.width();
1297 int y = clip.fTop;
1298 int height = clip.height();
1299
1300 char* dstRow = (char*)fDevice.writable_addr32(x, y);
1301 const size_t dstRB = fDevice.rowBytes();
1302 const uint8_t* maskRow = (const uint8_t*)mask.getAddr(x, y);
1303 const size_t maskRB = mask.fRowBytes;
1304
1305 SkPMColor* span = fBuffer;
1306
1307 if (fXfermode) {
1308 SkASSERT(SkMask::kA8_Format == mask.fFormat);
1309 SkXfermode* xfer = fXfermode;
1310 do {
1311 fShaderContext->shadeSpan(x, y, span, width);
1312 xfer->xfer32(reinterpret_cast<SkPMColor*>(dstRow), span, width, maskRow);
1313 dstRow += dstRB;
1314 maskRow += maskRB;
1315 y += 1;
1316 } while (--height > 0);
1317 } else {
1318 SkASSERT(blend_row);
1319 do {
1320 fShaderContext->shadeSpan(x, y, span, width);
1321 blend_row(reinterpret_cast<SkPMColor*>(dstRow), maskRow, span, width);
1322 dstRow += dstRB;
1323 maskRow += maskRB;
1324 y += 1;
1325 } while (--height > 0);
1326 }
1327 }
1328
blitV(int x,int y,int height,SkAlpha alpha)1329 void SkARGB32_Shader_Blitter::blitV(int x, int y, int height, SkAlpha alpha) {
1330 SkASSERT(x >= 0 && y >= 0 && y + height <= fDevice.height());
1331
1332 uint32_t* device = fDevice.writable_addr32(x, y);
1333 size_t deviceRB = fDevice.rowBytes();
1334
1335 if (fConstInY) {
1336 SkPMColor c;
1337 fShaderContext->shadeSpan(x, y, &c, 1);
1338
1339 if (fShadeDirectlyIntoDevice) {
1340 if (255 == alpha) {
1341 do {
1342 *device = c;
1343 device = (uint32_t*)((char*)device + deviceRB);
1344 } while (--height > 0);
1345 } else {
1346 do {
1347 *device = SkFourByteInterp(c, *device, alpha);
1348 device = (uint32_t*)((char*)device + deviceRB);
1349 } while (--height > 0);
1350 }
1351 } else {
1352 SkXfermode* xfer = fXfermode;
1353 if (xfer) {
1354 do {
1355 xfer->xfer32(device, &c, 1, &alpha);
1356 device = (uint32_t*)((char*)device + deviceRB);
1357 } while (--height > 0);
1358 } else {
1359 SkBlitRow::Proc32 proc = (255 == alpha) ? fProc32 : fProc32Blend;
1360 do {
1361 proc(device, &c, 1, alpha);
1362 device = (uint32_t*)((char*)device + deviceRB);
1363 } while (--height > 0);
1364 }
1365 }
1366 return;
1367 }
1368
1369 if (fShadeDirectlyIntoDevice) {
1370 if (255 == alpha) {
1371 do {
1372 fShaderContext->shadeSpan(x, y, device, 1);
1373 y += 1;
1374 device = (uint32_t*)((char*)device + deviceRB);
1375 } while (--height > 0);
1376 } else {
1377 do {
1378 SkPMColor c;
1379 fShaderContext->shadeSpan(x, y, &c, 1);
1380 *device = SkFourByteInterp(c, *device, alpha);
1381 y += 1;
1382 device = (uint32_t*)((char*)device + deviceRB);
1383 } while (--height > 0);
1384 }
1385 } else {
1386 SkPMColor* span = fBuffer;
1387 SkXfermode* xfer = fXfermode;
1388 if (xfer) {
1389 do {
1390 fShaderContext->shadeSpan(x, y, span, 1);
1391 xfer->xfer32(device, span, 1, &alpha);
1392 y += 1;
1393 device = (uint32_t*)((char*)device + deviceRB);
1394 } while (--height > 0);
1395 } else {
1396 SkBlitRow::Proc32 proc = (255 == alpha) ? fProc32 : fProc32Blend;
1397 do {
1398 fShaderContext->shadeSpan(x, y, span, 1);
1399 proc(device, span, 1, alpha);
1400 y += 1;
1401 device = (uint32_t*)((char*)device + deviceRB);
1402 } while (--height > 0);
1403 }
1404 }
1405 }
1406