• Home
  • History
  • Annotate
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2014 Google Inc.
3  *
4  * Use of this source code is governed by a BSD-style license that can be
5  * found in the LICENSE file.
6  */
7 
8 #include <arm_neon.h>
9 
10 #define SCALE_NOFILTER_NAME     MAKENAME(_nofilter_scale)
11 #define SCALE_FILTER_NAME       MAKENAME(_filter_scale)
12 #define AFFINE_NOFILTER_NAME    MAKENAME(_nofilter_affine)
13 #define AFFINE_FILTER_NAME      MAKENAME(_filter_affine)
14 #define PERSP_NOFILTER_NAME     MAKENAME(_nofilter_persp)
15 #define PERSP_FILTER_NAME       MAKENAME(_filter_persp)
16 
17 #define PACK_FILTER_X_NAME  MAKENAME(_pack_filter_x)
18 #define PACK_FILTER_Y_NAME  MAKENAME(_pack_filter_y)
19 #define PACK_FILTER_X4_NAME MAKENAME(_pack_filter_x4)
20 #define PACK_FILTER_Y4_NAME MAKENAME(_pack_filter_y4)
21 
22 #ifndef PREAMBLE
23     #define PREAMBLE(state)
24     #define PREAMBLE_PARAM_X
25     #define PREAMBLE_PARAM_Y
26     #define PREAMBLE_ARG_X
27     #define PREAMBLE_ARG_Y
28 #endif
29 
SCALE_NOFILTER_NAME(const SkBitmapProcState & s,uint32_t xy[],int count,int x,int y)30 static void SCALE_NOFILTER_NAME(const SkBitmapProcState& s,
31                                 uint32_t xy[], int count, int x, int y) {
32     SkASSERT((s.fInvType & ~(SkMatrix::kTranslate_Mask |
33                              SkMatrix::kScale_Mask)) == 0);
34 
35     PREAMBLE(s);
36 
37     // we store y, x, x, x, x, x
38     const unsigned maxX = s.fPixmap.width() - 1;
39     SkFractionalInt fx;
40     {
41         const SkBitmapProcStateAutoMapper mapper(s, x, y);
42         const unsigned maxY = s.fPixmap.height() - 1;
43         *xy++ = TILEY_PROCF(mapper.fixedY(), maxY);
44         fx = mapper.fractionalIntX();
45     }
46 
47     if (0 == maxX) {
48         // all of the following X values must be 0
49         memset(xy, 0, count * sizeof(uint16_t));
50         return;
51     }
52 
53     const SkFractionalInt dx = s.fInvSxFractionalInt;
54 
55 #ifdef CHECK_FOR_DECAL
56     // test if we don't need to apply the tile proc
57     const SkFixed fixedFx = SkFractionalIntToFixed(fx);
58     const SkFixed fixedDx = SkFractionalIntToFixed(dx);
59     if (can_truncate_to_fixed_for_decal(fixedFx, fixedDx, count, maxX)) {
60         decal_nofilter_scale_neon(xy, fixedFx, fixedDx, count);
61         return;
62     }
63 #endif
64 
65     if (count >= 8) {
66         SkFractionalInt dx2 = dx+dx;
67         SkFractionalInt dx4 = dx2+dx2;
68         SkFractionalInt dx8 = dx4+dx4;
69 
70         // now build fx/fx+dx/fx+2dx/fx+3dx
71         SkFractionalInt fx1, fx2, fx3;
72         int32x4_t lbase, hbase;
73         int16_t *dst16 = (int16_t *)xy;
74 
75         fx1 = fx+dx;
76         fx2 = fx1+dx;
77         fx3 = fx2+dx;
78 
79         lbase = vdupq_n_s32(SkFractionalIntToFixed(fx));
80         lbase = vsetq_lane_s32(SkFractionalIntToFixed(fx1), lbase, 1);
81         lbase = vsetq_lane_s32(SkFractionalIntToFixed(fx2), lbase, 2);
82         lbase = vsetq_lane_s32(SkFractionalIntToFixed(fx3), lbase, 3);
83         hbase = vaddq_s32(lbase, vdupq_n_s32(SkFractionalIntToFixed(dx4)));
84 
85         // store & bump
86         while (count >= 8) {
87 
88             int16x8_t fx8;
89 
90             fx8 = TILEX_PROCF_NEON8(lbase, hbase, maxX);
91 
92             vst1q_s16(dst16, fx8);
93 
94             // but preserving base & on to the next
95             lbase = vaddq_s32 (lbase, vdupq_n_s32(SkFractionalIntToFixed(dx8)));
96             hbase = vaddq_s32 (hbase, vdupq_n_s32(SkFractionalIntToFixed(dx8)));
97             dst16 += 8;
98             count -= 8;
99             fx += dx8;
100         };
101         xy = (uint32_t *) dst16;
102     }
103 
104     uint16_t* xx = (uint16_t*)xy;
105     for (int i = count; i > 0; --i) {
106         *xx++ = TILEX_PROCF(SkFractionalIntToFixed(fx), maxX);
107         fx += dx;
108     }
109 }
110 
AFFINE_NOFILTER_NAME(const SkBitmapProcState & s,uint32_t xy[],int count,int x,int y)111 static void AFFINE_NOFILTER_NAME(const SkBitmapProcState& s,
112                                  uint32_t xy[], int count, int x, int y) {
113     SkASSERT(s.fInvType & SkMatrix::kAffine_Mask);
114     SkASSERT((s.fInvType & ~(SkMatrix::kTranslate_Mask |
115                              SkMatrix::kScale_Mask |
116                              SkMatrix::kAffine_Mask)) == 0);
117 
118     PREAMBLE(s);
119     const SkBitmapProcStateAutoMapper mapper(s, x, y);
120 
121     SkFractionalInt fx = mapper.fractionalIntX();
122     SkFractionalInt fy = mapper.fractionalIntY();
123     SkFractionalInt dx = s.fInvSxFractionalInt;
124     SkFractionalInt dy = s.fInvKyFractionalInt;
125     int maxX = s.fPixmap.width() - 1;
126     int maxY = s.fPixmap.height() - 1;
127 
128     if (count >= 8) {
129         SkFractionalInt dx4 = dx * 4;
130         SkFractionalInt dy4 = dy * 4;
131         SkFractionalInt dx8 = dx * 8;
132         SkFractionalInt dy8 = dy * 8;
133 
134         int32x4_t xbase, ybase;
135         int32x4_t x2base, y2base;
136         int16_t *dst16 = (int16_t *) xy;
137 
138         // now build fx, fx+dx, fx+2dx, fx+3dx
139         xbase = vdupq_n_s32(SkFractionalIntToFixed(fx));
140         xbase = vsetq_lane_s32(SkFractionalIntToFixed(fx+dx), xbase, 1);
141         xbase = vsetq_lane_s32(SkFractionalIntToFixed(fx+dx+dx), xbase, 2);
142         xbase = vsetq_lane_s32(SkFractionalIntToFixed(fx+dx+dx+dx), xbase, 3);
143 
144         // same for fy
145         ybase = vdupq_n_s32(SkFractionalIntToFixed(fy));
146         ybase = vsetq_lane_s32(SkFractionalIntToFixed(fy+dy), ybase, 1);
147         ybase = vsetq_lane_s32(SkFractionalIntToFixed(fy+dy+dy), ybase, 2);
148         ybase = vsetq_lane_s32(SkFractionalIntToFixed(fy+dy+dy+dy), ybase, 3);
149 
150         x2base = vaddq_s32(xbase, vdupq_n_s32(SkFractionalIntToFixed(dx4)));
151         y2base = vaddq_s32(ybase, vdupq_n_s32(SkFractionalIntToFixed(dy4)));
152 
153         // store & bump
154         do {
155             int16x8x2_t hi16;
156 
157             hi16.val[0] = TILEX_PROCF_NEON8(xbase, x2base, maxX);
158             hi16.val[1] = TILEY_PROCF_NEON8(ybase, y2base, maxY);
159 
160             vst2q_s16(dst16, hi16);
161 
162             // moving base and on to the next
163             xbase = vaddq_s32(xbase, vdupq_n_s32(SkFractionalIntToFixed(dx8)));
164             ybase = vaddq_s32(ybase, vdupq_n_s32(SkFractionalIntToFixed(dy8)));
165             x2base = vaddq_s32(x2base, vdupq_n_s32(SkFractionalIntToFixed(dx8)));
166             y2base = vaddq_s32(y2base, vdupq_n_s32(SkFractionalIntToFixed(dy8)));
167 
168             dst16 += 16; // 8x32 aka 16x16
169             count -= 8;
170             fx += dx8;
171             fy += dy8;
172         } while (count >= 8);
173         xy = (uint32_t *) dst16;
174     }
175 
176     for (int i = count; i > 0; --i) {
177         *xy++ = (TILEY_PROCF(SkFractionalIntToFixed(fy), maxY) << 16) |
178                  TILEX_PROCF(SkFractionalIntToFixed(fx), maxX);
179         fx += dx; fy += dy;
180     }
181 }
182 
PERSP_NOFILTER_NAME(const SkBitmapProcState & s,uint32_t * SK_RESTRICT xy,int count,int x,int y)183 static void PERSP_NOFILTER_NAME(const SkBitmapProcState& s,
184                                 uint32_t* SK_RESTRICT xy,
185                                 int count, int x, int y) {
186     SkASSERT(s.fInvType & SkMatrix::kPerspective_Mask);
187 
188     PREAMBLE(s);
189     // max{X,Y} are int here, but later shown/assumed to fit in 16 bits
190     int maxX = s.fPixmap.width() - 1;
191     int maxY = s.fPixmap.height() - 1;
192 
193     SkPerspIter iter(s.fInvMatrix,
194                      SkIntToScalar(x) + SK_ScalarHalf,
195                      SkIntToScalar(y) + SK_ScalarHalf, count);
196 
197     while ((count = iter.next()) != 0) {
198         const SkFixed* SK_RESTRICT srcXY = iter.getXY();
199 
200         if (count >= 8) {
201             int32_t *mysrc = (int32_t *) srcXY;
202             int16_t *mydst = (int16_t *) xy;
203             do {
204                 int16x8x2_t hi16;
205                 int32x4x2_t xy1, xy2;
206 
207                 xy1 = vld2q_s32(mysrc);
208                 xy2 = vld2q_s32(mysrc+8);
209 
210                 hi16.val[0] = TILEX_PROCF_NEON8(xy1.val[0], xy2.val[0], maxX);
211                 hi16.val[1] = TILEY_PROCF_NEON8(xy1.val[1], xy2.val[1], maxY);
212 
213                 vst2q_s16(mydst, hi16);
214 
215                 count -= 8;  // 8 iterations
216                 mysrc += 16; // 16 longs
217                 mydst += 16; // 16 shorts, aka 8 longs
218             } while (count >= 8);
219             // get xy and srcXY fixed up
220             srcXY = (const SkFixed *) mysrc;
221             xy = (uint32_t *) mydst;
222         }
223 
224         while (--count >= 0) {
225             *xy++ = (TILEY_PROCF(srcXY[1], maxY) << 16) |
226                      TILEX_PROCF(srcXY[0], maxX);
227             srcXY += 2;
228         }
229     }
230 }
231 
PACK_FILTER_Y_NAME(SkFixed f,unsigned max,SkFixed one PREAMBLE_PARAM_Y)232 static inline uint32_t PACK_FILTER_Y_NAME(SkFixed f, unsigned max,
233                                           SkFixed one PREAMBLE_PARAM_Y) {
234     unsigned i = TILEY_PROCF(f, max);
235     i = (i << 4) | EXTRACT_LOW_BITS(f, max);
236     return (i << 14) | (TILEY_PROCF((f + one), max));
237 }
238 
PACK_FILTER_X_NAME(SkFixed f,unsigned max,SkFixed one PREAMBLE_PARAM_X)239 static inline uint32_t PACK_FILTER_X_NAME(SkFixed f, unsigned max,
240                                           SkFixed one PREAMBLE_PARAM_X) {
241     unsigned i = TILEX_PROCF(f, max);
242     i = (i << 4) | EXTRACT_LOW_BITS(f, max);
243     return (i << 14) | (TILEX_PROCF((f + one), max));
244 }
245 
PACK_FILTER_X4_NAME(int32x4_t f,unsigned max,SkFixed one PREAMBLE_PARAM_X)246 static inline int32x4_t PACK_FILTER_X4_NAME(int32x4_t f, unsigned max,
247                                           SkFixed one PREAMBLE_PARAM_X) {
248     int32x4_t ret, res, wide_one;
249 
250     // Prepare constants
251     wide_one = vdupq_n_s32(one);
252 
253     // Step 1
254     res = TILEX_PROCF_NEON4(f, max);
255 
256     // Step 2
257     ret = EXTRACT_LOW_BITS_NEON4(f, max);
258     ret = vsliq_n_s32(ret, res, 4);
259 
260     // Step 3
261     res = TILEX_PROCF_NEON4(f + wide_one, max);
262     ret = vorrq_s32(vshlq_n_s32(ret, 14), res);
263 
264     return ret;
265 }
266 
PACK_FILTER_Y4_NAME(int32x4_t f,unsigned max,SkFixed one PREAMBLE_PARAM_X)267 static inline int32x4_t PACK_FILTER_Y4_NAME(int32x4_t f, unsigned max,
268                                           SkFixed one PREAMBLE_PARAM_X) {
269     int32x4_t ret, res, wide_one;
270 
271     // Prepare constants
272     wide_one = vdupq_n_s32(one);
273 
274     // Step 1
275     res = TILEY_PROCF_NEON4(f, max);
276 
277     // Step 2
278     ret = EXTRACT_LOW_BITS_NEON4(f, max);
279     ret = vsliq_n_s32(ret, res, 4);
280 
281     // Step 3
282     res = TILEY_PROCF_NEON4(f + wide_one, max);
283     ret = vorrq_s32(vshlq_n_s32(ret, 14), res);
284 
285     return ret;
286 }
287 
SCALE_FILTER_NAME(const SkBitmapProcState & s,uint32_t xy[],int count,int x,int y)288 static void SCALE_FILTER_NAME(const SkBitmapProcState& s,
289                               uint32_t xy[], int count, int x, int y) {
290     SkASSERT((s.fInvType & ~(SkMatrix::kTranslate_Mask |
291                              SkMatrix::kScale_Mask)) == 0);
292     SkASSERT(s.fInvKy == 0);
293 
294     PREAMBLE(s);
295 
296     const unsigned maxX = s.fPixmap.width() - 1;
297     const SkFixed one = s.fFilterOneX;
298     const SkFractionalInt dx = s.fInvSxFractionalInt;
299     SkFractionalInt fx;
300 
301     {
302         const SkBitmapProcStateAutoMapper mapper(s, x, y);
303         const SkFixed fy = mapper.fixedY();
304         const unsigned maxY = s.fPixmap.height() - 1;
305         // compute our two Y values up front
306         *xy++ = PACK_FILTER_Y_NAME(fy, maxY, s.fFilterOneY PREAMBLE_ARG_Y);
307         // now initialize fx
308         fx = mapper.fractionalIntX();
309     }
310 
311 #ifdef CHECK_FOR_DECAL
312     // test if we don't need to apply the tile proc
313     const SkFixed fixedFx = SkFractionalIntToFixed(fx);
314     const SkFixed fixedDx = SkFractionalIntToFixed(dx);
315     if (can_truncate_to_fixed_for_decal(fixedFx, fixedDx, count, maxX)) {
316         decal_filter_scale_neon(xy, fixedFx, fixedDx, count);
317         return;
318     }
319 #endif
320     {
321 
322     if (count >= 4) {
323         int32x4_t wide_fx;
324 
325         wide_fx = vdupq_n_s32(SkFractionalIntToFixed(fx));
326         wide_fx = vsetq_lane_s32(SkFractionalIntToFixed(fx+dx), wide_fx, 1);
327         wide_fx = vsetq_lane_s32(SkFractionalIntToFixed(fx+dx+dx), wide_fx, 2);
328         wide_fx = vsetq_lane_s32(SkFractionalIntToFixed(fx+dx+dx+dx), wide_fx, 3);
329 
330         while (count >= 4) {
331             int32x4_t res;
332 
333             res = PACK_FILTER_X4_NAME(wide_fx, maxX, one PREAMBLE_ARG_X);
334 
335             vst1q_u32(xy, vreinterpretq_u32_s32(res));
336 
337             wide_fx += vdupq_n_s32(SkFractionalIntToFixed(dx+dx+dx+dx));
338             fx += dx+dx+dx+dx;
339             xy += 4;
340             count -= 4;
341         }
342     }
343 
344     while (--count >= 0) {
345         *xy++ = PACK_FILTER_X_NAME(SkFractionalIntToFixed(fx), maxX, one PREAMBLE_ARG_X);
346         fx += dx;
347     }
348 
349     }
350 }
351 
AFFINE_FILTER_NAME(const SkBitmapProcState & s,uint32_t xy[],int count,int x,int y)352 static void AFFINE_FILTER_NAME(const SkBitmapProcState& s,
353                                uint32_t xy[], int count, int x, int y) {
354     SkASSERT(s.fInvType & SkMatrix::kAffine_Mask);
355     SkASSERT((s.fInvType & ~(SkMatrix::kTranslate_Mask |
356                              SkMatrix::kScale_Mask |
357                              SkMatrix::kAffine_Mask)) == 0);
358 
359     PREAMBLE(s);
360     const SkBitmapProcStateAutoMapper mapper(s, x, y);
361 
362     SkFixed oneX = s.fFilterOneX;
363     SkFixed oneY = s.fFilterOneY;
364     SkFixed fx = mapper.fixedX();
365     SkFixed fy = mapper.fixedY();
366     SkFixed dx = s.fInvSx;
367     SkFixed dy = s.fInvKy;
368     unsigned maxX = s.fPixmap.width() - 1;
369     unsigned maxY = s.fPixmap.height() - 1;
370 
371     if (count >= 4) {
372         int32x4_t wide_fy, wide_fx;
373 
374         wide_fx = vdupq_n_s32(fx);
375         wide_fx = vsetq_lane_s32(fx+dx, wide_fx, 1);
376         wide_fx = vsetq_lane_s32(fx+dx+dx, wide_fx, 2);
377         wide_fx = vsetq_lane_s32(fx+dx+dx+dx, wide_fx, 3);
378 
379         wide_fy = vdupq_n_s32(fy);
380         wide_fy = vsetq_lane_s32(fy+dy, wide_fy, 1);
381         wide_fy = vsetq_lane_s32(fy+dy+dy, wide_fy, 2);
382         wide_fy = vsetq_lane_s32(fy+dy+dy+dy, wide_fy, 3);
383 
384         while (count >= 4) {
385             int32x4x2_t vxy;
386 
387             // do the X side, then the Y side, then interleave them
388             vxy.val[0] = PACK_FILTER_Y4_NAME(wide_fy, maxY, oneY PREAMBLE_ARG_Y);
389             vxy.val[1] = PACK_FILTER_X4_NAME(wide_fx, maxX, oneX PREAMBLE_ARG_X);
390 
391             // interleave as YXYXYXYX as part of the storing
392             vst2q_s32((int32_t*)xy, vxy);
393 
394             // prepare next iteration
395             wide_fx += vdupq_n_s32(dx+dx+dx+dx);
396             fx += dx + dx + dx + dx;
397             wide_fy += vdupq_n_s32(dy+dy+dy+dy);
398             fy += dy+dy+dy+dy;
399             xy += 8; // 4 x's, 4 y's
400             count -= 4;
401         }
402     }
403 
404     while (--count >= 0) {
405         // NB: writing Y/X
406         *xy++ = PACK_FILTER_Y_NAME(fy, maxY, oneY PREAMBLE_ARG_Y);
407         fy += dy;
408         *xy++ = PACK_FILTER_X_NAME(fx, maxX, oneX PREAMBLE_ARG_X);
409         fx += dx;
410     }
411 }
412 
PERSP_FILTER_NAME(const SkBitmapProcState & s,uint32_t * SK_RESTRICT xy,int count,int x,int y)413 static void PERSP_FILTER_NAME(const SkBitmapProcState& s,
414                               uint32_t* SK_RESTRICT xy, int count,
415                               int x, int y) {
416     SkASSERT(s.fInvType & SkMatrix::kPerspective_Mask);
417 
418     PREAMBLE(s);
419     unsigned maxX = s.fPixmap.width() - 1;
420     unsigned maxY = s.fPixmap.height() - 1;
421     SkFixed oneX = s.fFilterOneX;
422     SkFixed oneY = s.fFilterOneY;
423 
424     SkPerspIter iter(s.fInvMatrix,
425                      SkIntToScalar(x) + SK_ScalarHalf,
426                      SkIntToScalar(y) + SK_ScalarHalf, count);
427 
428     while ((count = iter.next()) != 0) {
429         const SkFixed* SK_RESTRICT srcXY = iter.getXY();
430 
431         while (count >= 4) {
432             int32x4_t wide_x, wide_y;
433             int32x4x2_t vxy, vresyx;
434 
435             // load src:  x-y-x-y-x-y-x-y
436             vxy = vld2q_s32(srcXY);
437 
438             // do the X side, then the Y side, then interleave them
439             wide_x = vsubq_s32(vxy.val[0], vdupq_n_s32(oneX>>1));
440             wide_y = vsubq_s32(vxy.val[1], vdupq_n_s32(oneY>>1));
441 
442             vresyx.val[0] = PACK_FILTER_Y4_NAME(wide_y, maxY, oneY PREAMBLE_ARG_Y);
443             vresyx.val[1] = PACK_FILTER_X4_NAME(wide_x, maxX, oneX PREAMBLE_ARG_X);
444 
445             // store interleaved as y-x-y-x-y-x-y-x (NB != read order)
446             vst2q_s32((int32_t*)xy, vresyx);
447 
448             // on to the next iteration
449             srcXY += 2*4;
450             count -= 4;
451             xy += 2*4;
452         }
453 
454         while (--count >= 0) {
455             // NB: we read x/y, we write y/x
456             *xy++ = PACK_FILTER_Y_NAME(srcXY[1] - (oneY >> 1), maxY,
457                                        oneY PREAMBLE_ARG_Y);
458             *xy++ = PACK_FILTER_X_NAME(srcXY[0] - (oneX >> 1), maxX,
459                                        oneX PREAMBLE_ARG_X);
460             srcXY += 2;
461         }
462     }
463 }
464 
465 const SkBitmapProcState::MatrixProc MAKENAME(_Procs)[] = {
466     SCALE_NOFILTER_NAME,
467     SCALE_FILTER_NAME,
468     AFFINE_NOFILTER_NAME,
469     AFFINE_FILTER_NAME,
470     PERSP_NOFILTER_NAME,
471     PERSP_FILTER_NAME
472 };
473 
474 #undef TILEX_PROCF_NEON8
475 #undef TILEY_PROCF_NEON8
476 #undef TILEX_PROCF_NEON4
477 #undef TILEY_PROCF_NEON4
478 #undef EXTRACT_LOW_BITS_NEON4
479 
480 #undef MAKENAME
481 #undef TILEX_PROCF
482 #undef TILEY_PROCF
483 #ifdef CHECK_FOR_DECAL
484     #undef CHECK_FOR_DECAL
485 #endif
486 
487 #undef SCALE_NOFILTER_NAME
488 #undef SCALE_FILTER_NAME
489 #undef AFFINE_NOFILTER_NAME
490 #undef AFFINE_FILTER_NAME
491 #undef PERSP_NOFILTER_NAME
492 #undef PERSP_FILTER_NAME
493 
494 #undef PREAMBLE
495 #undef PREAMBLE_PARAM_X
496 #undef PREAMBLE_PARAM_Y
497 #undef PREAMBLE_ARG_X
498 #undef PREAMBLE_ARG_Y
499 
500 #undef EXTRACT_LOW_BITS
501