1 /* NEON optimized code (C) COPYRIGHT 2009 Motorola
2  *
3  * Use of this source code is governed by a BSD-style license that can be
4  * found in the LICENSE file.
5  */
6 
7 #include "SkBitmapProcState.h"
8 #include "SkPerspIter.h"
9 #include "SkShader.h"
10 #include "SkUtils.h"
11 #include "SkUtilsArm.h"
12 #include "SkBitmapProcState_utils.h"
13 
14 /*  returns 0...(n-1) given any x (positive or negative).
15 
16     As an example, if n (which is always positive) is 5...
17 
18           x: -8 -7 -6 -5 -4 -3 -2 -1  0  1  2  3  4  5  6  7  8
19     returns:  2  3  4  0  1  2  3  4  0  1  2  3  4  0  1  2  3
20  */
sk_int_mod(int x,int n)21 static inline int sk_int_mod(int x, int n) {
22     SkASSERT(n > 0);
23     if ((unsigned)x >= (unsigned)n) {
24         if (x < 0) {
25             x = n + ~(~x % n);
26         } else {
27             x = x % n;
28         }
29     }
30     return x;
31 }
32 
33 void decal_nofilter_scale(uint32_t dst[], SkFixed fx, SkFixed dx, int count);
34 void decal_filter_scale(uint32_t dst[], SkFixed fx, SkFixed dx, int count);
35 
36 #include "SkBitmapProcState_matrix_template.h"
37 
38 ///////////////////////////////////////////////////////////////////////////////
39 
40 // Compile neon code paths if needed
41 #if !SK_ARM_NEON_IS_NONE
42 
43 // These are defined in src/opts/SkBitmapProcState_matrixProcs_neon.cpp
44 extern const SkBitmapProcState::MatrixProc ClampX_ClampY_Procs_neon[];
45 extern const SkBitmapProcState::MatrixProc RepeatX_RepeatY_Procs_neon[];
46 
47 #endif // !SK_ARM_NEON_IS_NONE
48 
49 // Compile non-neon code path if needed
50 #if !SK_ARM_NEON_IS_ALWAYS
51 #define MAKENAME(suffix)        ClampX_ClampY ## suffix
52 #define TILEX_PROCF(fx, max)    SkClampMax((fx) >> 16, max)
53 #define TILEY_PROCF(fy, max)    SkClampMax((fy) >> 16, max)
54 #define TILEX_LOW_BITS(fx, max) (((fx) >> 12) & 0xF)
55 #define TILEY_LOW_BITS(fy, max) (((fy) >> 12) & 0xF)
56 #define CHECK_FOR_DECAL
57 #include "SkBitmapProcState_matrix.h"
58 
59 struct ClampTileProcs {
XClampTileProcs60     static unsigned X(const SkBitmapProcState&, SkFixed fx, int max) {
61         return SkClampMax(fx >> 16, max);
62     }
YClampTileProcs63     static unsigned Y(const SkBitmapProcState&, SkFixed fy, int max) {
64         return SkClampMax(fy >> 16, max);
65     }
66 };
67 
68 // Referenced in opts_check_x86.cpp
ClampX_ClampY_nofilter_scale(const SkBitmapProcState & s,uint32_t xy[],int count,int x,int y)69 void ClampX_ClampY_nofilter_scale(const SkBitmapProcState& s, uint32_t xy[],
70                                   int count, int x, int y) {
71     return NoFilterProc_Scale<ClampTileProcs, true>(s, xy, count, x, y);
72 }
ClampX_ClampY_nofilter_affine(const SkBitmapProcState & s,uint32_t xy[],int count,int x,int y)73 void ClampX_ClampY_nofilter_affine(const SkBitmapProcState& s, uint32_t xy[],
74                                   int count, int x, int y) {
75     return NoFilterProc_Affine<ClampTileProcs>(s, xy, count, x, y);
76 }
77 
78 static SkBitmapProcState::MatrixProc ClampX_ClampY_Procs[] = {
79     // only clamp lives in the right coord space to check for decal
80     ClampX_ClampY_nofilter_scale,
81     ClampX_ClampY_filter_scale,
82     ClampX_ClampY_nofilter_affine,
83     ClampX_ClampY_filter_affine,
84     NoFilterProc_Persp<ClampTileProcs>,
85     ClampX_ClampY_filter_persp
86 };
87 
88 #define MAKENAME(suffix)        RepeatX_RepeatY ## suffix
89 #define TILEX_PROCF(fx, max)    SK_USHIFT16(((fx) & 0xFFFF) * ((max) + 1))
90 #define TILEY_PROCF(fy, max)    SK_USHIFT16(((fy) & 0xFFFF) * ((max) + 1))
91 #define TILEX_LOW_BITS(fx, max) ((((fx) & 0xFFFF) * ((max) + 1) >> 12) & 0xF)
92 #define TILEY_LOW_BITS(fy, max) ((((fy) & 0xFFFF) * ((max) + 1) >> 12) & 0xF)
93 #include "SkBitmapProcState_matrix.h"
94 
95 struct RepeatTileProcs {
XRepeatTileProcs96     static unsigned X(const SkBitmapProcState&, SkFixed fx, int max) {
97         return SK_USHIFT16(((fx) & 0xFFFF) * ((max) + 1));
98     }
YRepeatTileProcs99     static unsigned Y(const SkBitmapProcState&, SkFixed fy, int max) {
100         return SK_USHIFT16(((fy) & 0xFFFF) * ((max) + 1));
101     }
102 };
103 
104 static SkBitmapProcState::MatrixProc RepeatX_RepeatY_Procs[] = {
105     NoFilterProc_Scale<RepeatTileProcs, false>,
106     RepeatX_RepeatY_filter_scale,
107     NoFilterProc_Affine<RepeatTileProcs>,
108     RepeatX_RepeatY_filter_affine,
109     NoFilterProc_Persp<RepeatTileProcs>,
110     RepeatX_RepeatY_filter_persp
111 };
112 #endif
113 
114 #define MAKENAME(suffix)        GeneralXY ## suffix
115 #define PREAMBLE(state)         SkBitmapProcState::FixedTileProc tileProcX = (state).fTileProcX; (void) tileProcX; \
116                                 SkBitmapProcState::FixedTileProc tileProcY = (state).fTileProcY; (void) tileProcY; \
117                                 SkBitmapProcState::FixedTileLowBitsProc tileLowBitsProcX = (state).fTileLowBitsProcX; (void) tileLowBitsProcX; \
118                                 SkBitmapProcState::FixedTileLowBitsProc tileLowBitsProcY = (state).fTileLowBitsProcY; (void) tileLowBitsProcY
119 #define PREAMBLE_PARAM_X        , SkBitmapProcState::FixedTileProc tileProcX, SkBitmapProcState::FixedTileLowBitsProc tileLowBitsProcX
120 #define PREAMBLE_PARAM_Y        , SkBitmapProcState::FixedTileProc tileProcY, SkBitmapProcState::FixedTileLowBitsProc tileLowBitsProcY
121 #define PREAMBLE_ARG_X          , tileProcX, tileLowBitsProcX
122 #define PREAMBLE_ARG_Y          , tileProcY, tileLowBitsProcY
123 #define TILEX_PROCF(fx, max)    SK_USHIFT16(tileProcX(fx) * ((max) + 1))
124 #define TILEY_PROCF(fy, max)    SK_USHIFT16(tileProcY(fy) * ((max) + 1))
125 #define TILEX_LOW_BITS(fx, max) tileLowBitsProcX(fx, (max) + 1)
126 #define TILEY_LOW_BITS(fy, max) tileLowBitsProcY(fy, (max) + 1)
127 #include "SkBitmapProcState_matrix.h"
128 
129 struct GeneralTileProcs {
XGeneralTileProcs130     static unsigned X(const SkBitmapProcState& s, SkFixed fx, int max) {
131         return SK_USHIFT16(s.fTileProcX(fx) * ((max) + 1));
132     }
YGeneralTileProcs133     static unsigned Y(const SkBitmapProcState& s, SkFixed fy, int max) {
134         return SK_USHIFT16(s.fTileProcY(fy) * ((max) + 1));
135     }
136 };
137 
138 static SkBitmapProcState::MatrixProc GeneralXY_Procs[] = {
139     NoFilterProc_Scale<GeneralTileProcs, false>,
140     GeneralXY_filter_scale,
141     NoFilterProc_Affine<GeneralTileProcs>,
142     GeneralXY_filter_affine,
143     NoFilterProc_Persp<GeneralTileProcs>,
144     GeneralXY_filter_persp
145 };
146 
147 ///////////////////////////////////////////////////////////////////////////////
148 
fixed_clamp(SkFixed x)149 static inline U16CPU fixed_clamp(SkFixed x) {
150     if (x < 0) {
151         x = 0;
152     }
153     if (x >> 16) {
154         x = 0xFFFF;
155     }
156     return x;
157 }
158 
fixed_repeat(SkFixed x)159 static inline U16CPU fixed_repeat(SkFixed x) {
160     return x & 0xFFFF;
161 }
162 
163 // Visual Studio 2010 (MSC_VER=1600) optimizes bit-shift code incorrectly.
164 // See http://code.google.com/p/skia/issues/detail?id=472
165 #if defined(_MSC_VER) && (_MSC_VER >= 1600)
166 #pragma optimize("", off)
167 #endif
168 
fixed_mirror(SkFixed x)169 static inline U16CPU fixed_mirror(SkFixed x) {
170     SkFixed s = x << 15 >> 31;
171     // s is FFFFFFFF if we're on an odd interval, or 0 if an even interval
172     return (x ^ s) & 0xFFFF;
173 }
174 
175 #if defined(_MSC_VER) && (_MSC_VER >= 1600)
176 #pragma optimize("", on)
177 #endif
178 
choose_tile_proc(unsigned m)179 static SkBitmapProcState::FixedTileProc choose_tile_proc(unsigned m) {
180     if (SkShader::kClamp_TileMode == m) {
181         return fixed_clamp;
182     }
183     if (SkShader::kRepeat_TileMode == m) {
184         return fixed_repeat;
185     }
186     SkASSERT(SkShader::kMirror_TileMode == m);
187     return fixed_mirror;
188 }
189 
fixed_clamp_lowbits(SkFixed x,int)190 static inline U16CPU fixed_clamp_lowbits(SkFixed x, int) {
191     return (x >> 12) & 0xF;
192 }
193 
fixed_repeat_or_mirrow_lowbits(SkFixed x,int scale)194 static inline U16CPU fixed_repeat_or_mirrow_lowbits(SkFixed x, int scale) {
195     return ((x * scale) >> 12) & 0xF;
196 }
197 
choose_tile_lowbits_proc(unsigned m)198 static SkBitmapProcState::FixedTileLowBitsProc choose_tile_lowbits_proc(unsigned m) {
199     if (SkShader::kClamp_TileMode == m) {
200         return fixed_clamp_lowbits;
201     } else {
202         SkASSERT(SkShader::kMirror_TileMode == m ||
203                  SkShader::kRepeat_TileMode == m);
204         // mirror and repeat have the same behavior for the low bits.
205         return fixed_repeat_or_mirrow_lowbits;
206     }
207 }
208 
int_clamp(int x,int n)209 static inline U16CPU int_clamp(int x, int n) {
210     if (x >= n) {
211         x = n - 1;
212     }
213     if (x < 0) {
214         x = 0;
215     }
216     return x;
217 }
218 
int_repeat(int x,int n)219 static inline U16CPU int_repeat(int x, int n) {
220     return sk_int_mod(x, n);
221 }
222 
int_mirror(int x,int n)223 static inline U16CPU int_mirror(int x, int n) {
224     x = sk_int_mod(x, 2 * n);
225     if (x >= n) {
226         x = n + ~(x - n);
227     }
228     return x;
229 }
230 
231 #if 0
232 static void test_int_tileprocs() {
233     for (int i = -8; i <= 8; i++) {
234         SkDebugf(" int_mirror(%2d, 3) = %d\n", i, int_mirror(i, 3));
235     }
236 }
237 #endif
238 
choose_int_tile_proc(unsigned tm)239 static SkBitmapProcState::IntTileProc choose_int_tile_proc(unsigned tm) {
240     if (SkShader::kClamp_TileMode == tm)
241         return int_clamp;
242     if (SkShader::kRepeat_TileMode == tm)
243         return int_repeat;
244     SkASSERT(SkShader::kMirror_TileMode == tm);
245     return int_mirror;
246 }
247 
248 //////////////////////////////////////////////////////////////////////////////
249 
decal_nofilter_scale(uint32_t dst[],SkFixed fx,SkFixed dx,int count)250 void decal_nofilter_scale(uint32_t dst[], SkFixed fx, SkFixed dx, int count) {
251     int i;
252 
253     for (i = (count >> 2); i > 0; --i) {
254         *dst++ = pack_two_shorts(fx >> 16, (fx + dx) >> 16);
255         fx += dx+dx;
256         *dst++ = pack_two_shorts(fx >> 16, (fx + dx) >> 16);
257         fx += dx+dx;
258     }
259     count &= 3;
260 
261     uint16_t* xx = (uint16_t*)dst;
262     for (i = count; i > 0; --i) {
263         *xx++ = SkToU16(fx >> 16); fx += dx;
264     }
265 }
266 
decal_filter_scale(uint32_t dst[],SkFixed fx,SkFixed dx,int count)267 void decal_filter_scale(uint32_t dst[], SkFixed fx, SkFixed dx, int count) {
268     if (count & 1) {
269         SkASSERT((fx >> (16 + 14)) == 0);
270         *dst++ = (fx >> 12 << 14) | ((fx >> 16) + 1);
271         fx += dx;
272     }
273     while ((count -= 2) >= 0) {
274         SkASSERT((fx >> (16 + 14)) == 0);
275         *dst++ = (fx >> 12 << 14) | ((fx >> 16) + 1);
276         fx += dx;
277 
278         *dst++ = (fx >> 12 << 14) | ((fx >> 16) + 1);
279         fx += dx;
280     }
281 }
282 
283 ///////////////////////////////////////////////////////////////////////////////
284 // stores the same as SCALE, but is cheaper to compute. Also since there is no
285 // scale, we don't need/have a FILTER version
286 
fill_sequential(uint16_t xptr[],int start,int count)287 static void fill_sequential(uint16_t xptr[], int start, int count) {
288 #if 1
289     if (reinterpret_cast<intptr_t>(xptr) & 0x2) {
290         *xptr++ = start++;
291         count -= 1;
292     }
293     if (count > 3) {
294         uint32_t* xxptr = reinterpret_cast<uint32_t*>(xptr);
295         uint32_t pattern0 = PACK_TWO_SHORTS(start + 0, start + 1);
296         uint32_t pattern1 = PACK_TWO_SHORTS(start + 2, start + 3);
297         start += count & ~3;
298         int qcount = count >> 2;
299         do {
300             *xxptr++ = pattern0;
301             pattern0 += 0x40004;
302             *xxptr++ = pattern1;
303             pattern1 += 0x40004;
304         } while (--qcount != 0);
305         xptr = reinterpret_cast<uint16_t*>(xxptr);
306         count &= 3;
307     }
308     while (--count >= 0) {
309         *xptr++ = start++;
310     }
311 #else
312     for (int i = 0; i < count; i++) {
313         *xptr++ = start++;
314     }
315 #endif
316 }
317 
nofilter_trans_preamble(const SkBitmapProcState & s,uint32_t ** xy,int x,int y)318 static int nofilter_trans_preamble(const SkBitmapProcState& s, uint32_t** xy,
319                                    int x, int y) {
320     SkPoint pt;
321     s.fInvProc(s.fInvMatrix, SkIntToScalar(x) + SK_ScalarHalf,
322                SkIntToScalar(y) + SK_ScalarHalf, &pt);
323     **xy = s.fIntTileProcY(SkScalarToFixed(pt.fY) >> 16,
324                            s.fBitmap->height());
325     *xy += 1;   // bump the ptr
326     // return our starting X position
327     return SkScalarToFixed(pt.fX) >> 16;
328 }
329 
clampx_nofilter_trans(const SkBitmapProcState & s,uint32_t xy[],int count,int x,int y)330 static void clampx_nofilter_trans(const SkBitmapProcState& s,
331                                   uint32_t xy[], int count, int x, int y) {
332     SkASSERT((s.fInvType & ~SkMatrix::kTranslate_Mask) == 0);
333 
334     int xpos = nofilter_trans_preamble(s, &xy, x, y);
335     const int width = s.fBitmap->width();
336     if (1 == width) {
337         // all of the following X values must be 0
338         memset(xy, 0, count * sizeof(uint16_t));
339         return;
340     }
341 
342     uint16_t* xptr = reinterpret_cast<uint16_t*>(xy);
343     int n;
344 
345     // fill before 0 as needed
346     if (xpos < 0) {
347         n = -xpos;
348         if (n > count) {
349             n = count;
350         }
351         memset(xptr, 0, n * sizeof(uint16_t));
352         count -= n;
353         if (0 == count) {
354             return;
355         }
356         xptr += n;
357         xpos = 0;
358     }
359 
360     // fill in 0..width-1 if needed
361     if (xpos < width) {
362         n = width - xpos;
363         if (n > count) {
364             n = count;
365         }
366         fill_sequential(xptr, xpos, n);
367         count -= n;
368         if (0 == count) {
369             return;
370         }
371         xptr += n;
372     }
373 
374     // fill the remaining with the max value
375     sk_memset16(xptr, width - 1, count);
376 }
377 
repeatx_nofilter_trans(const SkBitmapProcState & s,uint32_t xy[],int count,int x,int y)378 static void repeatx_nofilter_trans(const SkBitmapProcState& s,
379                                    uint32_t xy[], int count, int x, int y) {
380     SkASSERT((s.fInvType & ~SkMatrix::kTranslate_Mask) == 0);
381 
382     int xpos = nofilter_trans_preamble(s, &xy, x, y);
383     const int width = s.fBitmap->width();
384     if (1 == width) {
385         // all of the following X values must be 0
386         memset(xy, 0, count * sizeof(uint16_t));
387         return;
388     }
389 
390     uint16_t* xptr = reinterpret_cast<uint16_t*>(xy);
391     int start = sk_int_mod(xpos, width);
392     int n = width - start;
393     if (n > count) {
394         n = count;
395     }
396     fill_sequential(xptr, start, n);
397     xptr += n;
398     count -= n;
399 
400     while (count >= width) {
401         fill_sequential(xptr, 0, width);
402         xptr += width;
403         count -= width;
404     }
405 
406     if (count > 0) {
407         fill_sequential(xptr, 0, count);
408     }
409 }
410 
fill_backwards(uint16_t xptr[],int pos,int count)411 static void fill_backwards(uint16_t xptr[], int pos, int count) {
412     for (int i = 0; i < count; i++) {
413         SkASSERT(pos >= 0);
414         xptr[i] = pos--;
415     }
416 }
417 
mirrorx_nofilter_trans(const SkBitmapProcState & s,uint32_t xy[],int count,int x,int y)418 static void mirrorx_nofilter_trans(const SkBitmapProcState& s,
419                                    uint32_t xy[], int count, int x, int y) {
420     SkASSERT((s.fInvType & ~SkMatrix::kTranslate_Mask) == 0);
421 
422     int xpos = nofilter_trans_preamble(s, &xy, x, y);
423     const int width = s.fBitmap->width();
424     if (1 == width) {
425         // all of the following X values must be 0
426         memset(xy, 0, count * sizeof(uint16_t));
427         return;
428     }
429 
430     uint16_t* xptr = reinterpret_cast<uint16_t*>(xy);
431     // need to know our start, and our initial phase (forward or backward)
432     bool forward;
433     int n;
434     int start = sk_int_mod(xpos, 2 * width);
435     if (start >= width) {
436         start = width + ~(start - width);
437         forward = false;
438         n = start + 1;  // [start .. 0]
439     } else {
440         forward = true;
441         n = width - start;  // [start .. width)
442     }
443     if (n > count) {
444         n = count;
445     }
446     if (forward) {
447         fill_sequential(xptr, start, n);
448     } else {
449         fill_backwards(xptr, start, n);
450     }
451     forward = !forward;
452     xptr += n;
453     count -= n;
454 
455     while (count >= width) {
456         if (forward) {
457             fill_sequential(xptr, 0, width);
458         } else {
459             fill_backwards(xptr, width - 1, width);
460         }
461         forward = !forward;
462         xptr += width;
463         count -= width;
464     }
465 
466     if (count > 0) {
467         if (forward) {
468             fill_sequential(xptr, 0, count);
469         } else {
470             fill_backwards(xptr, width - 1, count);
471         }
472     }
473 }
474 
475 ///////////////////////////////////////////////////////////////////////////////
476 
chooseMatrixProc(bool trivial_matrix)477 SkBitmapProcState::MatrixProc SkBitmapProcState::chooseMatrixProc(bool trivial_matrix) {
478 //    test_int_tileprocs();
479     // check for our special case when there is no scale/affine/perspective
480     if (trivial_matrix && kNone_SkFilterQuality == fFilterLevel) {
481         fIntTileProcY = choose_int_tile_proc(fTileModeY);
482         switch (fTileModeX) {
483             case SkShader::kClamp_TileMode:
484                 return clampx_nofilter_trans;
485             case SkShader::kRepeat_TileMode:
486                 return repeatx_nofilter_trans;
487             case SkShader::kMirror_TileMode:
488                 return mirrorx_nofilter_trans;
489         }
490     }
491 
492     int index = 0;
493     if (fFilterLevel != kNone_SkFilterQuality) {
494         index = 1;
495     }
496     if (fInvType & SkMatrix::kPerspective_Mask) {
497         index += 4;
498     } else if (fInvType & SkMatrix::kAffine_Mask) {
499         index += 2;
500     }
501 
502     if (SkShader::kClamp_TileMode == fTileModeX && SkShader::kClamp_TileMode == fTileModeY) {
503         // clamp gets special version of filterOne
504         fFilterOneX = SK_Fixed1;
505         fFilterOneY = SK_Fixed1;
506         return SK_ARM_NEON_WRAP(ClampX_ClampY_Procs)[index];
507     }
508 
509     // all remaining procs use this form for filterOne
510     fFilterOneX = SK_Fixed1 / fBitmap->width();
511     fFilterOneY = SK_Fixed1 / fBitmap->height();
512 
513     if (SkShader::kRepeat_TileMode == fTileModeX && SkShader::kRepeat_TileMode == fTileModeY) {
514         return SK_ARM_NEON_WRAP(RepeatX_RepeatY_Procs)[index];
515     }
516 
517     fTileProcX = choose_tile_proc(fTileModeX);
518     fTileProcY = choose_tile_proc(fTileModeY);
519     fTileLowBitsProcX = choose_tile_lowbits_proc(fTileModeX);
520     fTileLowBitsProcY = choose_tile_lowbits_proc(fTileModeY);
521     return GeneralXY_Procs[index];
522 }
523