1 /*
2  * Copyright 2008 Google Inc.
3  *
4  * Use of this source code is governed by a BSD-style license that can be
5  * found in the LICENSE file.
6  */
7 
8 // The copyright below was added in 2009, but I see no record of moto contributions...?
9 
10 /* NEON optimized code (C) COPYRIGHT 2009 Motorola
11  *
12  * Use of this source code is governed by a BSD-style license that can be
13  * found in the LICENSE file.
14  */
15 
16 #include "SkBitmapProcState.h"
17 #include "SkPerspIter.h"
18 #include "SkShader.h"
19 #include "SkUtils.h"
20 #include "SkUtilsArm.h"
21 #include "SkBitmapProcState_utils.h"
22 
23 /*  returns 0...(n-1) given any x (positive or negative).
24 
25     As an example, if n (which is always positive) is 5...
26 
27           x: -8 -7 -6 -5 -4 -3 -2 -1  0  1  2  3  4  5  6  7  8
28     returns:  2  3  4  0  1  2  3  4  0  1  2  3  4  0  1  2  3
29  */
sk_int_mod(int x,int n)30 static inline int sk_int_mod(int x, int n) {
31     SkASSERT(n > 0);
32     if ((unsigned)x >= (unsigned)n) {
33         if (x < 0) {
34             x = n + ~(~x % n);
35         } else {
36             x = x % n;
37         }
38     }
39     return x;
40 }
41 
42 void decal_nofilter_scale(uint32_t dst[], SkFixed fx, SkFixed dx, int count);
43 void decal_filter_scale(uint32_t dst[], SkFixed fx, SkFixed dx, int count);
44 
45 #include "SkBitmapProcState_matrix_template.h"
46 
47 ///////////////////////////////////////////////////////////////////////////////
48 
49 // Compile neon code paths if needed
50 #if !SK_ARM_NEON_IS_NONE
51 
52 // These are defined in src/opts/SkBitmapProcState_matrixProcs_neon.cpp
53 extern const SkBitmapProcState::MatrixProc ClampX_ClampY_Procs_neon[];
54 extern const SkBitmapProcState::MatrixProc RepeatX_RepeatY_Procs_neon[];
55 
56 #endif // !SK_ARM_NEON_IS_NONE
57 
58 // Compile non-neon code path if needed
59 #if !SK_ARM_NEON_IS_ALWAYS
60 #define MAKENAME(suffix)        ClampX_ClampY ## suffix
61 #define TILEX_PROCF(fx, max)    SkClampMax((fx) >> 16, max)
62 #define TILEY_PROCF(fy, max)    SkClampMax((fy) >> 16, max)
63 #define TILEX_LOW_BITS(fx, max) (((fx) >> 12) & 0xF)
64 #define TILEY_LOW_BITS(fy, max) (((fy) >> 12) & 0xF)
65 #define CHECK_FOR_DECAL
66 #include "SkBitmapProcState_matrix.h"
67 
68 struct ClampTileProcs {
XClampTileProcs69     static unsigned X(const SkBitmapProcState&, SkFixed fx, int max) {
70         return SkClampMax(fx >> 16, max);
71     }
YClampTileProcs72     static unsigned Y(const SkBitmapProcState&, SkFixed fy, int max) {
73         return SkClampMax(fy >> 16, max);
74     }
75 };
76 
77 // Referenced in opts_check_x86.cpp
ClampX_ClampY_nofilter_scale(const SkBitmapProcState & s,uint32_t xy[],int count,int x,int y)78 void ClampX_ClampY_nofilter_scale(const SkBitmapProcState& s, uint32_t xy[],
79                                   int count, int x, int y) {
80     return NoFilterProc_Scale<ClampTileProcs, true>(s, xy, count, x, y);
81 }
ClampX_ClampY_nofilter_affine(const SkBitmapProcState & s,uint32_t xy[],int count,int x,int y)82 void ClampX_ClampY_nofilter_affine(const SkBitmapProcState& s, uint32_t xy[],
83                                   int count, int x, int y) {
84     return NoFilterProc_Affine<ClampTileProcs>(s, xy, count, x, y);
85 }
86 
87 static SkBitmapProcState::MatrixProc ClampX_ClampY_Procs[] = {
88     // only clamp lives in the right coord space to check for decal
89     ClampX_ClampY_nofilter_scale,
90     ClampX_ClampY_filter_scale,
91     ClampX_ClampY_nofilter_affine,
92     ClampX_ClampY_filter_affine,
93     NoFilterProc_Persp<ClampTileProcs>,
94     ClampX_ClampY_filter_persp
95 };
96 
97 #define MAKENAME(suffix)        RepeatX_RepeatY ## suffix
98 #define TILEX_PROCF(fx, max)    SK_USHIFT16((unsigned)((fx) & 0xFFFF) * ((max) + 1))
99 #define TILEY_PROCF(fy, max)    SK_USHIFT16((unsigned)((fy) & 0xFFFF) * ((max) + 1))
100 #define TILEX_LOW_BITS(fx, max) (((unsigned)((fx) & 0xFFFF) * ((max) + 1) >> 12) & 0xF)
101 #define TILEY_LOW_BITS(fy, max) (((unsigned)((fy) & 0xFFFF) * ((max) + 1) >> 12) & 0xF)
102 #include "SkBitmapProcState_matrix.h"
103 
104 struct RepeatTileProcs {
XRepeatTileProcs105     static unsigned X(const SkBitmapProcState&, SkFixed fx, int max) {
106         SkASSERT(max < 65535);
107         return SK_USHIFT16((unsigned)((fx) & 0xFFFF) * ((max) + 1));
108     }
YRepeatTileProcs109     static unsigned Y(const SkBitmapProcState&, SkFixed fy, int max) {
110         SkASSERT(max < 65535);
111         return SK_USHIFT16((unsigned)((fy) & 0xFFFF) * ((max) + 1));
112     }
113 };
114 
115 static SkBitmapProcState::MatrixProc RepeatX_RepeatY_Procs[] = {
116     NoFilterProc_Scale<RepeatTileProcs, false>,
117     RepeatX_RepeatY_filter_scale,
118     NoFilterProc_Affine<RepeatTileProcs>,
119     RepeatX_RepeatY_filter_affine,
120     NoFilterProc_Persp<RepeatTileProcs>,
121     RepeatX_RepeatY_filter_persp
122 };
123 #endif
124 
125 #define MAKENAME(suffix)        GeneralXY ## suffix
126 #define PREAMBLE(state)         SkBitmapProcState::FixedTileProc tileProcX = (state).fTileProcX; (void) tileProcX; \
127                                 SkBitmapProcState::FixedTileProc tileProcY = (state).fTileProcY; (void) tileProcY; \
128                                 SkBitmapProcState::FixedTileLowBitsProc tileLowBitsProcX = (state).fTileLowBitsProcX; (void) tileLowBitsProcX; \
129                                 SkBitmapProcState::FixedTileLowBitsProc tileLowBitsProcY = (state).fTileLowBitsProcY; (void) tileLowBitsProcY
130 #define PREAMBLE_PARAM_X        , SkBitmapProcState::FixedTileProc tileProcX, SkBitmapProcState::FixedTileLowBitsProc tileLowBitsProcX
131 #define PREAMBLE_PARAM_Y        , SkBitmapProcState::FixedTileProc tileProcY, SkBitmapProcState::FixedTileLowBitsProc tileLowBitsProcY
132 #define PREAMBLE_ARG_X          , tileProcX, tileLowBitsProcX
133 #define PREAMBLE_ARG_Y          , tileProcY, tileLowBitsProcY
134 #define TILEX_PROCF(fx, max)    SK_USHIFT16(tileProcX(fx) * ((max) + 1))
135 #define TILEY_PROCF(fy, max)    SK_USHIFT16(tileProcY(fy) * ((max) + 1))
136 #define TILEX_LOW_BITS(fx, max) tileLowBitsProcX(fx, (max) + 1)
137 #define TILEY_LOW_BITS(fy, max) tileLowBitsProcY(fy, (max) + 1)
138 #include "SkBitmapProcState_matrix.h"
139 
140 struct GeneralTileProcs {
XGeneralTileProcs141     static unsigned X(const SkBitmapProcState& s, SkFixed fx, int max) {
142         return SK_USHIFT16(s.fTileProcX(fx) * ((max) + 1));
143     }
YGeneralTileProcs144     static unsigned Y(const SkBitmapProcState& s, SkFixed fy, int max) {
145         return SK_USHIFT16(s.fTileProcY(fy) * ((max) + 1));
146     }
147 };
148 
149 static SkBitmapProcState::MatrixProc GeneralXY_Procs[] = {
150     NoFilterProc_Scale<GeneralTileProcs, false>,
151     GeneralXY_filter_scale,
152     NoFilterProc_Affine<GeneralTileProcs>,
153     GeneralXY_filter_affine,
154     NoFilterProc_Persp<GeneralTileProcs>,
155     GeneralXY_filter_persp
156 };
157 
158 ///////////////////////////////////////////////////////////////////////////////
159 
fixed_clamp(SkFixed x)160 static inline U16CPU fixed_clamp(SkFixed x) {
161     if (x < 0) {
162         x = 0;
163     }
164     if (x >> 16) {
165         x = 0xFFFF;
166     }
167     return x;
168 }
169 
fixed_repeat(SkFixed x)170 static inline U16CPU fixed_repeat(SkFixed x) {
171     return x & 0xFFFF;
172 }
173 
174 // Visual Studio 2010 (MSC_VER=1600) optimizes bit-shift code incorrectly.
175 // See http://code.google.com/p/skia/issues/detail?id=472
176 #if defined(_MSC_VER) && (_MSC_VER >= 1600)
177 #pragma optimize("", off)
178 #endif
179 
fixed_mirror(SkFixed x)180 static inline U16CPU fixed_mirror(SkFixed x) {
181     SkFixed s = SkLeftShift(x, 15) >> 31;
182     // s is FFFFFFFF if we're on an odd interval, or 0 if an even interval
183     return (x ^ s) & 0xFFFF;
184 }
185 
186 #if defined(_MSC_VER) && (_MSC_VER >= 1600)
187 #pragma optimize("", on)
188 #endif
189 
choose_tile_proc(unsigned m)190 static SkBitmapProcState::FixedTileProc choose_tile_proc(unsigned m) {
191     if (SkShader::kClamp_TileMode == m) {
192         return fixed_clamp;
193     }
194     if (SkShader::kRepeat_TileMode == m) {
195         return fixed_repeat;
196     }
197     SkASSERT(SkShader::kMirror_TileMode == m);
198     return fixed_mirror;
199 }
200 
fixed_clamp_lowbits(SkFixed x,int)201 static inline U16CPU fixed_clamp_lowbits(SkFixed x, int) {
202     return (x >> 12) & 0xF;
203 }
204 
fixed_repeat_or_mirrow_lowbits(SkFixed x,int scale)205 static inline U16CPU fixed_repeat_or_mirrow_lowbits(SkFixed x, int scale) {
206     return ((x * scale) >> 12) & 0xF;
207 }
208 
choose_tile_lowbits_proc(unsigned m)209 static SkBitmapProcState::FixedTileLowBitsProc choose_tile_lowbits_proc(unsigned m) {
210     if (SkShader::kClamp_TileMode == m) {
211         return fixed_clamp_lowbits;
212     } else {
213         SkASSERT(SkShader::kMirror_TileMode == m ||
214                  SkShader::kRepeat_TileMode == m);
215         // mirror and repeat have the same behavior for the low bits.
216         return fixed_repeat_or_mirrow_lowbits;
217     }
218 }
219 
int_clamp(int x,int n)220 static inline U16CPU int_clamp(int x, int n) {
221     if (x >= n) {
222         x = n - 1;
223     }
224     if (x < 0) {
225         x = 0;
226     }
227     return x;
228 }
229 
int_repeat(int x,int n)230 static inline U16CPU int_repeat(int x, int n) {
231     return sk_int_mod(x, n);
232 }
233 
int_mirror(int x,int n)234 static inline U16CPU int_mirror(int x, int n) {
235     x = sk_int_mod(x, 2 * n);
236     if (x >= n) {
237         x = n + ~(x - n);
238     }
239     return x;
240 }
241 
242 #if 0
243 static void test_int_tileprocs() {
244     for (int i = -8; i <= 8; i++) {
245         SkDebugf(" int_mirror(%2d, 3) = %d\n", i, int_mirror(i, 3));
246     }
247 }
248 #endif
249 
choose_int_tile_proc(unsigned tm)250 static SkBitmapProcState::IntTileProc choose_int_tile_proc(unsigned tm) {
251     if (SkShader::kClamp_TileMode == tm)
252         return int_clamp;
253     if (SkShader::kRepeat_TileMode == tm)
254         return int_repeat;
255     SkASSERT(SkShader::kMirror_TileMode == tm);
256     return int_mirror;
257 }
258 
259 //////////////////////////////////////////////////////////////////////////////
260 
decal_nofilter_scale(uint32_t dst[],SkFixed fx,SkFixed dx,int count)261 void decal_nofilter_scale(uint32_t dst[], SkFixed fx, SkFixed dx, int count) {
262     int i;
263 
264     for (i = (count >> 2); i > 0; --i) {
265         *dst++ = pack_two_shorts(fx >> 16, (fx + dx) >> 16);
266         fx += dx+dx;
267         *dst++ = pack_two_shorts(fx >> 16, (fx + dx) >> 16);
268         fx += dx+dx;
269     }
270     count &= 3;
271 
272     uint16_t* xx = (uint16_t*)dst;
273     for (i = count; i > 0; --i) {
274         *xx++ = SkToU16(fx >> 16); fx += dx;
275     }
276 }
277 
decal_filter_scale(uint32_t dst[],SkFixed fx,SkFixed dx,int count)278 void decal_filter_scale(uint32_t dst[], SkFixed fx, SkFixed dx, int count) {
279     if (count & 1) {
280         SkASSERT((fx >> (16 + 14)) == 0);
281         *dst++ = (fx >> 12 << 14) | ((fx >> 16) + 1);
282         fx += dx;
283     }
284     while ((count -= 2) >= 0) {
285         SkASSERT((fx >> (16 + 14)) == 0);
286         *dst++ = (fx >> 12 << 14) | ((fx >> 16) + 1);
287         fx += dx;
288 
289         *dst++ = (fx >> 12 << 14) | ((fx >> 16) + 1);
290         fx += dx;
291     }
292 }
293 
294 ///////////////////////////////////////////////////////////////////////////////
295 // stores the same as SCALE, but is cheaper to compute. Also since there is no
296 // scale, we don't need/have a FILTER version
297 
fill_sequential(uint16_t xptr[],int start,int count)298 static void fill_sequential(uint16_t xptr[], int start, int count) {
299 #if 1
300     if (reinterpret_cast<intptr_t>(xptr) & 0x2) {
301         *xptr++ = start++;
302         count -= 1;
303     }
304     if (count > 3) {
305         uint32_t* xxptr = reinterpret_cast<uint32_t*>(xptr);
306         uint32_t pattern0 = PACK_TWO_SHORTS(start + 0, start + 1);
307         uint32_t pattern1 = PACK_TWO_SHORTS(start + 2, start + 3);
308         start += count & ~3;
309         int qcount = count >> 2;
310         do {
311             *xxptr++ = pattern0;
312             pattern0 += 0x40004;
313             *xxptr++ = pattern1;
314             pattern1 += 0x40004;
315         } while (--qcount != 0);
316         xptr = reinterpret_cast<uint16_t*>(xxptr);
317         count &= 3;
318     }
319     while (--count >= 0) {
320         *xptr++ = start++;
321     }
322 #else
323     for (int i = 0; i < count; i++) {
324         *xptr++ = start++;
325     }
326 #endif
327 }
328 
nofilter_trans_preamble(const SkBitmapProcState & s,uint32_t ** xy,int x,int y)329 static int nofilter_trans_preamble(const SkBitmapProcState& s, uint32_t** xy,
330                                    int x, int y) {
331     const SkBitmapProcStateAutoMapper mapper(s, x, y);
332     **xy = s.fIntTileProcY(mapper.intY(), s.fPixmap.height());
333     *xy += 1;   // bump the ptr
334     // return our starting X position
335     return mapper.intX();
336 }
337 
clampx_nofilter_trans(const SkBitmapProcState & s,uint32_t xy[],int count,int x,int y)338 static void clampx_nofilter_trans(const SkBitmapProcState& s,
339                                   uint32_t xy[], int count, int x, int y) {
340     SkASSERT((s.fInvType & ~SkMatrix::kTranslate_Mask) == 0);
341 
342     int xpos = nofilter_trans_preamble(s, &xy, x, y);
343     const int width = s.fPixmap.width();
344     if (1 == width) {
345         // all of the following X values must be 0
346         memset(xy, 0, count * sizeof(uint16_t));
347         return;
348     }
349 
350     uint16_t* xptr = reinterpret_cast<uint16_t*>(xy);
351     int n;
352 
353     // fill before 0 as needed
354     if (xpos < 0) {
355         n = -xpos;
356         if (n > count) {
357             n = count;
358         }
359         memset(xptr, 0, n * sizeof(uint16_t));
360         count -= n;
361         if (0 == count) {
362             return;
363         }
364         xptr += n;
365         xpos = 0;
366     }
367 
368     // fill in 0..width-1 if needed
369     if (xpos < width) {
370         n = width - xpos;
371         if (n > count) {
372             n = count;
373         }
374         fill_sequential(xptr, xpos, n);
375         count -= n;
376         if (0 == count) {
377             return;
378         }
379         xptr += n;
380     }
381 
382     // fill the remaining with the max value
383     sk_memset16(xptr, width - 1, count);
384 }
385 
repeatx_nofilter_trans(const SkBitmapProcState & s,uint32_t xy[],int count,int x,int y)386 static void repeatx_nofilter_trans(const SkBitmapProcState& s,
387                                    uint32_t xy[], int count, int x, int y) {
388     SkASSERT((s.fInvType & ~SkMatrix::kTranslate_Mask) == 0);
389 
390     int xpos = nofilter_trans_preamble(s, &xy, x, y);
391     const int width = s.fPixmap.width();
392     if (1 == width) {
393         // all of the following X values must be 0
394         memset(xy, 0, count * sizeof(uint16_t));
395         return;
396     }
397 
398     uint16_t* xptr = reinterpret_cast<uint16_t*>(xy);
399     int start = sk_int_mod(xpos, width);
400     int n = width - start;
401     if (n > count) {
402         n = count;
403     }
404     fill_sequential(xptr, start, n);
405     xptr += n;
406     count -= n;
407 
408     while (count >= width) {
409         fill_sequential(xptr, 0, width);
410         xptr += width;
411         count -= width;
412     }
413 
414     if (count > 0) {
415         fill_sequential(xptr, 0, count);
416     }
417 }
418 
fill_backwards(uint16_t xptr[],int pos,int count)419 static void fill_backwards(uint16_t xptr[], int pos, int count) {
420     for (int i = 0; i < count; i++) {
421         SkASSERT(pos >= 0);
422         xptr[i] = pos--;
423     }
424 }
425 
mirrorx_nofilter_trans(const SkBitmapProcState & s,uint32_t xy[],int count,int x,int y)426 static void mirrorx_nofilter_trans(const SkBitmapProcState& s,
427                                    uint32_t xy[], int count, int x, int y) {
428     SkASSERT((s.fInvType & ~SkMatrix::kTranslate_Mask) == 0);
429 
430     int xpos = nofilter_trans_preamble(s, &xy, x, y);
431     const int width = s.fPixmap.width();
432     if (1 == width) {
433         // all of the following X values must be 0
434         memset(xy, 0, count * sizeof(uint16_t));
435         return;
436     }
437 
438     uint16_t* xptr = reinterpret_cast<uint16_t*>(xy);
439     // need to know our start, and our initial phase (forward or backward)
440     bool forward;
441     int n;
442     int start = sk_int_mod(xpos, 2 * width);
443     if (start >= width) {
444         start = width + ~(start - width);
445         forward = false;
446         n = start + 1;  // [start .. 0]
447     } else {
448         forward = true;
449         n = width - start;  // [start .. width)
450     }
451     if (n > count) {
452         n = count;
453     }
454     if (forward) {
455         fill_sequential(xptr, start, n);
456     } else {
457         fill_backwards(xptr, start, n);
458     }
459     forward = !forward;
460     xptr += n;
461     count -= n;
462 
463     while (count >= width) {
464         if (forward) {
465             fill_sequential(xptr, 0, width);
466         } else {
467             fill_backwards(xptr, width - 1, width);
468         }
469         forward = !forward;
470         xptr += width;
471         count -= width;
472     }
473 
474     if (count > 0) {
475         if (forward) {
476             fill_sequential(xptr, 0, count);
477         } else {
478             fill_backwards(xptr, width - 1, count);
479         }
480     }
481 }
482 
483 ///////////////////////////////////////////////////////////////////////////////
484 
chooseMatrixProc(bool trivial_matrix)485 SkBitmapProcState::MatrixProc SkBitmapProcState::chooseMatrixProc(bool trivial_matrix) {
486 //    test_int_tileprocs();
487     // check for our special case when there is no scale/affine/perspective
488     if (trivial_matrix && kNone_SkFilterQuality == fFilterLevel) {
489         fIntTileProcY = choose_int_tile_proc(fTileModeY);
490         switch (fTileModeX) {
491             case SkShader::kClamp_TileMode:
492                 return clampx_nofilter_trans;
493             case SkShader::kRepeat_TileMode:
494                 return repeatx_nofilter_trans;
495             case SkShader::kMirror_TileMode:
496                 return mirrorx_nofilter_trans;
497         }
498     }
499 
500     int index = 0;
501     if (fFilterLevel != kNone_SkFilterQuality) {
502         index = 1;
503     }
504     if (fInvType & SkMatrix::kPerspective_Mask) {
505         index += 4;
506     } else if (fInvType & SkMatrix::kAffine_Mask) {
507         index += 2;
508     }
509 
510     if (SkShader::kClamp_TileMode == fTileModeX && SkShader::kClamp_TileMode == fTileModeY) {
511         // clamp gets special version of filterOne
512         fFilterOneX = SK_Fixed1;
513         fFilterOneY = SK_Fixed1;
514         return SK_ARM_NEON_WRAP(ClampX_ClampY_Procs)[index];
515     }
516 
517     // all remaining procs use this form for filterOne
518     fFilterOneX = SK_Fixed1 / fPixmap.width();
519     fFilterOneY = SK_Fixed1 / fPixmap.height();
520 
521     if (SkShader::kRepeat_TileMode == fTileModeX && SkShader::kRepeat_TileMode == fTileModeY) {
522         return SK_ARM_NEON_WRAP(RepeatX_RepeatY_Procs)[index];
523     }
524 
525     fTileProcX = choose_tile_proc(fTileModeX);
526     fTileProcY = choose_tile_proc(fTileModeY);
527     fTileLowBitsProcX = choose_tile_lowbits_proc(fTileModeX);
528     fTileLowBitsProcY = choose_tile_lowbits_proc(fTileModeY);
529     return GeneralXY_Procs[index];
530 }
531