1 /* NEON optimized code (C) COPYRIGHT 2009 Motorola
2 *
3 * Use of this source code is governed by a BSD-style license that can be
4 * found in the LICENSE file.
5 */
6
7 #include "SkBitmapProcState.h"
8 #include "SkPerspIter.h"
9 #include "SkShader.h"
10 #include "SkUtils.h"
11 #include "SkUtilsArm.h"
12 #include "SkBitmapProcState_utils.h"
13
14 /* returns 0...(n-1) given any x (positive or negative).
15
16 As an example, if n (which is always positive) is 5...
17
18 x: -8 -7 -6 -5 -4 -3 -2 -1 0 1 2 3 4 5 6 7 8
19 returns: 2 3 4 0 1 2 3 4 0 1 2 3 4 0 1 2 3
20 */
sk_int_mod(int x,int n)21 static inline int sk_int_mod(int x, int n) {
22 SkASSERT(n > 0);
23 if ((unsigned)x >= (unsigned)n) {
24 if (x < 0) {
25 x = n + ~(~x % n);
26 } else {
27 x = x % n;
28 }
29 }
30 return x;
31 }
32
33 void decal_nofilter_scale(uint32_t dst[], SkFixed fx, SkFixed dx, int count);
34 void decal_filter_scale(uint32_t dst[], SkFixed fx, SkFixed dx, int count);
35
36 #include "SkBitmapProcState_matrix_template.h"
37
38 ///////////////////////////////////////////////////////////////////////////////
39
40 // Compile neon code paths if needed
41 #if !SK_ARM_NEON_IS_NONE
42
43 // These are defined in src/opts/SkBitmapProcState_matrixProcs_neon.cpp
44 extern const SkBitmapProcState::MatrixProc ClampX_ClampY_Procs_neon[];
45 extern const SkBitmapProcState::MatrixProc RepeatX_RepeatY_Procs_neon[];
46
47 #endif // !SK_ARM_NEON_IS_NONE
48
49 // Compile non-neon code path if needed
50 #if !SK_ARM_NEON_IS_ALWAYS
51 #define MAKENAME(suffix) ClampX_ClampY ## suffix
52 #define TILEX_PROCF(fx, max) SkClampMax((fx) >> 16, max)
53 #define TILEY_PROCF(fy, max) SkClampMax((fy) >> 16, max)
54 #define TILEX_LOW_BITS(fx, max) (((fx) >> 12) & 0xF)
55 #define TILEY_LOW_BITS(fy, max) (((fy) >> 12) & 0xF)
56 #define CHECK_FOR_DECAL
57 #include "SkBitmapProcState_matrix.h"
58
59 struct ClampTileProcs {
XClampTileProcs60 static unsigned X(const SkBitmapProcState&, SkFixed fx, int max) {
61 return SkClampMax(fx >> 16, max);
62 }
YClampTileProcs63 static unsigned Y(const SkBitmapProcState&, SkFixed fy, int max) {
64 return SkClampMax(fy >> 16, max);
65 }
66 };
67
68 // Referenced in opts_check_x86.cpp
ClampX_ClampY_nofilter_scale(const SkBitmapProcState & s,uint32_t xy[],int count,int x,int y)69 void ClampX_ClampY_nofilter_scale(const SkBitmapProcState& s, uint32_t xy[],
70 int count, int x, int y) {
71 return NoFilterProc_Scale<ClampTileProcs, true>(s, xy, count, x, y);
72 }
ClampX_ClampY_nofilter_affine(const SkBitmapProcState & s,uint32_t xy[],int count,int x,int y)73 void ClampX_ClampY_nofilter_affine(const SkBitmapProcState& s, uint32_t xy[],
74 int count, int x, int y) {
75 return NoFilterProc_Affine<ClampTileProcs>(s, xy, count, x, y);
76 }
77
78 static SkBitmapProcState::MatrixProc ClampX_ClampY_Procs[] = {
79 // only clamp lives in the right coord space to check for decal
80 ClampX_ClampY_nofilter_scale,
81 ClampX_ClampY_filter_scale,
82 ClampX_ClampY_nofilter_affine,
83 ClampX_ClampY_filter_affine,
84 NoFilterProc_Persp<ClampTileProcs>,
85 ClampX_ClampY_filter_persp
86 };
87
88 #define MAKENAME(suffix) RepeatX_RepeatY ## suffix
89 #define TILEX_PROCF(fx, max) SK_USHIFT16(((fx) & 0xFFFF) * ((max) + 1))
90 #define TILEY_PROCF(fy, max) SK_USHIFT16(((fy) & 0xFFFF) * ((max) + 1))
91 #define TILEX_LOW_BITS(fx, max) ((((fx) & 0xFFFF) * ((max) + 1) >> 12) & 0xF)
92 #define TILEY_LOW_BITS(fy, max) ((((fy) & 0xFFFF) * ((max) + 1) >> 12) & 0xF)
93 #include "SkBitmapProcState_matrix.h"
94
95 struct RepeatTileProcs {
XRepeatTileProcs96 static unsigned X(const SkBitmapProcState&, SkFixed fx, int max) {
97 return SK_USHIFT16(((fx) & 0xFFFF) * ((max) + 1));
98 }
YRepeatTileProcs99 static unsigned Y(const SkBitmapProcState&, SkFixed fy, int max) {
100 return SK_USHIFT16(((fy) & 0xFFFF) * ((max) + 1));
101 }
102 };
103
104 static SkBitmapProcState::MatrixProc RepeatX_RepeatY_Procs[] = {
105 NoFilterProc_Scale<RepeatTileProcs, false>,
106 RepeatX_RepeatY_filter_scale,
107 NoFilterProc_Affine<RepeatTileProcs>,
108 RepeatX_RepeatY_filter_affine,
109 NoFilterProc_Persp<RepeatTileProcs>,
110 RepeatX_RepeatY_filter_persp
111 };
112 #endif
113
114 #define MAKENAME(suffix) GeneralXY ## suffix
115 #define PREAMBLE(state) SkBitmapProcState::FixedTileProc tileProcX = (state).fTileProcX; (void) tileProcX; \
116 SkBitmapProcState::FixedTileProc tileProcY = (state).fTileProcY; (void) tileProcY; \
117 SkBitmapProcState::FixedTileLowBitsProc tileLowBitsProcX = (state).fTileLowBitsProcX; (void) tileLowBitsProcX; \
118 SkBitmapProcState::FixedTileLowBitsProc tileLowBitsProcY = (state).fTileLowBitsProcY; (void) tileLowBitsProcY
119 #define PREAMBLE_PARAM_X , SkBitmapProcState::FixedTileProc tileProcX, SkBitmapProcState::FixedTileLowBitsProc tileLowBitsProcX
120 #define PREAMBLE_PARAM_Y , SkBitmapProcState::FixedTileProc tileProcY, SkBitmapProcState::FixedTileLowBitsProc tileLowBitsProcY
121 #define PREAMBLE_ARG_X , tileProcX, tileLowBitsProcX
122 #define PREAMBLE_ARG_Y , tileProcY, tileLowBitsProcY
123 #define TILEX_PROCF(fx, max) SK_USHIFT16(tileProcX(fx) * ((max) + 1))
124 #define TILEY_PROCF(fy, max) SK_USHIFT16(tileProcY(fy) * ((max) + 1))
125 #define TILEX_LOW_BITS(fx, max) tileLowBitsProcX(fx, (max) + 1)
126 #define TILEY_LOW_BITS(fy, max) tileLowBitsProcY(fy, (max) + 1)
127 #include "SkBitmapProcState_matrix.h"
128
129 struct GeneralTileProcs {
XGeneralTileProcs130 static unsigned X(const SkBitmapProcState& s, SkFixed fx, int max) {
131 return SK_USHIFT16(s.fTileProcX(fx) * ((max) + 1));
132 }
YGeneralTileProcs133 static unsigned Y(const SkBitmapProcState& s, SkFixed fy, int max) {
134 return SK_USHIFT16(s.fTileProcY(fy) * ((max) + 1));
135 }
136 };
137
138 static SkBitmapProcState::MatrixProc GeneralXY_Procs[] = {
139 NoFilterProc_Scale<GeneralTileProcs, false>,
140 GeneralXY_filter_scale,
141 NoFilterProc_Affine<GeneralTileProcs>,
142 GeneralXY_filter_affine,
143 NoFilterProc_Persp<GeneralTileProcs>,
144 GeneralXY_filter_persp
145 };
146
147 ///////////////////////////////////////////////////////////////////////////////
148
fixed_clamp(SkFixed x)149 static inline U16CPU fixed_clamp(SkFixed x) {
150 if (x < 0) {
151 x = 0;
152 }
153 if (x >> 16) {
154 x = 0xFFFF;
155 }
156 return x;
157 }
158
fixed_repeat(SkFixed x)159 static inline U16CPU fixed_repeat(SkFixed x) {
160 return x & 0xFFFF;
161 }
162
163 // Visual Studio 2010 (MSC_VER=1600) optimizes bit-shift code incorrectly.
164 // See http://code.google.com/p/skia/issues/detail?id=472
165 #if defined(_MSC_VER) && (_MSC_VER >= 1600)
166 #pragma optimize("", off)
167 #endif
168
fixed_mirror(SkFixed x)169 static inline U16CPU fixed_mirror(SkFixed x) {
170 SkFixed s = x << 15 >> 31;
171 // s is FFFFFFFF if we're on an odd interval, or 0 if an even interval
172 return (x ^ s) & 0xFFFF;
173 }
174
175 #if defined(_MSC_VER) && (_MSC_VER >= 1600)
176 #pragma optimize("", on)
177 #endif
178
choose_tile_proc(unsigned m)179 static SkBitmapProcState::FixedTileProc choose_tile_proc(unsigned m) {
180 if (SkShader::kClamp_TileMode == m) {
181 return fixed_clamp;
182 }
183 if (SkShader::kRepeat_TileMode == m) {
184 return fixed_repeat;
185 }
186 SkASSERT(SkShader::kMirror_TileMode == m);
187 return fixed_mirror;
188 }
189
fixed_clamp_lowbits(SkFixed x,int)190 static inline U16CPU fixed_clamp_lowbits(SkFixed x, int) {
191 return (x >> 12) & 0xF;
192 }
193
fixed_repeat_or_mirrow_lowbits(SkFixed x,int scale)194 static inline U16CPU fixed_repeat_or_mirrow_lowbits(SkFixed x, int scale) {
195 return ((x * scale) >> 12) & 0xF;
196 }
197
choose_tile_lowbits_proc(unsigned m)198 static SkBitmapProcState::FixedTileLowBitsProc choose_tile_lowbits_proc(unsigned m) {
199 if (SkShader::kClamp_TileMode == m) {
200 return fixed_clamp_lowbits;
201 } else {
202 SkASSERT(SkShader::kMirror_TileMode == m ||
203 SkShader::kRepeat_TileMode == m);
204 // mirror and repeat have the same behavior for the low bits.
205 return fixed_repeat_or_mirrow_lowbits;
206 }
207 }
208
int_clamp(int x,int n)209 static inline U16CPU int_clamp(int x, int n) {
210 if (x >= n) {
211 x = n - 1;
212 }
213 if (x < 0) {
214 x = 0;
215 }
216 return x;
217 }
218
int_repeat(int x,int n)219 static inline U16CPU int_repeat(int x, int n) {
220 return sk_int_mod(x, n);
221 }
222
int_mirror(int x,int n)223 static inline U16CPU int_mirror(int x, int n) {
224 x = sk_int_mod(x, 2 * n);
225 if (x >= n) {
226 x = n + ~(x - n);
227 }
228 return x;
229 }
230
231 #if 0
232 static void test_int_tileprocs() {
233 for (int i = -8; i <= 8; i++) {
234 SkDebugf(" int_mirror(%2d, 3) = %d\n", i, int_mirror(i, 3));
235 }
236 }
237 #endif
238
choose_int_tile_proc(unsigned tm)239 static SkBitmapProcState::IntTileProc choose_int_tile_proc(unsigned tm) {
240 if (SkShader::kClamp_TileMode == tm)
241 return int_clamp;
242 if (SkShader::kRepeat_TileMode == tm)
243 return int_repeat;
244 SkASSERT(SkShader::kMirror_TileMode == tm);
245 return int_mirror;
246 }
247
248 //////////////////////////////////////////////////////////////////////////////
249
decal_nofilter_scale(uint32_t dst[],SkFixed fx,SkFixed dx,int count)250 void decal_nofilter_scale(uint32_t dst[], SkFixed fx, SkFixed dx, int count) {
251 int i;
252
253 for (i = (count >> 2); i > 0; --i) {
254 *dst++ = pack_two_shorts(fx >> 16, (fx + dx) >> 16);
255 fx += dx+dx;
256 *dst++ = pack_two_shorts(fx >> 16, (fx + dx) >> 16);
257 fx += dx+dx;
258 }
259 count &= 3;
260
261 uint16_t* xx = (uint16_t*)dst;
262 for (i = count; i > 0; --i) {
263 *xx++ = SkToU16(fx >> 16); fx += dx;
264 }
265 }
266
decal_filter_scale(uint32_t dst[],SkFixed fx,SkFixed dx,int count)267 void decal_filter_scale(uint32_t dst[], SkFixed fx, SkFixed dx, int count) {
268 if (count & 1) {
269 SkASSERT((fx >> (16 + 14)) == 0);
270 *dst++ = (fx >> 12 << 14) | ((fx >> 16) + 1);
271 fx += dx;
272 }
273 while ((count -= 2) >= 0) {
274 SkASSERT((fx >> (16 + 14)) == 0);
275 *dst++ = (fx >> 12 << 14) | ((fx >> 16) + 1);
276 fx += dx;
277
278 *dst++ = (fx >> 12 << 14) | ((fx >> 16) + 1);
279 fx += dx;
280 }
281 }
282
283 ///////////////////////////////////////////////////////////////////////////////
284 // stores the same as SCALE, but is cheaper to compute. Also since there is no
285 // scale, we don't need/have a FILTER version
286
fill_sequential(uint16_t xptr[],int start,int count)287 static void fill_sequential(uint16_t xptr[], int start, int count) {
288 #if 1
289 if (reinterpret_cast<intptr_t>(xptr) & 0x2) {
290 *xptr++ = start++;
291 count -= 1;
292 }
293 if (count > 3) {
294 uint32_t* xxptr = reinterpret_cast<uint32_t*>(xptr);
295 uint32_t pattern0 = PACK_TWO_SHORTS(start + 0, start + 1);
296 uint32_t pattern1 = PACK_TWO_SHORTS(start + 2, start + 3);
297 start += count & ~3;
298 int qcount = count >> 2;
299 do {
300 *xxptr++ = pattern0;
301 pattern0 += 0x40004;
302 *xxptr++ = pattern1;
303 pattern1 += 0x40004;
304 } while (--qcount != 0);
305 xptr = reinterpret_cast<uint16_t*>(xxptr);
306 count &= 3;
307 }
308 while (--count >= 0) {
309 *xptr++ = start++;
310 }
311 #else
312 for (int i = 0; i < count; i++) {
313 *xptr++ = start++;
314 }
315 #endif
316 }
317
nofilter_trans_preamble(const SkBitmapProcState & s,uint32_t ** xy,int x,int y)318 static int nofilter_trans_preamble(const SkBitmapProcState& s, uint32_t** xy,
319 int x, int y) {
320 SkPoint pt;
321 s.fInvProc(s.fInvMatrix, SkIntToScalar(x) + SK_ScalarHalf,
322 SkIntToScalar(y) + SK_ScalarHalf, &pt);
323 **xy = s.fIntTileProcY(SkScalarToFixed(pt.fY) >> 16,
324 s.fBitmap->height());
325 *xy += 1; // bump the ptr
326 // return our starting X position
327 return SkScalarToFixed(pt.fX) >> 16;
328 }
329
clampx_nofilter_trans(const SkBitmapProcState & s,uint32_t xy[],int count,int x,int y)330 static void clampx_nofilter_trans(const SkBitmapProcState& s,
331 uint32_t xy[], int count, int x, int y) {
332 SkASSERT((s.fInvType & ~SkMatrix::kTranslate_Mask) == 0);
333
334 int xpos = nofilter_trans_preamble(s, &xy, x, y);
335 const int width = s.fBitmap->width();
336 if (1 == width) {
337 // all of the following X values must be 0
338 memset(xy, 0, count * sizeof(uint16_t));
339 return;
340 }
341
342 uint16_t* xptr = reinterpret_cast<uint16_t*>(xy);
343 int n;
344
345 // fill before 0 as needed
346 if (xpos < 0) {
347 n = -xpos;
348 if (n > count) {
349 n = count;
350 }
351 memset(xptr, 0, n * sizeof(uint16_t));
352 count -= n;
353 if (0 == count) {
354 return;
355 }
356 xptr += n;
357 xpos = 0;
358 }
359
360 // fill in 0..width-1 if needed
361 if (xpos < width) {
362 n = width - xpos;
363 if (n > count) {
364 n = count;
365 }
366 fill_sequential(xptr, xpos, n);
367 count -= n;
368 if (0 == count) {
369 return;
370 }
371 xptr += n;
372 }
373
374 // fill the remaining with the max value
375 sk_memset16(xptr, width - 1, count);
376 }
377
repeatx_nofilter_trans(const SkBitmapProcState & s,uint32_t xy[],int count,int x,int y)378 static void repeatx_nofilter_trans(const SkBitmapProcState& s,
379 uint32_t xy[], int count, int x, int y) {
380 SkASSERT((s.fInvType & ~SkMatrix::kTranslate_Mask) == 0);
381
382 int xpos = nofilter_trans_preamble(s, &xy, x, y);
383 const int width = s.fBitmap->width();
384 if (1 == width) {
385 // all of the following X values must be 0
386 memset(xy, 0, count * sizeof(uint16_t));
387 return;
388 }
389
390 uint16_t* xptr = reinterpret_cast<uint16_t*>(xy);
391 int start = sk_int_mod(xpos, width);
392 int n = width - start;
393 if (n > count) {
394 n = count;
395 }
396 fill_sequential(xptr, start, n);
397 xptr += n;
398 count -= n;
399
400 while (count >= width) {
401 fill_sequential(xptr, 0, width);
402 xptr += width;
403 count -= width;
404 }
405
406 if (count > 0) {
407 fill_sequential(xptr, 0, count);
408 }
409 }
410
fill_backwards(uint16_t xptr[],int pos,int count)411 static void fill_backwards(uint16_t xptr[], int pos, int count) {
412 for (int i = 0; i < count; i++) {
413 SkASSERT(pos >= 0);
414 xptr[i] = pos--;
415 }
416 }
417
mirrorx_nofilter_trans(const SkBitmapProcState & s,uint32_t xy[],int count,int x,int y)418 static void mirrorx_nofilter_trans(const SkBitmapProcState& s,
419 uint32_t xy[], int count, int x, int y) {
420 SkASSERT((s.fInvType & ~SkMatrix::kTranslate_Mask) == 0);
421
422 int xpos = nofilter_trans_preamble(s, &xy, x, y);
423 const int width = s.fBitmap->width();
424 if (1 == width) {
425 // all of the following X values must be 0
426 memset(xy, 0, count * sizeof(uint16_t));
427 return;
428 }
429
430 uint16_t* xptr = reinterpret_cast<uint16_t*>(xy);
431 // need to know our start, and our initial phase (forward or backward)
432 bool forward;
433 int n;
434 int start = sk_int_mod(xpos, 2 * width);
435 if (start >= width) {
436 start = width + ~(start - width);
437 forward = false;
438 n = start + 1; // [start .. 0]
439 } else {
440 forward = true;
441 n = width - start; // [start .. width)
442 }
443 if (n > count) {
444 n = count;
445 }
446 if (forward) {
447 fill_sequential(xptr, start, n);
448 } else {
449 fill_backwards(xptr, start, n);
450 }
451 forward = !forward;
452 xptr += n;
453 count -= n;
454
455 while (count >= width) {
456 if (forward) {
457 fill_sequential(xptr, 0, width);
458 } else {
459 fill_backwards(xptr, width - 1, width);
460 }
461 forward = !forward;
462 xptr += width;
463 count -= width;
464 }
465
466 if (count > 0) {
467 if (forward) {
468 fill_sequential(xptr, 0, count);
469 } else {
470 fill_backwards(xptr, width - 1, count);
471 }
472 }
473 }
474
475 ///////////////////////////////////////////////////////////////////////////////
476
chooseMatrixProc(bool trivial_matrix)477 SkBitmapProcState::MatrixProc SkBitmapProcState::chooseMatrixProc(bool trivial_matrix) {
478 // test_int_tileprocs();
479 // check for our special case when there is no scale/affine/perspective
480 if (trivial_matrix && kNone_SkFilterQuality == fFilterLevel) {
481 fIntTileProcY = choose_int_tile_proc(fTileModeY);
482 switch (fTileModeX) {
483 case SkShader::kClamp_TileMode:
484 return clampx_nofilter_trans;
485 case SkShader::kRepeat_TileMode:
486 return repeatx_nofilter_trans;
487 case SkShader::kMirror_TileMode:
488 return mirrorx_nofilter_trans;
489 }
490 }
491
492 int index = 0;
493 if (fFilterLevel != kNone_SkFilterQuality) {
494 index = 1;
495 }
496 if (fInvType & SkMatrix::kPerspective_Mask) {
497 index += 4;
498 } else if (fInvType & SkMatrix::kAffine_Mask) {
499 index += 2;
500 }
501
502 if (SkShader::kClamp_TileMode == fTileModeX && SkShader::kClamp_TileMode == fTileModeY) {
503 // clamp gets special version of filterOne
504 fFilterOneX = SK_Fixed1;
505 fFilterOneY = SK_Fixed1;
506 return SK_ARM_NEON_WRAP(ClampX_ClampY_Procs)[index];
507 }
508
509 // all remaining procs use this form for filterOne
510 fFilterOneX = SK_Fixed1 / fBitmap->width();
511 fFilterOneY = SK_Fixed1 / fBitmap->height();
512
513 if (SkShader::kRepeat_TileMode == fTileModeX && SkShader::kRepeat_TileMode == fTileModeY) {
514 return SK_ARM_NEON_WRAP(RepeatX_RepeatY_Procs)[index];
515 }
516
517 fTileProcX = choose_tile_proc(fTileModeX);
518 fTileProcY = choose_tile_proc(fTileModeY);
519 fTileLowBitsProcX = choose_tile_lowbits_proc(fTileModeX);
520 fTileLowBitsProcY = choose_tile_lowbits_proc(fTileModeY);
521 return GeneralXY_Procs[index];
522 }
523