1 /*
2 * Copyright 2008 Google Inc.
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8 // The copyright below was added in 2009, but I see no record of moto contributions...?
9
10 /* NEON optimized code (C) COPYRIGHT 2009 Motorola
11 *
12 * Use of this source code is governed by a BSD-style license that can be
13 * found in the LICENSE file.
14 */
15
16 #include "SkBitmapProcState.h"
17 #include "SkPerspIter.h"
18 #include "SkShader.h"
19 #include "SkUtils.h"
20 #include "SkUtilsArm.h"
21 #include "SkBitmapProcState_utils.h"
22
23 /* returns 0...(n-1) given any x (positive or negative).
24
25 As an example, if n (which is always positive) is 5...
26
27 x: -8 -7 -6 -5 -4 -3 -2 -1 0 1 2 3 4 5 6 7 8
28 returns: 2 3 4 0 1 2 3 4 0 1 2 3 4 0 1 2 3
29 */
sk_int_mod(int x,int n)30 static inline int sk_int_mod(int x, int n) {
31 SkASSERT(n > 0);
32 if ((unsigned)x >= (unsigned)n) {
33 if (x < 0) {
34 x = n + ~(~x % n);
35 } else {
36 x = x % n;
37 }
38 }
39 return x;
40 }
41
42 void decal_nofilter_scale(uint32_t dst[], SkFixed fx, SkFixed dx, int count);
43 void decal_filter_scale(uint32_t dst[], SkFixed fx, SkFixed dx, int count);
44
45 #include "SkBitmapProcState_matrix_template.h"
46
47 ///////////////////////////////////////////////////////////////////////////////
48
49 // Compile neon code paths if needed
50 #if !SK_ARM_NEON_IS_NONE
51
52 // These are defined in src/opts/SkBitmapProcState_matrixProcs_neon.cpp
53 extern const SkBitmapProcState::MatrixProc ClampX_ClampY_Procs_neon[];
54 extern const SkBitmapProcState::MatrixProc RepeatX_RepeatY_Procs_neon[];
55
56 #endif // !SK_ARM_NEON_IS_NONE
57
58 // Compile non-neon code path if needed
59 #if !SK_ARM_NEON_IS_ALWAYS
60 #define MAKENAME(suffix) ClampX_ClampY ## suffix
61 #define TILEX_PROCF(fx, max) SkClampMax((fx) >> 16, max)
62 #define TILEY_PROCF(fy, max) SkClampMax((fy) >> 16, max)
63 #define TILEX_LOW_BITS(fx, max) (((fx) >> 12) & 0xF)
64 #define TILEY_LOW_BITS(fy, max) (((fy) >> 12) & 0xF)
65 #define CHECK_FOR_DECAL
66 #include "SkBitmapProcState_matrix.h"
67
68 struct ClampTileProcs {
XClampTileProcs69 static unsigned X(const SkBitmapProcState&, SkFixed fx, int max) {
70 return SkClampMax(fx >> 16, max);
71 }
YClampTileProcs72 static unsigned Y(const SkBitmapProcState&, SkFixed fy, int max) {
73 return SkClampMax(fy >> 16, max);
74 }
75 };
76
77 // Referenced in opts_check_x86.cpp
ClampX_ClampY_nofilter_scale(const SkBitmapProcState & s,uint32_t xy[],int count,int x,int y)78 void ClampX_ClampY_nofilter_scale(const SkBitmapProcState& s, uint32_t xy[],
79 int count, int x, int y) {
80 return NoFilterProc_Scale<ClampTileProcs, true>(s, xy, count, x, y);
81 }
ClampX_ClampY_nofilter_affine(const SkBitmapProcState & s,uint32_t xy[],int count,int x,int y)82 void ClampX_ClampY_nofilter_affine(const SkBitmapProcState& s, uint32_t xy[],
83 int count, int x, int y) {
84 return NoFilterProc_Affine<ClampTileProcs>(s, xy, count, x, y);
85 }
86
87 static SkBitmapProcState::MatrixProc ClampX_ClampY_Procs[] = {
88 // only clamp lives in the right coord space to check for decal
89 ClampX_ClampY_nofilter_scale,
90 ClampX_ClampY_filter_scale,
91 ClampX_ClampY_nofilter_affine,
92 ClampX_ClampY_filter_affine,
93 NoFilterProc_Persp<ClampTileProcs>,
94 ClampX_ClampY_filter_persp
95 };
96
97 #define MAKENAME(suffix) RepeatX_RepeatY ## suffix
98 #define TILEX_PROCF(fx, max) SK_USHIFT16((unsigned)((fx) & 0xFFFF) * ((max) + 1))
99 #define TILEY_PROCF(fy, max) SK_USHIFT16((unsigned)((fy) & 0xFFFF) * ((max) + 1))
100 #define TILEX_LOW_BITS(fx, max) (((unsigned)((fx) & 0xFFFF) * ((max) + 1) >> 12) & 0xF)
101 #define TILEY_LOW_BITS(fy, max) (((unsigned)((fy) & 0xFFFF) * ((max) + 1) >> 12) & 0xF)
102 #include "SkBitmapProcState_matrix.h"
103
104 struct RepeatTileProcs {
XRepeatTileProcs105 static unsigned X(const SkBitmapProcState&, SkFixed fx, int max) {
106 SkASSERT(max < 65535);
107 return SK_USHIFT16((unsigned)((fx) & 0xFFFF) * ((max) + 1));
108 }
YRepeatTileProcs109 static unsigned Y(const SkBitmapProcState&, SkFixed fy, int max) {
110 SkASSERT(max < 65535);
111 return SK_USHIFT16((unsigned)((fy) & 0xFFFF) * ((max) + 1));
112 }
113 };
114
115 static SkBitmapProcState::MatrixProc RepeatX_RepeatY_Procs[] = {
116 NoFilterProc_Scale<RepeatTileProcs, false>,
117 RepeatX_RepeatY_filter_scale,
118 NoFilterProc_Affine<RepeatTileProcs>,
119 RepeatX_RepeatY_filter_affine,
120 NoFilterProc_Persp<RepeatTileProcs>,
121 RepeatX_RepeatY_filter_persp
122 };
123 #endif
124
125 #define MAKENAME(suffix) GeneralXY ## suffix
126 #define PREAMBLE(state) SkBitmapProcState::FixedTileProc tileProcX = (state).fTileProcX; (void) tileProcX; \
127 SkBitmapProcState::FixedTileProc tileProcY = (state).fTileProcY; (void) tileProcY; \
128 SkBitmapProcState::FixedTileLowBitsProc tileLowBitsProcX = (state).fTileLowBitsProcX; (void) tileLowBitsProcX; \
129 SkBitmapProcState::FixedTileLowBitsProc tileLowBitsProcY = (state).fTileLowBitsProcY; (void) tileLowBitsProcY
130 #define PREAMBLE_PARAM_X , SkBitmapProcState::FixedTileProc tileProcX, SkBitmapProcState::FixedTileLowBitsProc tileLowBitsProcX
131 #define PREAMBLE_PARAM_Y , SkBitmapProcState::FixedTileProc tileProcY, SkBitmapProcState::FixedTileLowBitsProc tileLowBitsProcY
132 #define PREAMBLE_ARG_X , tileProcX, tileLowBitsProcX
133 #define PREAMBLE_ARG_Y , tileProcY, tileLowBitsProcY
134 #define TILEX_PROCF(fx, max) SK_USHIFT16(tileProcX(fx) * ((max) + 1))
135 #define TILEY_PROCF(fy, max) SK_USHIFT16(tileProcY(fy) * ((max) + 1))
136 #define TILEX_LOW_BITS(fx, max) tileLowBitsProcX(fx, (max) + 1)
137 #define TILEY_LOW_BITS(fy, max) tileLowBitsProcY(fy, (max) + 1)
138 #include "SkBitmapProcState_matrix.h"
139
140 struct GeneralTileProcs {
XGeneralTileProcs141 static unsigned X(const SkBitmapProcState& s, SkFixed fx, int max) {
142 return SK_USHIFT16(s.fTileProcX(fx) * ((max) + 1));
143 }
YGeneralTileProcs144 static unsigned Y(const SkBitmapProcState& s, SkFixed fy, int max) {
145 return SK_USHIFT16(s.fTileProcY(fy) * ((max) + 1));
146 }
147 };
148
149 static SkBitmapProcState::MatrixProc GeneralXY_Procs[] = {
150 NoFilterProc_Scale<GeneralTileProcs, false>,
151 GeneralXY_filter_scale,
152 NoFilterProc_Affine<GeneralTileProcs>,
153 GeneralXY_filter_affine,
154 NoFilterProc_Persp<GeneralTileProcs>,
155 GeneralXY_filter_persp
156 };
157
158 ///////////////////////////////////////////////////////////////////////////////
159
fixed_clamp(SkFixed x)160 static inline U16CPU fixed_clamp(SkFixed x) {
161 if (x < 0) {
162 x = 0;
163 }
164 if (x >> 16) {
165 x = 0xFFFF;
166 }
167 return x;
168 }
169
fixed_repeat(SkFixed x)170 static inline U16CPU fixed_repeat(SkFixed x) {
171 return x & 0xFFFF;
172 }
173
174 // Visual Studio 2010 (MSC_VER=1600) optimizes bit-shift code incorrectly.
175 // See http://code.google.com/p/skia/issues/detail?id=472
176 #if defined(_MSC_VER) && (_MSC_VER >= 1600)
177 #pragma optimize("", off)
178 #endif
179
fixed_mirror(SkFixed x)180 static inline U16CPU fixed_mirror(SkFixed x) {
181 SkFixed s = SkLeftShift(x, 15) >> 31;
182 // s is FFFFFFFF if we're on an odd interval, or 0 if an even interval
183 return (x ^ s) & 0xFFFF;
184 }
185
186 #if defined(_MSC_VER) && (_MSC_VER >= 1600)
187 #pragma optimize("", on)
188 #endif
189
choose_tile_proc(unsigned m)190 static SkBitmapProcState::FixedTileProc choose_tile_proc(unsigned m) {
191 if (SkShader::kClamp_TileMode == m) {
192 return fixed_clamp;
193 }
194 if (SkShader::kRepeat_TileMode == m) {
195 return fixed_repeat;
196 }
197 SkASSERT(SkShader::kMirror_TileMode == m);
198 return fixed_mirror;
199 }
200
fixed_clamp_lowbits(SkFixed x,int)201 static inline U16CPU fixed_clamp_lowbits(SkFixed x, int) {
202 return (x >> 12) & 0xF;
203 }
204
fixed_repeat_or_mirrow_lowbits(SkFixed x,int scale)205 static inline U16CPU fixed_repeat_or_mirrow_lowbits(SkFixed x, int scale) {
206 return ((x * scale) >> 12) & 0xF;
207 }
208
choose_tile_lowbits_proc(unsigned m)209 static SkBitmapProcState::FixedTileLowBitsProc choose_tile_lowbits_proc(unsigned m) {
210 if (SkShader::kClamp_TileMode == m) {
211 return fixed_clamp_lowbits;
212 } else {
213 SkASSERT(SkShader::kMirror_TileMode == m ||
214 SkShader::kRepeat_TileMode == m);
215 // mirror and repeat have the same behavior for the low bits.
216 return fixed_repeat_or_mirrow_lowbits;
217 }
218 }
219
int_clamp(int x,int n)220 static inline U16CPU int_clamp(int x, int n) {
221 if (x >= n) {
222 x = n - 1;
223 }
224 if (x < 0) {
225 x = 0;
226 }
227 return x;
228 }
229
int_repeat(int x,int n)230 static inline U16CPU int_repeat(int x, int n) {
231 return sk_int_mod(x, n);
232 }
233
int_mirror(int x,int n)234 static inline U16CPU int_mirror(int x, int n) {
235 x = sk_int_mod(x, 2 * n);
236 if (x >= n) {
237 x = n + ~(x - n);
238 }
239 return x;
240 }
241
242 #if 0
243 static void test_int_tileprocs() {
244 for (int i = -8; i <= 8; i++) {
245 SkDebugf(" int_mirror(%2d, 3) = %d\n", i, int_mirror(i, 3));
246 }
247 }
248 #endif
249
choose_int_tile_proc(unsigned tm)250 static SkBitmapProcState::IntTileProc choose_int_tile_proc(unsigned tm) {
251 if (SkShader::kClamp_TileMode == tm)
252 return int_clamp;
253 if (SkShader::kRepeat_TileMode == tm)
254 return int_repeat;
255 SkASSERT(SkShader::kMirror_TileMode == tm);
256 return int_mirror;
257 }
258
259 //////////////////////////////////////////////////////////////////////////////
260
decal_nofilter_scale(uint32_t dst[],SkFixed fx,SkFixed dx,int count)261 void decal_nofilter_scale(uint32_t dst[], SkFixed fx, SkFixed dx, int count) {
262 int i;
263
264 for (i = (count >> 2); i > 0; --i) {
265 *dst++ = pack_two_shorts(fx >> 16, (fx + dx) >> 16);
266 fx += dx+dx;
267 *dst++ = pack_two_shorts(fx >> 16, (fx + dx) >> 16);
268 fx += dx+dx;
269 }
270 count &= 3;
271
272 uint16_t* xx = (uint16_t*)dst;
273 for (i = count; i > 0; --i) {
274 *xx++ = SkToU16(fx >> 16); fx += dx;
275 }
276 }
277
decal_filter_scale(uint32_t dst[],SkFixed fx,SkFixed dx,int count)278 void decal_filter_scale(uint32_t dst[], SkFixed fx, SkFixed dx, int count) {
279 if (count & 1) {
280 SkASSERT((fx >> (16 + 14)) == 0);
281 *dst++ = (fx >> 12 << 14) | ((fx >> 16) + 1);
282 fx += dx;
283 }
284 while ((count -= 2) >= 0) {
285 SkASSERT((fx >> (16 + 14)) == 0);
286 *dst++ = (fx >> 12 << 14) | ((fx >> 16) + 1);
287 fx += dx;
288
289 *dst++ = (fx >> 12 << 14) | ((fx >> 16) + 1);
290 fx += dx;
291 }
292 }
293
294 ///////////////////////////////////////////////////////////////////////////////
295 // stores the same as SCALE, but is cheaper to compute. Also since there is no
296 // scale, we don't need/have a FILTER version
297
fill_sequential(uint16_t xptr[],int start,int count)298 static void fill_sequential(uint16_t xptr[], int start, int count) {
299 #if 1
300 if (reinterpret_cast<intptr_t>(xptr) & 0x2) {
301 *xptr++ = start++;
302 count -= 1;
303 }
304 if (count > 3) {
305 uint32_t* xxptr = reinterpret_cast<uint32_t*>(xptr);
306 uint32_t pattern0 = PACK_TWO_SHORTS(start + 0, start + 1);
307 uint32_t pattern1 = PACK_TWO_SHORTS(start + 2, start + 3);
308 start += count & ~3;
309 int qcount = count >> 2;
310 do {
311 *xxptr++ = pattern0;
312 pattern0 += 0x40004;
313 *xxptr++ = pattern1;
314 pattern1 += 0x40004;
315 } while (--qcount != 0);
316 xptr = reinterpret_cast<uint16_t*>(xxptr);
317 count &= 3;
318 }
319 while (--count >= 0) {
320 *xptr++ = start++;
321 }
322 #else
323 for (int i = 0; i < count; i++) {
324 *xptr++ = start++;
325 }
326 #endif
327 }
328
nofilter_trans_preamble(const SkBitmapProcState & s,uint32_t ** xy,int x,int y)329 static int nofilter_trans_preamble(const SkBitmapProcState& s, uint32_t** xy,
330 int x, int y) {
331 const SkBitmapProcStateAutoMapper mapper(s, x, y);
332 **xy = s.fIntTileProcY(mapper.intY(), s.fPixmap.height());
333 *xy += 1; // bump the ptr
334 // return our starting X position
335 return mapper.intX();
336 }
337
clampx_nofilter_trans(const SkBitmapProcState & s,uint32_t xy[],int count,int x,int y)338 static void clampx_nofilter_trans(const SkBitmapProcState& s,
339 uint32_t xy[], int count, int x, int y) {
340 SkASSERT((s.fInvType & ~SkMatrix::kTranslate_Mask) == 0);
341
342 int xpos = nofilter_trans_preamble(s, &xy, x, y);
343 const int width = s.fPixmap.width();
344 if (1 == width) {
345 // all of the following X values must be 0
346 memset(xy, 0, count * sizeof(uint16_t));
347 return;
348 }
349
350 uint16_t* xptr = reinterpret_cast<uint16_t*>(xy);
351 int n;
352
353 // fill before 0 as needed
354 if (xpos < 0) {
355 n = -xpos;
356 if (n > count) {
357 n = count;
358 }
359 memset(xptr, 0, n * sizeof(uint16_t));
360 count -= n;
361 if (0 == count) {
362 return;
363 }
364 xptr += n;
365 xpos = 0;
366 }
367
368 // fill in 0..width-1 if needed
369 if (xpos < width) {
370 n = width - xpos;
371 if (n > count) {
372 n = count;
373 }
374 fill_sequential(xptr, xpos, n);
375 count -= n;
376 if (0 == count) {
377 return;
378 }
379 xptr += n;
380 }
381
382 // fill the remaining with the max value
383 sk_memset16(xptr, width - 1, count);
384 }
385
repeatx_nofilter_trans(const SkBitmapProcState & s,uint32_t xy[],int count,int x,int y)386 static void repeatx_nofilter_trans(const SkBitmapProcState& s,
387 uint32_t xy[], int count, int x, int y) {
388 SkASSERT((s.fInvType & ~SkMatrix::kTranslate_Mask) == 0);
389
390 int xpos = nofilter_trans_preamble(s, &xy, x, y);
391 const int width = s.fPixmap.width();
392 if (1 == width) {
393 // all of the following X values must be 0
394 memset(xy, 0, count * sizeof(uint16_t));
395 return;
396 }
397
398 uint16_t* xptr = reinterpret_cast<uint16_t*>(xy);
399 int start = sk_int_mod(xpos, width);
400 int n = width - start;
401 if (n > count) {
402 n = count;
403 }
404 fill_sequential(xptr, start, n);
405 xptr += n;
406 count -= n;
407
408 while (count >= width) {
409 fill_sequential(xptr, 0, width);
410 xptr += width;
411 count -= width;
412 }
413
414 if (count > 0) {
415 fill_sequential(xptr, 0, count);
416 }
417 }
418
fill_backwards(uint16_t xptr[],int pos,int count)419 static void fill_backwards(uint16_t xptr[], int pos, int count) {
420 for (int i = 0; i < count; i++) {
421 SkASSERT(pos >= 0);
422 xptr[i] = pos--;
423 }
424 }
425
mirrorx_nofilter_trans(const SkBitmapProcState & s,uint32_t xy[],int count,int x,int y)426 static void mirrorx_nofilter_trans(const SkBitmapProcState& s,
427 uint32_t xy[], int count, int x, int y) {
428 SkASSERT((s.fInvType & ~SkMatrix::kTranslate_Mask) == 0);
429
430 int xpos = nofilter_trans_preamble(s, &xy, x, y);
431 const int width = s.fPixmap.width();
432 if (1 == width) {
433 // all of the following X values must be 0
434 memset(xy, 0, count * sizeof(uint16_t));
435 return;
436 }
437
438 uint16_t* xptr = reinterpret_cast<uint16_t*>(xy);
439 // need to know our start, and our initial phase (forward or backward)
440 bool forward;
441 int n;
442 int start = sk_int_mod(xpos, 2 * width);
443 if (start >= width) {
444 start = width + ~(start - width);
445 forward = false;
446 n = start + 1; // [start .. 0]
447 } else {
448 forward = true;
449 n = width - start; // [start .. width)
450 }
451 if (n > count) {
452 n = count;
453 }
454 if (forward) {
455 fill_sequential(xptr, start, n);
456 } else {
457 fill_backwards(xptr, start, n);
458 }
459 forward = !forward;
460 xptr += n;
461 count -= n;
462
463 while (count >= width) {
464 if (forward) {
465 fill_sequential(xptr, 0, width);
466 } else {
467 fill_backwards(xptr, width - 1, width);
468 }
469 forward = !forward;
470 xptr += width;
471 count -= width;
472 }
473
474 if (count > 0) {
475 if (forward) {
476 fill_sequential(xptr, 0, count);
477 } else {
478 fill_backwards(xptr, width - 1, count);
479 }
480 }
481 }
482
483 ///////////////////////////////////////////////////////////////////////////////
484
chooseMatrixProc(bool trivial_matrix)485 SkBitmapProcState::MatrixProc SkBitmapProcState::chooseMatrixProc(bool trivial_matrix) {
486 // test_int_tileprocs();
487 // check for our special case when there is no scale/affine/perspective
488 if (trivial_matrix && kNone_SkFilterQuality == fFilterLevel) {
489 fIntTileProcY = choose_int_tile_proc(fTileModeY);
490 switch (fTileModeX) {
491 case SkShader::kClamp_TileMode:
492 return clampx_nofilter_trans;
493 case SkShader::kRepeat_TileMode:
494 return repeatx_nofilter_trans;
495 case SkShader::kMirror_TileMode:
496 return mirrorx_nofilter_trans;
497 }
498 }
499
500 int index = 0;
501 if (fFilterLevel != kNone_SkFilterQuality) {
502 index = 1;
503 }
504 if (fInvType & SkMatrix::kPerspective_Mask) {
505 index += 4;
506 } else if (fInvType & SkMatrix::kAffine_Mask) {
507 index += 2;
508 }
509
510 if (SkShader::kClamp_TileMode == fTileModeX && SkShader::kClamp_TileMode == fTileModeY) {
511 // clamp gets special version of filterOne
512 fFilterOneX = SK_Fixed1;
513 fFilterOneY = SK_Fixed1;
514 return SK_ARM_NEON_WRAP(ClampX_ClampY_Procs)[index];
515 }
516
517 // all remaining procs use this form for filterOne
518 fFilterOneX = SK_Fixed1 / fPixmap.width();
519 fFilterOneY = SK_Fixed1 / fPixmap.height();
520
521 if (SkShader::kRepeat_TileMode == fTileModeX && SkShader::kRepeat_TileMode == fTileModeY) {
522 return SK_ARM_NEON_WRAP(RepeatX_RepeatY_Procs)[index];
523 }
524
525 fTileProcX = choose_tile_proc(fTileModeX);
526 fTileProcY = choose_tile_proc(fTileModeY);
527 fTileLowBitsProcX = choose_tile_lowbits_proc(fTileModeX);
528 fTileLowBitsProcY = choose_tile_lowbits_proc(fTileModeY);
529 return GeneralXY_Procs[index];
530 }
531