1 /*
2  * Copyright 2006 The Android Open Source Project
3  *
4  * Use of this source code is governed by a BSD-style license that can be
5  * found in the LICENSE file.
6  */
7 
8 
9 #include "SkBlurMask.h"
10 #include "SkMath.h"
11 #include "SkTemplates.h"
12 #include "SkEndian.h"
13 
14 
15 // This constant approximates the scaling done in the software path's
16 // "high quality" mode, in SkBlurMask::Blur() (1 / sqrt(3)).
17 // IMHO, it actually should be 1:  we blur "less" than we should do
18 // according to the CSS and canvas specs, simply because Safari does the same.
19 // Firefox used to do the same too, until 4.0 where they fixed it.  So at some
20 // point we should probably get rid of these scaling constants and rebaseline
21 // all the blur tests.
22 static const SkScalar kBLUR_SIGMA_SCALE = 0.57735f;
23 
ConvertRadiusToSigma(SkScalar radius)24 SkScalar SkBlurMask::ConvertRadiusToSigma(SkScalar radius) {
25     return radius > 0 ? kBLUR_SIGMA_SCALE * radius + 0.5f : 0.0f;
26 }
27 
ConvertSigmaToRadius(SkScalar sigma)28 SkScalar SkBlurMask::ConvertSigmaToRadius(SkScalar sigma) {
29     return sigma > 0.5f ? (sigma - 0.5f) / kBLUR_SIGMA_SCALE : 0.0f;
30 }
31 
32 #define UNROLL_SEPARABLE_LOOPS
33 
34 /**
35  * This function performs a box blur in X, of the given radius.  If the
36  * "transpose" parameter is true, it will transpose the pixels on write,
37  * such that X and Y are swapped. Reads are always performed from contiguous
38  * memory in X, for speed. The destination buffer (dst) must be at least
39  * (width + leftRadius + rightRadius) * height bytes in size.
40  *
41  * This is what the inner loop looks like before unrolling, and with the two
42  * cases broken out separately (width < diameter, width >= diameter):
43  *
44  *      if (width < diameter) {
45  *          for (int x = 0; x < width; ++x) {
46  *              sum += *right++;
47  *              *dptr = (sum * scale + half) >> 24;
48  *              dptr += dst_x_stride;
49  *          }
50  *          for (int x = width; x < diameter; ++x) {
51  *              *dptr = (sum * scale + half) >> 24;
52  *              dptr += dst_x_stride;
53  *          }
54  *          for (int x = 0; x < width; ++x) {
55  *              *dptr = (sum * scale + half) >> 24;
56  *              sum -= *left++;
57  *              dptr += dst_x_stride;
58  *          }
59  *      } else {
60  *          for (int x = 0; x < diameter; ++x) {
61  *              sum += *right++;
62  *              *dptr = (sum * scale + half) >> 24;
63  *              dptr += dst_x_stride;
64  *          }
65  *          for (int x = diameter; x < width; ++x) {
66  *              sum += *right++;
67  *              *dptr = (sum * scale + half) >> 24;
68  *              sum -= *left++;
69  *              dptr += dst_x_stride;
70  *          }
71  *          for (int x = 0; x < diameter; ++x) {
72  *              *dptr = (sum * scale + half) >> 24;
73  *              sum -= *left++;
74  *              dptr += dst_x_stride;
75  *          }
76  *      }
77  */
boxBlur(const uint8_t * src,int src_y_stride,uint8_t * dst,int leftRadius,int rightRadius,int width,int height,bool transpose)78 static int boxBlur(const uint8_t* src, int src_y_stride, uint8_t* dst,
79                    int leftRadius, int rightRadius, int width, int height,
80                    bool transpose)
81 {
82     int diameter = leftRadius + rightRadius;
83     int kernelSize = diameter + 1;
84     int border = SkMin32(width, diameter);
85     uint32_t scale = (1 << 24) / kernelSize;
86     int new_width = width + SkMax32(leftRadius, rightRadius) * 2;
87     int dst_x_stride = transpose ? height : 1;
88     int dst_y_stride = transpose ? 1 : new_width;
89     uint32_t half = 1 << 23;
90     for (int y = 0; y < height; ++y) {
91         uint32_t sum = 0;
92         uint8_t* dptr = dst + y * dst_y_stride;
93         const uint8_t* right = src + y * src_y_stride;
94         const uint8_t* left = right;
95         for (int x = 0; x < rightRadius - leftRadius; x++) {
96             *dptr = 0;
97             dptr += dst_x_stride;
98         }
99 #define LEFT_BORDER_ITER \
100             sum += *right++; \
101             *dptr = (sum * scale + half) >> 24; \
102             dptr += dst_x_stride;
103 
104         int x = 0;
105 #ifdef UNROLL_SEPARABLE_LOOPS
106         for (; x < border - 16; x += 16) {
107             LEFT_BORDER_ITER
108             LEFT_BORDER_ITER
109             LEFT_BORDER_ITER
110             LEFT_BORDER_ITER
111             LEFT_BORDER_ITER
112             LEFT_BORDER_ITER
113             LEFT_BORDER_ITER
114             LEFT_BORDER_ITER
115             LEFT_BORDER_ITER
116             LEFT_BORDER_ITER
117             LEFT_BORDER_ITER
118             LEFT_BORDER_ITER
119             LEFT_BORDER_ITER
120             LEFT_BORDER_ITER
121             LEFT_BORDER_ITER
122             LEFT_BORDER_ITER
123         }
124 #endif
125         for (; x < border; ++x) {
126             LEFT_BORDER_ITER
127         }
128 #undef LEFT_BORDER_ITER
129 #define TRIVIAL_ITER \
130             *dptr = (sum * scale + half) >> 24; \
131             dptr += dst_x_stride;
132         x = width;
133 #ifdef UNROLL_SEPARABLE_LOOPS
134         for (; x < diameter - 16; x += 16) {
135             TRIVIAL_ITER
136             TRIVIAL_ITER
137             TRIVIAL_ITER
138             TRIVIAL_ITER
139             TRIVIAL_ITER
140             TRIVIAL_ITER
141             TRIVIAL_ITER
142             TRIVIAL_ITER
143             TRIVIAL_ITER
144             TRIVIAL_ITER
145             TRIVIAL_ITER
146             TRIVIAL_ITER
147             TRIVIAL_ITER
148             TRIVIAL_ITER
149             TRIVIAL_ITER
150             TRIVIAL_ITER
151         }
152 #endif
153         for (; x < diameter; ++x) {
154             TRIVIAL_ITER
155         }
156 #undef TRIVIAL_ITER
157 #define CENTER_ITER \
158             sum += *right++; \
159             *dptr = (sum * scale + half) >> 24; \
160             sum -= *left++; \
161             dptr += dst_x_stride;
162 
163         x = diameter;
164 #ifdef UNROLL_SEPARABLE_LOOPS
165         for (; x < width - 16; x += 16) {
166             CENTER_ITER
167             CENTER_ITER
168             CENTER_ITER
169             CENTER_ITER
170             CENTER_ITER
171             CENTER_ITER
172             CENTER_ITER
173             CENTER_ITER
174             CENTER_ITER
175             CENTER_ITER
176             CENTER_ITER
177             CENTER_ITER
178             CENTER_ITER
179             CENTER_ITER
180             CENTER_ITER
181             CENTER_ITER
182         }
183 #endif
184         for (; x < width; ++x) {
185             CENTER_ITER
186         }
187 #undef CENTER_ITER
188 #define RIGHT_BORDER_ITER \
189             *dptr = (sum * scale + half) >> 24; \
190             sum -= *left++; \
191             dptr += dst_x_stride;
192 
193         x = 0;
194 #ifdef UNROLL_SEPARABLE_LOOPS
195         for (; x < border - 16; x += 16) {
196             RIGHT_BORDER_ITER
197             RIGHT_BORDER_ITER
198             RIGHT_BORDER_ITER
199             RIGHT_BORDER_ITER
200             RIGHT_BORDER_ITER
201             RIGHT_BORDER_ITER
202             RIGHT_BORDER_ITER
203             RIGHT_BORDER_ITER
204             RIGHT_BORDER_ITER
205             RIGHT_BORDER_ITER
206             RIGHT_BORDER_ITER
207             RIGHT_BORDER_ITER
208             RIGHT_BORDER_ITER
209             RIGHT_BORDER_ITER
210             RIGHT_BORDER_ITER
211             RIGHT_BORDER_ITER
212         }
213 #endif
214         for (; x < border; ++x) {
215             RIGHT_BORDER_ITER
216         }
217 #undef RIGHT_BORDER_ITER
218         for (int x = 0; x < leftRadius - rightRadius; ++x) {
219             *dptr = 0;
220             dptr += dst_x_stride;
221         }
222         SkASSERT(sum == 0);
223     }
224     return new_width;
225 }
226 
227 /**
228  * This variant of the box blur handles blurring of non-integer radii.  It
229  * keeps two running sums: an outer sum for the rounded-up kernel radius, and
230  * an inner sum for the rounded-down kernel radius.  For each pixel, it linearly
231  * interpolates between them.  In float this would be:
232  *  outer_weight * outer_sum / kernelSize +
233  *  (1.0 - outer_weight) * innerSum / (kernelSize - 2)
234  *
235  * This is what the inner loop looks like before unrolling, and with the two
236  * cases broken out separately (width < diameter, width >= diameter):
237  *
238  *      if (width < diameter) {
239  *          for (int x = 0; x < width; x++) {
240  *              inner_sum = outer_sum;
241  *              outer_sum += *right++;
242  *              *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24;
243  *              dptr += dst_x_stride;
244  *          }
245  *          for (int x = width; x < diameter; ++x) {
246  *              *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24;
247  *              dptr += dst_x_stride;
248  *          }
249  *          for (int x = 0; x < width; x++) {
250  *              inner_sum = outer_sum - *left++;
251  *              *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24;
252  *              dptr += dst_x_stride;
253  *              outer_sum = inner_sum;
254  *          }
255  *      } else {
256  *          for (int x = 0; x < diameter; x++) {
257  *              inner_sum = outer_sum;
258  *              outer_sum += *right++;
259  *              *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24;
260  *              dptr += dst_x_stride;
261  *          }
262  *          for (int x = diameter; x < width; ++x) {
263  *              inner_sum = outer_sum - *left;
264  *              outer_sum += *right++;
265  *              *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24;
266  *              dptr += dst_x_stride;
267  *              outer_sum -= *left++;
268  *          }
269  *          for (int x = 0; x < diameter; x++) {
270  *              inner_sum = outer_sum - *left++;
271  *              *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24;
272  *              dptr += dst_x_stride;
273  *              outer_sum = inner_sum;
274  *          }
275  *      }
276  *  }
277  *  return new_width;
278  */
279 
boxBlurInterp(const uint8_t * src,int src_y_stride,uint8_t * dst,int radius,int width,int height,bool transpose,uint8_t outer_weight)280 static int boxBlurInterp(const uint8_t* src, int src_y_stride, uint8_t* dst,
281                          int radius, int width, int height,
282                          bool transpose, uint8_t outer_weight)
283 {
284     int diameter = radius * 2;
285     int kernelSize = diameter + 1;
286     int border = SkMin32(width, diameter);
287     int inner_weight = 255 - outer_weight;
288     outer_weight += outer_weight >> 7;
289     inner_weight += inner_weight >> 7;
290     uint32_t outer_scale = (outer_weight << 16) / kernelSize;
291     uint32_t inner_scale = (inner_weight << 16) / (kernelSize - 2);
292     uint32_t half = 1 << 23;
293     int new_width = width + diameter;
294     int dst_x_stride = transpose ? height : 1;
295     int dst_y_stride = transpose ? 1 : new_width;
296     for (int y = 0; y < height; ++y) {
297         uint32_t outer_sum = 0, inner_sum = 0;
298         uint8_t* dptr = dst + y * dst_y_stride;
299         const uint8_t* right = src + y * src_y_stride;
300         const uint8_t* left = right;
301         int x = 0;
302 
303 #define LEFT_BORDER_ITER \
304             inner_sum = outer_sum; \
305             outer_sum += *right++; \
306             *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24; \
307             dptr += dst_x_stride;
308 
309 #ifdef UNROLL_SEPARABLE_LOOPS
310         for (;x < border - 16; x += 16) {
311             LEFT_BORDER_ITER
312             LEFT_BORDER_ITER
313             LEFT_BORDER_ITER
314             LEFT_BORDER_ITER
315             LEFT_BORDER_ITER
316             LEFT_BORDER_ITER
317             LEFT_BORDER_ITER
318             LEFT_BORDER_ITER
319             LEFT_BORDER_ITER
320             LEFT_BORDER_ITER
321             LEFT_BORDER_ITER
322             LEFT_BORDER_ITER
323             LEFT_BORDER_ITER
324             LEFT_BORDER_ITER
325             LEFT_BORDER_ITER
326             LEFT_BORDER_ITER
327         }
328 #endif
329 
330         for (;x < border; ++x) {
331             LEFT_BORDER_ITER
332         }
333 #undef LEFT_BORDER_ITER
334         for (int x = width; x < diameter; ++x) {
335             *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24;
336             dptr += dst_x_stride;
337         }
338         x = diameter;
339 
340 #define CENTER_ITER \
341             inner_sum = outer_sum - *left; \
342             outer_sum += *right++; \
343             *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24; \
344             dptr += dst_x_stride; \
345             outer_sum -= *left++;
346 
347 #ifdef UNROLL_SEPARABLE_LOOPS
348         for (; x < width - 16; x += 16) {
349             CENTER_ITER
350             CENTER_ITER
351             CENTER_ITER
352             CENTER_ITER
353             CENTER_ITER
354             CENTER_ITER
355             CENTER_ITER
356             CENTER_ITER
357             CENTER_ITER
358             CENTER_ITER
359             CENTER_ITER
360             CENTER_ITER
361             CENTER_ITER
362             CENTER_ITER
363             CENTER_ITER
364             CENTER_ITER
365         }
366 #endif
367         for (; x < width; ++x) {
368             CENTER_ITER
369         }
370 #undef CENTER_ITER
371 
372         #define RIGHT_BORDER_ITER \
373             inner_sum = outer_sum - *left++; \
374             *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24; \
375             dptr += dst_x_stride; \
376             outer_sum = inner_sum;
377 
378         x = 0;
379 #ifdef UNROLL_SEPARABLE_LOOPS
380         for (; x < border - 16; x += 16) {
381             RIGHT_BORDER_ITER
382             RIGHT_BORDER_ITER
383             RIGHT_BORDER_ITER
384             RIGHT_BORDER_ITER
385             RIGHT_BORDER_ITER
386             RIGHT_BORDER_ITER
387             RIGHT_BORDER_ITER
388             RIGHT_BORDER_ITER
389             RIGHT_BORDER_ITER
390             RIGHT_BORDER_ITER
391             RIGHT_BORDER_ITER
392             RIGHT_BORDER_ITER
393             RIGHT_BORDER_ITER
394             RIGHT_BORDER_ITER
395             RIGHT_BORDER_ITER
396             RIGHT_BORDER_ITER
397         }
398 #endif
399         for (; x < border; ++x) {
400             RIGHT_BORDER_ITER
401         }
402 #undef RIGHT_BORDER_ITER
403         SkASSERT(outer_sum == 0 && inner_sum == 0);
404     }
405     return new_width;
406 }
407 
get_adjusted_radii(SkScalar passRadius,int * loRadius,int * hiRadius)408 static void get_adjusted_radii(SkScalar passRadius, int *loRadius, int *hiRadius)
409 {
410     *loRadius = *hiRadius = SkScalarCeilToInt(passRadius);
411     if (SkIntToScalar(*hiRadius) - passRadius > 0.5f) {
412         *loRadius = *hiRadius - 1;
413     }
414 }
415 
416 #include "SkColorPriv.h"
417 
merge_src_with_blur(uint8_t dst[],int dstRB,const uint8_t src[],int srcRB,const uint8_t blur[],int blurRB,int sw,int sh)418 static void merge_src_with_blur(uint8_t dst[], int dstRB,
419                                 const uint8_t src[], int srcRB,
420                                 const uint8_t blur[], int blurRB,
421                                 int sw, int sh) {
422     dstRB -= sw;
423     srcRB -= sw;
424     blurRB -= sw;
425     while (--sh >= 0) {
426         for (int x = sw - 1; x >= 0; --x) {
427             *dst = SkToU8(SkAlphaMul(*blur, SkAlpha255To256(*src)));
428             dst += 1;
429             src += 1;
430             blur += 1;
431         }
432         dst += dstRB;
433         src += srcRB;
434         blur += blurRB;
435     }
436 }
437 
clamp_with_orig(uint8_t dst[],int dstRowBytes,const uint8_t src[],int srcRowBytes,int sw,int sh,SkBlurStyle style)438 static void clamp_with_orig(uint8_t dst[], int dstRowBytes,
439                             const uint8_t src[], int srcRowBytes,
440                             int sw, int sh,
441                             SkBlurStyle style) {
442     int x;
443     while (--sh >= 0) {
444         switch (style) {
445         case kSolid_SkBlurStyle:
446             for (x = sw - 1; x >= 0; --x) {
447                 int s = *src;
448                 int d = *dst;
449                 *dst = SkToU8(s + d - SkMulDiv255Round(s, d));
450                 dst += 1;
451                 src += 1;
452             }
453             break;
454         case kOuter_SkBlurStyle:
455             for (x = sw - 1; x >= 0; --x) {
456                 if (*src) {
457                     *dst = SkToU8(SkAlphaMul(*dst, SkAlpha255To256(255 - *src)));
458                 }
459                 dst += 1;
460                 src += 1;
461             }
462             break;
463         default:
464             SkDEBUGFAIL("Unexpected blur style here");
465             break;
466         }
467         dst += dstRowBytes - sw;
468         src += srcRowBytes - sw;
469     }
470 }
471 
472 ///////////////////////////////////////////////////////////////////////////////
473 
474 // we use a local function to wrap the class static method to work around
475 // a bug in gcc98
476 void SkMask_FreeImage(uint8_t* image);
SkMask_FreeImage(uint8_t * image)477 void SkMask_FreeImage(uint8_t* image) {
478     SkMask::FreeImage(image);
479 }
480 
BoxBlur(SkMask * dst,const SkMask & src,SkScalar sigma,SkBlurStyle style,SkBlurQuality quality,SkIPoint * margin,bool force_quality)481 bool SkBlurMask::BoxBlur(SkMask* dst, const SkMask& src,
482                          SkScalar sigma, SkBlurStyle style, SkBlurQuality quality,
483                          SkIPoint* margin, bool force_quality) {
484 
485     if (src.fFormat != SkMask::kA8_Format) {
486         return false;
487     }
488 
489     // Force high quality off for small radii (performance)
490     if (!force_quality && sigma <= SkIntToScalar(2)) {
491         quality = kLow_SkBlurQuality;
492     }
493 
494     SkScalar passRadius;
495     if (kHigh_SkBlurQuality == quality) {
496         // For the high quality path the 3 pass box blur kernel width is
497         // 6*rad+1 while the full Gaussian width is 6*sigma.
498         passRadius = sigma - (1/6.0f);
499     } else {
500         // For the low quality path we only attempt to cover 3*sigma of the
501         // Gaussian blur area (1.5*sigma on each side). The single pass box
502         // blur's kernel size is 2*rad+1.
503         passRadius = 1.5f*sigma - 0.5f;
504     }
505 
506     // highQuality: use three box blur passes as a cheap way
507     // to approximate a Gaussian blur
508     int passCount = (kHigh_SkBlurQuality == quality) ? 3 : 1;
509 
510     int rx = SkScalarCeilToInt(passRadius);
511     int outerWeight = 255 - SkScalarRoundToInt((SkIntToScalar(rx) - passRadius) * 255);
512 
513     SkASSERT(rx >= 0);
514     SkASSERT((unsigned)outerWeight <= 255);
515     if (rx <= 0) {
516         return false;
517     }
518 
519     int ry = rx;    // only do square blur for now
520 
521     int padx = passCount * rx;
522     int pady = passCount * ry;
523 
524     if (margin) {
525         margin->set(padx, pady);
526     }
527     dst->fBounds.set(src.fBounds.fLeft - padx, src.fBounds.fTop - pady,
528                      src.fBounds.fRight + padx, src.fBounds.fBottom + pady);
529 
530     dst->fRowBytes = dst->fBounds.width();
531     dst->fFormat = SkMask::kA8_Format;
532     dst->fImage = nullptr;
533 
534     if (src.fImage) {
535         size_t dstSize = dst->computeImageSize();
536         if (0 == dstSize) {
537             return false;   // too big to allocate, abort
538         }
539 
540         int             sw = src.fBounds.width();
541         int             sh = src.fBounds.height();
542         const uint8_t*  sp = src.fImage;
543         uint8_t*        dp = SkMask::AllocImage(dstSize);
544         SkAutoTCallVProc<uint8_t, SkMask_FreeImage> autoCall(dp);
545 
546         // build the blurry destination
547         SkAutoTMalloc<uint8_t>  tmpBuffer(dstSize);
548         uint8_t*                tp = tmpBuffer.get();
549         int w = sw, h = sh;
550 
551         if (outerWeight == 255) {
552             int loRadius, hiRadius;
553             get_adjusted_radii(passRadius, &loRadius, &hiRadius);
554             if (kHigh_SkBlurQuality == quality) {
555                 // Do three X blurs, with a transpose on the final one.
556                 w = boxBlur(sp, src.fRowBytes, tp, loRadius, hiRadius, w, h, false);
557                 w = boxBlur(tp, w,             dp, hiRadius, loRadius, w, h, false);
558                 w = boxBlur(dp, w,             tp, hiRadius, hiRadius, w, h, true);
559                 // Do three Y blurs, with a transpose on the final one.
560                 h = boxBlur(tp, h,             dp, loRadius, hiRadius, h, w, false);
561                 h = boxBlur(dp, h,             tp, hiRadius, loRadius, h, w, false);
562                 h = boxBlur(tp, h,             dp, hiRadius, hiRadius, h, w, true);
563             } else {
564                 w = boxBlur(sp, src.fRowBytes, tp, rx, rx, w, h, true);
565                 h = boxBlur(tp, h,             dp, ry, ry, h, w, true);
566             }
567         } else {
568             if (kHigh_SkBlurQuality == quality) {
569                 // Do three X blurs, with a transpose on the final one.
570                 w = boxBlurInterp(sp, src.fRowBytes, tp, rx, w, h, false, outerWeight);
571                 w = boxBlurInterp(tp, w,             dp, rx, w, h, false, outerWeight);
572                 w = boxBlurInterp(dp, w,             tp, rx, w, h, true, outerWeight);
573                 // Do three Y blurs, with a transpose on the final one.
574                 h = boxBlurInterp(tp, h,             dp, ry, h, w, false, outerWeight);
575                 h = boxBlurInterp(dp, h,             tp, ry, h, w, false, outerWeight);
576                 h = boxBlurInterp(tp, h,             dp, ry, h, w, true, outerWeight);
577             } else {
578                 w = boxBlurInterp(sp, src.fRowBytes, tp, rx, w, h, true, outerWeight);
579                 h = boxBlurInterp(tp, h,             dp, ry, h, w, true, outerWeight);
580             }
581         }
582 
583         dst->fImage = dp;
584         // if need be, alloc the "real" dst (same size as src) and copy/merge
585         // the blur into it (applying the src)
586         if (style == kInner_SkBlurStyle) {
587             // now we allocate the "real" dst, mirror the size of src
588             size_t srcSize = src.computeImageSize();
589             if (0 == srcSize) {
590                 return false;   // too big to allocate, abort
591             }
592             dst->fImage = SkMask::AllocImage(srcSize);
593             merge_src_with_blur(dst->fImage, src.fRowBytes,
594                                 sp, src.fRowBytes,
595                                 dp + passCount * (rx + ry * dst->fRowBytes),
596                                 dst->fRowBytes, sw, sh);
597             SkMask::FreeImage(dp);
598         } else if (style != kNormal_SkBlurStyle) {
599             clamp_with_orig(dp + passCount * (rx + ry * dst->fRowBytes),
600                             dst->fRowBytes, sp, src.fRowBytes, sw, sh, style);
601         }
602         (void)autoCall.release();
603     }
604 
605     if (style == kInner_SkBlurStyle) {
606         dst->fBounds = src.fBounds; // restore trimmed bounds
607         dst->fRowBytes = src.fRowBytes;
608     }
609 
610     return true;
611 }
612 
613 /* Convolving a box with itself three times results in a piecewise
614    quadratic function:
615 
616    0                              x <= -1.5
617    9/8 + 3/2 x + 1/2 x^2   -1.5 < x <= -.5
618    3/4 - x^2                -.5 < x <= .5
619    9/8 - 3/2 x + 1/2 x^2    0.5 < x <= 1.5
620    0                        1.5 < x
621 
622    Mathematica:
623 
624    g[x_] := Piecewise [ {
625      {9/8 + 3/2 x + 1/2 x^2 ,  -1.5 < x <= -.5},
626      {3/4 - x^2             ,   -.5 < x <= .5},
627      {9/8 - 3/2 x + 1/2 x^2 ,   0.5 < x <= 1.5}
628    }, 0]
629 
630    To get the profile curve of the blurred step function at the rectangle
631    edge, we evaluate the indefinite integral, which is piecewise cubic:
632 
633    0                                        x <= -1.5
634    9/16 + 9/8 x + 3/4 x^2 + 1/6 x^3   -1.5 < x <= -0.5
635    1/2 + 3/4 x - 1/3 x^3              -.5 < x <= .5
636    7/16 + 9/8 x - 3/4 x^2 + 1/6 x^3     .5 < x <= 1.5
637    1                                  1.5 < x
638 
639    in Mathematica code:
640 
641    gi[x_] := Piecewise[ {
642      { 0 , x <= -1.5 },
643      { 9/16 + 9/8 x + 3/4 x^2 + 1/6 x^3, -1.5 < x <= -0.5 },
644      { 1/2 + 3/4 x - 1/3 x^3          ,  -.5 < x <= .5},
645      { 7/16 + 9/8 x - 3/4 x^2 + 1/6 x^3,   .5 < x <= 1.5}
646    },1]
647 */
648 
gaussianIntegral(float x)649 static float gaussianIntegral(float x) {
650     if (x > 1.5f) {
651         return 0.0f;
652     }
653     if (x < -1.5f) {
654         return 1.0f;
655     }
656 
657     float x2 = x*x;
658     float x3 = x2*x;
659 
660     if ( x > 0.5f ) {
661         return 0.5625f - (x3 / 6.0f - 3.0f * x2 * 0.25f + 1.125f * x);
662     }
663     if ( x > -0.5f ) {
664         return 0.5f - (0.75f * x - x3 / 3.0f);
665     }
666     return 0.4375f + (-x3 / 6.0f - 3.0f * x2 * 0.25f - 1.125f * x);
667 }
668 
669 /*  ComputeBlurProfile allocates and fills in an array of floating
670     point values between 0 and 255 for the profile signature of
671     a blurred half-plane with the given blur radius.  Since we're
672     going to be doing screened multiplications (i.e., 1 - (1-x)(1-y))
673     all the time, we actually fill in the profile pre-inverted
674     (already done 255-x).
675 
676     It's the responsibility of the caller to delete the
677     memory returned in profile_out.
678 */
679 
ComputeBlurProfile(SkScalar sigma)680 uint8_t* SkBlurMask::ComputeBlurProfile(SkScalar sigma) {
681     int size = SkScalarCeilToInt(6*sigma);
682 
683     int center = size >> 1;
684     uint8_t* profile = new uint8_t[size];
685 
686     float invr = 1.f/(2*sigma);
687 
688     profile[0] = 255;
689     for (int x = 1 ; x < size ; ++x) {
690         float scaled_x = (center - x - .5f) * invr;
691         float gi = gaussianIntegral(scaled_x);
692         profile[x] = 255 - (uint8_t) (255.f * gi);
693     }
694 
695     return profile;
696 }
697 
698 // TODO MAYBE: Maintain a profile cache to avoid recomputing this for
699 // commonly used radii.  Consider baking some of the most common blur radii
700 // directly in as static data?
701 
702 // Implementation adapted from Michael Herf's approach:
703 // http://stereopsis.com/shadowrect/
704 
ProfileLookup(const uint8_t * profile,int loc,int blurred_width,int sharp_width)705 uint8_t SkBlurMask::ProfileLookup(const uint8_t *profile, int loc, int blurred_width, int sharp_width) {
706     int dx = SkAbs32(((loc << 1) + 1) - blurred_width) - sharp_width; // how far are we from the original edge?
707     int ox = dx >> 1;
708     if (ox < 0) {
709         ox = 0;
710     }
711 
712     return profile[ox];
713 }
714 
ComputeBlurredScanline(uint8_t * pixels,const uint8_t * profile,unsigned int width,SkScalar sigma)715 void SkBlurMask::ComputeBlurredScanline(uint8_t *pixels, const uint8_t *profile,
716                                         unsigned int width, SkScalar sigma) {
717 
718     unsigned int profile_size = SkScalarCeilToInt(6*sigma);
719     SkAutoTMalloc<uint8_t> horizontalScanline(width);
720 
721     unsigned int sw = width - profile_size;
722     // nearest odd number less than the profile size represents the center
723     // of the (2x scaled) profile
724     int center = ( profile_size & ~1 ) - 1;
725 
726     int w = sw - center;
727 
728     for (unsigned int x = 0 ; x < width ; ++x) {
729        if (profile_size <= sw) {
730            pixels[x] = ProfileLookup(profile, x, width, w);
731        } else {
732            float span = float(sw)/(2*sigma);
733            float giX = 1.5f - (x+.5f)/(2*sigma);
734            pixels[x] = (uint8_t) (255 * (gaussianIntegral(giX) - gaussianIntegral(giX + span)));
735        }
736     }
737 }
738 
BlurRect(SkScalar sigma,SkMask * dst,const SkRect & src,SkBlurStyle style,SkIPoint * margin,SkMask::CreateMode createMode)739 bool SkBlurMask::BlurRect(SkScalar sigma, SkMask *dst,
740                           const SkRect &src, SkBlurStyle style,
741                           SkIPoint *margin, SkMask::CreateMode createMode) {
742     int profile_size = SkScalarCeilToInt(6*sigma);
743 
744     int pad = profile_size/2;
745     if (margin) {
746         margin->set( pad, pad );
747     }
748 
749     dst->fBounds.set(SkScalarRoundToInt(src.fLeft - pad),
750                      SkScalarRoundToInt(src.fTop - pad),
751                      SkScalarRoundToInt(src.fRight + pad),
752                      SkScalarRoundToInt(src.fBottom + pad));
753 
754     dst->fRowBytes = dst->fBounds.width();
755     dst->fFormat = SkMask::kA8_Format;
756     dst->fImage = nullptr;
757 
758     int             sw = SkScalarFloorToInt(src.width());
759     int             sh = SkScalarFloorToInt(src.height());
760 
761     if (createMode == SkMask::kJustComputeBounds_CreateMode) {
762         if (style == kInner_SkBlurStyle) {
763             dst->fBounds.set(SkScalarRoundToInt(src.fLeft),
764                              SkScalarRoundToInt(src.fTop),
765                              SkScalarRoundToInt(src.fRight),
766                              SkScalarRoundToInt(src.fBottom)); // restore trimmed bounds
767             dst->fRowBytes = sw;
768         }
769         return true;
770     }
771 
772     std::unique_ptr<uint8_t[]> profile(ComputeBlurProfile(sigma));
773 
774     size_t dstSize = dst->computeImageSize();
775     if (0 == dstSize) {
776         return false;   // too big to allocate, abort
777     }
778 
779     uint8_t*        dp = SkMask::AllocImage(dstSize);
780 
781     dst->fImage = dp;
782 
783     int dstHeight = dst->fBounds.height();
784     int dstWidth = dst->fBounds.width();
785 
786     uint8_t *outptr = dp;
787 
788     SkAutoTMalloc<uint8_t> horizontalScanline(dstWidth);
789     SkAutoTMalloc<uint8_t> verticalScanline(dstHeight);
790 
791     ComputeBlurredScanline(horizontalScanline, profile.get(), dstWidth, sigma);
792     ComputeBlurredScanline(verticalScanline, profile.get(), dstHeight, sigma);
793 
794     for (int y = 0 ; y < dstHeight ; ++y) {
795         for (int x = 0 ; x < dstWidth ; x++) {
796             unsigned int maskval = SkMulDiv255Round(horizontalScanline[x], verticalScanline[y]);
797             *(outptr++) = maskval;
798         }
799     }
800 
801     if (style == kInner_SkBlurStyle) {
802         // now we allocate the "real" dst, mirror the size of src
803         size_t srcSize = (size_t)(src.width() * src.height());
804         if (0 == srcSize) {
805             return false;   // too big to allocate, abort
806         }
807         dst->fImage = SkMask::AllocImage(srcSize);
808         for (int y = 0 ; y < sh ; y++) {
809             uint8_t *blur_scanline = dp + (y+pad)*dstWidth + pad;
810             uint8_t *inner_scanline = dst->fImage + y*sw;
811             memcpy(inner_scanline, blur_scanline, sw);
812         }
813         SkMask::FreeImage(dp);
814 
815         dst->fBounds.set(SkScalarRoundToInt(src.fLeft),
816                          SkScalarRoundToInt(src.fTop),
817                          SkScalarRoundToInt(src.fRight),
818                          SkScalarRoundToInt(src.fBottom)); // restore trimmed bounds
819         dst->fRowBytes = sw;
820 
821     } else if (style == kOuter_SkBlurStyle) {
822         for (int y = pad ; y < dstHeight-pad ; y++) {
823             uint8_t *dst_scanline = dp + y*dstWidth + pad;
824             memset(dst_scanline, 0, sw);
825         }
826     } else if (style == kSolid_SkBlurStyle) {
827         for (int y = pad ; y < dstHeight-pad ; y++) {
828             uint8_t *dst_scanline = dp + y*dstWidth + pad;
829             memset(dst_scanline, 0xff, sw);
830         }
831     }
832     // normal and solid styles are the same for analytic rect blurs, so don't
833     // need to handle solid specially.
834 
835     return true;
836 }
837 
BlurRRect(SkScalar sigma,SkMask * dst,const SkRRect & src,SkBlurStyle style,SkIPoint * margin,SkMask::CreateMode createMode)838 bool SkBlurMask::BlurRRect(SkScalar sigma, SkMask *dst,
839                            const SkRRect &src, SkBlurStyle style,
840                            SkIPoint *margin, SkMask::CreateMode createMode) {
841     // Temporary for now -- always fail, should cause caller to fall back
842     // to old path.  Plumbing just to land API and parallelize effort.
843 
844     return false;
845 }
846 
847 // The "simple" blur is a direct implementation of separable convolution with a discrete
848 // gaussian kernel.  It's "ground truth" in a sense; too slow to be used, but very
849 // useful for correctness comparisons.
850 
BlurGroundTruth(SkScalar sigma,SkMask * dst,const SkMask & src,SkBlurStyle style,SkIPoint * margin)851 bool SkBlurMask::BlurGroundTruth(SkScalar sigma, SkMask* dst, const SkMask& src,
852                                  SkBlurStyle style, SkIPoint* margin) {
853 
854     if (src.fFormat != SkMask::kA8_Format) {
855         return false;
856     }
857 
858     float variance = sigma * sigma;
859 
860     int windowSize = SkScalarCeilToInt(sigma*6);
861     // round window size up to nearest odd number
862     windowSize |= 1;
863 
864     SkAutoTMalloc<float> gaussWindow(windowSize);
865 
866     int halfWindow = windowSize >> 1;
867 
868     gaussWindow[halfWindow] = 1;
869 
870     float windowSum = 1;
871     for (int x = 1 ; x <= halfWindow ; ++x) {
872         float gaussian = expf(-x*x / (2*variance));
873         gaussWindow[halfWindow + x] = gaussWindow[halfWindow-x] = gaussian;
874         windowSum += 2*gaussian;
875     }
876 
877     // leave the filter un-normalized for now; we will divide by the normalization
878     // sum later;
879 
880     int pad = halfWindow;
881     if (margin) {
882         margin->set( pad, pad );
883     }
884 
885     dst->fBounds = src.fBounds;
886     dst->fBounds.outset(pad, pad);
887 
888     dst->fRowBytes = dst->fBounds.width();
889     dst->fFormat = SkMask::kA8_Format;
890     dst->fImage = nullptr;
891 
892     if (src.fImage) {
893 
894         size_t dstSize = dst->computeImageSize();
895         if (0 == dstSize) {
896             return false;   // too big to allocate, abort
897         }
898 
899         int             srcWidth = src.fBounds.width();
900         int             srcHeight = src.fBounds.height();
901         int             dstWidth = dst->fBounds.width();
902 
903         const uint8_t*  srcPixels = src.fImage;
904         uint8_t*        dstPixels = SkMask::AllocImage(dstSize);
905         SkAutoTCallVProc<uint8_t, SkMask_FreeImage> autoCall(dstPixels);
906 
907         // do the actual blur.  First, make a padded copy of the source.
908         // use double pad so we never have to check if we're outside anything
909 
910         int padWidth = srcWidth + 4*pad;
911         int padHeight = srcHeight;
912         int padSize = padWidth * padHeight;
913 
914         SkAutoTMalloc<uint8_t> padPixels(padSize);
915         memset(padPixels, 0, padSize);
916 
917         for (int y = 0 ; y < srcHeight; ++y) {
918             uint8_t* padptr = padPixels + y * padWidth + 2*pad;
919             const uint8_t* srcptr = srcPixels + y * srcWidth;
920             memcpy(padptr, srcptr, srcWidth);
921         }
922 
923         // blur in X, transposing the result into a temporary floating point buffer.
924         // also double-pad the intermediate result so that the second blur doesn't
925         // have to do extra conditionals.
926 
927         int tmpWidth = padHeight + 4*pad;
928         int tmpHeight = padWidth - 2*pad;
929         int tmpSize = tmpWidth * tmpHeight;
930 
931         SkAutoTMalloc<float> tmpImage(tmpSize);
932         memset(tmpImage, 0, tmpSize*sizeof(tmpImage[0]));
933 
934         for (int y = 0 ; y < padHeight ; ++y) {
935             uint8_t *srcScanline = padPixels + y*padWidth;
936             for (int x = pad ; x < padWidth - pad ; ++x) {
937                 float *outPixel = tmpImage + (x-pad)*tmpWidth + y + 2*pad; // transposed output
938                 uint8_t *windowCenter = srcScanline + x;
939                 for (int i = -pad ; i <= pad ; ++i) {
940                     *outPixel += gaussWindow[pad+i]*windowCenter[i];
941                 }
942                 *outPixel /= windowSum;
943             }
944         }
945 
946         // blur in Y; now filling in the actual desired destination.  We have to do
947         // the transpose again; these transposes guarantee that we read memory in
948         // linear order.
949 
950         for (int y = 0 ; y < tmpHeight ; ++y) {
951             float *srcScanline = tmpImage + y*tmpWidth;
952             for (int x = pad ; x < tmpWidth - pad ; ++x) {
953                 float *windowCenter = srcScanline + x;
954                 float finalValue = 0;
955                 for (int i = -pad ; i <= pad ; ++i) {
956                     finalValue += gaussWindow[pad+i]*windowCenter[i];
957                 }
958                 finalValue /= windowSum;
959                 uint8_t *outPixel = dstPixels + (x-pad)*dstWidth + y; // transposed output
960                 int integerPixel = int(finalValue + 0.5f);
961                 *outPixel = SkClampMax( SkClampPos(integerPixel), 255 );
962             }
963         }
964 
965         dst->fImage = dstPixels;
966         // if need be, alloc the "real" dst (same size as src) and copy/merge
967         // the blur into it (applying the src)
968         if (style == kInner_SkBlurStyle) {
969             // now we allocate the "real" dst, mirror the size of src
970             size_t srcSize = src.computeImageSize();
971             if (0 == srcSize) {
972                 return false;   // too big to allocate, abort
973             }
974             dst->fImage = SkMask::AllocImage(srcSize);
975             merge_src_with_blur(dst->fImage, src.fRowBytes,
976                 srcPixels, src.fRowBytes,
977                 dstPixels + pad*dst->fRowBytes + pad,
978                 dst->fRowBytes, srcWidth, srcHeight);
979             SkMask::FreeImage(dstPixels);
980         } else if (style != kNormal_SkBlurStyle) {
981             clamp_with_orig(dstPixels + pad*dst->fRowBytes + pad,
982                 dst->fRowBytes, srcPixels, src.fRowBytes, srcWidth, srcHeight, style);
983         }
984         (void)autoCall.release();
985     }
986 
987     if (style == kInner_SkBlurStyle) {
988         dst->fBounds = src.fBounds; // restore trimmed bounds
989         dst->fRowBytes = src.fRowBytes;
990     }
991 
992     return true;
993 }
994