• Home
  • History
  • Annotate
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1  
2  /*
3   * Copyright 2006 The Android Open Source Project
4   *
5   * Use of this source code is governed by a BSD-style license that can be
6   * found in the LICENSE file.
7   */
8  
9  
10  #include "SkBlurMask.h"
11  #include "SkMath.h"
12  #include "SkTemplates.h"
13  #include "SkEndian.h"
14  
15  
16  // This constant approximates the scaling done in the software path's
17  // "high quality" mode, in SkBlurMask::Blur() (1 / sqrt(3)).
18  // IMHO, it actually should be 1:  we blur "less" than we should do
19  // according to the CSS and canvas specs, simply because Safari does the same.
20  // Firefox used to do the same too, until 4.0 where they fixed it.  So at some
21  // point we should probably get rid of these scaling constants and rebaseline
22  // all the blur tests.
23  static const SkScalar kBLUR_SIGMA_SCALE = 0.57735f;
24  
ConvertRadiusToSigma(SkScalar radius)25  SkScalar SkBlurMask::ConvertRadiusToSigma(SkScalar radius) {
26      return radius > 0 ? kBLUR_SIGMA_SCALE * radius + 0.5f : 0.0f;
27  }
28  
ConvertSigmaToRadius(SkScalar sigma)29  SkScalar SkBlurMask::ConvertSigmaToRadius(SkScalar sigma) {
30      return sigma > 0.5f ? (sigma - 0.5f) / kBLUR_SIGMA_SCALE : 0.0f;
31  }
32  
33  #define UNROLL_SEPARABLE_LOOPS
34  
35  /**
36   * This function performs a box blur in X, of the given radius.  If the
37   * "transpose" parameter is true, it will transpose the pixels on write,
38   * such that X and Y are swapped. Reads are always performed from contiguous
39   * memory in X, for speed. The destination buffer (dst) must be at least
40   * (width + leftRadius + rightRadius) * height bytes in size.
41   *
42   * This is what the inner loop looks like before unrolling, and with the two
43   * cases broken out separately (width < diameter, width >= diameter):
44   *
45   *      if (width < diameter) {
46   *          for (int x = 0; x < width; ++x) {
47   *              sum += *right++;
48   *              *dptr = (sum * scale + half) >> 24;
49   *              dptr += dst_x_stride;
50   *          }
51   *          for (int x = width; x < diameter; ++x) {
52   *              *dptr = (sum * scale + half) >> 24;
53   *              dptr += dst_x_stride;
54   *          }
55   *          for (int x = 0; x < width; ++x) {
56   *              *dptr = (sum * scale + half) >> 24;
57   *              sum -= *left++;
58   *              dptr += dst_x_stride;
59   *          }
60   *      } else {
61   *          for (int x = 0; x < diameter; ++x) {
62   *              sum += *right++;
63   *              *dptr = (sum * scale + half) >> 24;
64   *              dptr += dst_x_stride;
65   *          }
66   *          for (int x = diameter; x < width; ++x) {
67   *              sum += *right++;
68   *              *dptr = (sum * scale + half) >> 24;
69   *              sum -= *left++;
70   *              dptr += dst_x_stride;
71   *          }
72   *          for (int x = 0; x < diameter; ++x) {
73   *              *dptr = (sum * scale + half) >> 24;
74   *              sum -= *left++;
75   *              dptr += dst_x_stride;
76   *          }
77   *      }
78   */
boxBlur(const uint8_t * src,int src_y_stride,uint8_t * dst,int leftRadius,int rightRadius,int width,int height,bool transpose)79  static int boxBlur(const uint8_t* src, int src_y_stride, uint8_t* dst,
80                     int leftRadius, int rightRadius, int width, int height,
81                     bool transpose)
82  {
83      int diameter = leftRadius + rightRadius;
84      int kernelSize = diameter + 1;
85      int border = SkMin32(width, diameter);
86      uint32_t scale = (1 << 24) / kernelSize;
87      int new_width = width + SkMax32(leftRadius, rightRadius) * 2;
88      int dst_x_stride = transpose ? height : 1;
89      int dst_y_stride = transpose ? 1 : new_width;
90      uint32_t half = 1 << 23;
91      for (int y = 0; y < height; ++y) {
92          uint32_t sum = 0;
93          uint8_t* dptr = dst + y * dst_y_stride;
94          const uint8_t* right = src + y * src_y_stride;
95          const uint8_t* left = right;
96          for (int x = 0; x < rightRadius - leftRadius; x++) {
97              *dptr = 0;
98              dptr += dst_x_stride;
99          }
100  #define LEFT_BORDER_ITER \
101              sum += *right++; \
102              *dptr = (sum * scale + half) >> 24; \
103              dptr += dst_x_stride;
104  
105          int x = 0;
106  #ifdef UNROLL_SEPARABLE_LOOPS
107          for (; x < border - 16; x += 16) {
108              LEFT_BORDER_ITER
109              LEFT_BORDER_ITER
110              LEFT_BORDER_ITER
111              LEFT_BORDER_ITER
112              LEFT_BORDER_ITER
113              LEFT_BORDER_ITER
114              LEFT_BORDER_ITER
115              LEFT_BORDER_ITER
116              LEFT_BORDER_ITER
117              LEFT_BORDER_ITER
118              LEFT_BORDER_ITER
119              LEFT_BORDER_ITER
120              LEFT_BORDER_ITER
121              LEFT_BORDER_ITER
122              LEFT_BORDER_ITER
123              LEFT_BORDER_ITER
124          }
125  #endif
126          for (; x < border; ++x) {
127              LEFT_BORDER_ITER
128          }
129  #undef LEFT_BORDER_ITER
130  #define TRIVIAL_ITER \
131              *dptr = (sum * scale + half) >> 24; \
132              dptr += dst_x_stride;
133          x = width;
134  #ifdef UNROLL_SEPARABLE_LOOPS
135          for (; x < diameter - 16; x += 16) {
136              TRIVIAL_ITER
137              TRIVIAL_ITER
138              TRIVIAL_ITER
139              TRIVIAL_ITER
140              TRIVIAL_ITER
141              TRIVIAL_ITER
142              TRIVIAL_ITER
143              TRIVIAL_ITER
144              TRIVIAL_ITER
145              TRIVIAL_ITER
146              TRIVIAL_ITER
147              TRIVIAL_ITER
148              TRIVIAL_ITER
149              TRIVIAL_ITER
150              TRIVIAL_ITER
151              TRIVIAL_ITER
152          }
153  #endif
154          for (; x < diameter; ++x) {
155              TRIVIAL_ITER
156          }
157  #undef TRIVIAL_ITER
158  #define CENTER_ITER \
159              sum += *right++; \
160              *dptr = (sum * scale + half) >> 24; \
161              sum -= *left++; \
162              dptr += dst_x_stride;
163  
164          x = diameter;
165  #ifdef UNROLL_SEPARABLE_LOOPS
166          for (; x < width - 16; x += 16) {
167              CENTER_ITER
168              CENTER_ITER
169              CENTER_ITER
170              CENTER_ITER
171              CENTER_ITER
172              CENTER_ITER
173              CENTER_ITER
174              CENTER_ITER
175              CENTER_ITER
176              CENTER_ITER
177              CENTER_ITER
178              CENTER_ITER
179              CENTER_ITER
180              CENTER_ITER
181              CENTER_ITER
182              CENTER_ITER
183          }
184  #endif
185          for (; x < width; ++x) {
186              CENTER_ITER
187          }
188  #undef CENTER_ITER
189  #define RIGHT_BORDER_ITER \
190              *dptr = (sum * scale + half) >> 24; \
191              sum -= *left++; \
192              dptr += dst_x_stride;
193  
194          x = 0;
195  #ifdef UNROLL_SEPARABLE_LOOPS
196          for (; x < border - 16; x += 16) {
197              RIGHT_BORDER_ITER
198              RIGHT_BORDER_ITER
199              RIGHT_BORDER_ITER
200              RIGHT_BORDER_ITER
201              RIGHT_BORDER_ITER
202              RIGHT_BORDER_ITER
203              RIGHT_BORDER_ITER
204              RIGHT_BORDER_ITER
205              RIGHT_BORDER_ITER
206              RIGHT_BORDER_ITER
207              RIGHT_BORDER_ITER
208              RIGHT_BORDER_ITER
209              RIGHT_BORDER_ITER
210              RIGHT_BORDER_ITER
211              RIGHT_BORDER_ITER
212              RIGHT_BORDER_ITER
213          }
214  #endif
215          for (; x < border; ++x) {
216              RIGHT_BORDER_ITER
217          }
218  #undef RIGHT_BORDER_ITER
219          for (int x = 0; x < leftRadius - rightRadius; ++x) {
220              *dptr = 0;
221              dptr += dst_x_stride;
222          }
223          SkASSERT(sum == 0);
224      }
225      return new_width;
226  }
227  
228  /**
229   * This variant of the box blur handles blurring of non-integer radii.  It
230   * keeps two running sums: an outer sum for the rounded-up kernel radius, and
231   * an inner sum for the rounded-down kernel radius.  For each pixel, it linearly
232   * interpolates between them.  In float this would be:
233   *  outer_weight * outer_sum / kernelSize +
234   *  (1.0 - outer_weight) * innerSum / (kernelSize - 2)
235   *
236   * This is what the inner loop looks like before unrolling, and with the two
237   * cases broken out separately (width < diameter, width >= diameter):
238   *
239   *      if (width < diameter) {
240   *          for (int x = 0; x < width; x++) {
241   *              inner_sum = outer_sum;
242   *              outer_sum += *right++;
243   *              *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24;
244   *              dptr += dst_x_stride;
245   *          }
246   *          for (int x = width; x < diameter; ++x) {
247   *              *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24;
248   *              dptr += dst_x_stride;
249   *          }
250   *          for (int x = 0; x < width; x++) {
251   *              inner_sum = outer_sum - *left++;
252   *              *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24;
253   *              dptr += dst_x_stride;
254   *              outer_sum = inner_sum;
255   *          }
256   *      } else {
257   *          for (int x = 0; x < diameter; x++) {
258   *              inner_sum = outer_sum;
259   *              outer_sum += *right++;
260   *              *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24;
261   *              dptr += dst_x_stride;
262   *          }
263   *          for (int x = diameter; x < width; ++x) {
264   *              inner_sum = outer_sum - *left;
265   *              outer_sum += *right++;
266   *              *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24;
267   *              dptr += dst_x_stride;
268   *              outer_sum -= *left++;
269   *          }
270   *          for (int x = 0; x < diameter; x++) {
271   *              inner_sum = outer_sum - *left++;
272   *              *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24;
273   *              dptr += dst_x_stride;
274   *              outer_sum = inner_sum;
275   *          }
276   *      }
277   *  }
278   *  return new_width;
279   */
280  
boxBlurInterp(const uint8_t * src,int src_y_stride,uint8_t * dst,int radius,int width,int height,bool transpose,uint8_t outer_weight)281  static int boxBlurInterp(const uint8_t* src, int src_y_stride, uint8_t* dst,
282                           int radius, int width, int height,
283                           bool transpose, uint8_t outer_weight)
284  {
285      int diameter = radius * 2;
286      int kernelSize = diameter + 1;
287      int border = SkMin32(width, diameter);
288      int inner_weight = 255 - outer_weight;
289      outer_weight += outer_weight >> 7;
290      inner_weight += inner_weight >> 7;
291      uint32_t outer_scale = (outer_weight << 16) / kernelSize;
292      uint32_t inner_scale = (inner_weight << 16) / (kernelSize - 2);
293      uint32_t half = 1 << 23;
294      int new_width = width + diameter;
295      int dst_x_stride = transpose ? height : 1;
296      int dst_y_stride = transpose ? 1 : new_width;
297      for (int y = 0; y < height; ++y) {
298          uint32_t outer_sum = 0, inner_sum = 0;
299          uint8_t* dptr = dst + y * dst_y_stride;
300          const uint8_t* right = src + y * src_y_stride;
301          const uint8_t* left = right;
302          int x = 0;
303  
304  #define LEFT_BORDER_ITER \
305              inner_sum = outer_sum; \
306              outer_sum += *right++; \
307              *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24; \
308              dptr += dst_x_stride;
309  
310  #ifdef UNROLL_SEPARABLE_LOOPS
311          for (;x < border - 16; x += 16) {
312              LEFT_BORDER_ITER
313              LEFT_BORDER_ITER
314              LEFT_BORDER_ITER
315              LEFT_BORDER_ITER
316              LEFT_BORDER_ITER
317              LEFT_BORDER_ITER
318              LEFT_BORDER_ITER
319              LEFT_BORDER_ITER
320              LEFT_BORDER_ITER
321              LEFT_BORDER_ITER
322              LEFT_BORDER_ITER
323              LEFT_BORDER_ITER
324              LEFT_BORDER_ITER
325              LEFT_BORDER_ITER
326              LEFT_BORDER_ITER
327              LEFT_BORDER_ITER
328          }
329  #endif
330  
331          for (;x < border; ++x) {
332              LEFT_BORDER_ITER
333          }
334  #undef LEFT_BORDER_ITER
335          for (int x = width; x < diameter; ++x) {
336              *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24;
337              dptr += dst_x_stride;
338          }
339          x = diameter;
340  
341  #define CENTER_ITER \
342              inner_sum = outer_sum - *left; \
343              outer_sum += *right++; \
344              *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24; \
345              dptr += dst_x_stride; \
346              outer_sum -= *left++;
347  
348  #ifdef UNROLL_SEPARABLE_LOOPS
349          for (; x < width - 16; x += 16) {
350              CENTER_ITER
351              CENTER_ITER
352              CENTER_ITER
353              CENTER_ITER
354              CENTER_ITER
355              CENTER_ITER
356              CENTER_ITER
357              CENTER_ITER
358              CENTER_ITER
359              CENTER_ITER
360              CENTER_ITER
361              CENTER_ITER
362              CENTER_ITER
363              CENTER_ITER
364              CENTER_ITER
365              CENTER_ITER
366          }
367  #endif
368          for (; x < width; ++x) {
369              CENTER_ITER
370          }
371  #undef CENTER_ITER
372  
373          #define RIGHT_BORDER_ITER \
374              inner_sum = outer_sum - *left++; \
375              *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24; \
376              dptr += dst_x_stride; \
377              outer_sum = inner_sum;
378  
379          x = 0;
380  #ifdef UNROLL_SEPARABLE_LOOPS
381          for (; x < border - 16; x += 16) {
382              RIGHT_BORDER_ITER
383              RIGHT_BORDER_ITER
384              RIGHT_BORDER_ITER
385              RIGHT_BORDER_ITER
386              RIGHT_BORDER_ITER
387              RIGHT_BORDER_ITER
388              RIGHT_BORDER_ITER
389              RIGHT_BORDER_ITER
390              RIGHT_BORDER_ITER
391              RIGHT_BORDER_ITER
392              RIGHT_BORDER_ITER
393              RIGHT_BORDER_ITER
394              RIGHT_BORDER_ITER
395              RIGHT_BORDER_ITER
396              RIGHT_BORDER_ITER
397              RIGHT_BORDER_ITER
398          }
399  #endif
400          for (; x < border; ++x) {
401              RIGHT_BORDER_ITER
402          }
403  #undef RIGHT_BORDER_ITER
404          SkASSERT(outer_sum == 0 && inner_sum == 0);
405      }
406      return new_width;
407  }
408  
get_adjusted_radii(SkScalar passRadius,int * loRadius,int * hiRadius)409  static void get_adjusted_radii(SkScalar passRadius, int *loRadius, int *hiRadius)
410  {
411      *loRadius = *hiRadius = SkScalarCeilToInt(passRadius);
412      if (SkIntToScalar(*hiRadius) - passRadius > 0.5f) {
413          *loRadius = *hiRadius - 1;
414      }
415  }
416  
417  #include "SkColorPriv.h"
418  
merge_src_with_blur(uint8_t dst[],int dstRB,const uint8_t src[],int srcRB,const uint8_t blur[],int blurRB,int sw,int sh)419  static void merge_src_with_blur(uint8_t dst[], int dstRB,
420                                  const uint8_t src[], int srcRB,
421                                  const uint8_t blur[], int blurRB,
422                                  int sw, int sh) {
423      dstRB -= sw;
424      srcRB -= sw;
425      blurRB -= sw;
426      while (--sh >= 0) {
427          for (int x = sw - 1; x >= 0; --x) {
428              *dst = SkToU8(SkAlphaMul(*blur, SkAlpha255To256(*src)));
429              dst += 1;
430              src += 1;
431              blur += 1;
432          }
433          dst += dstRB;
434          src += srcRB;
435          blur += blurRB;
436      }
437  }
438  
clamp_with_orig(uint8_t dst[],int dstRowBytes,const uint8_t src[],int srcRowBytes,int sw,int sh,SkBlurStyle style)439  static void clamp_with_orig(uint8_t dst[], int dstRowBytes,
440                              const uint8_t src[], int srcRowBytes,
441                              int sw, int sh,
442                              SkBlurStyle style) {
443      int x;
444      while (--sh >= 0) {
445          switch (style) {
446          case kSolid_SkBlurStyle:
447              for (x = sw - 1; x >= 0; --x) {
448                  int s = *src;
449                  int d = *dst;
450                  *dst = SkToU8(s + d - SkMulDiv255Round(s, d));
451                  dst += 1;
452                  src += 1;
453              }
454              break;
455          case kOuter_SkBlurStyle:
456              for (x = sw - 1; x >= 0; --x) {
457                  if (*src) {
458                      *dst = SkToU8(SkAlphaMul(*dst, SkAlpha255To256(255 - *src)));
459                  }
460                  dst += 1;
461                  src += 1;
462              }
463              break;
464          default:
465              SkDEBUGFAIL("Unexpected blur style here");
466              break;
467          }
468          dst += dstRowBytes - sw;
469          src += srcRowBytes - sw;
470      }
471  }
472  
473  ///////////////////////////////////////////////////////////////////////////////
474  
475  // we use a local function to wrap the class static method to work around
476  // a bug in gcc98
477  void SkMask_FreeImage(uint8_t* image);
SkMask_FreeImage(uint8_t * image)478  void SkMask_FreeImage(uint8_t* image) {
479      SkMask::FreeImage(image);
480  }
481  
BoxBlur(SkMask * dst,const SkMask & src,SkScalar sigma,SkBlurStyle style,SkBlurQuality quality,SkIPoint * margin,bool force_quality)482  bool SkBlurMask::BoxBlur(SkMask* dst, const SkMask& src,
483                           SkScalar sigma, SkBlurStyle style, SkBlurQuality quality,
484                           SkIPoint* margin, bool force_quality) {
485  
486      if (src.fFormat != SkMask::kA8_Format) {
487          return false;
488      }
489  
490      // Force high quality off for small radii (performance)
491      if (!force_quality && sigma <= SkIntToScalar(2)) {
492          quality = kLow_SkBlurQuality;
493      }
494  
495      SkScalar passRadius;
496      if (kHigh_SkBlurQuality == quality) {
497          // For the high quality path the 3 pass box blur kernel width is
498          // 6*rad+1 while the full Gaussian width is 6*sigma.
499          passRadius = sigma - (1/6.0f);
500      } else {
501          // For the low quality path we only attempt to cover 3*sigma of the
502          // Gaussian blur area (1.5*sigma on each side). The single pass box
503          // blur's kernel size is 2*rad+1.
504          passRadius = 1.5f*sigma - 0.5f;
505      }
506  
507      // highQuality: use three box blur passes as a cheap way
508      // to approximate a Gaussian blur
509      int passCount = (kHigh_SkBlurQuality == quality) ? 3 : 1;
510  
511      int rx = SkScalarCeilToInt(passRadius);
512      int outerWeight = 255 - SkScalarRoundToInt((SkIntToScalar(rx) - passRadius) * 255);
513  
514      SkASSERT(rx >= 0);
515      SkASSERT((unsigned)outerWeight <= 255);
516      if (rx <= 0) {
517          return false;
518      }
519  
520      int ry = rx;    // only do square blur for now
521  
522      int padx = passCount * rx;
523      int pady = passCount * ry;
524  
525      if (margin) {
526          margin->set(padx, pady);
527      }
528      dst->fBounds.set(src.fBounds.fLeft - padx, src.fBounds.fTop - pady,
529                       src.fBounds.fRight + padx, src.fBounds.fBottom + pady);
530  
531      dst->fRowBytes = dst->fBounds.width();
532      dst->fFormat = SkMask::kA8_Format;
533      dst->fImage = NULL;
534  
535      if (src.fImage) {
536          size_t dstSize = dst->computeImageSize();
537          if (0 == dstSize) {
538              return false;   // too big to allocate, abort
539          }
540  
541          int             sw = src.fBounds.width();
542          int             sh = src.fBounds.height();
543          const uint8_t*  sp = src.fImage;
544          uint8_t*        dp = SkMask::AllocImage(dstSize);
545          SkAutoTCallVProc<uint8_t, SkMask_FreeImage> autoCall(dp);
546  
547          // build the blurry destination
548          SkAutoTMalloc<uint8_t>  tmpBuffer(dstSize);
549          uint8_t*                tp = tmpBuffer.get();
550          int w = sw, h = sh;
551  
552          if (outerWeight == 255) {
553              int loRadius, hiRadius;
554              get_adjusted_radii(passRadius, &loRadius, &hiRadius);
555              if (kHigh_SkBlurQuality == quality) {
556                  // Do three X blurs, with a transpose on the final one.
557                  w = boxBlur(sp, src.fRowBytes, tp, loRadius, hiRadius, w, h, false);
558                  w = boxBlur(tp, w,             dp, hiRadius, loRadius, w, h, false);
559                  w = boxBlur(dp, w,             tp, hiRadius, hiRadius, w, h, true);
560                  // Do three Y blurs, with a transpose on the final one.
561                  h = boxBlur(tp, h,             dp, loRadius, hiRadius, h, w, false);
562                  h = boxBlur(dp, h,             tp, hiRadius, loRadius, h, w, false);
563                  h = boxBlur(tp, h,             dp, hiRadius, hiRadius, h, w, true);
564              } else {
565                  w = boxBlur(sp, src.fRowBytes, tp, rx, rx, w, h, true);
566                  h = boxBlur(tp, h,             dp, ry, ry, h, w, true);
567              }
568          } else {
569              if (kHigh_SkBlurQuality == quality) {
570                  // Do three X blurs, with a transpose on the final one.
571                  w = boxBlurInterp(sp, src.fRowBytes, tp, rx, w, h, false, outerWeight);
572                  w = boxBlurInterp(tp, w,             dp, rx, w, h, false, outerWeight);
573                  w = boxBlurInterp(dp, w,             tp, rx, w, h, true, outerWeight);
574                  // Do three Y blurs, with a transpose on the final one.
575                  h = boxBlurInterp(tp, h,             dp, ry, h, w, false, outerWeight);
576                  h = boxBlurInterp(dp, h,             tp, ry, h, w, false, outerWeight);
577                  h = boxBlurInterp(tp, h,             dp, ry, h, w, true, outerWeight);
578              } else {
579                  w = boxBlurInterp(sp, src.fRowBytes, tp, rx, w, h, true, outerWeight);
580                  h = boxBlurInterp(tp, h,             dp, ry, h, w, true, outerWeight);
581              }
582          }
583  
584          dst->fImage = dp;
585          // if need be, alloc the "real" dst (same size as src) and copy/merge
586          // the blur into it (applying the src)
587          if (style == kInner_SkBlurStyle) {
588              // now we allocate the "real" dst, mirror the size of src
589              size_t srcSize = src.computeImageSize();
590              if (0 == srcSize) {
591                  return false;   // too big to allocate, abort
592              }
593              dst->fImage = SkMask::AllocImage(srcSize);
594              merge_src_with_blur(dst->fImage, src.fRowBytes,
595                                  sp, src.fRowBytes,
596                                  dp + passCount * (rx + ry * dst->fRowBytes),
597                                  dst->fRowBytes, sw, sh);
598              SkMask::FreeImage(dp);
599          } else if (style != kNormal_SkBlurStyle) {
600              clamp_with_orig(dp + passCount * (rx + ry * dst->fRowBytes),
601                              dst->fRowBytes, sp, src.fRowBytes, sw, sh, style);
602          }
603          (void)autoCall.detach();
604      }
605  
606      if (style == kInner_SkBlurStyle) {
607          dst->fBounds = src.fBounds; // restore trimmed bounds
608          dst->fRowBytes = src.fRowBytes;
609      }
610  
611      return true;
612  }
613  
614  /* Convolving a box with itself three times results in a piecewise
615     quadratic function:
616  
617     0                              x <= -1.5
618     9/8 + 3/2 x + 1/2 x^2   -1.5 < x <= -.5
619     3/4 - x^2                -.5 < x <= .5
620     9/8 - 3/2 x + 1/2 x^2    0.5 < x <= 1.5
621     0                        1.5 < x
622  
623     Mathematica:
624  
625     g[x_] := Piecewise [ {
626       {9/8 + 3/2 x + 1/2 x^2 ,  -1.5 < x <= -.5},
627       {3/4 - x^2             ,   -.5 < x <= .5},
628       {9/8 - 3/2 x + 1/2 x^2 ,   0.5 < x <= 1.5}
629     }, 0]
630  
631     To get the profile curve of the blurred step function at the rectangle
632     edge, we evaluate the indefinite integral, which is piecewise cubic:
633  
634     0                                        x <= -1.5
635     9/16 + 9/8 x + 3/4 x^2 + 1/6 x^3   -1.5 < x <= -0.5
636     1/2 + 3/4 x - 1/3 x^3              -.5 < x <= .5
637     7/16 + 9/8 x - 3/4 x^2 + 1/6 x^3     .5 < x <= 1.5
638     1                                  1.5 < x
639  
640     in Mathematica code:
641  
642     gi[x_] := Piecewise[ {
643       { 0 , x <= -1.5 },
644       { 9/16 + 9/8 x + 3/4 x^2 + 1/6 x^3, -1.5 < x <= -0.5 },
645       { 1/2 + 3/4 x - 1/3 x^3          ,  -.5 < x <= .5},
646       { 7/16 + 9/8 x - 3/4 x^2 + 1/6 x^3,   .5 < x <= 1.5}
647     },1]
648  */
649  
gaussianIntegral(float x)650  static float gaussianIntegral(float x) {
651      if (x > 1.5f) {
652          return 0.0f;
653      }
654      if (x < -1.5f) {
655          return 1.0f;
656      }
657  
658      float x2 = x*x;
659      float x3 = x2*x;
660  
661      if ( x > 0.5f ) {
662          return 0.5625f - (x3 / 6.0f - 3.0f * x2 * 0.25f + 1.125f * x);
663      }
664      if ( x > -0.5f ) {
665          return 0.5f - (0.75f * x - x3 / 3.0f);
666      }
667      return 0.4375f + (-x3 / 6.0f - 3.0f * x2 * 0.25f - 1.125f * x);
668  }
669  
670  /*  ComputeBlurProfile allocates and fills in an array of floating
671      point values between 0 and 255 for the profile signature of
672      a blurred half-plane with the given blur radius.  Since we're
673      going to be doing screened multiplications (i.e., 1 - (1-x)(1-y))
674      all the time, we actually fill in the profile pre-inverted
675      (already done 255-x).
676  
677      It's the responsibility of the caller to delete the
678      memory returned in profile_out.
679  */
680  
ComputeBlurProfile(SkScalar sigma,uint8_t ** profile_out)681  void SkBlurMask::ComputeBlurProfile(SkScalar sigma, uint8_t **profile_out) {
682      int size = SkScalarCeilToInt(6*sigma);
683  
684      int center = size >> 1;
685      uint8_t *profile = SkNEW_ARRAY(uint8_t, size);
686  
687      float invr = 1.f/(2*sigma);
688  
689      profile[0] = 255;
690      for (int x = 1 ; x < size ; ++x) {
691          float scaled_x = (center - x - .5f) * invr;
692          float gi = gaussianIntegral(scaled_x);
693          profile[x] = 255 - (uint8_t) (255.f * gi);
694      }
695  
696      *profile_out = profile;
697  }
698  
699  // TODO MAYBE: Maintain a profile cache to avoid recomputing this for
700  // commonly used radii.  Consider baking some of the most common blur radii
701  // directly in as static data?
702  
703  // Implementation adapted from Michael Herf's approach:
704  // http://stereopsis.com/shadowrect/
705  
ProfileLookup(const uint8_t * profile,int loc,int blurred_width,int sharp_width)706  uint8_t SkBlurMask::ProfileLookup(const uint8_t *profile, int loc, int blurred_width, int sharp_width) {
707      int dx = SkAbs32(((loc << 1) + 1) - blurred_width) - sharp_width; // how far are we from the original edge?
708      int ox = dx >> 1;
709      if (ox < 0) {
710          ox = 0;
711      }
712  
713      return profile[ox];
714  }
715  
ComputeBlurredScanline(uint8_t * pixels,const uint8_t * profile,unsigned int width,SkScalar sigma)716  void SkBlurMask::ComputeBlurredScanline(uint8_t *pixels, const uint8_t *profile,
717                                          unsigned int width, SkScalar sigma) {
718  
719      unsigned int profile_size = SkScalarCeilToInt(6*sigma);
720      SkAutoTMalloc<uint8_t> horizontalScanline(width);
721  
722      unsigned int sw = width - profile_size;
723      // nearest odd number less than the profile size represents the center
724      // of the (2x scaled) profile
725      int center = ( profile_size & ~1 ) - 1;
726  
727      int w = sw - center;
728  
729      for (unsigned int x = 0 ; x < width ; ++x) {
730         if (profile_size <= sw) {
731             pixels[x] = ProfileLookup(profile, x, width, w);
732         } else {
733             float span = float(sw)/(2*sigma);
734             float giX = 1.5f - (x+.5f)/(2*sigma);
735             pixels[x] = (uint8_t) (255 * (gaussianIntegral(giX) - gaussianIntegral(giX + span)));
736         }
737      }
738  }
739  
BlurRect(SkScalar sigma,SkMask * dst,const SkRect & src,SkBlurStyle style,SkIPoint * margin,SkMask::CreateMode createMode)740  bool SkBlurMask::BlurRect(SkScalar sigma, SkMask *dst,
741                            const SkRect &src, SkBlurStyle style,
742                            SkIPoint *margin, SkMask::CreateMode createMode) {
743      int profile_size = SkScalarCeilToInt(6*sigma);
744  
745      int pad = profile_size/2;
746      if (margin) {
747          margin->set( pad, pad );
748      }
749  
750      dst->fBounds.set(SkScalarRoundToInt(src.fLeft - pad),
751                       SkScalarRoundToInt(src.fTop - pad),
752                       SkScalarRoundToInt(src.fRight + pad),
753                       SkScalarRoundToInt(src.fBottom + pad));
754  
755      dst->fRowBytes = dst->fBounds.width();
756      dst->fFormat = SkMask::kA8_Format;
757      dst->fImage = NULL;
758  
759      int             sw = SkScalarFloorToInt(src.width());
760      int             sh = SkScalarFloorToInt(src.height());
761  
762      if (createMode == SkMask::kJustComputeBounds_CreateMode) {
763          if (style == kInner_SkBlurStyle) {
764              dst->fBounds.set(SkScalarRoundToInt(src.fLeft),
765                               SkScalarRoundToInt(src.fTop),
766                               SkScalarRoundToInt(src.fRight),
767                               SkScalarRoundToInt(src.fBottom)); // restore trimmed bounds
768              dst->fRowBytes = sw;
769          }
770          return true;
771      }
772      uint8_t *profile = NULL;
773  
774      ComputeBlurProfile(sigma, &profile);
775      SkAutoTDeleteArray<uint8_t> ada(profile);
776  
777      size_t dstSize = dst->computeImageSize();
778      if (0 == dstSize) {
779          return false;   // too big to allocate, abort
780      }
781  
782      uint8_t*        dp = SkMask::AllocImage(dstSize);
783  
784      dst->fImage = dp;
785  
786      int dstHeight = dst->fBounds.height();
787      int dstWidth = dst->fBounds.width();
788  
789      uint8_t *outptr = dp;
790  
791      SkAutoTMalloc<uint8_t> horizontalScanline(dstWidth);
792      SkAutoTMalloc<uint8_t> verticalScanline(dstHeight);
793  
794      ComputeBlurredScanline(horizontalScanline, profile, dstWidth, sigma);
795      ComputeBlurredScanline(verticalScanline, profile, dstHeight, sigma);
796  
797      for (int y = 0 ; y < dstHeight ; ++y) {
798          for (int x = 0 ; x < dstWidth ; x++) {
799              unsigned int maskval = SkMulDiv255Round(horizontalScanline[x], verticalScanline[y]);
800              *(outptr++) = maskval;
801          }
802      }
803  
804      if (style == kInner_SkBlurStyle) {
805          // now we allocate the "real" dst, mirror the size of src
806          size_t srcSize = (size_t)(src.width() * src.height());
807          if (0 == srcSize) {
808              return false;   // too big to allocate, abort
809          }
810          dst->fImage = SkMask::AllocImage(srcSize);
811          for (int y = 0 ; y < sh ; y++) {
812              uint8_t *blur_scanline = dp + (y+pad)*dstWidth + pad;
813              uint8_t *inner_scanline = dst->fImage + y*sw;
814              memcpy(inner_scanline, blur_scanline, sw);
815          }
816          SkMask::FreeImage(dp);
817  
818          dst->fBounds.set(SkScalarRoundToInt(src.fLeft),
819                           SkScalarRoundToInt(src.fTop),
820                           SkScalarRoundToInt(src.fRight),
821                           SkScalarRoundToInt(src.fBottom)); // restore trimmed bounds
822          dst->fRowBytes = sw;
823  
824      } else if (style == kOuter_SkBlurStyle) {
825          for (int y = pad ; y < dstHeight-pad ; y++) {
826              uint8_t *dst_scanline = dp + y*dstWidth + pad;
827              memset(dst_scanline, 0, sw);
828          }
829      } else if (style == kSolid_SkBlurStyle) {
830          for (int y = pad ; y < dstHeight-pad ; y++) {
831              uint8_t *dst_scanline = dp + y*dstWidth + pad;
832              memset(dst_scanline, 0xff, sw);
833          }
834      }
835      // normal and solid styles are the same for analytic rect blurs, so don't
836      // need to handle solid specially.
837  
838      return true;
839  }
840  
BlurRRect(SkScalar sigma,SkMask * dst,const SkRRect & src,SkBlurStyle style,SkIPoint * margin,SkMask::CreateMode createMode)841  bool SkBlurMask::BlurRRect(SkScalar sigma, SkMask *dst,
842                             const SkRRect &src, SkBlurStyle style,
843                             SkIPoint *margin, SkMask::CreateMode createMode) {
844      // Temporary for now -- always fail, should cause caller to fall back
845      // to old path.  Plumbing just to land API and parallelize effort.
846  
847      return false;
848  }
849  
850  // The "simple" blur is a direct implementation of separable convolution with a discrete
851  // gaussian kernel.  It's "ground truth" in a sense; too slow to be used, but very
852  // useful for correctness comparisons.
853  
BlurGroundTruth(SkScalar sigma,SkMask * dst,const SkMask & src,SkBlurStyle style,SkIPoint * margin)854  bool SkBlurMask::BlurGroundTruth(SkScalar sigma, SkMask* dst, const SkMask& src,
855                                   SkBlurStyle style, SkIPoint* margin) {
856  
857      if (src.fFormat != SkMask::kA8_Format) {
858          return false;
859      }
860  
861      float variance = sigma * sigma;
862  
863      int windowSize = SkScalarCeilToInt(sigma*6);
864      // round window size up to nearest odd number
865      windowSize |= 1;
866  
867      SkAutoTMalloc<float> gaussWindow(windowSize);
868  
869      int halfWindow = windowSize >> 1;
870  
871      gaussWindow[halfWindow] = 1;
872  
873      float windowSum = 1;
874      for (int x = 1 ; x <= halfWindow ; ++x) {
875          float gaussian = expf(-x*x / (2*variance));
876          gaussWindow[halfWindow + x] = gaussWindow[halfWindow-x] = gaussian;
877          windowSum += 2*gaussian;
878      }
879  
880      // leave the filter un-normalized for now; we will divide by the normalization
881      // sum later;
882  
883      int pad = halfWindow;
884      if (margin) {
885          margin->set( pad, pad );
886      }
887  
888      dst->fBounds = src.fBounds;
889      dst->fBounds.outset(pad, pad);
890  
891      dst->fRowBytes = dst->fBounds.width();
892      dst->fFormat = SkMask::kA8_Format;
893      dst->fImage = NULL;
894  
895      if (src.fImage) {
896  
897          size_t dstSize = dst->computeImageSize();
898          if (0 == dstSize) {
899              return false;   // too big to allocate, abort
900          }
901  
902          int             srcWidth = src.fBounds.width();
903          int             srcHeight = src.fBounds.height();
904          int             dstWidth = dst->fBounds.width();
905  
906          const uint8_t*  srcPixels = src.fImage;
907          uint8_t*        dstPixels = SkMask::AllocImage(dstSize);
908          SkAutoTCallVProc<uint8_t, SkMask_FreeImage> autoCall(dstPixels);
909  
910          // do the actual blur.  First, make a padded copy of the source.
911          // use double pad so we never have to check if we're outside anything
912  
913          int padWidth = srcWidth + 4*pad;
914          int padHeight = srcHeight;
915          int padSize = padWidth * padHeight;
916  
917          SkAutoTMalloc<uint8_t> padPixels(padSize);
918          memset(padPixels, 0, padSize);
919  
920          for (int y = 0 ; y < srcHeight; ++y) {
921              uint8_t* padptr = padPixels + y * padWidth + 2*pad;
922              const uint8_t* srcptr = srcPixels + y * srcWidth;
923              memcpy(padptr, srcptr, srcWidth);
924          }
925  
926          // blur in X, transposing the result into a temporary floating point buffer.
927          // also double-pad the intermediate result so that the second blur doesn't
928          // have to do extra conditionals.
929  
930          int tmpWidth = padHeight + 4*pad;
931          int tmpHeight = padWidth - 2*pad;
932          int tmpSize = tmpWidth * tmpHeight;
933  
934          SkAutoTMalloc<float> tmpImage(tmpSize);
935          memset(tmpImage, 0, tmpSize*sizeof(tmpImage[0]));
936  
937          for (int y = 0 ; y < padHeight ; ++y) {
938              uint8_t *srcScanline = padPixels + y*padWidth;
939              for (int x = pad ; x < padWidth - pad ; ++x) {
940                  float *outPixel = tmpImage + (x-pad)*tmpWidth + y + 2*pad; // transposed output
941                  uint8_t *windowCenter = srcScanline + x;
942                  for (int i = -pad ; i <= pad ; ++i) {
943                      *outPixel += gaussWindow[pad+i]*windowCenter[i];
944                  }
945                  *outPixel /= windowSum;
946              }
947          }
948  
949          // blur in Y; now filling in the actual desired destination.  We have to do
950          // the transpose again; these transposes guarantee that we read memory in
951          // linear order.
952  
953          for (int y = 0 ; y < tmpHeight ; ++y) {
954              float *srcScanline = tmpImage + y*tmpWidth;
955              for (int x = pad ; x < tmpWidth - pad ; ++x) {
956                  float *windowCenter = srcScanline + x;
957                  float finalValue = 0;
958                  for (int i = -pad ; i <= pad ; ++i) {
959                      finalValue += gaussWindow[pad+i]*windowCenter[i];
960                  }
961                  finalValue /= windowSum;
962                  uint8_t *outPixel = dstPixels + (x-pad)*dstWidth + y; // transposed output
963                  int integerPixel = int(finalValue + 0.5f);
964                  *outPixel = SkClampMax( SkClampPos(integerPixel), 255 );
965              }
966          }
967  
968          dst->fImage = dstPixels;
969          // if need be, alloc the "real" dst (same size as src) and copy/merge
970          // the blur into it (applying the src)
971          if (style == kInner_SkBlurStyle) {
972              // now we allocate the "real" dst, mirror the size of src
973              size_t srcSize = src.computeImageSize();
974              if (0 == srcSize) {
975                  return false;   // too big to allocate, abort
976              }
977              dst->fImage = SkMask::AllocImage(srcSize);
978              merge_src_with_blur(dst->fImage, src.fRowBytes,
979                  srcPixels, src.fRowBytes,
980                  dstPixels + pad*dst->fRowBytes + pad,
981                  dst->fRowBytes, srcWidth, srcHeight);
982              SkMask::FreeImage(dstPixels);
983          } else if (style != kNormal_SkBlurStyle) {
984              clamp_with_orig(dstPixels + pad*dst->fRowBytes + pad,
985                  dst->fRowBytes, srcPixels, src.fRowBytes, srcWidth, srcHeight, style);
986          }
987          (void)autoCall.detach();
988      }
989  
990      if (style == kInner_SkBlurStyle) {
991          dst->fBounds = src.fBounds; // restore trimmed bounds
992          dst->fRowBytes = src.fRowBytes;
993      }
994  
995      return true;
996  }
997