1 /*
2  * Copyright 2016 Google Inc.
3  *
4  * Use of this source code is governed by a BSD-style license that can be
5  * found in the LICENSE file.
6  */
7 
8 #ifndef SkLinearBitmapPipeline_sampler_DEFINED
9 #define SkLinearBitmapPipeline_sampler_DEFINED
10 
11 #include <tuple>
12 
13 #include "SkAutoMalloc.h"
14 #include "SkColor.h"
15 #include "SkColorPriv.h"
16 #include "SkFixed.h"  // for SkFixed1 only. Don't use SkFixed in this file.
17 #include "SkHalf.h"
18 #include "SkLinearBitmapPipeline_core.h"
19 #include "SkNx.h"
20 #include "SkPM4fPriv.h"
21 
22 namespace {
23 // Explaination of the math:
24 //              1 - x      x
25 //           +--------+--------+
26 //           |        |        |
27 //  1 - y    |  px00  |  px10  |
28 //           |        |        |
29 //           +--------+--------+
30 //           |        |        |
31 //    y      |  px01  |  px11  |
32 //           |        |        |
33 //           +--------+--------+
34 //
35 //
36 // Given a pixelxy each is multiplied by a different factor derived from the fractional part of x
37 // and y:
38 // * px00 -> (1 - x)(1 - y) = 1 - x - y + xy
39 // * px10 -> x(1 - y) = x - xy
40 // * px01 -> (1 - x)y = y - xy
41 // * px11 -> xy
42 // So x * y is calculated first and then used to calculate all the other factors.
bilerp4(Sk4s xs,Sk4s ys,Sk4f px00,Sk4f px10,Sk4f px01,Sk4f px11)43 static Sk4s SK_VECTORCALL bilerp4(Sk4s xs, Sk4s ys, Sk4f px00, Sk4f px10,
44                                                     Sk4f px01, Sk4f px11) {
45     // Calculate fractional xs and ys.
46     Sk4s fxs = xs - xs.floor();
47     Sk4s fys = ys - ys.floor();
48     Sk4s fxys{fxs * fys};
49     Sk4f sum = px11 * fxys;
50     sum = sum + px01 * (fys - fxys);
51     sum = sum + px10 * (fxs - fxys);
52     sum = sum + px00 * (Sk4f{1.0f} - fxs - fys + fxys);
53     return sum;
54 }
55 
56 ////////////////////////////////////////////////////////////////////////////////////////////////////
57 // PixelGetter is the lowest level interface to the source data. There is a PixelConverter for each
58 // of the different SkColorTypes.
59 template <SkColorType, SkGammaType> class PixelConverter;
60 
61 // Alpha handling:
62 //   The alpha from the paint (tintColor) is used in the blend part of the pipeline to modulate
63 // the entire bitmap. So, the tint color is given an alpha of 1.0 so that the later alpha can
64 // modulate this color later.
65 template <>
66 class PixelConverter<kAlpha_8_SkColorType, kLinear_SkGammaType> {
67 public:
68     using Element = uint8_t;
PixelConverter(const SkPixmap & srcPixmap,SkColor tintColor)69     PixelConverter(const SkPixmap& srcPixmap, SkColor tintColor) {
70         fTintColor = SkColor4f::FromColor(tintColor);
71         fTintColor.fA = 1.0f;
72     }
73 
toSk4f(const Element pixel)74     Sk4f toSk4f(const Element pixel) const {
75         return Sk4f::Load(&fTintColor) * (pixel * (1.0f/255.0f));
76     }
77 
78 private:
79     SkColor4f fTintColor;
80 };
81 
82 template <SkGammaType gammaType>
pmcolor_to_rgba(SkPMColor pixel)83 static inline Sk4f pmcolor_to_rgba(SkPMColor pixel) {
84     return swizzle_rb_if_bgra(
85             (gammaType == kSRGB_SkGammaType) ? Sk4f_fromS32(pixel)
86                                              : Sk4f_fromL32(pixel));
87 }
88 
89 template <SkGammaType gammaType>
90 class PixelConverter<kRGB_565_SkColorType, gammaType> {
91 public:
92     using Element = uint16_t;
PixelConverter(const SkPixmap & srcPixmap)93     PixelConverter(const SkPixmap& srcPixmap) { }
94 
toSk4f(Element pixel)95     Sk4f toSk4f(Element pixel) const {
96         return pmcolor_to_rgba<gammaType>(SkPixel16ToPixel32(pixel));
97     }
98 };
99 
100 template <SkGammaType gammaType>
101 class PixelConverter<kARGB_4444_SkColorType, gammaType> {
102 public:
103     using Element = uint16_t;
PixelConverter(const SkPixmap & srcPixmap)104     PixelConverter(const SkPixmap& srcPixmap) { }
105 
toSk4f(Element pixel)106     Sk4f toSk4f(Element pixel) const {
107         return pmcolor_to_rgba<gammaType>(SkPixel4444ToPixel32(pixel));
108     }
109 };
110 
111 template <SkGammaType gammaType>
112 class PixelConverter<kRGBA_8888_SkColorType, gammaType> {
113 public:
114     using Element = uint32_t;
PixelConverter(const SkPixmap & srcPixmap)115     PixelConverter(const SkPixmap& srcPixmap) { }
116 
toSk4f(Element pixel)117     Sk4f toSk4f(Element pixel) const {
118         return gammaType == kSRGB_SkGammaType
119                ? Sk4f_fromS32(pixel)
120                : Sk4f_fromL32(pixel);
121     }
122 };
123 
124 template <SkGammaType gammaType>
125 class PixelConverter<kBGRA_8888_SkColorType, gammaType> {
126 public:
127     using Element = uint32_t;
PixelConverter(const SkPixmap & srcPixmap)128     PixelConverter(const SkPixmap& srcPixmap) { }
129 
toSk4f(Element pixel)130     Sk4f toSk4f(Element pixel) const {
131         return swizzle_rb(
132                    gammaType == kSRGB_SkGammaType ? Sk4f_fromS32(pixel) : Sk4f_fromL32(pixel));
133     }
134 };
135 
136 template <SkGammaType gammaType>
137 class PixelConverter<kIndex_8_SkColorType, gammaType> {
138 public:
139     using Element = uint8_t;
PixelConverter(const SkPixmap & srcPixmap)140     PixelConverter(const SkPixmap& srcPixmap)
141     : fColorTableSize(srcPixmap.ctable()->count()){
142         SkColorTable* skColorTable = srcPixmap.ctable();
143         SkASSERT(skColorTable != nullptr);
144 
145         fColorTable = (Sk4f*)SkAlign16((intptr_t)fColorTableStorage.get());
146         for (int i = 0; i < fColorTableSize; i++) {
147             fColorTable[i] = pmcolor_to_rgba<gammaType>((*skColorTable)[i]);
148         }
149     }
150 
PixelConverter(const PixelConverter & strategy)151     PixelConverter(const PixelConverter& strategy)
152     : fColorTableSize{strategy.fColorTableSize}{
153         fColorTable = (Sk4f*)SkAlign16((intptr_t)fColorTableStorage.get());
154         for (int i = 0; i < fColorTableSize; i++) {
155             fColorTable[i] = strategy.fColorTable[i];
156         }
157     }
158 
toSk4f(Element index)159     Sk4f toSk4f(Element index) const {
160         return fColorTable[index];
161     }
162 
163 private:
164     static const size_t kColorTableSize = sizeof(Sk4f[256]) + 12;
165     const int           fColorTableSize;
166     SkAutoMalloc        fColorTableStorage{kColorTableSize};
167     Sk4f*               fColorTable;
168 };
169 
170 template <SkGammaType gammaType>
171 class PixelConverter<kGray_8_SkColorType, gammaType> {
172 public:
173     using Element = uint8_t;
PixelConverter(const SkPixmap & srcPixmap)174     PixelConverter(const SkPixmap& srcPixmap) { }
175 
toSk4f(Element pixel)176     Sk4f toSk4f(Element pixel) const {
177         float gray = (gammaType == kSRGB_SkGammaType)
178             ? sk_linear_from_srgb[pixel]
179             : pixel * (1/255.0f);
180         return {gray, gray, gray, 1.0f};
181     }
182 };
183 
184 template <>
185 class PixelConverter<kRGBA_F16_SkColorType, kLinear_SkGammaType> {
186 public:
187     using Element = uint64_t;
PixelConverter(const SkPixmap & srcPixmap)188     PixelConverter(const SkPixmap& srcPixmap) { }
189 
toSk4f(const Element pixel)190     Sk4f toSk4f(const Element pixel) const {
191         return SkHalfToFloat_finite_ftz(pixel);
192     }
193 };
194 
195 class PixelAccessorShim {
196 public:
PixelAccessorShim(SkLinearBitmapPipeline::PixelAccessorInterface * accessor)197     explicit PixelAccessorShim(SkLinearBitmapPipeline::PixelAccessorInterface* accessor)
198         : fPixelAccessor(accessor) { }
199 
getFewPixels(int n,Sk4i xs,Sk4i ys,Sk4f * px0,Sk4f * px1,Sk4f * px2)200     void SK_VECTORCALL getFewPixels(
201         int n, Sk4i xs, Sk4i ys, Sk4f* px0, Sk4f* px1, Sk4f* px2) const {
202         fPixelAccessor->getFewPixels(n, xs, ys, px0, px1, px2);
203     }
204 
get4Pixels(Sk4i xs,Sk4i ys,Sk4f * px0,Sk4f * px1,Sk4f * px2,Sk4f * px3)205     void SK_VECTORCALL get4Pixels(
206         Sk4i xs, Sk4i ys, Sk4f* px0, Sk4f* px1, Sk4f* px2, Sk4f* px3) const {
207         fPixelAccessor->get4Pixels(xs, ys, px0, px1, px2, px3);
208     }
209 
get4Pixels(const void * src,int index,Sk4f * px0,Sk4f * px1,Sk4f * px2,Sk4f * px3)210     void get4Pixels(
211         const void* src, int index, Sk4f* px0, Sk4f* px1, Sk4f* px2, Sk4f* px3) const {
212         fPixelAccessor->get4Pixels(src, index, px0, px1, px2, px3);
213     }
214 
getPixelFromRow(const void * row,int index)215     Sk4f getPixelFromRow(const void* row, int index) const {
216         return fPixelAccessor->getPixelFromRow(row, index);
217     }
218 
getPixelAt(int index)219     Sk4f getPixelAt(int index) const {
220         return fPixelAccessor->getPixelAt(index);
221     }
222 
row(int y)223     const void* row(int y) const {
224         return fPixelAccessor->row(y);
225     }
226 
227 private:
228     SkLinearBitmapPipeline::PixelAccessorInterface* const fPixelAccessor;
229 };
230 
231 ////////////////////////////////////////////////////////////////////////////////////////////////////
232 // PixelAccessor handles all the same plumbing for all the PixelGetters.
233 template <SkColorType colorType, SkGammaType gammaType>
234 class PixelAccessor final : public SkLinearBitmapPipeline::PixelAccessorInterface {
235     using Element = typename PixelConverter<colorType, gammaType>::Element;
236 public:
237     template <typename... Args>
PixelAccessor(const SkPixmap & srcPixmap,Args &&...args)238     PixelAccessor(const SkPixmap& srcPixmap, Args&&... args)
239         : fSrc{static_cast<const Element*>(srcPixmap.addr())}
240         , fWidth{srcPixmap.rowBytesAsPixels()}
241         , fConverter{srcPixmap, std::move<Args>(args)...} { }
242 
getFewPixels(int n,Sk4i xs,Sk4i ys,Sk4f * px0,Sk4f * px1,Sk4f * px2)243     void SK_VECTORCALL getFewPixels (
244         int n, Sk4i xs, Sk4i ys, Sk4f* px0, Sk4f* px1, Sk4f* px2) const override {
245         Sk4i bufferLoc = ys * fWidth + xs;
246         switch (n) {
247             case 3:
248                 *px2 = this->getPixelAt(bufferLoc[2]);
249             case 2:
250                 *px1 = this->getPixelAt(bufferLoc[1]);
251             case 1:
252                 *px0 = this->getPixelAt(bufferLoc[0]);
253             default:
254                 break;
255         }
256     }
257 
get4Pixels(Sk4i xs,Sk4i ys,Sk4f * px0,Sk4f * px1,Sk4f * px2,Sk4f * px3)258     void SK_VECTORCALL get4Pixels(
259         Sk4i xs, Sk4i ys, Sk4f* px0, Sk4f* px1, Sk4f* px2, Sk4f* px3) const override {
260         Sk4i bufferLoc = ys * fWidth + xs;
261         *px0 = this->getPixelAt(bufferLoc[0]);
262         *px1 = this->getPixelAt(bufferLoc[1]);
263         *px2 = this->getPixelAt(bufferLoc[2]);
264         *px3 = this->getPixelAt(bufferLoc[3]);
265     }
266 
get4Pixels(const void * src,int index,Sk4f * px0,Sk4f * px1,Sk4f * px2,Sk4f * px3)267     void get4Pixels(
268         const void* src, int index, Sk4f* px0, Sk4f* px1, Sk4f* px2, Sk4f* px3) const override {
269         *px0 = this->getPixelFromRow(src, index + 0);
270         *px1 = this->getPixelFromRow(src, index + 1);
271         *px2 = this->getPixelFromRow(src, index + 2);
272         *px3 = this->getPixelFromRow(src, index + 3);
273     }
274 
getPixelFromRow(const void * row,int index)275     Sk4f getPixelFromRow(const void* row, int index) const override {
276         const Element* src = static_cast<const Element*>(row);
277         return fConverter.toSk4f(src[index]);
278     }
279 
getPixelAt(int index)280     Sk4f getPixelAt(int index) const override {
281         return this->getPixelFromRow(fSrc, index);
282     }
283 
row(int y)284     const void* row(int y) const override { return fSrc + y * fWidth; }
285 
286 private:
287     const Element* const                 fSrc;
288     const int                            fWidth;
289     PixelConverter<colorType, gammaType> fConverter;
290 };
291 
292 // We're moving through source space at a rate of 1 source pixel per 1 dst pixel.
293 // We'll never re-use pixels, but we can at least load contiguous pixels.
294 template <typename Next, typename Strategy>
src_strategy_blend(Span span,Next * next,Strategy * strategy)295 static void src_strategy_blend(Span span, Next* next, Strategy* strategy) {
296     SkPoint start;
297     SkScalar length;
298     int count;
299     std::tie(start, length, count) = span;
300     int ix = SkScalarFloorToInt(X(start));
301     const void* row = strategy->row((int)std::floor(Y(start)));
302     if (length > 0) {
303         while (count >= 4) {
304             Sk4f px0, px1, px2, px3;
305             strategy->get4Pixels(row, ix, &px0, &px1, &px2, &px3);
306             next->blend4Pixels(px0, px1, px2, px3);
307             ix += 4;
308             count -= 4;
309         }
310 
311         while (count > 0) {
312             next->blendPixel(strategy->getPixelFromRow(row, ix));
313             ix += 1;
314             count -= 1;
315         }
316     } else {
317         while (count >= 4) {
318             Sk4f px0, px1, px2, px3;
319             strategy->get4Pixels(row, ix - 3, &px3, &px2, &px1, &px0);
320             next->blend4Pixels(px0, px1, px2, px3);
321             ix -= 4;
322             count -= 4;
323         }
324 
325         while (count > 0) {
326             next->blendPixel(strategy->getPixelFromRow(row, ix));
327             ix -= 1;
328             count -= 1;
329         }
330     }
331 }
332 
333 // -- NearestNeighborSampler -----------------------------------------------------------------------
334 // NearestNeighborSampler - use nearest neighbor filtering to create runs of destination pixels.
335 template<typename Accessor, typename Next>
336 class NearestNeighborSampler : public SkLinearBitmapPipeline::SampleProcessorInterface {
337 public:
338     template<typename... Args>
NearestNeighborSampler(SkLinearBitmapPipeline::BlendProcessorInterface * next,Args &&...args)339     NearestNeighborSampler(SkLinearBitmapPipeline::BlendProcessorInterface* next, Args&& ... args)
340     : fNext{next}, fAccessor{std::forward<Args>(args)...} { }
341 
NearestNeighborSampler(SkLinearBitmapPipeline::BlendProcessorInterface * next,const NearestNeighborSampler & sampler)342     NearestNeighborSampler(SkLinearBitmapPipeline::BlendProcessorInterface* next,
343     const NearestNeighborSampler& sampler)
344     : fNext{next}, fAccessor{sampler.fAccessor} { }
345 
pointListFew(int n,Sk4s xs,Sk4s ys)346     void SK_VECTORCALL pointListFew(int n, Sk4s xs, Sk4s ys) override {
347         SkASSERT(0 < n && n < 4);
348         Sk4f px0, px1, px2;
349         fAccessor.getFewPixels(n, SkNx_cast<int>(xs), SkNx_cast<int>(ys), &px0, &px1, &px2);
350         if (n >= 1) fNext->blendPixel(px0);
351         if (n >= 2) fNext->blendPixel(px1);
352         if (n >= 3) fNext->blendPixel(px2);
353     }
354 
pointList4(Sk4s xs,Sk4s ys)355     void SK_VECTORCALL pointList4(Sk4s xs, Sk4s ys) override {
356         Sk4f px0, px1, px2, px3;
357         fAccessor.get4Pixels(SkNx_cast<int>(xs), SkNx_cast<int>(ys), &px0, &px1, &px2, &px3);
358         fNext->blend4Pixels(px0, px1, px2, px3);
359     }
360 
pointSpan(Span span)361     void pointSpan(Span span) override {
362         SkASSERT(!span.isEmpty());
363         SkPoint start;
364         SkScalar length;
365         int count;
366         std::tie(start, length, count) = span;
367         SkScalar absLength = SkScalarAbs(length);
368         if (absLength < (count - 1)) {
369             this->spanSlowRate(span);
370         } else if (absLength == (count - 1)) {
371             src_strategy_blend(span, fNext, &fAccessor);
372         } else {
373             this->spanFastRate(span);
374         }
375     }
376 
repeatSpan(Span span,int32_t repeatCount)377     void repeatSpan(Span span, int32_t repeatCount) override {
378         while (repeatCount > 0) {
379             this->pointSpan(span);
380             repeatCount--;
381         }
382     }
383 
384 private:
385     // When moving through source space more slowly than dst space (zoomed in),
386     // we'll be sampling from the same source pixel more than once.
spanSlowRate(Span span)387     void spanSlowRate(Span span) {
388         SkPoint start; SkScalar length; int count;
389         std::tie(start, length, count) = span;
390         SkScalar x = X(start);
391         // fx is a fixed 48.16 number.
392         int64_t fx = static_cast<int64_t>(x * SK_Fixed1);
393         SkScalar dx = length / (count - 1);
394         // fdx is a fixed 48.16 number.
395         int64_t fdx = static_cast<int64_t>(dx * SK_Fixed1);
396 
397         const void* row = fAccessor.row((int)std::floor(Y(start)));
398         Next* next = fNext;
399 
400         int64_t ix = fx >> 16;
401         int64_t prevIX = ix;
402         Sk4f fpixel = fAccessor.getPixelFromRow(row, ix);
403 
404         // When dx is less than one, each pixel is used more than once. Using the fixed point fx
405         // allows the code to quickly check that the same pixel is being used. The code uses this
406         // same pixel check to do the sRGB and normalization only once.
407         auto getNextPixel = [&]() {
408             if (ix != prevIX) {
409                 fpixel = fAccessor.getPixelFromRow(row, ix);
410                 prevIX = ix;
411             }
412             fx += fdx;
413             ix = fx >> 16;
414             return fpixel;
415         };
416 
417         while (count >= 4) {
418             Sk4f px0 = getNextPixel();
419             Sk4f px1 = getNextPixel();
420             Sk4f px2 = getNextPixel();
421             Sk4f px3 = getNextPixel();
422             next->blend4Pixels(px0, px1, px2, px3);
423             count -= 4;
424         }
425         while (count > 0) {
426             next->blendPixel(getNextPixel());
427             count -= 1;
428         }
429     }
430 
431     // We're moving through source space at a rate of 1 source pixel per 1 dst pixel.
432     // We'll never re-use pixels, but we can at least load contiguous pixels.
spanUnitRate(Span span)433     void spanUnitRate(Span span) {
434         src_strategy_blend(span, fNext, &fAccessor);
435     }
436 
437     // We're moving through source space faster than dst (zoomed out),
438     // so we'll never reuse a source pixel or be able to do contiguous loads.
spanFastRate(Span span)439     void spanFastRate(Span span) {
440         span_fallback(span, this);
441     }
442 
443     Next* const fNext;
444     Accessor    fAccessor;
445 };
446 
447 // From an edgeType, the integer value of a pixel vs, and the integer value of the extreme edge
448 // vMax, take the point which might be off the tile by one pixel and either wrap it or pin it to
449 // generate the right pixel. The value vs is on the interval [-1, vMax + 1]. It produces a value
450 // on the interval [0, vMax].
451 // Note: vMax is not width or height, but width-1 or height-1 because it is the largest valid pixel.
adjust_edge(SkShader::TileMode edgeType,int vs,int vMax)452 static inline int adjust_edge(SkShader::TileMode edgeType, int vs, int vMax) {
453     SkASSERT(-1 <= vs && vs <= vMax + 1);
454     switch (edgeType) {
455         case SkShader::kClamp_TileMode:
456         case SkShader::kMirror_TileMode:
457             vs = std::max(vs, 0);
458             vs = std::min(vs, vMax);
459             break;
460         case SkShader::kRepeat_TileMode:
461             vs = (vs <= vMax) ? vs : 0;
462             vs =    (vs >= 0) ? vs : vMax;
463             break;
464     }
465     SkASSERT(0 <= vs && vs <= vMax);
466     return vs;
467 }
468 
469 // From a sample point on the tile, return the top or left filter value.
470 // The result r should be in the range (0, 1]. Since this represents the weight given to the top
471 // left element, then if x == 0.5 the filter value should be 1.0.
472 // The input sample point must be on the tile, therefore it must be >= 0.
sample_to_filter(SkScalar x)473 static SkScalar sample_to_filter(SkScalar x) {
474     SkASSERT(x >= 0.0f);
475     // The usual form of the top or left edge is x - .5, but since we are working on the unit
476     // square, then x + .5 works just as well. This also guarantees that v > 0.0 allowing the use
477     // of trunc.
478     SkScalar v = x + 0.5f;
479     // Produce the top or left offset a value on the range [0, 1).
480     SkScalar f = v - SkScalarTruncToScalar(v);
481     // Produce the filter value which is on the range (0, 1].
482     SkScalar r =  1.0f - f;
483     SkASSERT(0.0f < r && r <= 1.0f);
484     return r;
485 }
486 
487 // -- BilerpSampler --------------------------------------------------------------------------------
488 // BilerpSampler - use a bilerp filter to create runs of destination pixels.
489 // Note: in the code below, there are two types of points
490 //       * sample points - these are the points passed in by pointList* and Spans.
491 //       * filter points - are created from a sample point to form the coordinates of the points
492 //                         to use in the filter and to generate the filter values.
493 template<typename Accessor, typename Next>
494 class BilerpSampler : public SkLinearBitmapPipeline::SampleProcessorInterface {
495 public:
496     template<typename... Args>
BilerpSampler(SkLinearBitmapPipeline::BlendProcessorInterface * next,SkISize dimensions,SkShader::TileMode xTile,SkShader::TileMode yTile,Args &&...args)497     BilerpSampler(
498         SkLinearBitmapPipeline::BlendProcessorInterface* next,
499         SkISize dimensions,
500         SkShader::TileMode xTile, SkShader::TileMode yTile,
501         Args&& ... args
502     )
503         : fNext{next}
504         , fXEdgeType{xTile}
505         , fXMax{dimensions.width() - 1}
506         , fYEdgeType{yTile}
507         , fYMax{dimensions.height() - 1}
508         , fAccessor{std::forward<Args>(args)...} { }
509 
BilerpSampler(SkLinearBitmapPipeline::BlendProcessorInterface * next,const BilerpSampler & sampler)510     BilerpSampler(SkLinearBitmapPipeline::BlendProcessorInterface* next,
511                    const BilerpSampler& sampler)
512         : fNext{next}
513         , fXEdgeType{sampler.fXEdgeType}
514         , fXMax{sampler.fXMax}
515         , fYEdgeType{sampler.fYEdgeType}
516         , fYMax{sampler.fYMax}
517         , fAccessor{sampler.fAccessor} { }
518 
pointListFew(int n,Sk4s xs,Sk4s ys)519     void SK_VECTORCALL pointListFew(int n, Sk4s xs, Sk4s ys) override {
520         SkASSERT(0 < n && n < 4);
521         auto bilerpPixel = [&](int index) {
522             return this->bilerpSamplePoint(SkPoint{xs[index], ys[index]});
523         };
524 
525         if (n >= 1) fNext->blendPixel(bilerpPixel(0));
526         if (n >= 2) fNext->blendPixel(bilerpPixel(1));
527         if (n >= 3) fNext->blendPixel(bilerpPixel(2));
528     }
529 
pointList4(Sk4s xs,Sk4s ys)530     void SK_VECTORCALL pointList4(Sk4s xs, Sk4s ys) override {
531         auto bilerpPixel = [&](int index) {
532             return this->bilerpSamplePoint(SkPoint{xs[index], ys[index]});
533         };
534         fNext->blend4Pixels(bilerpPixel(0), bilerpPixel(1), bilerpPixel(2), bilerpPixel(3));
535     }
536 
pointSpan(Span span)537     void pointSpan(Span span) override {
538         SkASSERT(!span.isEmpty());
539         SkPoint start;
540         SkScalar length;
541         int count;
542         std::tie(start, length, count) = span;
543 
544         // Nothing to do.
545         if (count == 0) {
546             return;
547         }
548 
549         // Trivial case. No sample points are generated other than start.
550         if (count == 1) {
551             fNext->blendPixel(this->bilerpSamplePoint(start));
552             return;
553         }
554 
555         // Note: the following code could be done in terms of dx = length / (count -1), but that
556         // would introduce a divide that is not needed for the most common dx == 1 cases.
557         SkScalar absLength = SkScalarAbs(length);
558         if (absLength == 0.0f) {
559             // |dx| == 0
560             // length is zero, so clamp an edge pixel.
561             this->spanZeroRate(span);
562         } else if (absLength < (count - 1)) {
563             // 0 < |dx| < 1.
564             this->spanSlowRate(span);
565         } else if (absLength == (count - 1)) {
566             // |dx| == 1.
567             if (sample_to_filter(span.startX()) == 1.0f
568                 && sample_to_filter(span.startY()) == 1.0f) {
569                 // All the pixels are aligned with the dest; go fast.
570                 src_strategy_blend(span, fNext, &fAccessor);
571             } else {
572                 // There is some sub-pixel offsets, so bilerp.
573                 this->spanUnitRate(span);
574             }
575         } else if (absLength < 2.0f * (count - 1)) {
576             // 1 < |dx| < 2.
577             this->spanMediumRate(span);
578         } else {
579             // |dx| >= 2.
580             this->spanFastRate(span);
581         }
582     }
583 
repeatSpan(Span span,int32_t repeatCount)584     void repeatSpan(Span span, int32_t repeatCount) override {
585         while (repeatCount > 0) {
586             this->pointSpan(span);
587             repeatCount--;
588         }
589     }
590 
591 private:
592 
593     // Convert a sample point to the points used by the filter.
filterPoints(SkPoint sample,Sk4i * filterXs,Sk4i * filterYs)594     void filterPoints(SkPoint sample, Sk4i* filterXs, Sk4i* filterYs) {
595         // May be less than zero. Be careful to use Floor.
596         int x0 = adjust_edge(fXEdgeType, SkScalarFloorToInt(X(sample) - 0.5), fXMax);
597         // Always greater than zero. Use the faster Trunc.
598         int x1 = adjust_edge(fXEdgeType, SkScalarTruncToInt(X(sample) + 0.5), fXMax);
599         int y0 = adjust_edge(fYEdgeType, SkScalarFloorToInt(Y(sample) - 0.5), fYMax);
600         int y1 = adjust_edge(fYEdgeType, SkScalarTruncToInt(Y(sample) + 0.5), fYMax);
601 
602         *filterXs = Sk4i{x0, x1, x0, x1};
603         *filterYs = Sk4i{y0, y0, y1, y1};
604     }
605 
606     // Given a sample point, generate a color by bilerping the four filter points.
bilerpSamplePoint(SkPoint sample)607     Sk4f bilerpSamplePoint(SkPoint sample) {
608         Sk4i iXs, iYs;
609         filterPoints(sample, &iXs, &iYs);
610         Sk4f px00, px10, px01, px11;
611         fAccessor.get4Pixels(iXs, iYs, &px00, &px10, &px01, &px11);
612         return bilerp4(Sk4f{X(sample) - 0.5f}, Sk4f{Y(sample) - 0.5f}, px00, px10, px01, px11);
613     }
614 
615     // Get two pixels at x from row0 and row1.
get2PixelColumn(const void * row0,const void * row1,int x,Sk4f * px0,Sk4f * px1)616     void get2PixelColumn(const void* row0, const void* row1, int x, Sk4f* px0, Sk4f* px1) {
617         *px0 = fAccessor.getPixelFromRow(row0, x);
618         *px1 = fAccessor.getPixelFromRow(row1, x);
619     }
620 
621     // |dx| == 0. This code assumes that length is zero.
spanZeroRate(Span span)622     void spanZeroRate(Span span) {
623         SkPoint start; SkScalar length; int count;
624         std::tie(start, length, count) = span;
625         SkASSERT(length == 0.0f);
626 
627         // Filter for the blending of the top and bottom pixels.
628         SkScalar filterY = sample_to_filter(Y(start));
629 
630         // Generate the four filter points from the sample point start. Generate the row* values.
631         Sk4i iXs, iYs;
632         this->filterPoints(start, &iXs, &iYs);
633         const void* const row0 = fAccessor.row(iYs[0]);
634         const void* const row1 = fAccessor.row(iYs[2]);
635 
636         // Get the two pixels that make up the clamping pixel.
637         Sk4f pxTop, pxBottom;
638         this->get2PixelColumn(row0, row1, SkScalarFloorToInt(X(start)), &pxTop, &pxBottom);
639         Sk4f pixel = pxTop * filterY + (1.0f - filterY) * pxBottom;
640 
641         while (count >= 4) {
642             fNext->blend4Pixels(pixel, pixel, pixel, pixel);
643             count -= 4;
644         }
645         while (count > 0) {
646             fNext->blendPixel(pixel);
647             count -= 1;
648         }
649     }
650 
651     // 0 < |dx| < 1. This code reuses the calculations from previous pixels to reduce
652     // computation. In particular, several destination pixels maybe generated from the same four
653     // source pixels.
654     // In the following code a "part" is a combination of two pixels from the same column of the
655     // filter.
spanSlowRate(Span span)656     void spanSlowRate(Span span) {
657         SkPoint start; SkScalar length; int count;
658         std::tie(start, length, count) = span;
659 
660         // Calculate the distance between each sample point.
661         const SkScalar dx = length / (count - 1);
662         SkASSERT(-1.0f < dx && dx < 1.0f && dx != 0.0f);
663 
664         // Generate the filter values for the top-left corner.
665         // Note: these values are in filter space; this has implications about how to adjust
666         // these values at each step. For example, as the sample point increases, the filter
667         // value decreases, this is because the filter and position are related by
668         // (1 - (X(sample) - .5)) % 1. The (1 - stuff) causes the filter to move in the opposite
669         // direction of the sample point which is increasing by dx.
670         SkScalar filterX = sample_to_filter(X(start));
671         SkScalar filterY = sample_to_filter(Y(start));
672 
673         // Generate the four filter points from the sample point start. Generate the row* values.
674         Sk4i iXs, iYs;
675         this->filterPoints(start, &iXs, &iYs);
676         const void* const row0 = fAccessor.row(iYs[0]);
677         const void* const row1 = fAccessor.row(iYs[2]);
678 
679         // Generate part of the filter value at xColumn.
680         auto partAtColumn = [&](int xColumn) {
681             int adjustedColumn = adjust_edge(fXEdgeType, xColumn, fXMax);
682             Sk4f pxTop, pxBottom;
683             this->get2PixelColumn(row0, row1, adjustedColumn, &pxTop, &pxBottom);
684             return pxTop * filterY + (1.0f - filterY) * pxBottom;
685         };
686 
687         // The leftPart is made up of two pixels from the left column of the filter, right part
688         // is similar. The top and bottom pixels in the *Part are created as a linear blend of
689         // the top and bottom pixels using filterY. See the partAtColumn function above.
690         Sk4f leftPart  = partAtColumn(iXs[0]);
691         Sk4f rightPart = partAtColumn(iXs[1]);
692 
693         // Create a destination color by blending together a left and right part using filterX.
694         auto bilerp = [&](const Sk4f& leftPart, const Sk4f& rightPart) {
695             Sk4f pixel = leftPart * filterX + rightPart * (1.0f - filterX);
696             return check_pixel(pixel);
697         };
698 
699         // Send the first pixel to the destination. This simplifies the loop structure so that no
700         // extra pixels are fetched for the last iteration of the loop.
701         fNext->blendPixel(bilerp(leftPart, rightPart));
702         count -= 1;
703 
704         if (dx > 0.0f) {
705             // * positive direction - generate destination pixels by sliding the filter from left
706             //                        to right.
707             int rightPartCursor = iXs[1];
708 
709             // Advance the filter from left to right. Remember that moving the top-left corner of
710             // the filter to the right actually makes the filter value smaller.
711             auto advanceFilter = [&]() {
712                 filterX -= dx;
713                 if (filterX <= 0.0f) {
714                     filterX += 1.0f;
715                     leftPart = rightPart;
716                     rightPartCursor += 1;
717                     rightPart = partAtColumn(rightPartCursor);
718                 }
719                 SkASSERT(0.0f < filterX && filterX <= 1.0f);
720 
721                 return bilerp(leftPart, rightPart);
722             };
723 
724             while (count >= 4) {
725                 Sk4f px0 = advanceFilter(),
726                      px1 = advanceFilter(),
727                      px2 = advanceFilter(),
728                      px3 = advanceFilter();
729                 fNext->blend4Pixels(px0, px1, px2, px3);
730                 count -= 4;
731             }
732 
733             while (count > 0) {
734                 fNext->blendPixel(advanceFilter());
735                 count -= 1;
736             }
737         } else {
738             // * negative direction - generate destination pixels by sliding the filter from
739             //                        right to left.
740             int leftPartCursor = iXs[0];
741 
742             // Advance the filter from right to left. Remember that moving the top-left corner of
743             // the filter to the left actually makes the filter value larger.
744             auto advanceFilter = [&]() {
745                 // Remember, dx < 0 therefore this adds |dx| to filterX.
746                 filterX -= dx;
747                 // At this point filterX may be > 1, and needs to be wrapped back on to the filter
748                 // interval, and the next column in the filter is calculated.
749                 if (filterX > 1.0f) {
750                     filterX -= 1.0f;
751                     rightPart = leftPart;
752                     leftPartCursor -= 1;
753                     leftPart = partAtColumn(leftPartCursor);
754                 }
755                 SkASSERT(0.0f < filterX && filterX <= 1.0f);
756 
757                 return bilerp(leftPart, rightPart);
758             };
759 
760             while (count >= 4) {
761                 Sk4f px0 = advanceFilter(),
762                      px1 = advanceFilter(),
763                      px2 = advanceFilter(),
764                      px3 = advanceFilter();
765                 fNext->blend4Pixels(px0, px1, px2, px3);
766                 count -= 4;
767             }
768 
769             while (count > 0) {
770                 fNext->blendPixel(advanceFilter());
771                 count -= 1;
772             }
773         }
774     }
775 
776     // |dx| == 1. Moving through source space at a rate of 1 source pixel per 1 dst pixel.
777     // Every filter part is used for two destination pixels, and the code can bulk load four
778     // pixels at a time.
spanUnitRate(Span span)779     void spanUnitRate(Span span) {
780         SkPoint start; SkScalar length; int count;
781         std::tie(start, length, count) = span;
782         SkASSERT(SkScalarAbs(length) == (count - 1));
783 
784         // Calculate the four filter points of start, and use the two different Y values to
785         // generate the row pointers.
786         Sk4i iXs, iYs;
787         filterPoints(start, &iXs, &iYs);
788         const void* row0 = fAccessor.row(iYs[0]);
789         const void* row1 = fAccessor.row(iYs[2]);
790 
791         // Calculate the filter values for the top-left filter element.
792         const SkScalar filterX = sample_to_filter(X(start));
793         const SkScalar filterY = sample_to_filter(Y(start));
794 
795         // Generate part of the filter value at xColumn.
796         auto partAtColumn = [&](int xColumn) {
797             int adjustedColumn = adjust_edge(fXEdgeType, xColumn, fXMax);
798             Sk4f pxTop, pxBottom;
799             this->get2PixelColumn(row0, row1, adjustedColumn, &pxTop, &pxBottom);
800             return pxTop * filterY + (1.0f - filterY) * pxBottom;
801         };
802 
803         auto get4Parts = [&](int ix, Sk4f* part0, Sk4f* part1, Sk4f* part2, Sk4f* part3) {
804             // Check if the pixels needed are near the edges. If not go fast using bulk pixels,
805             // otherwise be careful.
806             if (0 <= ix && ix <= fXMax - 3) {
807                 Sk4f px00, px10, px20, px30,
808                      px01, px11, px21, px31;
809                 fAccessor.get4Pixels(row0, ix, &px00, &px10, &px20, &px30);
810                 fAccessor.get4Pixels(row1, ix, &px01, &px11, &px21, &px31);
811                 *part0 = filterY * px00 + (1.0f - filterY) * px01;
812                 *part1 = filterY * px10 + (1.0f - filterY) * px11;
813                 *part2 = filterY * px20 + (1.0f - filterY) * px21;
814                 *part3 = filterY * px30 + (1.0f - filterY) * px31;
815             } else {
816                 *part0 = partAtColumn(ix + 0);
817                 *part1 = partAtColumn(ix + 1);
818                 *part2 = partAtColumn(ix + 2);
819                 *part3 = partAtColumn(ix + 3);
820             }
821         };
822 
823         auto bilerp = [&](const Sk4f& part0, const Sk4f& part1) {
824             return part0 * filterX + part1 * (1.0f - filterX);
825         };
826 
827         if (length > 0) {
828             // * positive direction - generate destination pixels by sliding the filter from left
829             //                        to right.
830 
831             // overlapPart is the filter part from the end of the previous four pixels used at
832             // the start of the next four pixels.
833             Sk4f overlapPart = partAtColumn(iXs[0]);
834             int rightColumnCursor = iXs[1];
835             while (count >= 4) {
836                 Sk4f part0, part1, part2, part3;
837                 get4Parts(rightColumnCursor, &part0, &part1, &part2, &part3);
838                 Sk4f px0 = bilerp(overlapPart, part0);
839                 Sk4f px1 = bilerp(part0, part1);
840                 Sk4f px2 = bilerp(part1, part2);
841                 Sk4f px3 = bilerp(part2, part3);
842                 overlapPart = part3;
843                 fNext->blend4Pixels(px0, px1, px2, px3);
844                 rightColumnCursor += 4;
845                 count -= 4;
846             }
847 
848             while (count > 0) {
849                 Sk4f rightPart = partAtColumn(rightColumnCursor);
850 
851                 fNext->blendPixel(bilerp(overlapPart, rightPart));
852                 overlapPart = rightPart;
853                 rightColumnCursor += 1;
854                 count -= 1;
855             }
856         } else {
857             // * negative direction - generate destination pixels by sliding the filter from
858             //                        right to left.
859             Sk4f overlapPart = partAtColumn(iXs[1]);
860             int leftColumnCursor = iXs[0];
861 
862             while (count >= 4) {
863                 Sk4f part0, part1, part2, part3;
864                 get4Parts(leftColumnCursor - 3, &part3, &part2, &part1, &part0);
865                 Sk4f px0 = bilerp(part0, overlapPart);
866                 Sk4f px1 = bilerp(part1, part0);
867                 Sk4f px2 = bilerp(part2, part1);
868                 Sk4f px3 = bilerp(part3, part2);
869                 overlapPart = part3;
870                 fNext->blend4Pixels(px0, px1, px2, px3);
871                 leftColumnCursor -= 4;
872                 count -= 4;
873             }
874 
875             while (count > 0) {
876                 Sk4f leftPart = partAtColumn(leftColumnCursor);
877 
878                 fNext->blendPixel(bilerp(leftPart, overlapPart));
879                 overlapPart = leftPart;
880                 leftColumnCursor -= 1;
881                 count -= 1;
882             }
883         }
884     }
885 
886     // 1 < |dx| < 2. Going through the source pixels at a faster rate than the dest pixels, but
887     // still slow enough to take advantage of previous calculations.
spanMediumRate(Span span)888     void spanMediumRate(Span span) {
889         SkPoint start; SkScalar length; int count;
890         std::tie(start, length, count) = span;
891 
892         // Calculate the distance between each sample point.
893         const SkScalar dx = length / (count - 1);
894         SkASSERT((-2.0f < dx && dx < -1.0f) || (1.0f < dx && dx < 2.0f));
895 
896         // Generate the filter values for the top-left corner.
897         // Note: these values are in filter space; this has implications about how to adjust
898         // these values at each step. For example, as the sample point increases, the filter
899         // value decreases, this is because the filter and position are related by
900         // (1 - (X(sample) - .5)) % 1. The (1 - stuff) causes the filter to move in the opposite
901         // direction of the sample point which is increasing by dx.
902         SkScalar filterX = sample_to_filter(X(start));
903         SkScalar filterY = sample_to_filter(Y(start));
904 
905         // Generate the four filter points from the sample point start. Generate the row* values.
906         Sk4i iXs, iYs;
907         this->filterPoints(start, &iXs, &iYs);
908         const void* const row0 = fAccessor.row(iYs[0]);
909         const void* const row1 = fAccessor.row(iYs[2]);
910 
911         // Generate part of the filter value at xColumn.
912         auto partAtColumn = [&](int xColumn) {
913             int adjustedColumn = adjust_edge(fXEdgeType, xColumn, fXMax);
914             Sk4f pxTop, pxBottom;
915             this->get2PixelColumn(row0, row1, adjustedColumn, &pxTop, &pxBottom);
916             return pxTop * filterY + (1.0f - filterY) * pxBottom;
917         };
918 
919         // The leftPart is made up of two pixels from the left column of the filter, right part
920         // is similar. The top and bottom pixels in the *Part are created as a linear blend of
921         // the top and bottom pixels using filterY. See the nextPart function below.
922         Sk4f leftPart  = partAtColumn(iXs[0]);
923         Sk4f rightPart = partAtColumn(iXs[1]);
924 
925         // Create a destination color by blending together a left and right part using filterX.
926         auto bilerp = [&](const Sk4f& leftPart, const Sk4f& rightPart) {
927             Sk4f pixel = leftPart * filterX + rightPart * (1.0f - filterX);
928             return check_pixel(pixel);
929         };
930 
931         // Send the first pixel to the destination. This simplifies the loop structure so that no
932         // extra pixels are fetched for the last iteration of the loop.
933         fNext->blendPixel(bilerp(leftPart, rightPart));
934         count -= 1;
935 
936         if (dx > 0.0f) {
937             // * positive direction - generate destination pixels by sliding the filter from left
938             //                        to right.
939             int rightPartCursor = iXs[1];
940 
941             // Advance the filter from left to right. Remember that moving the top-left corner of
942             // the filter to the right actually makes the filter value smaller.
943             auto advanceFilter = [&]() {
944                 filterX -= dx;
945                 // At this point filterX is less than zero, but might actually be less than -1.
946                 if (filterX > -1.0f) {
947                     filterX += 1.0f;
948                     leftPart = rightPart;
949                     rightPartCursor += 1;
950                     rightPart = partAtColumn(rightPartCursor);
951                 } else {
952                     filterX += 2.0f;
953                     rightPartCursor += 2;
954                     leftPart = partAtColumn(rightPartCursor - 1);
955                     rightPart = partAtColumn(rightPartCursor);
956                 }
957                 SkASSERT(0.0f < filterX && filterX <= 1.0f);
958 
959                 return bilerp(leftPart, rightPart);
960             };
961 
962             while (count >= 4) {
963                 Sk4f px0 = advanceFilter(),
964                      px1 = advanceFilter(),
965                      px2 = advanceFilter(),
966                      px3 = advanceFilter();
967                 fNext->blend4Pixels(px0, px1, px2, px3);
968                 count -= 4;
969             }
970 
971             while (count > 0) {
972                 fNext->blendPixel(advanceFilter());
973                 count -= 1;
974             }
975         } else {
976             // * negative direction - generate destination pixels by sliding the filter from
977             //                        right to left.
978             int leftPartCursor = iXs[0];
979 
980             auto advanceFilter = [&]() {
981                 // Remember, dx < 0 therefore this adds |dx| to filterX.
982                 filterX -= dx;
983                 // At this point, filterX is greater than one, but may actually be greater than two.
984                 if (filterX < 2.0f) {
985                     filterX -= 1.0f;
986                     rightPart = leftPart;
987                     leftPartCursor -= 1;
988                     leftPart = partAtColumn(leftPartCursor);
989                 } else {
990                     filterX -= 2.0f;
991                     leftPartCursor -= 2;
992                     rightPart = partAtColumn(leftPartCursor - 1);
993                     leftPart = partAtColumn(leftPartCursor);
994                 }
995                 SkASSERT(0.0f < filterX && filterX <= 1.0f);
996                 return bilerp(leftPart, rightPart);
997             };
998 
999             while (count >= 4) {
1000                 Sk4f px0 = advanceFilter(),
1001                      px1 = advanceFilter(),
1002                      px2 = advanceFilter(),
1003                      px3 = advanceFilter();
1004                 fNext->blend4Pixels(px0, px1, px2, px3);
1005                 count -= 4;
1006             }
1007 
1008             while (count > 0) {
1009                 fNext->blendPixel(advanceFilter());
1010                 count -= 1;
1011             }
1012         }
1013     }
1014 
1015     // We're moving through source space faster than dst (zoomed out),
1016     // so we'll never reuse a source pixel or be able to do contiguous loads.
spanFastRate(Span span)1017     void spanFastRate(Span span) {
1018         SkPoint start; SkScalar length; int count;
1019         std::tie(start, length, count) = span;
1020         SkScalar x = X(start);
1021         SkScalar y = Y(start);
1022 
1023         SkScalar dx = length / (count - 1);
1024         while (count > 0) {
1025             fNext->blendPixel(this->bilerpSamplePoint(SkPoint{x, y}));
1026             x += dx;
1027             count -= 1;
1028         }
1029     }
1030 
1031     Next* const              fNext;
1032     const SkShader::TileMode fXEdgeType;
1033     const int                fXMax;
1034     const SkShader::TileMode fYEdgeType;
1035     const int                fYMax;
1036     Accessor                 fAccessor;
1037 };
1038 
1039 }  // namespace
1040 
1041 #endif  // SkLinearBitmapPipeline_sampler_DEFINED
1042