1 /*
2 * Copyright 2016 Google Inc.
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8 #ifndef SkLinearBitmapPipeline_sampler_DEFINED
9 #define SkLinearBitmapPipeline_sampler_DEFINED
10
11 #include <tuple>
12
13 #include "SkAutoMalloc.h"
14 #include "SkColor.h"
15 #include "SkColorPriv.h"
16 #include "SkFixed.h" // for SkFixed1 only. Don't use SkFixed in this file.
17 #include "SkHalf.h"
18 #include "SkLinearBitmapPipeline_core.h"
19 #include "SkNx.h"
20 #include "SkPM4fPriv.h"
21
22 namespace {
23 // Explaination of the math:
24 // 1 - x x
25 // +--------+--------+
26 // | | |
27 // 1 - y | px00 | px10 |
28 // | | |
29 // +--------+--------+
30 // | | |
31 // y | px01 | px11 |
32 // | | |
33 // +--------+--------+
34 //
35 //
36 // Given a pixelxy each is multiplied by a different factor derived from the fractional part of x
37 // and y:
38 // * px00 -> (1 - x)(1 - y) = 1 - x - y + xy
39 // * px10 -> x(1 - y) = x - xy
40 // * px01 -> (1 - x)y = y - xy
41 // * px11 -> xy
42 // So x * y is calculated first and then used to calculate all the other factors.
bilerp4(Sk4s xs,Sk4s ys,Sk4f px00,Sk4f px10,Sk4f px01,Sk4f px11)43 static Sk4s SK_VECTORCALL bilerp4(Sk4s xs, Sk4s ys, Sk4f px00, Sk4f px10,
44 Sk4f px01, Sk4f px11) {
45 // Calculate fractional xs and ys.
46 Sk4s fxs = xs - xs.floor();
47 Sk4s fys = ys - ys.floor();
48 Sk4s fxys{fxs * fys};
49 Sk4f sum = px11 * fxys;
50 sum = sum + px01 * (fys - fxys);
51 sum = sum + px10 * (fxs - fxys);
52 sum = sum + px00 * (Sk4f{1.0f} - fxs - fys + fxys);
53 return sum;
54 }
55
56 ////////////////////////////////////////////////////////////////////////////////////////////////////
57 // PixelGetter is the lowest level interface to the source data. There is a PixelConverter for each
58 // of the different SkColorTypes.
59 template <SkColorType, SkGammaType> class PixelConverter;
60
61 // Alpha handling:
62 // The alpha from the paint (tintColor) is used in the blend part of the pipeline to modulate
63 // the entire bitmap. So, the tint color is given an alpha of 1.0 so that the later alpha can
64 // modulate this color later.
65 template <>
66 class PixelConverter<kAlpha_8_SkColorType, kLinear_SkGammaType> {
67 public:
68 using Element = uint8_t;
PixelConverter(const SkPixmap & srcPixmap,SkColor tintColor)69 PixelConverter(const SkPixmap& srcPixmap, SkColor tintColor) {
70 fTintColor = SkColor4f::FromColor(tintColor);
71 fTintColor.fA = 1.0f;
72 }
73
toSk4f(const Element pixel)74 Sk4f toSk4f(const Element pixel) const {
75 return Sk4f::Load(&fTintColor) * (pixel * (1.0f/255.0f));
76 }
77
78 private:
79 SkColor4f fTintColor;
80 };
81
82 template <SkGammaType gammaType>
pmcolor_to_rgba(SkPMColor pixel)83 static inline Sk4f pmcolor_to_rgba(SkPMColor pixel) {
84 return swizzle_rb_if_bgra(
85 (gammaType == kSRGB_SkGammaType) ? Sk4f_fromS32(pixel)
86 : Sk4f_fromL32(pixel));
87 }
88
89 template <SkGammaType gammaType>
90 class PixelConverter<kRGB_565_SkColorType, gammaType> {
91 public:
92 using Element = uint16_t;
PixelConverter(const SkPixmap & srcPixmap)93 PixelConverter(const SkPixmap& srcPixmap) { }
94
toSk4f(Element pixel)95 Sk4f toSk4f(Element pixel) const {
96 return pmcolor_to_rgba<gammaType>(SkPixel16ToPixel32(pixel));
97 }
98 };
99
100 template <SkGammaType gammaType>
101 class PixelConverter<kARGB_4444_SkColorType, gammaType> {
102 public:
103 using Element = uint16_t;
PixelConverter(const SkPixmap & srcPixmap)104 PixelConverter(const SkPixmap& srcPixmap) { }
105
toSk4f(Element pixel)106 Sk4f toSk4f(Element pixel) const {
107 return pmcolor_to_rgba<gammaType>(SkPixel4444ToPixel32(pixel));
108 }
109 };
110
111 template <SkGammaType gammaType>
112 class PixelConverter<kRGBA_8888_SkColorType, gammaType> {
113 public:
114 using Element = uint32_t;
PixelConverter(const SkPixmap & srcPixmap)115 PixelConverter(const SkPixmap& srcPixmap) { }
116
toSk4f(Element pixel)117 Sk4f toSk4f(Element pixel) const {
118 return gammaType == kSRGB_SkGammaType
119 ? Sk4f_fromS32(pixel)
120 : Sk4f_fromL32(pixel);
121 }
122 };
123
124 template <SkGammaType gammaType>
125 class PixelConverter<kBGRA_8888_SkColorType, gammaType> {
126 public:
127 using Element = uint32_t;
PixelConverter(const SkPixmap & srcPixmap)128 PixelConverter(const SkPixmap& srcPixmap) { }
129
toSk4f(Element pixel)130 Sk4f toSk4f(Element pixel) const {
131 return swizzle_rb(
132 gammaType == kSRGB_SkGammaType ? Sk4f_fromS32(pixel) : Sk4f_fromL32(pixel));
133 }
134 };
135
136 template <SkGammaType gammaType>
137 class PixelConverter<kIndex_8_SkColorType, gammaType> {
138 public:
139 using Element = uint8_t;
PixelConverter(const SkPixmap & srcPixmap)140 PixelConverter(const SkPixmap& srcPixmap)
141 : fColorTableSize(srcPixmap.ctable()->count()){
142 SkColorTable* skColorTable = srcPixmap.ctable();
143 SkASSERT(skColorTable != nullptr);
144
145 fColorTable = (Sk4f*)SkAlign16((intptr_t)fColorTableStorage.get());
146 for (int i = 0; i < fColorTableSize; i++) {
147 fColorTable[i] = pmcolor_to_rgba<gammaType>((*skColorTable)[i]);
148 }
149 }
150
PixelConverter(const PixelConverter & strategy)151 PixelConverter(const PixelConverter& strategy)
152 : fColorTableSize{strategy.fColorTableSize}{
153 fColorTable = (Sk4f*)SkAlign16((intptr_t)fColorTableStorage.get());
154 for (int i = 0; i < fColorTableSize; i++) {
155 fColorTable[i] = strategy.fColorTable[i];
156 }
157 }
158
toSk4f(Element index)159 Sk4f toSk4f(Element index) const {
160 return fColorTable[index];
161 }
162
163 private:
164 static const size_t kColorTableSize = sizeof(Sk4f[256]) + 12;
165 const int fColorTableSize;
166 SkAutoMalloc fColorTableStorage{kColorTableSize};
167 Sk4f* fColorTable;
168 };
169
170 template <SkGammaType gammaType>
171 class PixelConverter<kGray_8_SkColorType, gammaType> {
172 public:
173 using Element = uint8_t;
PixelConverter(const SkPixmap & srcPixmap)174 PixelConverter(const SkPixmap& srcPixmap) { }
175
toSk4f(Element pixel)176 Sk4f toSk4f(Element pixel) const {
177 float gray = (gammaType == kSRGB_SkGammaType)
178 ? sk_linear_from_srgb[pixel]
179 : pixel * (1/255.0f);
180 return {gray, gray, gray, 1.0f};
181 }
182 };
183
184 template <>
185 class PixelConverter<kRGBA_F16_SkColorType, kLinear_SkGammaType> {
186 public:
187 using Element = uint64_t;
PixelConverter(const SkPixmap & srcPixmap)188 PixelConverter(const SkPixmap& srcPixmap) { }
189
toSk4f(const Element pixel)190 Sk4f toSk4f(const Element pixel) const {
191 return SkHalfToFloat_finite_ftz(pixel);
192 }
193 };
194
195 class PixelAccessorShim {
196 public:
PixelAccessorShim(SkLinearBitmapPipeline::PixelAccessorInterface * accessor)197 explicit PixelAccessorShim(SkLinearBitmapPipeline::PixelAccessorInterface* accessor)
198 : fPixelAccessor(accessor) { }
199
getFewPixels(int n,Sk4i xs,Sk4i ys,Sk4f * px0,Sk4f * px1,Sk4f * px2)200 void SK_VECTORCALL getFewPixels(
201 int n, Sk4i xs, Sk4i ys, Sk4f* px0, Sk4f* px1, Sk4f* px2) const {
202 fPixelAccessor->getFewPixels(n, xs, ys, px0, px1, px2);
203 }
204
get4Pixels(Sk4i xs,Sk4i ys,Sk4f * px0,Sk4f * px1,Sk4f * px2,Sk4f * px3)205 void SK_VECTORCALL get4Pixels(
206 Sk4i xs, Sk4i ys, Sk4f* px0, Sk4f* px1, Sk4f* px2, Sk4f* px3) const {
207 fPixelAccessor->get4Pixels(xs, ys, px0, px1, px2, px3);
208 }
209
get4Pixels(const void * src,int index,Sk4f * px0,Sk4f * px1,Sk4f * px2,Sk4f * px3)210 void get4Pixels(
211 const void* src, int index, Sk4f* px0, Sk4f* px1, Sk4f* px2, Sk4f* px3) const {
212 fPixelAccessor->get4Pixels(src, index, px0, px1, px2, px3);
213 }
214
getPixelFromRow(const void * row,int index)215 Sk4f getPixelFromRow(const void* row, int index) const {
216 return fPixelAccessor->getPixelFromRow(row, index);
217 }
218
getPixelAt(int index)219 Sk4f getPixelAt(int index) const {
220 return fPixelAccessor->getPixelAt(index);
221 }
222
row(int y)223 const void* row(int y) const {
224 return fPixelAccessor->row(y);
225 }
226
227 private:
228 SkLinearBitmapPipeline::PixelAccessorInterface* const fPixelAccessor;
229 };
230
231 ////////////////////////////////////////////////////////////////////////////////////////////////////
232 // PixelAccessor handles all the same plumbing for all the PixelGetters.
233 template <SkColorType colorType, SkGammaType gammaType>
234 class PixelAccessor final : public SkLinearBitmapPipeline::PixelAccessorInterface {
235 using Element = typename PixelConverter<colorType, gammaType>::Element;
236 public:
237 template <typename... Args>
PixelAccessor(const SkPixmap & srcPixmap,Args &&...args)238 PixelAccessor(const SkPixmap& srcPixmap, Args&&... args)
239 : fSrc{static_cast<const Element*>(srcPixmap.addr())}
240 , fWidth{srcPixmap.rowBytesAsPixels()}
241 , fConverter{srcPixmap, std::move<Args>(args)...} { }
242
getFewPixels(int n,Sk4i xs,Sk4i ys,Sk4f * px0,Sk4f * px1,Sk4f * px2)243 void SK_VECTORCALL getFewPixels (
244 int n, Sk4i xs, Sk4i ys, Sk4f* px0, Sk4f* px1, Sk4f* px2) const override {
245 Sk4i bufferLoc = ys * fWidth + xs;
246 switch (n) {
247 case 3:
248 *px2 = this->getPixelAt(bufferLoc[2]);
249 case 2:
250 *px1 = this->getPixelAt(bufferLoc[1]);
251 case 1:
252 *px0 = this->getPixelAt(bufferLoc[0]);
253 default:
254 break;
255 }
256 }
257
get4Pixels(Sk4i xs,Sk4i ys,Sk4f * px0,Sk4f * px1,Sk4f * px2,Sk4f * px3)258 void SK_VECTORCALL get4Pixels(
259 Sk4i xs, Sk4i ys, Sk4f* px0, Sk4f* px1, Sk4f* px2, Sk4f* px3) const override {
260 Sk4i bufferLoc = ys * fWidth + xs;
261 *px0 = this->getPixelAt(bufferLoc[0]);
262 *px1 = this->getPixelAt(bufferLoc[1]);
263 *px2 = this->getPixelAt(bufferLoc[2]);
264 *px3 = this->getPixelAt(bufferLoc[3]);
265 }
266
get4Pixels(const void * src,int index,Sk4f * px0,Sk4f * px1,Sk4f * px2,Sk4f * px3)267 void get4Pixels(
268 const void* src, int index, Sk4f* px0, Sk4f* px1, Sk4f* px2, Sk4f* px3) const override {
269 *px0 = this->getPixelFromRow(src, index + 0);
270 *px1 = this->getPixelFromRow(src, index + 1);
271 *px2 = this->getPixelFromRow(src, index + 2);
272 *px3 = this->getPixelFromRow(src, index + 3);
273 }
274
getPixelFromRow(const void * row,int index)275 Sk4f getPixelFromRow(const void* row, int index) const override {
276 const Element* src = static_cast<const Element*>(row);
277 return fConverter.toSk4f(src[index]);
278 }
279
getPixelAt(int index)280 Sk4f getPixelAt(int index) const override {
281 return this->getPixelFromRow(fSrc, index);
282 }
283
row(int y)284 const void* row(int y) const override { return fSrc + y * fWidth; }
285
286 private:
287 const Element* const fSrc;
288 const int fWidth;
289 PixelConverter<colorType, gammaType> fConverter;
290 };
291
292 // We're moving through source space at a rate of 1 source pixel per 1 dst pixel.
293 // We'll never re-use pixels, but we can at least load contiguous pixels.
294 template <typename Next, typename Strategy>
src_strategy_blend(Span span,Next * next,Strategy * strategy)295 static void src_strategy_blend(Span span, Next* next, Strategy* strategy) {
296 SkPoint start;
297 SkScalar length;
298 int count;
299 std::tie(start, length, count) = span;
300 int ix = SkScalarFloorToInt(X(start));
301 const void* row = strategy->row((int)std::floor(Y(start)));
302 if (length > 0) {
303 while (count >= 4) {
304 Sk4f px0, px1, px2, px3;
305 strategy->get4Pixels(row, ix, &px0, &px1, &px2, &px3);
306 next->blend4Pixels(px0, px1, px2, px3);
307 ix += 4;
308 count -= 4;
309 }
310
311 while (count > 0) {
312 next->blendPixel(strategy->getPixelFromRow(row, ix));
313 ix += 1;
314 count -= 1;
315 }
316 } else {
317 while (count >= 4) {
318 Sk4f px0, px1, px2, px3;
319 strategy->get4Pixels(row, ix - 3, &px3, &px2, &px1, &px0);
320 next->blend4Pixels(px0, px1, px2, px3);
321 ix -= 4;
322 count -= 4;
323 }
324
325 while (count > 0) {
326 next->blendPixel(strategy->getPixelFromRow(row, ix));
327 ix -= 1;
328 count -= 1;
329 }
330 }
331 }
332
333 // -- NearestNeighborSampler -----------------------------------------------------------------------
334 // NearestNeighborSampler - use nearest neighbor filtering to create runs of destination pixels.
335 template<typename Accessor, typename Next>
336 class NearestNeighborSampler : public SkLinearBitmapPipeline::SampleProcessorInterface {
337 public:
338 template<typename... Args>
NearestNeighborSampler(SkLinearBitmapPipeline::BlendProcessorInterface * next,Args &&...args)339 NearestNeighborSampler(SkLinearBitmapPipeline::BlendProcessorInterface* next, Args&& ... args)
340 : fNext{next}, fAccessor{std::forward<Args>(args)...} { }
341
NearestNeighborSampler(SkLinearBitmapPipeline::BlendProcessorInterface * next,const NearestNeighborSampler & sampler)342 NearestNeighborSampler(SkLinearBitmapPipeline::BlendProcessorInterface* next,
343 const NearestNeighborSampler& sampler)
344 : fNext{next}, fAccessor{sampler.fAccessor} { }
345
pointListFew(int n,Sk4s xs,Sk4s ys)346 void SK_VECTORCALL pointListFew(int n, Sk4s xs, Sk4s ys) override {
347 SkASSERT(0 < n && n < 4);
348 Sk4f px0, px1, px2;
349 fAccessor.getFewPixels(n, SkNx_cast<int>(xs), SkNx_cast<int>(ys), &px0, &px1, &px2);
350 if (n >= 1) fNext->blendPixel(px0);
351 if (n >= 2) fNext->blendPixel(px1);
352 if (n >= 3) fNext->blendPixel(px2);
353 }
354
pointList4(Sk4s xs,Sk4s ys)355 void SK_VECTORCALL pointList4(Sk4s xs, Sk4s ys) override {
356 Sk4f px0, px1, px2, px3;
357 fAccessor.get4Pixels(SkNx_cast<int>(xs), SkNx_cast<int>(ys), &px0, &px1, &px2, &px3);
358 fNext->blend4Pixels(px0, px1, px2, px3);
359 }
360
pointSpan(Span span)361 void pointSpan(Span span) override {
362 SkASSERT(!span.isEmpty());
363 SkPoint start;
364 SkScalar length;
365 int count;
366 std::tie(start, length, count) = span;
367 SkScalar absLength = SkScalarAbs(length);
368 if (absLength < (count - 1)) {
369 this->spanSlowRate(span);
370 } else if (absLength == (count - 1)) {
371 src_strategy_blend(span, fNext, &fAccessor);
372 } else {
373 this->spanFastRate(span);
374 }
375 }
376
repeatSpan(Span span,int32_t repeatCount)377 void repeatSpan(Span span, int32_t repeatCount) override {
378 while (repeatCount > 0) {
379 this->pointSpan(span);
380 repeatCount--;
381 }
382 }
383
384 private:
385 // When moving through source space more slowly than dst space (zoomed in),
386 // we'll be sampling from the same source pixel more than once.
spanSlowRate(Span span)387 void spanSlowRate(Span span) {
388 SkPoint start; SkScalar length; int count;
389 std::tie(start, length, count) = span;
390 SkScalar x = X(start);
391 // fx is a fixed 48.16 number.
392 int64_t fx = static_cast<int64_t>(x * SK_Fixed1);
393 SkScalar dx = length / (count - 1);
394 // fdx is a fixed 48.16 number.
395 int64_t fdx = static_cast<int64_t>(dx * SK_Fixed1);
396
397 const void* row = fAccessor.row((int)std::floor(Y(start)));
398 Next* next = fNext;
399
400 int64_t ix = fx >> 16;
401 int64_t prevIX = ix;
402 Sk4f fpixel = fAccessor.getPixelFromRow(row, ix);
403
404 // When dx is less than one, each pixel is used more than once. Using the fixed point fx
405 // allows the code to quickly check that the same pixel is being used. The code uses this
406 // same pixel check to do the sRGB and normalization only once.
407 auto getNextPixel = [&]() {
408 if (ix != prevIX) {
409 fpixel = fAccessor.getPixelFromRow(row, ix);
410 prevIX = ix;
411 }
412 fx += fdx;
413 ix = fx >> 16;
414 return fpixel;
415 };
416
417 while (count >= 4) {
418 Sk4f px0 = getNextPixel();
419 Sk4f px1 = getNextPixel();
420 Sk4f px2 = getNextPixel();
421 Sk4f px3 = getNextPixel();
422 next->blend4Pixels(px0, px1, px2, px3);
423 count -= 4;
424 }
425 while (count > 0) {
426 next->blendPixel(getNextPixel());
427 count -= 1;
428 }
429 }
430
431 // We're moving through source space at a rate of 1 source pixel per 1 dst pixel.
432 // We'll never re-use pixels, but we can at least load contiguous pixels.
spanUnitRate(Span span)433 void spanUnitRate(Span span) {
434 src_strategy_blend(span, fNext, &fAccessor);
435 }
436
437 // We're moving through source space faster than dst (zoomed out),
438 // so we'll never reuse a source pixel or be able to do contiguous loads.
spanFastRate(Span span)439 void spanFastRate(Span span) {
440 span_fallback(span, this);
441 }
442
443 Next* const fNext;
444 Accessor fAccessor;
445 };
446
447 // From an edgeType, the integer value of a pixel vs, and the integer value of the extreme edge
448 // vMax, take the point which might be off the tile by one pixel and either wrap it or pin it to
449 // generate the right pixel. The value vs is on the interval [-1, vMax + 1]. It produces a value
450 // on the interval [0, vMax].
451 // Note: vMax is not width or height, but width-1 or height-1 because it is the largest valid pixel.
adjust_edge(SkShader::TileMode edgeType,int vs,int vMax)452 static inline int adjust_edge(SkShader::TileMode edgeType, int vs, int vMax) {
453 SkASSERT(-1 <= vs && vs <= vMax + 1);
454 switch (edgeType) {
455 case SkShader::kClamp_TileMode:
456 case SkShader::kMirror_TileMode:
457 vs = std::max(vs, 0);
458 vs = std::min(vs, vMax);
459 break;
460 case SkShader::kRepeat_TileMode:
461 vs = (vs <= vMax) ? vs : 0;
462 vs = (vs >= 0) ? vs : vMax;
463 break;
464 }
465 SkASSERT(0 <= vs && vs <= vMax);
466 return vs;
467 }
468
469 // From a sample point on the tile, return the top or left filter value.
470 // The result r should be in the range (0, 1]. Since this represents the weight given to the top
471 // left element, then if x == 0.5 the filter value should be 1.0.
472 // The input sample point must be on the tile, therefore it must be >= 0.
sample_to_filter(SkScalar x)473 static SkScalar sample_to_filter(SkScalar x) {
474 SkASSERT(x >= 0.0f);
475 // The usual form of the top or left edge is x - .5, but since we are working on the unit
476 // square, then x + .5 works just as well. This also guarantees that v > 0.0 allowing the use
477 // of trunc.
478 SkScalar v = x + 0.5f;
479 // Produce the top or left offset a value on the range [0, 1).
480 SkScalar f = v - SkScalarTruncToScalar(v);
481 // Produce the filter value which is on the range (0, 1].
482 SkScalar r = 1.0f - f;
483 SkASSERT(0.0f < r && r <= 1.0f);
484 return r;
485 }
486
487 // -- BilerpSampler --------------------------------------------------------------------------------
488 // BilerpSampler - use a bilerp filter to create runs of destination pixels.
489 // Note: in the code below, there are two types of points
490 // * sample points - these are the points passed in by pointList* and Spans.
491 // * filter points - are created from a sample point to form the coordinates of the points
492 // to use in the filter and to generate the filter values.
493 template<typename Accessor, typename Next>
494 class BilerpSampler : public SkLinearBitmapPipeline::SampleProcessorInterface {
495 public:
496 template<typename... Args>
BilerpSampler(SkLinearBitmapPipeline::BlendProcessorInterface * next,SkISize dimensions,SkShader::TileMode xTile,SkShader::TileMode yTile,Args &&...args)497 BilerpSampler(
498 SkLinearBitmapPipeline::BlendProcessorInterface* next,
499 SkISize dimensions,
500 SkShader::TileMode xTile, SkShader::TileMode yTile,
501 Args&& ... args
502 )
503 : fNext{next}
504 , fXEdgeType{xTile}
505 , fXMax{dimensions.width() - 1}
506 , fYEdgeType{yTile}
507 , fYMax{dimensions.height() - 1}
508 , fAccessor{std::forward<Args>(args)...} { }
509
BilerpSampler(SkLinearBitmapPipeline::BlendProcessorInterface * next,const BilerpSampler & sampler)510 BilerpSampler(SkLinearBitmapPipeline::BlendProcessorInterface* next,
511 const BilerpSampler& sampler)
512 : fNext{next}
513 , fXEdgeType{sampler.fXEdgeType}
514 , fXMax{sampler.fXMax}
515 , fYEdgeType{sampler.fYEdgeType}
516 , fYMax{sampler.fYMax}
517 , fAccessor{sampler.fAccessor} { }
518
pointListFew(int n,Sk4s xs,Sk4s ys)519 void SK_VECTORCALL pointListFew(int n, Sk4s xs, Sk4s ys) override {
520 SkASSERT(0 < n && n < 4);
521 auto bilerpPixel = [&](int index) {
522 return this->bilerpSamplePoint(SkPoint{xs[index], ys[index]});
523 };
524
525 if (n >= 1) fNext->blendPixel(bilerpPixel(0));
526 if (n >= 2) fNext->blendPixel(bilerpPixel(1));
527 if (n >= 3) fNext->blendPixel(bilerpPixel(2));
528 }
529
pointList4(Sk4s xs,Sk4s ys)530 void SK_VECTORCALL pointList4(Sk4s xs, Sk4s ys) override {
531 auto bilerpPixel = [&](int index) {
532 return this->bilerpSamplePoint(SkPoint{xs[index], ys[index]});
533 };
534 fNext->blend4Pixels(bilerpPixel(0), bilerpPixel(1), bilerpPixel(2), bilerpPixel(3));
535 }
536
pointSpan(Span span)537 void pointSpan(Span span) override {
538 SkASSERT(!span.isEmpty());
539 SkPoint start;
540 SkScalar length;
541 int count;
542 std::tie(start, length, count) = span;
543
544 // Nothing to do.
545 if (count == 0) {
546 return;
547 }
548
549 // Trivial case. No sample points are generated other than start.
550 if (count == 1) {
551 fNext->blendPixel(this->bilerpSamplePoint(start));
552 return;
553 }
554
555 // Note: the following code could be done in terms of dx = length / (count -1), but that
556 // would introduce a divide that is not needed for the most common dx == 1 cases.
557 SkScalar absLength = SkScalarAbs(length);
558 if (absLength == 0.0f) {
559 // |dx| == 0
560 // length is zero, so clamp an edge pixel.
561 this->spanZeroRate(span);
562 } else if (absLength < (count - 1)) {
563 // 0 < |dx| < 1.
564 this->spanSlowRate(span);
565 } else if (absLength == (count - 1)) {
566 // |dx| == 1.
567 if (sample_to_filter(span.startX()) == 1.0f
568 && sample_to_filter(span.startY()) == 1.0f) {
569 // All the pixels are aligned with the dest; go fast.
570 src_strategy_blend(span, fNext, &fAccessor);
571 } else {
572 // There is some sub-pixel offsets, so bilerp.
573 this->spanUnitRate(span);
574 }
575 } else if (absLength < 2.0f * (count - 1)) {
576 // 1 < |dx| < 2.
577 this->spanMediumRate(span);
578 } else {
579 // |dx| >= 2.
580 this->spanFastRate(span);
581 }
582 }
583
repeatSpan(Span span,int32_t repeatCount)584 void repeatSpan(Span span, int32_t repeatCount) override {
585 while (repeatCount > 0) {
586 this->pointSpan(span);
587 repeatCount--;
588 }
589 }
590
591 private:
592
593 // Convert a sample point to the points used by the filter.
filterPoints(SkPoint sample,Sk4i * filterXs,Sk4i * filterYs)594 void filterPoints(SkPoint sample, Sk4i* filterXs, Sk4i* filterYs) {
595 // May be less than zero. Be careful to use Floor.
596 int x0 = adjust_edge(fXEdgeType, SkScalarFloorToInt(X(sample) - 0.5), fXMax);
597 // Always greater than zero. Use the faster Trunc.
598 int x1 = adjust_edge(fXEdgeType, SkScalarTruncToInt(X(sample) + 0.5), fXMax);
599 int y0 = adjust_edge(fYEdgeType, SkScalarFloorToInt(Y(sample) - 0.5), fYMax);
600 int y1 = adjust_edge(fYEdgeType, SkScalarTruncToInt(Y(sample) + 0.5), fYMax);
601
602 *filterXs = Sk4i{x0, x1, x0, x1};
603 *filterYs = Sk4i{y0, y0, y1, y1};
604 }
605
606 // Given a sample point, generate a color by bilerping the four filter points.
bilerpSamplePoint(SkPoint sample)607 Sk4f bilerpSamplePoint(SkPoint sample) {
608 Sk4i iXs, iYs;
609 filterPoints(sample, &iXs, &iYs);
610 Sk4f px00, px10, px01, px11;
611 fAccessor.get4Pixels(iXs, iYs, &px00, &px10, &px01, &px11);
612 return bilerp4(Sk4f{X(sample) - 0.5f}, Sk4f{Y(sample) - 0.5f}, px00, px10, px01, px11);
613 }
614
615 // Get two pixels at x from row0 and row1.
get2PixelColumn(const void * row0,const void * row1,int x,Sk4f * px0,Sk4f * px1)616 void get2PixelColumn(const void* row0, const void* row1, int x, Sk4f* px0, Sk4f* px1) {
617 *px0 = fAccessor.getPixelFromRow(row0, x);
618 *px1 = fAccessor.getPixelFromRow(row1, x);
619 }
620
621 // |dx| == 0. This code assumes that length is zero.
spanZeroRate(Span span)622 void spanZeroRate(Span span) {
623 SkPoint start; SkScalar length; int count;
624 std::tie(start, length, count) = span;
625 SkASSERT(length == 0.0f);
626
627 // Filter for the blending of the top and bottom pixels.
628 SkScalar filterY = sample_to_filter(Y(start));
629
630 // Generate the four filter points from the sample point start. Generate the row* values.
631 Sk4i iXs, iYs;
632 this->filterPoints(start, &iXs, &iYs);
633 const void* const row0 = fAccessor.row(iYs[0]);
634 const void* const row1 = fAccessor.row(iYs[2]);
635
636 // Get the two pixels that make up the clamping pixel.
637 Sk4f pxTop, pxBottom;
638 this->get2PixelColumn(row0, row1, SkScalarFloorToInt(X(start)), &pxTop, &pxBottom);
639 Sk4f pixel = pxTop * filterY + (1.0f - filterY) * pxBottom;
640
641 while (count >= 4) {
642 fNext->blend4Pixels(pixel, pixel, pixel, pixel);
643 count -= 4;
644 }
645 while (count > 0) {
646 fNext->blendPixel(pixel);
647 count -= 1;
648 }
649 }
650
651 // 0 < |dx| < 1. This code reuses the calculations from previous pixels to reduce
652 // computation. In particular, several destination pixels maybe generated from the same four
653 // source pixels.
654 // In the following code a "part" is a combination of two pixels from the same column of the
655 // filter.
spanSlowRate(Span span)656 void spanSlowRate(Span span) {
657 SkPoint start; SkScalar length; int count;
658 std::tie(start, length, count) = span;
659
660 // Calculate the distance between each sample point.
661 const SkScalar dx = length / (count - 1);
662 SkASSERT(-1.0f < dx && dx < 1.0f && dx != 0.0f);
663
664 // Generate the filter values for the top-left corner.
665 // Note: these values are in filter space; this has implications about how to adjust
666 // these values at each step. For example, as the sample point increases, the filter
667 // value decreases, this is because the filter and position are related by
668 // (1 - (X(sample) - .5)) % 1. The (1 - stuff) causes the filter to move in the opposite
669 // direction of the sample point which is increasing by dx.
670 SkScalar filterX = sample_to_filter(X(start));
671 SkScalar filterY = sample_to_filter(Y(start));
672
673 // Generate the four filter points from the sample point start. Generate the row* values.
674 Sk4i iXs, iYs;
675 this->filterPoints(start, &iXs, &iYs);
676 const void* const row0 = fAccessor.row(iYs[0]);
677 const void* const row1 = fAccessor.row(iYs[2]);
678
679 // Generate part of the filter value at xColumn.
680 auto partAtColumn = [&](int xColumn) {
681 int adjustedColumn = adjust_edge(fXEdgeType, xColumn, fXMax);
682 Sk4f pxTop, pxBottom;
683 this->get2PixelColumn(row0, row1, adjustedColumn, &pxTop, &pxBottom);
684 return pxTop * filterY + (1.0f - filterY) * pxBottom;
685 };
686
687 // The leftPart is made up of two pixels from the left column of the filter, right part
688 // is similar. The top and bottom pixels in the *Part are created as a linear blend of
689 // the top and bottom pixels using filterY. See the partAtColumn function above.
690 Sk4f leftPart = partAtColumn(iXs[0]);
691 Sk4f rightPart = partAtColumn(iXs[1]);
692
693 // Create a destination color by blending together a left and right part using filterX.
694 auto bilerp = [&](const Sk4f& leftPart, const Sk4f& rightPart) {
695 Sk4f pixel = leftPart * filterX + rightPart * (1.0f - filterX);
696 return check_pixel(pixel);
697 };
698
699 // Send the first pixel to the destination. This simplifies the loop structure so that no
700 // extra pixels are fetched for the last iteration of the loop.
701 fNext->blendPixel(bilerp(leftPart, rightPart));
702 count -= 1;
703
704 if (dx > 0.0f) {
705 // * positive direction - generate destination pixels by sliding the filter from left
706 // to right.
707 int rightPartCursor = iXs[1];
708
709 // Advance the filter from left to right. Remember that moving the top-left corner of
710 // the filter to the right actually makes the filter value smaller.
711 auto advanceFilter = [&]() {
712 filterX -= dx;
713 if (filterX <= 0.0f) {
714 filterX += 1.0f;
715 leftPart = rightPart;
716 rightPartCursor += 1;
717 rightPart = partAtColumn(rightPartCursor);
718 }
719 SkASSERT(0.0f < filterX && filterX <= 1.0f);
720
721 return bilerp(leftPart, rightPart);
722 };
723
724 while (count >= 4) {
725 Sk4f px0 = advanceFilter(),
726 px1 = advanceFilter(),
727 px2 = advanceFilter(),
728 px3 = advanceFilter();
729 fNext->blend4Pixels(px0, px1, px2, px3);
730 count -= 4;
731 }
732
733 while (count > 0) {
734 fNext->blendPixel(advanceFilter());
735 count -= 1;
736 }
737 } else {
738 // * negative direction - generate destination pixels by sliding the filter from
739 // right to left.
740 int leftPartCursor = iXs[0];
741
742 // Advance the filter from right to left. Remember that moving the top-left corner of
743 // the filter to the left actually makes the filter value larger.
744 auto advanceFilter = [&]() {
745 // Remember, dx < 0 therefore this adds |dx| to filterX.
746 filterX -= dx;
747 // At this point filterX may be > 1, and needs to be wrapped back on to the filter
748 // interval, and the next column in the filter is calculated.
749 if (filterX > 1.0f) {
750 filterX -= 1.0f;
751 rightPart = leftPart;
752 leftPartCursor -= 1;
753 leftPart = partAtColumn(leftPartCursor);
754 }
755 SkASSERT(0.0f < filterX && filterX <= 1.0f);
756
757 return bilerp(leftPart, rightPart);
758 };
759
760 while (count >= 4) {
761 Sk4f px0 = advanceFilter(),
762 px1 = advanceFilter(),
763 px2 = advanceFilter(),
764 px3 = advanceFilter();
765 fNext->blend4Pixels(px0, px1, px2, px3);
766 count -= 4;
767 }
768
769 while (count > 0) {
770 fNext->blendPixel(advanceFilter());
771 count -= 1;
772 }
773 }
774 }
775
776 // |dx| == 1. Moving through source space at a rate of 1 source pixel per 1 dst pixel.
777 // Every filter part is used for two destination pixels, and the code can bulk load four
778 // pixels at a time.
spanUnitRate(Span span)779 void spanUnitRate(Span span) {
780 SkPoint start; SkScalar length; int count;
781 std::tie(start, length, count) = span;
782 SkASSERT(SkScalarAbs(length) == (count - 1));
783
784 // Calculate the four filter points of start, and use the two different Y values to
785 // generate the row pointers.
786 Sk4i iXs, iYs;
787 filterPoints(start, &iXs, &iYs);
788 const void* row0 = fAccessor.row(iYs[0]);
789 const void* row1 = fAccessor.row(iYs[2]);
790
791 // Calculate the filter values for the top-left filter element.
792 const SkScalar filterX = sample_to_filter(X(start));
793 const SkScalar filterY = sample_to_filter(Y(start));
794
795 // Generate part of the filter value at xColumn.
796 auto partAtColumn = [&](int xColumn) {
797 int adjustedColumn = adjust_edge(fXEdgeType, xColumn, fXMax);
798 Sk4f pxTop, pxBottom;
799 this->get2PixelColumn(row0, row1, adjustedColumn, &pxTop, &pxBottom);
800 return pxTop * filterY + (1.0f - filterY) * pxBottom;
801 };
802
803 auto get4Parts = [&](int ix, Sk4f* part0, Sk4f* part1, Sk4f* part2, Sk4f* part3) {
804 // Check if the pixels needed are near the edges. If not go fast using bulk pixels,
805 // otherwise be careful.
806 if (0 <= ix && ix <= fXMax - 3) {
807 Sk4f px00, px10, px20, px30,
808 px01, px11, px21, px31;
809 fAccessor.get4Pixels(row0, ix, &px00, &px10, &px20, &px30);
810 fAccessor.get4Pixels(row1, ix, &px01, &px11, &px21, &px31);
811 *part0 = filterY * px00 + (1.0f - filterY) * px01;
812 *part1 = filterY * px10 + (1.0f - filterY) * px11;
813 *part2 = filterY * px20 + (1.0f - filterY) * px21;
814 *part3 = filterY * px30 + (1.0f - filterY) * px31;
815 } else {
816 *part0 = partAtColumn(ix + 0);
817 *part1 = partAtColumn(ix + 1);
818 *part2 = partAtColumn(ix + 2);
819 *part3 = partAtColumn(ix + 3);
820 }
821 };
822
823 auto bilerp = [&](const Sk4f& part0, const Sk4f& part1) {
824 return part0 * filterX + part1 * (1.0f - filterX);
825 };
826
827 if (length > 0) {
828 // * positive direction - generate destination pixels by sliding the filter from left
829 // to right.
830
831 // overlapPart is the filter part from the end of the previous four pixels used at
832 // the start of the next four pixels.
833 Sk4f overlapPart = partAtColumn(iXs[0]);
834 int rightColumnCursor = iXs[1];
835 while (count >= 4) {
836 Sk4f part0, part1, part2, part3;
837 get4Parts(rightColumnCursor, &part0, &part1, &part2, &part3);
838 Sk4f px0 = bilerp(overlapPart, part0);
839 Sk4f px1 = bilerp(part0, part1);
840 Sk4f px2 = bilerp(part1, part2);
841 Sk4f px3 = bilerp(part2, part3);
842 overlapPart = part3;
843 fNext->blend4Pixels(px0, px1, px2, px3);
844 rightColumnCursor += 4;
845 count -= 4;
846 }
847
848 while (count > 0) {
849 Sk4f rightPart = partAtColumn(rightColumnCursor);
850
851 fNext->blendPixel(bilerp(overlapPart, rightPart));
852 overlapPart = rightPart;
853 rightColumnCursor += 1;
854 count -= 1;
855 }
856 } else {
857 // * negative direction - generate destination pixels by sliding the filter from
858 // right to left.
859 Sk4f overlapPart = partAtColumn(iXs[1]);
860 int leftColumnCursor = iXs[0];
861
862 while (count >= 4) {
863 Sk4f part0, part1, part2, part3;
864 get4Parts(leftColumnCursor - 3, &part3, &part2, &part1, &part0);
865 Sk4f px0 = bilerp(part0, overlapPart);
866 Sk4f px1 = bilerp(part1, part0);
867 Sk4f px2 = bilerp(part2, part1);
868 Sk4f px3 = bilerp(part3, part2);
869 overlapPart = part3;
870 fNext->blend4Pixels(px0, px1, px2, px3);
871 leftColumnCursor -= 4;
872 count -= 4;
873 }
874
875 while (count > 0) {
876 Sk4f leftPart = partAtColumn(leftColumnCursor);
877
878 fNext->blendPixel(bilerp(leftPart, overlapPart));
879 overlapPart = leftPart;
880 leftColumnCursor -= 1;
881 count -= 1;
882 }
883 }
884 }
885
886 // 1 < |dx| < 2. Going through the source pixels at a faster rate than the dest pixels, but
887 // still slow enough to take advantage of previous calculations.
spanMediumRate(Span span)888 void spanMediumRate(Span span) {
889 SkPoint start; SkScalar length; int count;
890 std::tie(start, length, count) = span;
891
892 // Calculate the distance between each sample point.
893 const SkScalar dx = length / (count - 1);
894 SkASSERT((-2.0f < dx && dx < -1.0f) || (1.0f < dx && dx < 2.0f));
895
896 // Generate the filter values for the top-left corner.
897 // Note: these values are in filter space; this has implications about how to adjust
898 // these values at each step. For example, as the sample point increases, the filter
899 // value decreases, this is because the filter and position are related by
900 // (1 - (X(sample) - .5)) % 1. The (1 - stuff) causes the filter to move in the opposite
901 // direction of the sample point which is increasing by dx.
902 SkScalar filterX = sample_to_filter(X(start));
903 SkScalar filterY = sample_to_filter(Y(start));
904
905 // Generate the four filter points from the sample point start. Generate the row* values.
906 Sk4i iXs, iYs;
907 this->filterPoints(start, &iXs, &iYs);
908 const void* const row0 = fAccessor.row(iYs[0]);
909 const void* const row1 = fAccessor.row(iYs[2]);
910
911 // Generate part of the filter value at xColumn.
912 auto partAtColumn = [&](int xColumn) {
913 int adjustedColumn = adjust_edge(fXEdgeType, xColumn, fXMax);
914 Sk4f pxTop, pxBottom;
915 this->get2PixelColumn(row0, row1, adjustedColumn, &pxTop, &pxBottom);
916 return pxTop * filterY + (1.0f - filterY) * pxBottom;
917 };
918
919 // The leftPart is made up of two pixels from the left column of the filter, right part
920 // is similar. The top and bottom pixels in the *Part are created as a linear blend of
921 // the top and bottom pixels using filterY. See the nextPart function below.
922 Sk4f leftPart = partAtColumn(iXs[0]);
923 Sk4f rightPart = partAtColumn(iXs[1]);
924
925 // Create a destination color by blending together a left and right part using filterX.
926 auto bilerp = [&](const Sk4f& leftPart, const Sk4f& rightPart) {
927 Sk4f pixel = leftPart * filterX + rightPart * (1.0f - filterX);
928 return check_pixel(pixel);
929 };
930
931 // Send the first pixel to the destination. This simplifies the loop structure so that no
932 // extra pixels are fetched for the last iteration of the loop.
933 fNext->blendPixel(bilerp(leftPart, rightPart));
934 count -= 1;
935
936 if (dx > 0.0f) {
937 // * positive direction - generate destination pixels by sliding the filter from left
938 // to right.
939 int rightPartCursor = iXs[1];
940
941 // Advance the filter from left to right. Remember that moving the top-left corner of
942 // the filter to the right actually makes the filter value smaller.
943 auto advanceFilter = [&]() {
944 filterX -= dx;
945 // At this point filterX is less than zero, but might actually be less than -1.
946 if (filterX > -1.0f) {
947 filterX += 1.0f;
948 leftPart = rightPart;
949 rightPartCursor += 1;
950 rightPart = partAtColumn(rightPartCursor);
951 } else {
952 filterX += 2.0f;
953 rightPartCursor += 2;
954 leftPart = partAtColumn(rightPartCursor - 1);
955 rightPart = partAtColumn(rightPartCursor);
956 }
957 SkASSERT(0.0f < filterX && filterX <= 1.0f);
958
959 return bilerp(leftPart, rightPart);
960 };
961
962 while (count >= 4) {
963 Sk4f px0 = advanceFilter(),
964 px1 = advanceFilter(),
965 px2 = advanceFilter(),
966 px3 = advanceFilter();
967 fNext->blend4Pixels(px0, px1, px2, px3);
968 count -= 4;
969 }
970
971 while (count > 0) {
972 fNext->blendPixel(advanceFilter());
973 count -= 1;
974 }
975 } else {
976 // * negative direction - generate destination pixels by sliding the filter from
977 // right to left.
978 int leftPartCursor = iXs[0];
979
980 auto advanceFilter = [&]() {
981 // Remember, dx < 0 therefore this adds |dx| to filterX.
982 filterX -= dx;
983 // At this point, filterX is greater than one, but may actually be greater than two.
984 if (filterX < 2.0f) {
985 filterX -= 1.0f;
986 rightPart = leftPart;
987 leftPartCursor -= 1;
988 leftPart = partAtColumn(leftPartCursor);
989 } else {
990 filterX -= 2.0f;
991 leftPartCursor -= 2;
992 rightPart = partAtColumn(leftPartCursor - 1);
993 leftPart = partAtColumn(leftPartCursor);
994 }
995 SkASSERT(0.0f < filterX && filterX <= 1.0f);
996 return bilerp(leftPart, rightPart);
997 };
998
999 while (count >= 4) {
1000 Sk4f px0 = advanceFilter(),
1001 px1 = advanceFilter(),
1002 px2 = advanceFilter(),
1003 px3 = advanceFilter();
1004 fNext->blend4Pixels(px0, px1, px2, px3);
1005 count -= 4;
1006 }
1007
1008 while (count > 0) {
1009 fNext->blendPixel(advanceFilter());
1010 count -= 1;
1011 }
1012 }
1013 }
1014
1015 // We're moving through source space faster than dst (zoomed out),
1016 // so we'll never reuse a source pixel or be able to do contiguous loads.
spanFastRate(Span span)1017 void spanFastRate(Span span) {
1018 SkPoint start; SkScalar length; int count;
1019 std::tie(start, length, count) = span;
1020 SkScalar x = X(start);
1021 SkScalar y = Y(start);
1022
1023 SkScalar dx = length / (count - 1);
1024 while (count > 0) {
1025 fNext->blendPixel(this->bilerpSamplePoint(SkPoint{x, y}));
1026 x += dx;
1027 count -= 1;
1028 }
1029 }
1030
1031 Next* const fNext;
1032 const SkShader::TileMode fXEdgeType;
1033 const int fXMax;
1034 const SkShader::TileMode fYEdgeType;
1035 const int fYMax;
1036 Accessor fAccessor;
1037 };
1038
1039 } // namespace
1040
1041 #endif // SkLinearBitmapPipeline_sampler_DEFINED
1042