1 // Copyright 2014 Google Inc. All Rights Reserved.
2 //
3 // Use of this source code is governed by a BSD-style license
4 // that can be found in the COPYING file in the root of the source
5 // tree. An additional intellectual property rights grant can be found
6 // in the file PATENTS. All contributing project authors may
7 // be found in the AUTHORS file in the root of the source tree.
8 // -----------------------------------------------------------------------------
9 //
10 // WebPPicture utils for colorspace conversion
11 //
12 // Author: Skal (pascal.massimino@gmail.com)
13 
14 #include <assert.h>
15 #include <stdlib.h>
16 #include <math.h>
17 
18 #include "src/enc/vp8i_enc.h"
19 #include "src/utils/random_utils.h"
20 #include "src/utils/utils.h"
21 #include "src/dsp/dsp.h"
22 #include "src/dsp/lossless.h"
23 #include "src/dsp/yuv.h"
24 
25 // Uncomment to disable gamma-compression during RGB->U/V averaging
26 #define USE_GAMMA_COMPRESSION
27 
28 // If defined, use table to compute x / alpha.
29 #define USE_INVERSE_ALPHA_TABLE
30 
31 #ifdef WORDS_BIGENDIAN
32 #define ALPHA_OFFSET 0   // uint32_t 0xff000000 is 0xff,00,00,00 in memory
33 #else
34 #define ALPHA_OFFSET 3   // uint32_t 0xff000000 is 0x00,00,00,ff in memory
35 #endif
36 
37 //------------------------------------------------------------------------------
38 // Detection of non-trivial transparency
39 
40 // Returns true if alpha[] has non-0xff values.
CheckNonOpaque(const uint8_t * alpha,int width,int height,int x_step,int y_step)41 static int CheckNonOpaque(const uint8_t* alpha, int width, int height,
42                           int x_step, int y_step) {
43   if (alpha == NULL) return 0;
44   WebPInitAlphaProcessing();
45   if (x_step == 1) {
46     for (; height-- > 0; alpha += y_step) {
47       if (WebPHasAlpha8b(alpha, width)) return 1;
48     }
49   } else {
50     for (; height-- > 0; alpha += y_step) {
51       if (WebPHasAlpha32b(alpha, width)) return 1;
52     }
53   }
54   return 0;
55 }
56 
57 // Checking for the presence of non-opaque alpha.
WebPPictureHasTransparency(const WebPPicture * picture)58 int WebPPictureHasTransparency(const WebPPicture* picture) {
59   if (picture == NULL) return 0;
60   if (!picture->use_argb) {
61     return CheckNonOpaque(picture->a, picture->width, picture->height,
62                           1, picture->a_stride);
63   } else {
64     const int alpha_offset = ALPHA_OFFSET;
65     return CheckNonOpaque((const uint8_t*)picture->argb + alpha_offset,
66                           picture->width, picture->height,
67                           4, picture->argb_stride * sizeof(*picture->argb));
68   }
69   return 0;
70 }
71 
72 //------------------------------------------------------------------------------
73 // Code for gamma correction
74 
75 #if defined(USE_GAMMA_COMPRESSION)
76 
77 // gamma-compensates loss of resolution during chroma subsampling
78 #define kGamma 0.80      // for now we use a different gamma value than kGammaF
79 #define kGammaFix 12     // fixed-point precision for linear values
80 #define kGammaScale ((1 << kGammaFix) - 1)
81 #define kGammaTabFix 7   // fixed-point fractional bits precision
82 #define kGammaTabScale (1 << kGammaTabFix)
83 #define kGammaTabRounder (kGammaTabScale >> 1)
84 #define kGammaTabSize (1 << (kGammaFix - kGammaTabFix))
85 
86 static int kLinearToGammaTab[kGammaTabSize + 1];
87 static uint16_t kGammaToLinearTab[256];
88 static volatile int kGammaTablesOk = 0;
89 
InitGammaTables(void)90 static WEBP_TSAN_IGNORE_FUNCTION void InitGammaTables(void) {
91   if (!kGammaTablesOk) {
92     int v;
93     const double scale = (double)(1 << kGammaTabFix) / kGammaScale;
94     const double norm = 1. / 255.;
95     for (v = 0; v <= 255; ++v) {
96       kGammaToLinearTab[v] =
97           (uint16_t)(pow(norm * v, kGamma) * kGammaScale + .5);
98     }
99     for (v = 0; v <= kGammaTabSize; ++v) {
100       kLinearToGammaTab[v] = (int)(255. * pow(scale * v, 1. / kGamma) + .5);
101     }
102     kGammaTablesOk = 1;
103   }
104 }
105 
GammaToLinear(uint8_t v)106 static WEBP_INLINE uint32_t GammaToLinear(uint8_t v) {
107   return kGammaToLinearTab[v];
108 }
109 
Interpolate(int v)110 static WEBP_INLINE int Interpolate(int v) {
111   const int tab_pos = v >> (kGammaTabFix + 2);    // integer part
112   const int x = v & ((kGammaTabScale << 2) - 1);  // fractional part
113   const int v0 = kLinearToGammaTab[tab_pos];
114   const int v1 = kLinearToGammaTab[tab_pos + 1];
115   const int y = v1 * x + v0 * ((kGammaTabScale << 2) - x);   // interpolate
116   assert(tab_pos + 1 < kGammaTabSize + 1);
117   return y;
118 }
119 
120 // Convert a linear value 'v' to YUV_FIX+2 fixed-point precision
121 // U/V value, suitable for RGBToU/V calls.
LinearToGamma(uint32_t base_value,int shift)122 static WEBP_INLINE int LinearToGamma(uint32_t base_value, int shift) {
123   const int y = Interpolate(base_value << shift);   // final uplifted value
124   return (y + kGammaTabRounder) >> kGammaTabFix;    // descale
125 }
126 
127 #else
128 
InitGammaTables(void)129 static void InitGammaTables(void) {}
GammaToLinear(uint8_t v)130 static WEBP_INLINE uint32_t GammaToLinear(uint8_t v) { return v; }
LinearToGamma(uint32_t base_value,int shift)131 static WEBP_INLINE int LinearToGamma(uint32_t base_value, int shift) {
132   return (int)(base_value << shift);
133 }
134 
135 #endif    // USE_GAMMA_COMPRESSION
136 
137 //------------------------------------------------------------------------------
138 // RGB -> YUV conversion
139 
RGBToY(int r,int g,int b,VP8Random * const rg)140 static int RGBToY(int r, int g, int b, VP8Random* const rg) {
141   return (rg == NULL) ? VP8RGBToY(r, g, b, YUV_HALF)
142                       : VP8RGBToY(r, g, b, VP8RandomBits(rg, YUV_FIX));
143 }
144 
RGBToU(int r,int g,int b,VP8Random * const rg)145 static int RGBToU(int r, int g, int b, VP8Random* const rg) {
146   return (rg == NULL) ? VP8RGBToU(r, g, b, YUV_HALF << 2)
147                       : VP8RGBToU(r, g, b, VP8RandomBits(rg, YUV_FIX + 2));
148 }
149 
RGBToV(int r,int g,int b,VP8Random * const rg)150 static int RGBToV(int r, int g, int b, VP8Random* const rg) {
151   return (rg == NULL) ? VP8RGBToV(r, g, b, YUV_HALF << 2)
152                       : VP8RGBToV(r, g, b, VP8RandomBits(rg, YUV_FIX + 2));
153 }
154 
155 //------------------------------------------------------------------------------
156 // Sharp RGB->YUV conversion
157 
158 static const int kNumIterations = 4;
159 static const int kMinDimensionIterativeConversion = 4;
160 
161 // We could use SFIX=0 and only uint8_t for fixed_y_t, but it produces some
162 // banding sometimes. Better use extra precision.
163 #define SFIX 2                // fixed-point precision of RGB and Y/W
164 typedef int16_t fixed_t;      // signed type with extra SFIX precision for UV
165 typedef uint16_t fixed_y_t;   // unsigned type with extra SFIX precision for W
166 
167 #define SHALF (1 << SFIX >> 1)
168 #define MAX_Y_T ((256 << SFIX) - 1)
169 #define SROUNDER (1 << (YUV_FIX + SFIX - 1))
170 
171 #if defined(USE_GAMMA_COMPRESSION)
172 
173 // We use tables of different size and precision for the Rec709 / BT2020
174 // transfer function.
175 #define kGammaF (1./0.45)
176 static uint32_t kLinearToGammaTabS[kGammaTabSize + 2];
177 #define GAMMA_TO_LINEAR_BITS 14
178 static uint32_t kGammaToLinearTabS[MAX_Y_T + 1];   // size scales with Y_FIX
179 static volatile int kGammaTablesSOk = 0;
180 
InitGammaTablesS(void)181 static WEBP_TSAN_IGNORE_FUNCTION void InitGammaTablesS(void) {
182   assert(2 * GAMMA_TO_LINEAR_BITS < 32);  // we use uint32_t intermediate values
183   if (!kGammaTablesSOk) {
184     int v;
185     const double norm = 1. / MAX_Y_T;
186     const double scale = 1. / kGammaTabSize;
187     const double a = 0.09929682680944;
188     const double thresh = 0.018053968510807;
189     const double final_scale = 1 << GAMMA_TO_LINEAR_BITS;
190     for (v = 0; v <= MAX_Y_T; ++v) {
191       const double g = norm * v;
192       double value;
193       if (g <= thresh * 4.5) {
194         value = g / 4.5;
195       } else {
196         const double a_rec = 1. / (1. + a);
197         value = pow(a_rec * (g + a), kGammaF);
198       }
199       kGammaToLinearTabS[v] = (uint32_t)(value * final_scale + .5);
200     }
201     for (v = 0; v <= kGammaTabSize; ++v) {
202       const double g = scale * v;
203       double value;
204       if (g <= thresh) {
205         value = 4.5 * g;
206       } else {
207         value = (1. + a) * pow(g, 1. / kGammaF) - a;
208       }
209       // we already incorporate the 1/2 rounding constant here
210       kLinearToGammaTabS[v] =
211           (uint32_t)(MAX_Y_T * value) + (1 << GAMMA_TO_LINEAR_BITS >> 1);
212     }
213     // to prevent small rounding errors to cause read-overflow:
214     kLinearToGammaTabS[kGammaTabSize + 1] = kLinearToGammaTabS[kGammaTabSize];
215     kGammaTablesSOk = 1;
216   }
217 }
218 
219 // return value has a fixed-point precision of GAMMA_TO_LINEAR_BITS
GammaToLinearS(int v)220 static WEBP_INLINE uint32_t GammaToLinearS(int v) {
221   return kGammaToLinearTabS[v];
222 }
223 
LinearToGammaS(uint32_t value)224 static WEBP_INLINE uint32_t LinearToGammaS(uint32_t value) {
225   // 'value' is in GAMMA_TO_LINEAR_BITS fractional precision
226   const uint32_t v = value * kGammaTabSize;
227   const uint32_t tab_pos = v >> GAMMA_TO_LINEAR_BITS;
228   // fractional part, in GAMMA_TO_LINEAR_BITS fixed-point precision
229   const uint32_t x = v - (tab_pos << GAMMA_TO_LINEAR_BITS);  // fractional part
230   // v0 / v1 are in GAMMA_TO_LINEAR_BITS fixed-point precision (range [0..1])
231   const uint32_t v0 = kLinearToGammaTabS[tab_pos + 0];
232   const uint32_t v1 = kLinearToGammaTabS[tab_pos + 1];
233   // Final interpolation. Note that rounding is already included.
234   const uint32_t v2 = (v1 - v0) * x;    // note: v1 >= v0.
235   const uint32_t result = v0 + (v2 >> GAMMA_TO_LINEAR_BITS);
236   return result;
237 }
238 
239 #else
240 
InitGammaTablesS(void)241 static void InitGammaTablesS(void) {}
GammaToLinearS(int v)242 static WEBP_INLINE uint32_t GammaToLinearS(int v) {
243   return (v << GAMMA_TO_LINEAR_BITS) / MAX_Y_T;
244 }
LinearToGammaS(uint32_t value)245 static WEBP_INLINE uint32_t LinearToGammaS(uint32_t value) {
246   return (MAX_Y_T * value) >> GAMMA_TO_LINEAR_BITS;
247 }
248 
249 #endif    // USE_GAMMA_COMPRESSION
250 
251 //------------------------------------------------------------------------------
252 
clip_8b(fixed_t v)253 static uint8_t clip_8b(fixed_t v) {
254   return (!(v & ~0xff)) ? (uint8_t)v : (v < 0) ? 0u : 255u;
255 }
256 
clip_y(int y)257 static fixed_y_t clip_y(int y) {
258   return (!(y & ~MAX_Y_T)) ? (fixed_y_t)y : (y < 0) ? 0 : MAX_Y_T;
259 }
260 
261 //------------------------------------------------------------------------------
262 
RGBToGray(int r,int g,int b)263 static int RGBToGray(int r, int g, int b) {
264   const int luma = 13933 * r + 46871 * g + 4732 * b + YUV_HALF;
265   return (luma >> YUV_FIX);
266 }
267 
ScaleDown(int a,int b,int c,int d)268 static uint32_t ScaleDown(int a, int b, int c, int d) {
269   const uint32_t A = GammaToLinearS(a);
270   const uint32_t B = GammaToLinearS(b);
271   const uint32_t C = GammaToLinearS(c);
272   const uint32_t D = GammaToLinearS(d);
273   return LinearToGammaS((A + B + C + D + 2) >> 2);
274 }
275 
UpdateW(const fixed_y_t * src,fixed_y_t * dst,int w)276 static WEBP_INLINE void UpdateW(const fixed_y_t* src, fixed_y_t* dst, int w) {
277   int i;
278   for (i = 0; i < w; ++i) {
279     const uint32_t R = GammaToLinearS(src[0 * w + i]);
280     const uint32_t G = GammaToLinearS(src[1 * w + i]);
281     const uint32_t B = GammaToLinearS(src[2 * w + i]);
282     const uint32_t Y = RGBToGray(R, G, B);
283     dst[i] = (fixed_y_t)LinearToGammaS(Y);
284   }
285 }
286 
UpdateChroma(const fixed_y_t * src1,const fixed_y_t * src2,fixed_t * dst,int uv_w)287 static void UpdateChroma(const fixed_y_t* src1, const fixed_y_t* src2,
288                          fixed_t* dst, int uv_w) {
289   int i;
290   for (i = 0; i < uv_w; ++i) {
291     const int r = ScaleDown(src1[0 * uv_w + 0], src1[0 * uv_w + 1],
292                             src2[0 * uv_w + 0], src2[0 * uv_w + 1]);
293     const int g = ScaleDown(src1[2 * uv_w + 0], src1[2 * uv_w + 1],
294                             src2[2 * uv_w + 0], src2[2 * uv_w + 1]);
295     const int b = ScaleDown(src1[4 * uv_w + 0], src1[4 * uv_w + 1],
296                             src2[4 * uv_w + 0], src2[4 * uv_w + 1]);
297     const int W = RGBToGray(r, g, b);
298     dst[0 * uv_w] = (fixed_t)(r - W);
299     dst[1 * uv_w] = (fixed_t)(g - W);
300     dst[2 * uv_w] = (fixed_t)(b - W);
301     dst  += 1;
302     src1 += 2;
303     src2 += 2;
304   }
305 }
306 
StoreGray(const fixed_y_t * rgb,fixed_y_t * y,int w)307 static void StoreGray(const fixed_y_t* rgb, fixed_y_t* y, int w) {
308   int i;
309   for (i = 0; i < w; ++i) {
310     y[i] = RGBToGray(rgb[0 * w + i], rgb[1 * w + i], rgb[2 * w + i]);
311   }
312 }
313 
314 //------------------------------------------------------------------------------
315 
Filter2(int A,int B,int W0)316 static WEBP_INLINE fixed_y_t Filter2(int A, int B, int W0) {
317   const int v0 = (A * 3 + B + 2) >> 2;
318   return clip_y(v0 + W0);
319 }
320 
321 //------------------------------------------------------------------------------
322 
UpLift(uint8_t a)323 static WEBP_INLINE fixed_y_t UpLift(uint8_t a) {  // 8bit -> SFIX
324   return ((fixed_y_t)a << SFIX) | SHALF;
325 }
326 
ImportOneRow(const uint8_t * const r_ptr,const uint8_t * const g_ptr,const uint8_t * const b_ptr,int step,int pic_width,fixed_y_t * const dst)327 static void ImportOneRow(const uint8_t* const r_ptr,
328                          const uint8_t* const g_ptr,
329                          const uint8_t* const b_ptr,
330                          int step,
331                          int pic_width,
332                          fixed_y_t* const dst) {
333   int i;
334   const int w = (pic_width + 1) & ~1;
335   for (i = 0; i < pic_width; ++i) {
336     const int off = i * step;
337     dst[i + 0 * w] = UpLift(r_ptr[off]);
338     dst[i + 1 * w] = UpLift(g_ptr[off]);
339     dst[i + 2 * w] = UpLift(b_ptr[off]);
340   }
341   if (pic_width & 1) {  // replicate rightmost pixel
342     dst[pic_width + 0 * w] = dst[pic_width + 0 * w - 1];
343     dst[pic_width + 1 * w] = dst[pic_width + 1 * w - 1];
344     dst[pic_width + 2 * w] = dst[pic_width + 2 * w - 1];
345   }
346 }
347 
InterpolateTwoRows(const fixed_y_t * const best_y,const fixed_t * prev_uv,const fixed_t * cur_uv,const fixed_t * next_uv,int w,fixed_y_t * out1,fixed_y_t * out2)348 static void InterpolateTwoRows(const fixed_y_t* const best_y,
349                                const fixed_t* prev_uv,
350                                const fixed_t* cur_uv,
351                                const fixed_t* next_uv,
352                                int w,
353                                fixed_y_t* out1,
354                                fixed_y_t* out2) {
355   const int uv_w = w >> 1;
356   const int len = (w - 1) >> 1;   // length to filter
357   int k = 3;
358   while (k-- > 0) {   // process each R/G/B segments in turn
359     // special boundary case for i==0
360     out1[0] = Filter2(cur_uv[0], prev_uv[0], best_y[0]);
361     out2[0] = Filter2(cur_uv[0], next_uv[0], best_y[w]);
362 
363     WebPSharpYUVFilterRow(cur_uv, prev_uv, len, best_y + 0 + 1, out1 + 1);
364     WebPSharpYUVFilterRow(cur_uv, next_uv, len, best_y + w + 1, out2 + 1);
365 
366     // special boundary case for i == w - 1 when w is even
367     if (!(w & 1)) {
368       out1[w - 1] = Filter2(cur_uv[uv_w - 1], prev_uv[uv_w - 1],
369                             best_y[w - 1 + 0]);
370       out2[w - 1] = Filter2(cur_uv[uv_w - 1], next_uv[uv_w - 1],
371                             best_y[w - 1 + w]);
372     }
373     out1 += w;
374     out2 += w;
375     prev_uv += uv_w;
376     cur_uv  += uv_w;
377     next_uv += uv_w;
378   }
379 }
380 
ConvertRGBToY(int r,int g,int b)381 static WEBP_INLINE uint8_t ConvertRGBToY(int r, int g, int b) {
382   const int luma = 16839 * r + 33059 * g + 6420 * b + SROUNDER;
383   return clip_8b(16 + (luma >> (YUV_FIX + SFIX)));
384 }
385 
ConvertRGBToU(int r,int g,int b)386 static WEBP_INLINE uint8_t ConvertRGBToU(int r, int g, int b) {
387   const int u =  -9719 * r - 19081 * g + 28800 * b + SROUNDER;
388   return clip_8b(128 + (u >> (YUV_FIX + SFIX)));
389 }
390 
ConvertRGBToV(int r,int g,int b)391 static WEBP_INLINE uint8_t ConvertRGBToV(int r, int g, int b) {
392   const int v = +28800 * r - 24116 * g -  4684 * b + SROUNDER;
393   return clip_8b(128 + (v >> (YUV_FIX + SFIX)));
394 }
395 
ConvertWRGBToYUV(const fixed_y_t * best_y,const fixed_t * best_uv,WebPPicture * const picture)396 static int ConvertWRGBToYUV(const fixed_y_t* best_y, const fixed_t* best_uv,
397                             WebPPicture* const picture) {
398   int i, j;
399   uint8_t* dst_y = picture->y;
400   uint8_t* dst_u = picture->u;
401   uint8_t* dst_v = picture->v;
402   const fixed_t* const best_uv_base = best_uv;
403   const int w = (picture->width + 1) & ~1;
404   const int h = (picture->height + 1) & ~1;
405   const int uv_w = w >> 1;
406   const int uv_h = h >> 1;
407   for (best_uv = best_uv_base, j = 0; j < picture->height; ++j) {
408     for (i = 0; i < picture->width; ++i) {
409       const int off = (i >> 1);
410       const int W = best_y[i];
411       const int r = best_uv[off + 0 * uv_w] + W;
412       const int g = best_uv[off + 1 * uv_w] + W;
413       const int b = best_uv[off + 2 * uv_w] + W;
414       dst_y[i] = ConvertRGBToY(r, g, b);
415     }
416     best_y += w;
417     best_uv += (j & 1) * 3 * uv_w;
418     dst_y += picture->y_stride;
419   }
420   for (best_uv = best_uv_base, j = 0; j < uv_h; ++j) {
421     for (i = 0; i < uv_w; ++i) {
422       const int off = i;
423       const int r = best_uv[off + 0 * uv_w];
424       const int g = best_uv[off + 1 * uv_w];
425       const int b = best_uv[off + 2 * uv_w];
426       dst_u[i] = ConvertRGBToU(r, g, b);
427       dst_v[i] = ConvertRGBToV(r, g, b);
428     }
429     best_uv += 3 * uv_w;
430     dst_u += picture->uv_stride;
431     dst_v += picture->uv_stride;
432   }
433   return 1;
434 }
435 
436 //------------------------------------------------------------------------------
437 // Main function
438 
439 #define SAFE_ALLOC(W, H, T) ((T*)WebPSafeMalloc((W) * (H), sizeof(T)))
440 
PreprocessARGB(const uint8_t * r_ptr,const uint8_t * g_ptr,const uint8_t * b_ptr,int step,int rgb_stride,WebPPicture * const picture)441 static int PreprocessARGB(const uint8_t* r_ptr,
442                           const uint8_t* g_ptr,
443                           const uint8_t* b_ptr,
444                           int step, int rgb_stride,
445                           WebPPicture* const picture) {
446   // we expand the right/bottom border if needed
447   const int w = (picture->width + 1) & ~1;
448   const int h = (picture->height + 1) & ~1;
449   const int uv_w = w >> 1;
450   const int uv_h = h >> 1;
451   uint64_t prev_diff_y_sum = ~0;
452   int j, iter;
453 
454   // TODO(skal): allocate one big memory chunk. But for now, it's easier
455   // for valgrind debugging to have several chunks.
456   fixed_y_t* const tmp_buffer = SAFE_ALLOC(w * 3, 2, fixed_y_t);   // scratch
457   fixed_y_t* const best_y_base = SAFE_ALLOC(w, h, fixed_y_t);
458   fixed_y_t* const target_y_base = SAFE_ALLOC(w, h, fixed_y_t);
459   fixed_y_t* const best_rgb_y = SAFE_ALLOC(w, 2, fixed_y_t);
460   fixed_t* const best_uv_base = SAFE_ALLOC(uv_w * 3, uv_h, fixed_t);
461   fixed_t* const target_uv_base = SAFE_ALLOC(uv_w * 3, uv_h, fixed_t);
462   fixed_t* const best_rgb_uv = SAFE_ALLOC(uv_w * 3, 1, fixed_t);
463   fixed_y_t* best_y = best_y_base;
464   fixed_y_t* target_y = target_y_base;
465   fixed_t* best_uv = best_uv_base;
466   fixed_t* target_uv = target_uv_base;
467   const uint64_t diff_y_threshold = (uint64_t)(3.0 * w * h);
468   int ok;
469 
470   if (best_y_base == NULL || best_uv_base == NULL ||
471       target_y_base == NULL || target_uv_base == NULL ||
472       best_rgb_y == NULL || best_rgb_uv == NULL ||
473       tmp_buffer == NULL) {
474     ok = WebPEncodingSetError(picture, VP8_ENC_ERROR_OUT_OF_MEMORY);
475     goto End;
476   }
477   assert(picture->width >= kMinDimensionIterativeConversion);
478   assert(picture->height >= kMinDimensionIterativeConversion);
479 
480   WebPInitConvertARGBToYUV();
481 
482   // Import RGB samples to W/RGB representation.
483   for (j = 0; j < picture->height; j += 2) {
484     const int is_last_row = (j == picture->height - 1);
485     fixed_y_t* const src1 = tmp_buffer + 0 * w;
486     fixed_y_t* const src2 = tmp_buffer + 3 * w;
487 
488     // prepare two rows of input
489     ImportOneRow(r_ptr, g_ptr, b_ptr, step, picture->width, src1);
490     if (!is_last_row) {
491       ImportOneRow(r_ptr + rgb_stride, g_ptr + rgb_stride, b_ptr + rgb_stride,
492                    step, picture->width, src2);
493     } else {
494       memcpy(src2, src1, 3 * w * sizeof(*src2));
495     }
496     StoreGray(src1, best_y + 0, w);
497     StoreGray(src2, best_y + w, w);
498 
499     UpdateW(src1, target_y, w);
500     UpdateW(src2, target_y + w, w);
501     UpdateChroma(src1, src2, target_uv, uv_w);
502     memcpy(best_uv, target_uv, 3 * uv_w * sizeof(*best_uv));
503     best_y += 2 * w;
504     best_uv += 3 * uv_w;
505     target_y += 2 * w;
506     target_uv += 3 * uv_w;
507     r_ptr += 2 * rgb_stride;
508     g_ptr += 2 * rgb_stride;
509     b_ptr += 2 * rgb_stride;
510   }
511 
512   // Iterate and resolve clipping conflicts.
513   for (iter = 0; iter < kNumIterations; ++iter) {
514     const fixed_t* cur_uv = best_uv_base;
515     const fixed_t* prev_uv = best_uv_base;
516     uint64_t diff_y_sum = 0;
517 
518     best_y = best_y_base;
519     best_uv = best_uv_base;
520     target_y = target_y_base;
521     target_uv = target_uv_base;
522     for (j = 0; j < h; j += 2) {
523       fixed_y_t* const src1 = tmp_buffer + 0 * w;
524       fixed_y_t* const src2 = tmp_buffer + 3 * w;
525       {
526         const fixed_t* const next_uv = cur_uv + ((j < h - 2) ? 3 * uv_w : 0);
527         InterpolateTwoRows(best_y, prev_uv, cur_uv, next_uv, w, src1, src2);
528         prev_uv = cur_uv;
529         cur_uv = next_uv;
530       }
531 
532       UpdateW(src1, best_rgb_y + 0 * w, w);
533       UpdateW(src2, best_rgb_y + 1 * w, w);
534       UpdateChroma(src1, src2, best_rgb_uv, uv_w);
535 
536       // update two rows of Y and one row of RGB
537       diff_y_sum += WebPSharpYUVUpdateY(target_y, best_rgb_y, best_y, 2 * w);
538       WebPSharpYUVUpdateRGB(target_uv, best_rgb_uv, best_uv, 3 * uv_w);
539 
540       best_y += 2 * w;
541       best_uv += 3 * uv_w;
542       target_y += 2 * w;
543       target_uv += 3 * uv_w;
544     }
545     // test exit condition
546     if (iter > 0) {
547       if (diff_y_sum < diff_y_threshold) break;
548       if (diff_y_sum > prev_diff_y_sum) break;
549     }
550     prev_diff_y_sum = diff_y_sum;
551   }
552   // final reconstruction
553   ok = ConvertWRGBToYUV(best_y_base, best_uv_base, picture);
554 
555  End:
556   WebPSafeFree(best_y_base);
557   WebPSafeFree(best_uv_base);
558   WebPSafeFree(target_y_base);
559   WebPSafeFree(target_uv_base);
560   WebPSafeFree(best_rgb_y);
561   WebPSafeFree(best_rgb_uv);
562   WebPSafeFree(tmp_buffer);
563   return ok;
564 }
565 #undef SAFE_ALLOC
566 
567 //------------------------------------------------------------------------------
568 // "Fast" regular RGB->YUV
569 
570 #define SUM4(ptr, step) LinearToGamma(                     \
571     GammaToLinear((ptr)[0]) +                              \
572     GammaToLinear((ptr)[(step)]) +                         \
573     GammaToLinear((ptr)[rgb_stride]) +                     \
574     GammaToLinear((ptr)[rgb_stride + (step)]), 0)          \
575 
576 #define SUM2(ptr) \
577     LinearToGamma(GammaToLinear((ptr)[0]) + GammaToLinear((ptr)[rgb_stride]), 1)
578 
579 #define SUM2ALPHA(ptr) ((ptr)[0] + (ptr)[rgb_stride])
580 #define SUM4ALPHA(ptr) (SUM2ALPHA(ptr) + SUM2ALPHA((ptr) + 4))
581 
582 #if defined(USE_INVERSE_ALPHA_TABLE)
583 
584 static const int kAlphaFix = 19;
585 // Following table is (1 << kAlphaFix) / a. The (v * kInvAlpha[a]) >> kAlphaFix
586 // formula is then equal to v / a in most (99.6%) cases. Note that this table
587 // and constant are adjusted very tightly to fit 32b arithmetic.
588 // In particular, they use the fact that the operands for 'v / a' are actually
589 // derived as v = (a0.p0 + a1.p1 + a2.p2 + a3.p3) and a = a0 + a1 + a2 + a3
590 // with ai in [0..255] and pi in [0..1<<kGammaFix). The constraint to avoid
591 // overflow is: kGammaFix + kAlphaFix <= 31.
592 static const uint32_t kInvAlpha[4 * 0xff + 1] = {
593   0,  /* alpha = 0 */
594   524288, 262144, 174762, 131072, 104857, 87381, 74898, 65536,
595   58254, 52428, 47662, 43690, 40329, 37449, 34952, 32768,
596   30840, 29127, 27594, 26214, 24966, 23831, 22795, 21845,
597   20971, 20164, 19418, 18724, 18078, 17476, 16912, 16384,
598   15887, 15420, 14979, 14563, 14169, 13797, 13443, 13107,
599   12787, 12483, 12192, 11915, 11650, 11397, 11155, 10922,
600   10699, 10485, 10280, 10082, 9892, 9709, 9532, 9362,
601   9198, 9039, 8886, 8738, 8594, 8456, 8322, 8192,
602   8065, 7943, 7825, 7710, 7598, 7489, 7384, 7281,
603   7182, 7084, 6990, 6898, 6808, 6721, 6636, 6553,
604   6472, 6393, 6316, 6241, 6168, 6096, 6026, 5957,
605   5890, 5825, 5761, 5698, 5637, 5577, 5518, 5461,
606   5405, 5349, 5295, 5242, 5190, 5140, 5090, 5041,
607   4993, 4946, 4899, 4854, 4809, 4766, 4723, 4681,
608   4639, 4599, 4559, 4519, 4481, 4443, 4405, 4369,
609   4332, 4297, 4262, 4228, 4194, 4161, 4128, 4096,
610   4064, 4032, 4002, 3971, 3942, 3912, 3883, 3855,
611   3826, 3799, 3771, 3744, 3718, 3692, 3666, 3640,
612   3615, 3591, 3566, 3542, 3518, 3495, 3472, 3449,
613   3426, 3404, 3382, 3360, 3339, 3318, 3297, 3276,
614   3256, 3236, 3216, 3196, 3177, 3158, 3139, 3120,
615   3102, 3084, 3066, 3048, 3030, 3013, 2995, 2978,
616   2962, 2945, 2928, 2912, 2896, 2880, 2864, 2849,
617   2833, 2818, 2803, 2788, 2774, 2759, 2744, 2730,
618   2716, 2702, 2688, 2674, 2661, 2647, 2634, 2621,
619   2608, 2595, 2582, 2570, 2557, 2545, 2532, 2520,
620   2508, 2496, 2484, 2473, 2461, 2449, 2438, 2427,
621   2416, 2404, 2394, 2383, 2372, 2361, 2351, 2340,
622   2330, 2319, 2309, 2299, 2289, 2279, 2269, 2259,
623   2250, 2240, 2231, 2221, 2212, 2202, 2193, 2184,
624   2175, 2166, 2157, 2148, 2139, 2131, 2122, 2114,
625   2105, 2097, 2088, 2080, 2072, 2064, 2056, 2048,
626   2040, 2032, 2024, 2016, 2008, 2001, 1993, 1985,
627   1978, 1971, 1963, 1956, 1949, 1941, 1934, 1927,
628   1920, 1913, 1906, 1899, 1892, 1885, 1879, 1872,
629   1865, 1859, 1852, 1846, 1839, 1833, 1826, 1820,
630   1814, 1807, 1801, 1795, 1789, 1783, 1777, 1771,
631   1765, 1759, 1753, 1747, 1741, 1736, 1730, 1724,
632   1718, 1713, 1707, 1702, 1696, 1691, 1685, 1680,
633   1675, 1669, 1664, 1659, 1653, 1648, 1643, 1638,
634   1633, 1628, 1623, 1618, 1613, 1608, 1603, 1598,
635   1593, 1588, 1583, 1579, 1574, 1569, 1565, 1560,
636   1555, 1551, 1546, 1542, 1537, 1533, 1528, 1524,
637   1519, 1515, 1510, 1506, 1502, 1497, 1493, 1489,
638   1485, 1481, 1476, 1472, 1468, 1464, 1460, 1456,
639   1452, 1448, 1444, 1440, 1436, 1432, 1428, 1424,
640   1420, 1416, 1413, 1409, 1405, 1401, 1398, 1394,
641   1390, 1387, 1383, 1379, 1376, 1372, 1368, 1365,
642   1361, 1358, 1354, 1351, 1347, 1344, 1340, 1337,
643   1334, 1330, 1327, 1323, 1320, 1317, 1314, 1310,
644   1307, 1304, 1300, 1297, 1294, 1291, 1288, 1285,
645   1281, 1278, 1275, 1272, 1269, 1266, 1263, 1260,
646   1257, 1254, 1251, 1248, 1245, 1242, 1239, 1236,
647   1233, 1230, 1227, 1224, 1222, 1219, 1216, 1213,
648   1210, 1208, 1205, 1202, 1199, 1197, 1194, 1191,
649   1188, 1186, 1183, 1180, 1178, 1175, 1172, 1170,
650   1167, 1165, 1162, 1159, 1157, 1154, 1152, 1149,
651   1147, 1144, 1142, 1139, 1137, 1134, 1132, 1129,
652   1127, 1125, 1122, 1120, 1117, 1115, 1113, 1110,
653   1108, 1106, 1103, 1101, 1099, 1096, 1094, 1092,
654   1089, 1087, 1085, 1083, 1081, 1078, 1076, 1074,
655   1072, 1069, 1067, 1065, 1063, 1061, 1059, 1057,
656   1054, 1052, 1050, 1048, 1046, 1044, 1042, 1040,
657   1038, 1036, 1034, 1032, 1030, 1028, 1026, 1024,
658   1022, 1020, 1018, 1016, 1014, 1012, 1010, 1008,
659   1006, 1004, 1002, 1000, 998, 996, 994, 992,
660   991, 989, 987, 985, 983, 981, 979, 978,
661   976, 974, 972, 970, 969, 967, 965, 963,
662   961, 960, 958, 956, 954, 953, 951, 949,
663   948, 946, 944, 942, 941, 939, 937, 936,
664   934, 932, 931, 929, 927, 926, 924, 923,
665   921, 919, 918, 916, 914, 913, 911, 910,
666   908, 907, 905, 903, 902, 900, 899, 897,
667   896, 894, 893, 891, 890, 888, 887, 885,
668   884, 882, 881, 879, 878, 876, 875, 873,
669   872, 870, 869, 868, 866, 865, 863, 862,
670   860, 859, 858, 856, 855, 853, 852, 851,
671   849, 848, 846, 845, 844, 842, 841, 840,
672   838, 837, 836, 834, 833, 832, 830, 829,
673   828, 826, 825, 824, 823, 821, 820, 819,
674   817, 816, 815, 814, 812, 811, 810, 809,
675   807, 806, 805, 804, 802, 801, 800, 799,
676   798, 796, 795, 794, 793, 791, 790, 789,
677   788, 787, 786, 784, 783, 782, 781, 780,
678   779, 777, 776, 775, 774, 773, 772, 771,
679   769, 768, 767, 766, 765, 764, 763, 762,
680   760, 759, 758, 757, 756, 755, 754, 753,
681   752, 751, 750, 748, 747, 746, 745, 744,
682   743, 742, 741, 740, 739, 738, 737, 736,
683   735, 734, 733, 732, 731, 730, 729, 728,
684   727, 726, 725, 724, 723, 722, 721, 720,
685   719, 718, 717, 716, 715, 714, 713, 712,
686   711, 710, 709, 708, 707, 706, 705, 704,
687   703, 702, 701, 700, 699, 699, 698, 697,
688   696, 695, 694, 693, 692, 691, 690, 689,
689   688, 688, 687, 686, 685, 684, 683, 682,
690   681, 680, 680, 679, 678, 677, 676, 675,
691   674, 673, 673, 672, 671, 670, 669, 668,
692   667, 667, 666, 665, 664, 663, 662, 661,
693   661, 660, 659, 658, 657, 657, 656, 655,
694   654, 653, 652, 652, 651, 650, 649, 648,
695   648, 647, 646, 645, 644, 644, 643, 642,
696   641, 640, 640, 639, 638, 637, 637, 636,
697   635, 634, 633, 633, 632, 631, 630, 630,
698   629, 628, 627, 627, 626, 625, 624, 624,
699   623, 622, 621, 621, 620, 619, 618, 618,
700   617, 616, 616, 615, 614, 613, 613, 612,
701   611, 611, 610, 609, 608, 608, 607, 606,
702   606, 605, 604, 604, 603, 602, 601, 601,
703   600, 599, 599, 598, 597, 597, 596, 595,
704   595, 594, 593, 593, 592, 591, 591, 590,
705   589, 589, 588, 587, 587, 586, 585, 585,
706   584, 583, 583, 582, 581, 581, 580, 579,
707   579, 578, 578, 577, 576, 576, 575, 574,
708   574, 573, 572, 572, 571, 571, 570, 569,
709   569, 568, 568, 567, 566, 566, 565, 564,
710   564, 563, 563, 562, 561, 561, 560, 560,
711   559, 558, 558, 557, 557, 556, 555, 555,
712   554, 554, 553, 553, 552, 551, 551, 550,
713   550, 549, 548, 548, 547, 547, 546, 546,
714   545, 544, 544, 543, 543, 542, 542, 541,
715   541, 540, 539, 539, 538, 538, 537, 537,
716   536, 536, 535, 534, 534, 533, 533, 532,
717   532, 531, 531, 530, 530, 529, 529, 528,
718   527, 527, 526, 526, 525, 525, 524, 524,
719   523, 523, 522, 522, 521, 521, 520, 520,
720   519, 519, 518, 518, 517, 517, 516, 516,
721   515, 515, 514, 514
722 };
723 
724 // Note that LinearToGamma() expects the values to be premultiplied by 4,
725 // so we incorporate this factor 4 inside the DIVIDE_BY_ALPHA macro directly.
726 #define DIVIDE_BY_ALPHA(sum, a)  (((sum) * kInvAlpha[(a)]) >> (kAlphaFix - 2))
727 
728 #else
729 
730 #define DIVIDE_BY_ALPHA(sum, a) (4 * (sum) / (a))
731 
732 #endif  // USE_INVERSE_ALPHA_TABLE
733 
LinearToGammaWeighted(const uint8_t * src,const uint8_t * a_ptr,uint32_t total_a,int step,int rgb_stride)734 static WEBP_INLINE int LinearToGammaWeighted(const uint8_t* src,
735                                              const uint8_t* a_ptr,
736                                              uint32_t total_a, int step,
737                                              int rgb_stride) {
738   const uint32_t sum =
739       a_ptr[0] * GammaToLinear(src[0]) +
740       a_ptr[step] * GammaToLinear(src[step]) +
741       a_ptr[rgb_stride] * GammaToLinear(src[rgb_stride]) +
742       a_ptr[rgb_stride + step] * GammaToLinear(src[rgb_stride + step]);
743   assert(total_a > 0 && total_a <= 4 * 0xff);
744 #if defined(USE_INVERSE_ALPHA_TABLE)
745   assert((uint64_t)sum * kInvAlpha[total_a] < ((uint64_t)1 << 32));
746 #endif
747   return LinearToGamma(DIVIDE_BY_ALPHA(sum, total_a), 0);
748 }
749 
ConvertRowToY(const uint8_t * const r_ptr,const uint8_t * const g_ptr,const uint8_t * const b_ptr,int step,uint8_t * const dst_y,int width,VP8Random * const rg)750 static WEBP_INLINE void ConvertRowToY(const uint8_t* const r_ptr,
751                                       const uint8_t* const g_ptr,
752                                       const uint8_t* const b_ptr,
753                                       int step,
754                                       uint8_t* const dst_y,
755                                       int width,
756                                       VP8Random* const rg) {
757   int i, j;
758   for (i = 0, j = 0; i < width; i += 1, j += step) {
759     dst_y[i] = RGBToY(r_ptr[j], g_ptr[j], b_ptr[j], rg);
760   }
761 }
762 
AccumulateRGBA(const uint8_t * const r_ptr,const uint8_t * const g_ptr,const uint8_t * const b_ptr,const uint8_t * const a_ptr,int rgb_stride,uint16_t * dst,int width)763 static WEBP_INLINE void AccumulateRGBA(const uint8_t* const r_ptr,
764                                        const uint8_t* const g_ptr,
765                                        const uint8_t* const b_ptr,
766                                        const uint8_t* const a_ptr,
767                                        int rgb_stride,
768                                        uint16_t* dst, int width) {
769   int i, j;
770   // we loop over 2x2 blocks and produce one R/G/B/A value for each.
771   for (i = 0, j = 0; i < (width >> 1); i += 1, j += 2 * 4, dst += 4) {
772     const uint32_t a = SUM4ALPHA(a_ptr + j);
773     int r, g, b;
774     if (a == 4 * 0xff || a == 0) {
775       r = SUM4(r_ptr + j, 4);
776       g = SUM4(g_ptr + j, 4);
777       b = SUM4(b_ptr + j, 4);
778     } else {
779       r = LinearToGammaWeighted(r_ptr + j, a_ptr + j, a, 4, rgb_stride);
780       g = LinearToGammaWeighted(g_ptr + j, a_ptr + j, a, 4, rgb_stride);
781       b = LinearToGammaWeighted(b_ptr + j, a_ptr + j, a, 4, rgb_stride);
782     }
783     dst[0] = r;
784     dst[1] = g;
785     dst[2] = b;
786     dst[3] = a;
787   }
788   if (width & 1) {
789     const uint32_t a = 2u * SUM2ALPHA(a_ptr + j);
790     int r, g, b;
791     if (a == 4 * 0xff || a == 0) {
792       r = SUM2(r_ptr + j);
793       g = SUM2(g_ptr + j);
794       b = SUM2(b_ptr + j);
795     } else {
796       r = LinearToGammaWeighted(r_ptr + j, a_ptr + j, a, 0, rgb_stride);
797       g = LinearToGammaWeighted(g_ptr + j, a_ptr + j, a, 0, rgb_stride);
798       b = LinearToGammaWeighted(b_ptr + j, a_ptr + j, a, 0, rgb_stride);
799     }
800     dst[0] = r;
801     dst[1] = g;
802     dst[2] = b;
803     dst[3] = a;
804   }
805 }
806 
AccumulateRGB(const uint8_t * const r_ptr,const uint8_t * const g_ptr,const uint8_t * const b_ptr,int step,int rgb_stride,uint16_t * dst,int width)807 static WEBP_INLINE void AccumulateRGB(const uint8_t* const r_ptr,
808                                       const uint8_t* const g_ptr,
809                                       const uint8_t* const b_ptr,
810                                       int step, int rgb_stride,
811                                       uint16_t* dst, int width) {
812   int i, j;
813   for (i = 0, j = 0; i < (width >> 1); i += 1, j += 2 * step, dst += 4) {
814     dst[0] = SUM4(r_ptr + j, step);
815     dst[1] = SUM4(g_ptr + j, step);
816     dst[2] = SUM4(b_ptr + j, step);
817   }
818   if (width & 1) {
819     dst[0] = SUM2(r_ptr + j);
820     dst[1] = SUM2(g_ptr + j);
821     dst[2] = SUM2(b_ptr + j);
822   }
823 }
824 
ConvertRowsToUV(const uint16_t * rgb,uint8_t * const dst_u,uint8_t * const dst_v,int width,VP8Random * const rg)825 static WEBP_INLINE void ConvertRowsToUV(const uint16_t* rgb,
826                                         uint8_t* const dst_u,
827                                         uint8_t* const dst_v,
828                                         int width,
829                                         VP8Random* const rg) {
830   int i;
831   for (i = 0; i < width; i += 1, rgb += 4) {
832     const int r = rgb[0], g = rgb[1], b = rgb[2];
833     dst_u[i] = RGBToU(r, g, b, rg);
834     dst_v[i] = RGBToV(r, g, b, rg);
835   }
836 }
837 
ImportYUVAFromRGBA(const uint8_t * r_ptr,const uint8_t * g_ptr,const uint8_t * b_ptr,const uint8_t * a_ptr,int step,int rgb_stride,float dithering,int use_iterative_conversion,WebPPicture * const picture)838 static int ImportYUVAFromRGBA(const uint8_t* r_ptr,
839                               const uint8_t* g_ptr,
840                               const uint8_t* b_ptr,
841                               const uint8_t* a_ptr,
842                               int step,         // bytes per pixel
843                               int rgb_stride,   // bytes per scanline
844                               float dithering,
845                               int use_iterative_conversion,
846                               WebPPicture* const picture) {
847   int y;
848   const int width = picture->width;
849   const int height = picture->height;
850   const int has_alpha = CheckNonOpaque(a_ptr, width, height, step, rgb_stride);
851   const int is_rgb = (r_ptr < b_ptr);  // otherwise it's bgr
852 
853   picture->colorspace = has_alpha ? WEBP_YUV420A : WEBP_YUV420;
854   picture->use_argb = 0;
855 
856   // disable smart conversion if source is too small (overkill).
857   if (width < kMinDimensionIterativeConversion ||
858       height < kMinDimensionIterativeConversion) {
859     use_iterative_conversion = 0;
860   }
861 
862   if (!WebPPictureAllocYUVA(picture, width, height)) {
863     return 0;
864   }
865   if (has_alpha) {
866     assert(step == 4);
867 #if defined(USE_GAMMA_COMPRESSION) && defined(USE_INVERSE_ALPHA_TABLE)
868     assert(kAlphaFix + kGammaFix <= 31);
869 #endif
870   }
871 
872   if (use_iterative_conversion) {
873     InitGammaTablesS();
874     if (!PreprocessARGB(r_ptr, g_ptr, b_ptr, step, rgb_stride, picture)) {
875       return 0;
876     }
877     if (has_alpha) {
878       WebPExtractAlpha(a_ptr, rgb_stride, width, height,
879                        picture->a, picture->a_stride);
880     }
881   } else {
882     const int uv_width = (width + 1) >> 1;
883     int use_dsp = (step == 3);  // use special function in this case
884     // temporary storage for accumulated R/G/B values during conversion to U/V
885     uint16_t* const tmp_rgb =
886         (uint16_t*)WebPSafeMalloc(4 * uv_width, sizeof(*tmp_rgb));
887     uint8_t* dst_y = picture->y;
888     uint8_t* dst_u = picture->u;
889     uint8_t* dst_v = picture->v;
890     uint8_t* dst_a = picture->a;
891 
892     VP8Random base_rg;
893     VP8Random* rg = NULL;
894     if (dithering > 0.) {
895       VP8InitRandom(&base_rg, dithering);
896       rg = &base_rg;
897       use_dsp = 0;   // can't use dsp in this case
898     }
899     WebPInitConvertARGBToYUV();
900     InitGammaTables();
901 
902     if (tmp_rgb == NULL) return 0;  // malloc error
903 
904     // Downsample Y/U/V planes, two rows at a time
905     for (y = 0; y < (height >> 1); ++y) {
906       int rows_have_alpha = has_alpha;
907       if (use_dsp) {
908         if (is_rgb) {
909           WebPConvertRGB24ToY(r_ptr, dst_y, width);
910           WebPConvertRGB24ToY(r_ptr + rgb_stride,
911                               dst_y + picture->y_stride, width);
912         } else {
913           WebPConvertBGR24ToY(b_ptr, dst_y, width);
914           WebPConvertBGR24ToY(b_ptr + rgb_stride,
915                               dst_y + picture->y_stride, width);
916         }
917       } else {
918         ConvertRowToY(r_ptr, g_ptr, b_ptr, step, dst_y, width, rg);
919         ConvertRowToY(r_ptr + rgb_stride,
920                       g_ptr + rgb_stride,
921                       b_ptr + rgb_stride, step,
922                       dst_y + picture->y_stride, width, rg);
923       }
924       dst_y += 2 * picture->y_stride;
925       if (has_alpha) {
926         rows_have_alpha &= !WebPExtractAlpha(a_ptr, rgb_stride, width, 2,
927                                              dst_a, picture->a_stride);
928         dst_a += 2 * picture->a_stride;
929       }
930       // Collect averaged R/G/B(/A)
931       if (!rows_have_alpha) {
932         AccumulateRGB(r_ptr, g_ptr, b_ptr, step, rgb_stride, tmp_rgb, width);
933       } else {
934         AccumulateRGBA(r_ptr, g_ptr, b_ptr, a_ptr, rgb_stride, tmp_rgb, width);
935       }
936       // Convert to U/V
937       if (rg == NULL) {
938         WebPConvertRGBA32ToUV(tmp_rgb, dst_u, dst_v, uv_width);
939       } else {
940         ConvertRowsToUV(tmp_rgb, dst_u, dst_v, uv_width, rg);
941       }
942       dst_u += picture->uv_stride;
943       dst_v += picture->uv_stride;
944       r_ptr += 2 * rgb_stride;
945       b_ptr += 2 * rgb_stride;
946       g_ptr += 2 * rgb_stride;
947       if (has_alpha) a_ptr += 2 * rgb_stride;
948     }
949     if (height & 1) {    // extra last row
950       int row_has_alpha = has_alpha;
951       if (use_dsp) {
952         if (r_ptr < b_ptr) {
953           WebPConvertRGB24ToY(r_ptr, dst_y, width);
954         } else {
955           WebPConvertBGR24ToY(b_ptr, dst_y, width);
956         }
957       } else {
958         ConvertRowToY(r_ptr, g_ptr, b_ptr, step, dst_y, width, rg);
959       }
960       if (row_has_alpha) {
961         row_has_alpha &= !WebPExtractAlpha(a_ptr, 0, width, 1, dst_a, 0);
962       }
963       // Collect averaged R/G/B(/A)
964       if (!row_has_alpha) {
965         // Collect averaged R/G/B
966         AccumulateRGB(r_ptr, g_ptr, b_ptr, step, /* rgb_stride = */ 0,
967                       tmp_rgb, width);
968       } else {
969         AccumulateRGBA(r_ptr, g_ptr, b_ptr, a_ptr, /* rgb_stride = */ 0,
970                        tmp_rgb, width);
971       }
972       if (rg == NULL) {
973         WebPConvertRGBA32ToUV(tmp_rgb, dst_u, dst_v, uv_width);
974       } else {
975         ConvertRowsToUV(tmp_rgb, dst_u, dst_v, uv_width, rg);
976       }
977     }
978     WebPSafeFree(tmp_rgb);
979   }
980   return 1;
981 }
982 
983 #undef SUM4
984 #undef SUM2
985 #undef SUM4ALPHA
986 #undef SUM2ALPHA
987 
988 //------------------------------------------------------------------------------
989 // call for ARGB->YUVA conversion
990 
PictureARGBToYUVA(WebPPicture * picture,WebPEncCSP colorspace,float dithering,int use_iterative_conversion)991 static int PictureARGBToYUVA(WebPPicture* picture, WebPEncCSP colorspace,
992                              float dithering, int use_iterative_conversion) {
993   if (picture == NULL) return 0;
994   if (picture->argb == NULL) {
995     return WebPEncodingSetError(picture, VP8_ENC_ERROR_NULL_PARAMETER);
996   } else if ((colorspace & WEBP_CSP_UV_MASK) != WEBP_YUV420) {
997     return WebPEncodingSetError(picture, VP8_ENC_ERROR_INVALID_CONFIGURATION);
998   } else {
999     const uint8_t* const argb = (const uint8_t*)picture->argb;
1000     const uint8_t* const a = argb + (0 ^ ALPHA_OFFSET);
1001     const uint8_t* const r = argb + (1 ^ ALPHA_OFFSET);
1002     const uint8_t* const g = argb + (2 ^ ALPHA_OFFSET);
1003     const uint8_t* const b = argb + (3 ^ ALPHA_OFFSET);
1004 
1005     picture->colorspace = WEBP_YUV420;
1006     return ImportYUVAFromRGBA(r, g, b, a, 4, 4 * picture->argb_stride,
1007                               dithering, use_iterative_conversion, picture);
1008   }
1009 }
1010 
WebPPictureARGBToYUVADithered(WebPPicture * picture,WebPEncCSP colorspace,float dithering)1011 int WebPPictureARGBToYUVADithered(WebPPicture* picture, WebPEncCSP colorspace,
1012                                   float dithering) {
1013   return PictureARGBToYUVA(picture, colorspace, dithering, 0);
1014 }
1015 
WebPPictureARGBToYUVA(WebPPicture * picture,WebPEncCSP colorspace)1016 int WebPPictureARGBToYUVA(WebPPicture* picture, WebPEncCSP colorspace) {
1017   return PictureARGBToYUVA(picture, colorspace, 0.f, 0);
1018 }
1019 
WebPPictureSharpARGBToYUVA(WebPPicture * picture)1020 int WebPPictureSharpARGBToYUVA(WebPPicture* picture) {
1021   return PictureARGBToYUVA(picture, WEBP_YUV420, 0.f, 1);
1022 }
1023 // for backward compatibility
WebPPictureSmartARGBToYUVA(WebPPicture * picture)1024 int WebPPictureSmartARGBToYUVA(WebPPicture* picture) {
1025   return WebPPictureSharpARGBToYUVA(picture);
1026 }
1027 
1028 //------------------------------------------------------------------------------
1029 // call for YUVA -> ARGB conversion
1030 
WebPPictureYUVAToARGB(WebPPicture * picture)1031 int WebPPictureYUVAToARGB(WebPPicture* picture) {
1032   if (picture == NULL) return 0;
1033   if (picture->y == NULL || picture->u == NULL || picture->v == NULL) {
1034     return WebPEncodingSetError(picture, VP8_ENC_ERROR_NULL_PARAMETER);
1035   }
1036   if ((picture->colorspace & WEBP_CSP_ALPHA_BIT) && picture->a == NULL) {
1037     return WebPEncodingSetError(picture, VP8_ENC_ERROR_NULL_PARAMETER);
1038   }
1039   if ((picture->colorspace & WEBP_CSP_UV_MASK) != WEBP_YUV420) {
1040     return WebPEncodingSetError(picture, VP8_ENC_ERROR_INVALID_CONFIGURATION);
1041   }
1042   // Allocate a new argb buffer (discarding the previous one).
1043   if (!WebPPictureAllocARGB(picture, picture->width, picture->height)) return 0;
1044   picture->use_argb = 1;
1045 
1046   // Convert
1047   {
1048     int y;
1049     const int width = picture->width;
1050     const int height = picture->height;
1051     const int argb_stride = 4 * picture->argb_stride;
1052     uint8_t* dst = (uint8_t*)picture->argb;
1053     const uint8_t *cur_u = picture->u, *cur_v = picture->v, *cur_y = picture->y;
1054     WebPUpsampleLinePairFunc upsample =
1055         WebPGetLinePairConverter(ALPHA_OFFSET > 0);
1056 
1057     // First row, with replicated top samples.
1058     upsample(cur_y, NULL, cur_u, cur_v, cur_u, cur_v, dst, NULL, width);
1059     cur_y += picture->y_stride;
1060     dst += argb_stride;
1061     // Center rows.
1062     for (y = 1; y + 1 < height; y += 2) {
1063       const uint8_t* const top_u = cur_u;
1064       const uint8_t* const top_v = cur_v;
1065       cur_u += picture->uv_stride;
1066       cur_v += picture->uv_stride;
1067       upsample(cur_y, cur_y + picture->y_stride, top_u, top_v, cur_u, cur_v,
1068                dst, dst + argb_stride, width);
1069       cur_y += 2 * picture->y_stride;
1070       dst += 2 * argb_stride;
1071     }
1072     // Last row (if needed), with replicated bottom samples.
1073     if (height > 1 && !(height & 1)) {
1074       upsample(cur_y, NULL, cur_u, cur_v, cur_u, cur_v, dst, NULL, width);
1075     }
1076     // Insert alpha values if needed, in replacement for the default 0xff ones.
1077     if (picture->colorspace & WEBP_CSP_ALPHA_BIT) {
1078       for (y = 0; y < height; ++y) {
1079         uint32_t* const argb_dst = picture->argb + y * picture->argb_stride;
1080         const uint8_t* const src = picture->a + y * picture->a_stride;
1081         int x;
1082         for (x = 0; x < width; ++x) {
1083           argb_dst[x] = (argb_dst[x] & 0x00ffffffu) | ((uint32_t)src[x] << 24);
1084         }
1085       }
1086     }
1087   }
1088   return 1;
1089 }
1090 
1091 //------------------------------------------------------------------------------
1092 // automatic import / conversion
1093 
Import(WebPPicture * const picture,const uint8_t * rgb,int rgb_stride,int step,int swap_rb,int import_alpha)1094 static int Import(WebPPicture* const picture,
1095                   const uint8_t* rgb, int rgb_stride,
1096                   int step, int swap_rb, int import_alpha) {
1097   int y;
1098   // swap_rb -> b,g,r,a , !swap_rb -> r,g,b,a
1099   const uint8_t* r_ptr = rgb + (swap_rb ? 2 : 0);
1100   const uint8_t* g_ptr = rgb + 1;
1101   const uint8_t* b_ptr = rgb + (swap_rb ? 0 : 2);
1102   const int width = picture->width;
1103   const int height = picture->height;
1104 
1105   if (!picture->use_argb) {
1106     const uint8_t* a_ptr = import_alpha ? rgb + 3 : NULL;
1107     return ImportYUVAFromRGBA(r_ptr, g_ptr, b_ptr, a_ptr, step, rgb_stride,
1108                               0.f /* no dithering */, 0, picture);
1109   }
1110   if (!WebPPictureAlloc(picture)) return 0;
1111 
1112   VP8LDspInit();
1113   WebPInitAlphaProcessing();
1114 
1115   if (import_alpha) {
1116     // dst[] byte order is {a,r,g,b} for big-endian, {b,g,r,a} for little endian
1117     uint32_t* dst = picture->argb;
1118     const int do_copy = (ALPHA_OFFSET == 3) && swap_rb;
1119     assert(step == 4);
1120     if (do_copy) {
1121       for (y = 0; y < height; ++y) {
1122         memcpy(dst, rgb, width * 4);
1123         rgb += rgb_stride;
1124         dst += picture->argb_stride;
1125       }
1126     } else {
1127       for (y = 0; y < height; ++y) {
1128 #ifdef WORDS_BIGENDIAN
1129         // BGRA or RGBA input order.
1130         const uint8_t* a_ptr = rgb + 3;
1131         WebPPackARGB(a_ptr, r_ptr, g_ptr, b_ptr, width, dst);
1132         r_ptr += rgb_stride;
1133         g_ptr += rgb_stride;
1134         b_ptr += rgb_stride;
1135 #else
1136         // RGBA input order. Need to swap R and B.
1137         VP8LConvertBGRAToRGBA((const uint32_t*)rgb, width, (uint8_t*)dst);
1138 #endif
1139         rgb += rgb_stride;
1140         dst += picture->argb_stride;
1141       }
1142     }
1143   } else {
1144     uint32_t* dst = picture->argb;
1145     assert(step >= 3);
1146     for (y = 0; y < height; ++y) {
1147       WebPPackRGB(r_ptr, g_ptr, b_ptr, width, step, dst);
1148       r_ptr += rgb_stride;
1149       g_ptr += rgb_stride;
1150       b_ptr += rgb_stride;
1151       dst += picture->argb_stride;
1152     }
1153   }
1154   return 1;
1155 }
1156 
1157 // Public API
1158 
1159 #if !defined(WEBP_REDUCE_CSP)
1160 
WebPPictureImportBGR(WebPPicture * picture,const uint8_t * rgb,int rgb_stride)1161 int WebPPictureImportBGR(WebPPicture* picture,
1162                          const uint8_t* rgb, int rgb_stride) {
1163   return (picture != NULL && rgb != NULL)
1164              ? Import(picture, rgb, rgb_stride, 3, 1, 0)
1165              : 0;
1166 }
1167 
WebPPictureImportBGRA(WebPPicture * picture,const uint8_t * rgba,int rgba_stride)1168 int WebPPictureImportBGRA(WebPPicture* picture,
1169                           const uint8_t* rgba, int rgba_stride) {
1170   return (picture != NULL && rgba != NULL)
1171              ? Import(picture, rgba, rgba_stride, 4, 1, 1)
1172              : 0;
1173 }
1174 
1175 
WebPPictureImportBGRX(WebPPicture * picture,const uint8_t * rgba,int rgba_stride)1176 int WebPPictureImportBGRX(WebPPicture* picture,
1177                           const uint8_t* rgba, int rgba_stride) {
1178   return (picture != NULL && rgba != NULL)
1179              ? Import(picture, rgba, rgba_stride, 4, 1, 0)
1180              : 0;
1181 }
1182 
1183 #endif   // WEBP_REDUCE_CSP
1184 
WebPPictureImportRGB(WebPPicture * picture,const uint8_t * rgb,int rgb_stride)1185 int WebPPictureImportRGB(WebPPicture* picture,
1186                          const uint8_t* rgb, int rgb_stride) {
1187   return (picture != NULL && rgb != NULL)
1188              ? Import(picture, rgb, rgb_stride, 3, 0, 0)
1189              : 0;
1190 }
1191 
WebPPictureImportRGBA(WebPPicture * picture,const uint8_t * rgba,int rgba_stride)1192 int WebPPictureImportRGBA(WebPPicture* picture,
1193                           const uint8_t* rgba, int rgba_stride) {
1194   return (picture != NULL && rgba != NULL)
1195              ? Import(picture, rgba, rgba_stride, 4, 0, 1)
1196              : 0;
1197 }
1198 
WebPPictureImportRGBX(WebPPicture * picture,const uint8_t * rgba,int rgba_stride)1199 int WebPPictureImportRGBX(WebPPicture* picture,
1200                           const uint8_t* rgba, int rgba_stride) {
1201   return (picture != NULL && rgba != NULL)
1202              ? Import(picture, rgba, rgba_stride, 4, 0, 0)
1203              : 0;
1204 }
1205 
1206 //------------------------------------------------------------------------------
1207