1 // Copyright 2012 Google Inc. All Rights Reserved.
2 //
3 // Use of this source code is governed by a BSD-style license
4 // that can be found in the COPYING file in the root of the source
5 // tree. An additional intellectual property rights grant can be found
6 // in the file PATENTS. All contributing project authors may
7 // be found in the AUTHORS file in the root of the source tree.
8 // -----------------------------------------------------------------------------
9 //
10 // Image transforms and color space conversion methods for lossless decoder.
11 //
12 // Authors: Vikas Arora (vikaas.arora@gmail.com)
13 //          Jyrki Alakuijala (jyrki@google.com)
14 //          Urvang Joshi (urvang@google.com)
15 
16 #include "./dsp.h"
17 
18 #include <math.h>
19 #include <stdlib.h>
20 #include "../dec/vp8li.h"
21 #include "../utils/endian_inl.h"
22 #include "./lossless.h"
23 
24 #define MAX_DIFF_COST (1e30f)
25 
26 //------------------------------------------------------------------------------
27 // Image transforms.
28 
29 // In-place sum of each component with mod 256.
AddPixelsEq(uint32_t * a,uint32_t b)30 static WEBP_INLINE void AddPixelsEq(uint32_t* a, uint32_t b) {
31   const uint32_t alpha_and_green = (*a & 0xff00ff00u) + (b & 0xff00ff00u);
32   const uint32_t red_and_blue = (*a & 0x00ff00ffu) + (b & 0x00ff00ffu);
33   *a = (alpha_and_green & 0xff00ff00u) | (red_and_blue & 0x00ff00ffu);
34 }
35 
Average2(uint32_t a0,uint32_t a1)36 static WEBP_INLINE uint32_t Average2(uint32_t a0, uint32_t a1) {
37   return (((a0 ^ a1) & 0xfefefefeu) >> 1) + (a0 & a1);
38 }
39 
Average3(uint32_t a0,uint32_t a1,uint32_t a2)40 static WEBP_INLINE uint32_t Average3(uint32_t a0, uint32_t a1, uint32_t a2) {
41   return Average2(Average2(a0, a2), a1);
42 }
43 
Average4(uint32_t a0,uint32_t a1,uint32_t a2,uint32_t a3)44 static WEBP_INLINE uint32_t Average4(uint32_t a0, uint32_t a1,
45                                      uint32_t a2, uint32_t a3) {
46   return Average2(Average2(a0, a1), Average2(a2, a3));
47 }
48 
Clip255(uint32_t a)49 static WEBP_INLINE uint32_t Clip255(uint32_t a) {
50   if (a < 256) {
51     return a;
52   }
53   // return 0, when a is a negative integer.
54   // return 255, when a is positive.
55   return ~a >> 24;
56 }
57 
AddSubtractComponentFull(int a,int b,int c)58 static WEBP_INLINE int AddSubtractComponentFull(int a, int b, int c) {
59   return Clip255(a + b - c);
60 }
61 
ClampedAddSubtractFull(uint32_t c0,uint32_t c1,uint32_t c2)62 static WEBP_INLINE uint32_t ClampedAddSubtractFull(uint32_t c0, uint32_t c1,
63                                                    uint32_t c2) {
64   const int a = AddSubtractComponentFull(c0 >> 24, c1 >> 24, c2 >> 24);
65   const int r = AddSubtractComponentFull((c0 >> 16) & 0xff,
66                                          (c1 >> 16) & 0xff,
67                                          (c2 >> 16) & 0xff);
68   const int g = AddSubtractComponentFull((c0 >> 8) & 0xff,
69                                          (c1 >> 8) & 0xff,
70                                          (c2 >> 8) & 0xff);
71   const int b = AddSubtractComponentFull(c0 & 0xff, c1 & 0xff, c2 & 0xff);
72   return ((uint32_t)a << 24) | (r << 16) | (g << 8) | b;
73 }
74 
AddSubtractComponentHalf(int a,int b)75 static WEBP_INLINE int AddSubtractComponentHalf(int a, int b) {
76   return Clip255(a + (a - b) / 2);
77 }
78 
ClampedAddSubtractHalf(uint32_t c0,uint32_t c1,uint32_t c2)79 static WEBP_INLINE uint32_t ClampedAddSubtractHalf(uint32_t c0, uint32_t c1,
80                                                    uint32_t c2) {
81   const uint32_t ave = Average2(c0, c1);
82   const int a = AddSubtractComponentHalf(ave >> 24, c2 >> 24);
83   const int r = AddSubtractComponentHalf((ave >> 16) & 0xff, (c2 >> 16) & 0xff);
84   const int g = AddSubtractComponentHalf((ave >> 8) & 0xff, (c2 >> 8) & 0xff);
85   const int b = AddSubtractComponentHalf((ave >> 0) & 0xff, (c2 >> 0) & 0xff);
86   return ((uint32_t)a << 24) | (r << 16) | (g << 8) | b;
87 }
88 
89 // gcc-4.9 on ARM generates incorrect code in Select() when Sub3() is inlined.
90 #if defined(__arm__) && LOCAL_GCC_VERSION == 0x409
91 # define LOCAL_INLINE __attribute__ ((noinline))
92 #else
93 # define LOCAL_INLINE WEBP_INLINE
94 #endif
95 
Sub3(int a,int b,int c)96 static LOCAL_INLINE int Sub3(int a, int b, int c) {
97   const int pb = b - c;
98   const int pa = a - c;
99   return abs(pb) - abs(pa);
100 }
101 
102 #undef LOCAL_INLINE
103 
Select(uint32_t a,uint32_t b,uint32_t c)104 static WEBP_INLINE uint32_t Select(uint32_t a, uint32_t b, uint32_t c) {
105   const int pa_minus_pb =
106       Sub3((a >> 24)       , (b >> 24)       , (c >> 24)       ) +
107       Sub3((a >> 16) & 0xff, (b >> 16) & 0xff, (c >> 16) & 0xff) +
108       Sub3((a >>  8) & 0xff, (b >>  8) & 0xff, (c >>  8) & 0xff) +
109       Sub3((a      ) & 0xff, (b      ) & 0xff, (c      ) & 0xff);
110   return (pa_minus_pb <= 0) ? a : b;
111 }
112 
113 //------------------------------------------------------------------------------
114 // Predictors
115 
Predictor0(uint32_t left,const uint32_t * const top)116 static uint32_t Predictor0(uint32_t left, const uint32_t* const top) {
117   (void)top;
118   (void)left;
119   return ARGB_BLACK;
120 }
Predictor1(uint32_t left,const uint32_t * const top)121 static uint32_t Predictor1(uint32_t left, const uint32_t* const top) {
122   (void)top;
123   return left;
124 }
Predictor2(uint32_t left,const uint32_t * const top)125 static uint32_t Predictor2(uint32_t left, const uint32_t* const top) {
126   (void)left;
127   return top[0];
128 }
Predictor3(uint32_t left,const uint32_t * const top)129 static uint32_t Predictor3(uint32_t left, const uint32_t* const top) {
130   (void)left;
131   return top[1];
132 }
Predictor4(uint32_t left,const uint32_t * const top)133 static uint32_t Predictor4(uint32_t left, const uint32_t* const top) {
134   (void)left;
135   return top[-1];
136 }
Predictor5(uint32_t left,const uint32_t * const top)137 static uint32_t Predictor5(uint32_t left, const uint32_t* const top) {
138   const uint32_t pred = Average3(left, top[0], top[1]);
139   return pred;
140 }
Predictor6(uint32_t left,const uint32_t * const top)141 static uint32_t Predictor6(uint32_t left, const uint32_t* const top) {
142   const uint32_t pred = Average2(left, top[-1]);
143   return pred;
144 }
Predictor7(uint32_t left,const uint32_t * const top)145 static uint32_t Predictor7(uint32_t left, const uint32_t* const top) {
146   const uint32_t pred = Average2(left, top[0]);
147   return pred;
148 }
Predictor8(uint32_t left,const uint32_t * const top)149 static uint32_t Predictor8(uint32_t left, const uint32_t* const top) {
150   const uint32_t pred = Average2(top[-1], top[0]);
151   (void)left;
152   return pred;
153 }
Predictor9(uint32_t left,const uint32_t * const top)154 static uint32_t Predictor9(uint32_t left, const uint32_t* const top) {
155   const uint32_t pred = Average2(top[0], top[1]);
156   (void)left;
157   return pred;
158 }
Predictor10(uint32_t left,const uint32_t * const top)159 static uint32_t Predictor10(uint32_t left, const uint32_t* const top) {
160   const uint32_t pred = Average4(left, top[-1], top[0], top[1]);
161   return pred;
162 }
Predictor11(uint32_t left,const uint32_t * const top)163 static uint32_t Predictor11(uint32_t left, const uint32_t* const top) {
164   const uint32_t pred = Select(top[0], left, top[-1]);
165   return pred;
166 }
Predictor12(uint32_t left,const uint32_t * const top)167 static uint32_t Predictor12(uint32_t left, const uint32_t* const top) {
168   const uint32_t pred = ClampedAddSubtractFull(left, top[0], top[-1]);
169   return pred;
170 }
Predictor13(uint32_t left,const uint32_t * const top)171 static uint32_t Predictor13(uint32_t left, const uint32_t* const top) {
172   const uint32_t pred = ClampedAddSubtractHalf(left, top[0], top[-1]);
173   return pred;
174 }
175 
176 //------------------------------------------------------------------------------
177 
178 // Inverse prediction.
PredictorInverseTransform(const VP8LTransform * const transform,int y_start,int y_end,uint32_t * data)179 static void PredictorInverseTransform(const VP8LTransform* const transform,
180                                       int y_start, int y_end, uint32_t* data) {
181   const int width = transform->xsize_;
182   if (y_start == 0) {  // First Row follows the L (mode=1) mode.
183     int x;
184     const uint32_t pred0 = Predictor0(data[-1], NULL);
185     AddPixelsEq(data, pred0);
186     for (x = 1; x < width; ++x) {
187       const uint32_t pred1 = Predictor1(data[x - 1], NULL);
188       AddPixelsEq(data + x, pred1);
189     }
190     data += width;
191     ++y_start;
192   }
193 
194   {
195     int y = y_start;
196     const int tile_width = 1 << transform->bits_;
197     const int mask = tile_width - 1;
198     const int safe_width = width & ~mask;
199     const int tiles_per_row = VP8LSubSampleSize(width, transform->bits_);
200     const uint32_t* pred_mode_base =
201         transform->data_ + (y >> transform->bits_) * tiles_per_row;
202 
203     while (y < y_end) {
204       const uint32_t pred2 = Predictor2(data[-1], data - width);
205       const uint32_t* pred_mode_src = pred_mode_base;
206       VP8LPredictorFunc pred_func;
207       int x = 1;
208       int t = 1;
209       // First pixel follows the T (mode=2) mode.
210       AddPixelsEq(data, pred2);
211       // .. the rest:
212       while (x < safe_width) {
213         pred_func = VP8LPredictors[((*pred_mode_src++) >> 8) & 0xf];
214         for (; t < tile_width; ++t, ++x) {
215           const uint32_t pred = pred_func(data[x - 1], data + x - width);
216           AddPixelsEq(data + x, pred);
217         }
218         t = 0;
219       }
220       if (x < width) {
221         pred_func = VP8LPredictors[((*pred_mode_src++) >> 8) & 0xf];
222         for (; x < width; ++x) {
223           const uint32_t pred = pred_func(data[x - 1], data + x - width);
224           AddPixelsEq(data + x, pred);
225         }
226       }
227       data += width;
228       ++y;
229       if ((y & mask) == 0) {   // Use the same mask, since tiles are squares.
230         pred_mode_base += tiles_per_row;
231       }
232     }
233   }
234 }
235 
236 // Add green to blue and red channels (i.e. perform the inverse transform of
237 // 'subtract green').
VP8LAddGreenToBlueAndRed_C(uint32_t * data,int num_pixels)238 void VP8LAddGreenToBlueAndRed_C(uint32_t* data, int num_pixels) {
239   int i;
240   for (i = 0; i < num_pixels; ++i) {
241     const uint32_t argb = data[i];
242     const uint32_t green = ((argb >> 8) & 0xff);
243     uint32_t red_blue = (argb & 0x00ff00ffu);
244     red_blue += (green << 16) | green;
245     red_blue &= 0x00ff00ffu;
246     data[i] = (argb & 0xff00ff00u) | red_blue;
247   }
248 }
249 
ColorTransformDelta(int8_t color_pred,int8_t color)250 static WEBP_INLINE uint32_t ColorTransformDelta(int8_t color_pred,
251                                                 int8_t color) {
252   return (uint32_t)((int)(color_pred) * color) >> 5;
253 }
254 
ColorCodeToMultipliers(uint32_t color_code,VP8LMultipliers * const m)255 static WEBP_INLINE void ColorCodeToMultipliers(uint32_t color_code,
256                                                VP8LMultipliers* const m) {
257   m->green_to_red_  = (color_code >>  0) & 0xff;
258   m->green_to_blue_ = (color_code >>  8) & 0xff;
259   m->red_to_blue_   = (color_code >> 16) & 0xff;
260 }
261 
VP8LTransformColorInverse_C(const VP8LMultipliers * const m,uint32_t * data,int num_pixels)262 void VP8LTransformColorInverse_C(const VP8LMultipliers* const m, uint32_t* data,
263                                  int num_pixels) {
264   int i;
265   for (i = 0; i < num_pixels; ++i) {
266     const uint32_t argb = data[i];
267     const uint32_t green = argb >> 8;
268     const uint32_t red = argb >> 16;
269     uint32_t new_red = red;
270     uint32_t new_blue = argb;
271     new_red += ColorTransformDelta(m->green_to_red_, green);
272     new_red &= 0xff;
273     new_blue += ColorTransformDelta(m->green_to_blue_, green);
274     new_blue += ColorTransformDelta(m->red_to_blue_, new_red);
275     new_blue &= 0xff;
276     data[i] = (argb & 0xff00ff00u) | (new_red << 16) | (new_blue);
277   }
278 }
279 
280 // Color space inverse transform.
ColorSpaceInverseTransform(const VP8LTransform * const transform,int y_start,int y_end,uint32_t * data)281 static void ColorSpaceInverseTransform(const VP8LTransform* const transform,
282                                        int y_start, int y_end, uint32_t* data) {
283   const int width = transform->xsize_;
284   const int tile_width = 1 << transform->bits_;
285   const int mask = tile_width - 1;
286   const int safe_width = width & ~mask;
287   const int remaining_width = width - safe_width;
288   const int tiles_per_row = VP8LSubSampleSize(width, transform->bits_);
289   int y = y_start;
290   const uint32_t* pred_row =
291       transform->data_ + (y >> transform->bits_) * tiles_per_row;
292 
293   while (y < y_end) {
294     const uint32_t* pred = pred_row;
295     VP8LMultipliers m = { 0, 0, 0 };
296     const uint32_t* const data_safe_end = data + safe_width;
297     const uint32_t* const data_end = data + width;
298     while (data < data_safe_end) {
299       ColorCodeToMultipliers(*pred++, &m);
300       VP8LTransformColorInverse(&m, data, tile_width);
301       data += tile_width;
302     }
303     if (data < data_end) {  // Left-overs using C-version.
304       ColorCodeToMultipliers(*pred++, &m);
305       VP8LTransformColorInverse(&m, data, remaining_width);
306       data += remaining_width;
307     }
308     ++y;
309     if ((y & mask) == 0) pred_row += tiles_per_row;
310   }
311 }
312 
313 // Separate out pixels packed together using pixel-bundling.
314 // We define two methods for ARGB data (uint32_t) and alpha-only data (uint8_t).
315 #define COLOR_INDEX_INVERSE(FUNC_NAME, F_NAME, STATIC_DECL, TYPE, BIT_SUFFIX,  \
316                             GET_INDEX, GET_VALUE)                              \
317 static void F_NAME(const TYPE* src, const uint32_t* const color_map,           \
318                    TYPE* dst, int y_start, int y_end, int width) {             \
319   int y;                                                                       \
320   for (y = y_start; y < y_end; ++y) {                                          \
321     int x;                                                                     \
322     for (x = 0; x < width; ++x) {                                              \
323       *dst++ = GET_VALUE(color_map[GET_INDEX(*src++)]);                        \
324     }                                                                          \
325   }                                                                            \
326 }                                                                              \
327 STATIC_DECL void FUNC_NAME(const VP8LTransform* const transform,               \
328                            int y_start, int y_end, const TYPE* src,            \
329                            TYPE* dst) {                                        \
330   int y;                                                                       \
331   const int bits_per_pixel = 8 >> transform->bits_;                            \
332   const int width = transform->xsize_;                                         \
333   const uint32_t* const color_map = transform->data_;                          \
334   if (bits_per_pixel < 8) {                                                    \
335     const int pixels_per_byte = 1 << transform->bits_;                         \
336     const int count_mask = pixels_per_byte - 1;                                \
337     const uint32_t bit_mask = (1 << bits_per_pixel) - 1;                       \
338     for (y = y_start; y < y_end; ++y) {                                        \
339       uint32_t packed_pixels = 0;                                              \
340       int x;                                                                   \
341       for (x = 0; x < width; ++x) {                                            \
342         /* We need to load fresh 'packed_pixels' once every                */  \
343         /* 'pixels_per_byte' increments of x. Fortunately, pixels_per_byte */  \
344         /* is a power of 2, so can just use a mask for that, instead of    */  \
345         /* decrementing a counter.                                         */  \
346         if ((x & count_mask) == 0) packed_pixels = GET_INDEX(*src++);          \
347         *dst++ = GET_VALUE(color_map[packed_pixels & bit_mask]);               \
348         packed_pixels >>= bits_per_pixel;                                      \
349       }                                                                        \
350     }                                                                          \
351   } else {                                                                     \
352     VP8LMapColor##BIT_SUFFIX(src, color_map, dst, y_start, y_end, width);      \
353   }                                                                            \
354 }
355 
356 COLOR_INDEX_INVERSE(ColorIndexInverseTransform, MapARGB, static, uint32_t, 32b,
357                     VP8GetARGBIndex, VP8GetARGBValue)
358 COLOR_INDEX_INVERSE(VP8LColorIndexInverseTransformAlpha, MapAlpha, , uint8_t,
359                     8b, VP8GetAlphaIndex, VP8GetAlphaValue)
360 
361 #undef COLOR_INDEX_INVERSE
362 
VP8LInverseTransform(const VP8LTransform * const transform,int row_start,int row_end,const uint32_t * const in,uint32_t * const out)363 void VP8LInverseTransform(const VP8LTransform* const transform,
364                           int row_start, int row_end,
365                           const uint32_t* const in, uint32_t* const out) {
366   const int width = transform->xsize_;
367   assert(row_start < row_end);
368   assert(row_end <= transform->ysize_);
369   switch (transform->type_) {
370     case SUBTRACT_GREEN:
371       VP8LAddGreenToBlueAndRed(out, (row_end - row_start) * width);
372       break;
373     case PREDICTOR_TRANSFORM:
374       PredictorInverseTransform(transform, row_start, row_end, out);
375       if (row_end != transform->ysize_) {
376         // The last predicted row in this iteration will be the top-pred row
377         // for the first row in next iteration.
378         memcpy(out - width, out + (row_end - row_start - 1) * width,
379                width * sizeof(*out));
380       }
381       break;
382     case CROSS_COLOR_TRANSFORM:
383       ColorSpaceInverseTransform(transform, row_start, row_end, out);
384       break;
385     case COLOR_INDEXING_TRANSFORM:
386       if (in == out && transform->bits_ > 0) {
387         // Move packed pixels to the end of unpacked region, so that unpacking
388         // can occur seamlessly.
389         // Also, note that this is the only transform that applies on
390         // the effective width of VP8LSubSampleSize(xsize_, bits_). All other
391         // transforms work on effective width of xsize_.
392         const int out_stride = (row_end - row_start) * width;
393         const int in_stride = (row_end - row_start) *
394             VP8LSubSampleSize(transform->xsize_, transform->bits_);
395         uint32_t* const src = out + out_stride - in_stride;
396         memmove(src, out, in_stride * sizeof(*src));
397         ColorIndexInverseTransform(transform, row_start, row_end, src, out);
398       } else {
399         ColorIndexInverseTransform(transform, row_start, row_end, in, out);
400       }
401       break;
402   }
403 }
404 
405 //------------------------------------------------------------------------------
406 // Color space conversion.
407 
is_big_endian(void)408 static int is_big_endian(void) {
409   static const union {
410     uint16_t w;
411     uint8_t b[2];
412   } tmp = { 1 };
413   return (tmp.b[0] != 1);
414 }
415 
VP8LConvertBGRAToRGB_C(const uint32_t * src,int num_pixels,uint8_t * dst)416 void VP8LConvertBGRAToRGB_C(const uint32_t* src,
417                             int num_pixels, uint8_t* dst) {
418   const uint32_t* const src_end = src + num_pixels;
419   while (src < src_end) {
420     const uint32_t argb = *src++;
421     *dst++ = (argb >> 16) & 0xff;
422     *dst++ = (argb >>  8) & 0xff;
423     *dst++ = (argb >>  0) & 0xff;
424   }
425 }
426 
VP8LConvertBGRAToRGBA_C(const uint32_t * src,int num_pixels,uint8_t * dst)427 void VP8LConvertBGRAToRGBA_C(const uint32_t* src,
428                              int num_pixels, uint8_t* dst) {
429   const uint32_t* const src_end = src + num_pixels;
430   while (src < src_end) {
431     const uint32_t argb = *src++;
432     *dst++ = (argb >> 16) & 0xff;
433     *dst++ = (argb >>  8) & 0xff;
434     *dst++ = (argb >>  0) & 0xff;
435     *dst++ = (argb >> 24) & 0xff;
436   }
437 }
438 
VP8LConvertBGRAToRGBA4444_C(const uint32_t * src,int num_pixels,uint8_t * dst)439 void VP8LConvertBGRAToRGBA4444_C(const uint32_t* src,
440                                  int num_pixels, uint8_t* dst) {
441   const uint32_t* const src_end = src + num_pixels;
442   while (src < src_end) {
443     const uint32_t argb = *src++;
444     const uint8_t rg = ((argb >> 16) & 0xf0) | ((argb >> 12) & 0xf);
445     const uint8_t ba = ((argb >>  0) & 0xf0) | ((argb >> 28) & 0xf);
446 #ifdef WEBP_SWAP_16BIT_CSP
447     *dst++ = ba;
448     *dst++ = rg;
449 #else
450     *dst++ = rg;
451     *dst++ = ba;
452 #endif
453   }
454 }
455 
VP8LConvertBGRAToRGB565_C(const uint32_t * src,int num_pixels,uint8_t * dst)456 void VP8LConvertBGRAToRGB565_C(const uint32_t* src,
457                                int num_pixels, uint8_t* dst) {
458   const uint32_t* const src_end = src + num_pixels;
459   while (src < src_end) {
460     const uint32_t argb = *src++;
461     const uint8_t rg = ((argb >> 16) & 0xf8) | ((argb >> 13) & 0x7);
462     const uint8_t gb = ((argb >>  5) & 0xe0) | ((argb >>  3) & 0x1f);
463 #ifdef WEBP_SWAP_16BIT_CSP
464     *dst++ = gb;
465     *dst++ = rg;
466 #else
467     *dst++ = rg;
468     *dst++ = gb;
469 #endif
470   }
471 }
472 
VP8LConvertBGRAToBGR_C(const uint32_t * src,int num_pixels,uint8_t * dst)473 void VP8LConvertBGRAToBGR_C(const uint32_t* src,
474                             int num_pixels, uint8_t* dst) {
475   const uint32_t* const src_end = src + num_pixels;
476   while (src < src_end) {
477     const uint32_t argb = *src++;
478     *dst++ = (argb >>  0) & 0xff;
479     *dst++ = (argb >>  8) & 0xff;
480     *dst++ = (argb >> 16) & 0xff;
481   }
482 }
483 
CopyOrSwap(const uint32_t * src,int num_pixels,uint8_t * dst,int swap_on_big_endian)484 static void CopyOrSwap(const uint32_t* src, int num_pixels, uint8_t* dst,
485                        int swap_on_big_endian) {
486   if (is_big_endian() == swap_on_big_endian) {
487     const uint32_t* const src_end = src + num_pixels;
488     while (src < src_end) {
489       const uint32_t argb = *src++;
490 
491 #if !defined(WORDS_BIGENDIAN)
492 #if !defined(WEBP_REFERENCE_IMPLEMENTATION)
493       WebPUint32ToMem(dst, BSwap32(argb));
494 #else  // WEBP_REFERENCE_IMPLEMENTATION
495       dst[0] = (argb >> 24) & 0xff;
496       dst[1] = (argb >> 16) & 0xff;
497       dst[2] = (argb >>  8) & 0xff;
498       dst[3] = (argb >>  0) & 0xff;
499 #endif
500 #else  // WORDS_BIGENDIAN
501       dst[0] = (argb >>  0) & 0xff;
502       dst[1] = (argb >>  8) & 0xff;
503       dst[2] = (argb >> 16) & 0xff;
504       dst[3] = (argb >> 24) & 0xff;
505 #endif
506       dst += sizeof(argb);
507     }
508   } else {
509     memcpy(dst, src, num_pixels * sizeof(*src));
510   }
511 }
512 
VP8LConvertFromBGRA(const uint32_t * const in_data,int num_pixels,WEBP_CSP_MODE out_colorspace,uint8_t * const rgba)513 void VP8LConvertFromBGRA(const uint32_t* const in_data, int num_pixels,
514                          WEBP_CSP_MODE out_colorspace, uint8_t* const rgba) {
515   switch (out_colorspace) {
516     case MODE_RGB:
517       VP8LConvertBGRAToRGB(in_data, num_pixels, rgba);
518       break;
519     case MODE_RGBA:
520       VP8LConvertBGRAToRGBA(in_data, num_pixels, rgba);
521       break;
522     case MODE_rgbA:
523       VP8LConvertBGRAToRGBA(in_data, num_pixels, rgba);
524       WebPApplyAlphaMultiply(rgba, 0, num_pixels, 1, 0);
525       break;
526     case MODE_BGR:
527       VP8LConvertBGRAToBGR(in_data, num_pixels, rgba);
528       break;
529     case MODE_BGRA:
530       CopyOrSwap(in_data, num_pixels, rgba, 1);
531       break;
532     case MODE_bgrA:
533       CopyOrSwap(in_data, num_pixels, rgba, 1);
534       WebPApplyAlphaMultiply(rgba, 0, num_pixels, 1, 0);
535       break;
536     case MODE_ARGB:
537       CopyOrSwap(in_data, num_pixels, rgba, 0);
538       break;
539     case MODE_Argb:
540       CopyOrSwap(in_data, num_pixels, rgba, 0);
541       WebPApplyAlphaMultiply(rgba, 1, num_pixels, 1, 0);
542       break;
543     case MODE_RGBA_4444:
544       VP8LConvertBGRAToRGBA4444(in_data, num_pixels, rgba);
545       break;
546     case MODE_rgbA_4444:
547       VP8LConvertBGRAToRGBA4444(in_data, num_pixels, rgba);
548       WebPApplyAlphaMultiply4444(rgba, num_pixels, 1, 0);
549       break;
550     case MODE_RGB_565:
551       VP8LConvertBGRAToRGB565(in_data, num_pixels, rgba);
552       break;
553     default:
554       assert(0);          // Code flow should not reach here.
555   }
556 }
557 
558 //------------------------------------------------------------------------------
559 
560 VP8LProcessBlueAndRedFunc VP8LAddGreenToBlueAndRed;
561 VP8LPredictorFunc VP8LPredictors[16];
562 
563 VP8LTransformColorFunc VP8LTransformColorInverse;
564 
565 VP8LConvertFunc VP8LConvertBGRAToRGB;
566 VP8LConvertFunc VP8LConvertBGRAToRGBA;
567 VP8LConvertFunc VP8LConvertBGRAToRGBA4444;
568 VP8LConvertFunc VP8LConvertBGRAToRGB565;
569 VP8LConvertFunc VP8LConvertBGRAToBGR;
570 
571 VP8LMapARGBFunc VP8LMapColor32b;
572 VP8LMapAlphaFunc VP8LMapColor8b;
573 
574 extern void VP8LDspInitSSE2(void);
575 extern void VP8LDspInitNEON(void);
576 extern void VP8LDspInitMIPSdspR2(void);
577 
578 static volatile VP8CPUInfo lossless_last_cpuinfo_used =
579     (VP8CPUInfo)&lossless_last_cpuinfo_used;
580 
VP8LDspInit(void)581 WEBP_TSAN_IGNORE_FUNCTION void VP8LDspInit(void) {
582   if (lossless_last_cpuinfo_used == VP8GetCPUInfo) return;
583 
584   VP8LPredictors[0] = Predictor0;
585   VP8LPredictors[1] = Predictor1;
586   VP8LPredictors[2] = Predictor2;
587   VP8LPredictors[3] = Predictor3;
588   VP8LPredictors[4] = Predictor4;
589   VP8LPredictors[5] = Predictor5;
590   VP8LPredictors[6] = Predictor6;
591   VP8LPredictors[7] = Predictor7;
592   VP8LPredictors[8] = Predictor8;
593   VP8LPredictors[9] = Predictor9;
594   VP8LPredictors[10] = Predictor10;
595   VP8LPredictors[11] = Predictor11;
596   VP8LPredictors[12] = Predictor12;
597   VP8LPredictors[13] = Predictor13;
598   VP8LPredictors[14] = Predictor0;     // <- padding security sentinels
599   VP8LPredictors[15] = Predictor0;
600 
601   VP8LAddGreenToBlueAndRed = VP8LAddGreenToBlueAndRed_C;
602 
603   VP8LTransformColorInverse = VP8LTransformColorInverse_C;
604 
605   VP8LConvertBGRAToRGB = VP8LConvertBGRAToRGB_C;
606   VP8LConvertBGRAToRGBA = VP8LConvertBGRAToRGBA_C;
607   VP8LConvertBGRAToRGBA4444 = VP8LConvertBGRAToRGBA4444_C;
608   VP8LConvertBGRAToRGB565 = VP8LConvertBGRAToRGB565_C;
609   VP8LConvertBGRAToBGR = VP8LConvertBGRAToBGR_C;
610 
611   VP8LMapColor32b = MapARGB;
612   VP8LMapColor8b = MapAlpha;
613 
614   // If defined, use CPUInfo() to overwrite some pointers with faster versions.
615   if (VP8GetCPUInfo != NULL) {
616 #if defined(WEBP_USE_SSE2)
617     if (VP8GetCPUInfo(kSSE2)) {
618       VP8LDspInitSSE2();
619     }
620 #endif
621 #if defined(WEBP_USE_NEON)
622     if (VP8GetCPUInfo(kNEON)) {
623       VP8LDspInitNEON();
624     }
625 #endif
626 #if defined(WEBP_USE_MIPS_DSP_R2)
627     if (VP8GetCPUInfo(kMIPSdspR2)) {
628       VP8LDspInitMIPSdspR2();
629     }
630 #endif
631   }
632   lossless_last_cpuinfo_used = VP8GetCPUInfo;
633 }
634 
635 //------------------------------------------------------------------------------
636