1 /*
2  *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include "libyuv/row.h"
12 
13 #include <string.h>  // For memcpy
14 
15 #include "libyuv/basic_types.h"
16 
17 #ifdef __cplusplus
18 namespace libyuv {
19 extern "C" {
20 #endif
21 
BGRAToARGBRow_C(const uint8 * src_bgra,uint8 * dst_argb,int width)22 void BGRAToARGBRow_C(const uint8* src_bgra, uint8* dst_argb, int width) {
23   for (int x = 0; x < width; ++x) {
24     // To support in-place conversion.
25     uint8 a = src_bgra[0];
26     uint8 r = src_bgra[1];
27     uint8 g = src_bgra[2];
28     uint8 b = src_bgra[3];
29     dst_argb[0] = b;
30     dst_argb[1] = g;
31     dst_argb[2] = r;
32     dst_argb[3] = a;
33     dst_argb += 4;
34     src_bgra += 4;
35   }
36 }
37 
ABGRToARGBRow_C(const uint8 * src_abgr,uint8 * dst_argb,int width)38 void ABGRToARGBRow_C(const uint8* src_abgr, uint8* dst_argb, int width) {
39   for (int x = 0; x < width; ++x) {
40     // To support in-place conversion.
41     uint8 r = src_abgr[0];
42     uint8 g = src_abgr[1];
43     uint8 b = src_abgr[2];
44     uint8 a = src_abgr[3];
45     dst_argb[0] = b;
46     dst_argb[1] = g;
47     dst_argb[2] = r;
48     dst_argb[3] = a;
49     dst_argb += 4;
50     src_abgr += 4;
51   }
52 }
53 
RGBAToARGBRow_C(const uint8 * src_abgr,uint8 * dst_argb,int width)54 void RGBAToARGBRow_C(const uint8* src_abgr, uint8* dst_argb, int width) {
55   for (int x = 0; x < width; ++x) {
56     // To support in-place conversion.
57     uint8 a = src_abgr[0];
58     uint8 b = src_abgr[1];
59     uint8 g = src_abgr[2];
60     uint8 r = src_abgr[3];
61     dst_argb[0] = b;
62     dst_argb[1] = g;
63     dst_argb[2] = r;
64     dst_argb[3] = a;
65     dst_argb += 4;
66     src_abgr += 4;
67   }
68 }
69 
RGB24ToARGBRow_C(const uint8 * src_rgb24,uint8 * dst_argb,int width)70 void RGB24ToARGBRow_C(const uint8* src_rgb24, uint8* dst_argb, int width) {
71   for (int x = 0; x < width; ++x) {
72     uint8 b = src_rgb24[0];
73     uint8 g = src_rgb24[1];
74     uint8 r = src_rgb24[2];
75     dst_argb[0] = b;
76     dst_argb[1] = g;
77     dst_argb[2] = r;
78     dst_argb[3] = 255u;
79     dst_argb += 4;
80     src_rgb24 += 3;
81   }
82 }
83 
RAWToARGBRow_C(const uint8 * src_raw,uint8 * dst_argb,int width)84 void RAWToARGBRow_C(const uint8* src_raw, uint8* dst_argb, int width) {
85   for (int x = 0; x < width; ++x) {
86     uint8 r = src_raw[0];
87     uint8 g = src_raw[1];
88     uint8 b = src_raw[2];
89     dst_argb[0] = b;
90     dst_argb[1] = g;
91     dst_argb[2] = r;
92     dst_argb[3] = 255u;
93     dst_argb += 4;
94     src_raw += 3;
95   }
96 }
97 
RGB565ToARGBRow_C(const uint8 * src_rgb,uint8 * dst_argb,int width)98 void RGB565ToARGBRow_C(const uint8* src_rgb, uint8* dst_argb, int width) {
99   for (int x = 0; x < width; ++x) {
100     uint8 b = src_rgb[0] & 0x1f;
101     uint8 g = (src_rgb[0] >> 5) | ((src_rgb[1] & 0x07) << 3);
102     uint8 r = src_rgb[1] >> 3;
103     dst_argb[0] = (b << 3) | (b >> 2);
104     dst_argb[1] = (g << 2) | (g >> 4);
105     dst_argb[2] = (r << 3) | (r >> 2);
106     dst_argb[3] = 255u;
107     dst_argb += 4;
108     src_rgb += 2;
109   }
110 }
111 
ARGB1555ToARGBRow_C(const uint8 * src_rgb,uint8 * dst_argb,int width)112 void ARGB1555ToARGBRow_C(const uint8* src_rgb, uint8* dst_argb, int width) {
113   for (int x = 0; x < width; ++x) {
114     uint8 b = src_rgb[0] & 0x1f;
115     uint8 g = (src_rgb[0] >> 5) | ((src_rgb[1] & 0x03) << 3);
116     uint8 r = (src_rgb[1] & 0x7c) >> 2;
117     uint8 a = src_rgb[1] >> 7;
118     dst_argb[0] = (b << 3) | (b >> 2);
119     dst_argb[1] = (g << 3) | (g >> 2);
120     dst_argb[2] = (r << 3) | (r >> 2);
121     dst_argb[3] = -a;
122     dst_argb += 4;
123     src_rgb += 2;
124   }
125 }
126 
ARGB4444ToARGBRow_C(const uint8 * src_rgb,uint8 * dst_argb,int width)127 void ARGB4444ToARGBRow_C(const uint8* src_rgb, uint8* dst_argb, int width) {
128   for (int x = 0; x < width; ++x) {
129     uint8 b = src_rgb[0] & 0x0f;
130     uint8 g = src_rgb[0] >> 4;
131     uint8 r = src_rgb[1] & 0x0f;
132     uint8 a = src_rgb[1] >> 4;
133     dst_argb[0] = (b << 4) | b;
134     dst_argb[1] = (g << 4) | g;
135     dst_argb[2] = (r << 4) | r;
136     dst_argb[3] = (a << 4) | a;
137     dst_argb += 4;
138     src_rgb += 2;
139   }
140 }
141 
ARGBToRGBARow_C(const uint8 * src_argb,uint8 * dst_rgb,int width)142 void ARGBToRGBARow_C(const uint8* src_argb, uint8* dst_rgb, int width) {
143   for (int x = 0; x < width; ++x) {
144     uint8 b = src_argb[0];
145     uint8 g = src_argb[1];
146     uint8 r = src_argb[2];
147     uint8 a = src_argb[3];
148     dst_rgb[0] = a;
149     dst_rgb[1] = b;
150     dst_rgb[2] = g;
151     dst_rgb[3] = r;
152     dst_rgb += 4;
153     src_argb += 4;
154   }
155 }
156 
ARGBToRGB24Row_C(const uint8 * src_argb,uint8 * dst_rgb,int width)157 void ARGBToRGB24Row_C(const uint8* src_argb, uint8* dst_rgb, int width) {
158   for (int x = 0; x < width; ++x) {
159     uint8 b = src_argb[0];
160     uint8 g = src_argb[1];
161     uint8 r = src_argb[2];
162     dst_rgb[0] = b;
163     dst_rgb[1] = g;
164     dst_rgb[2] = r;
165     dst_rgb += 3;
166     src_argb += 4;
167   }
168 }
169 
ARGBToRAWRow_C(const uint8 * src_argb,uint8 * dst_rgb,int width)170 void ARGBToRAWRow_C(const uint8* src_argb, uint8* dst_rgb, int width) {
171   for (int x = 0; x < width; ++x) {
172     uint8 b = src_argb[0];
173     uint8 g = src_argb[1];
174     uint8 r = src_argb[2];
175     dst_rgb[0] = r;
176     dst_rgb[1] = g;
177     dst_rgb[2] = b;
178     dst_rgb += 3;
179     src_argb += 4;
180   }
181 }
182 
183 // TODO(fbarchard): support big endian CPU
ARGBToRGB565Row_C(const uint8 * src_argb,uint8 * dst_rgb,int width)184 void ARGBToRGB565Row_C(const uint8* src_argb, uint8* dst_rgb, int width) {
185   for (int x = 0; x < width - 1; x += 2) {
186     uint8 b0 = src_argb[0] >> 3;
187     uint8 g0 = src_argb[1] >> 2;
188     uint8 r0 = src_argb[2] >> 3;
189     uint8 b1 = src_argb[4] >> 3;
190     uint8 g1 = src_argb[5] >> 2;
191     uint8 r1 = src_argb[6] >> 3;
192     *reinterpret_cast<uint32*>(dst_rgb) = b0 | (g0 << 5) | (r0 << 11) |
193         (b1 << 16) | (g1 << 21) | (r1 << 27);
194     dst_rgb += 4;
195     src_argb += 8;
196   }
197   if (width & 1) {
198     uint8 b0 = src_argb[0] >> 3;
199     uint8 g0 = src_argb[1] >> 2;
200     uint8 r0 = src_argb[2] >> 3;
201     *reinterpret_cast<uint16*>(dst_rgb) = b0 | (g0 << 5) | (r0 << 11);
202   }
203 }
204 
ARGBToARGB1555Row_C(const uint8 * src_argb,uint8 * dst_rgb,int width)205 void ARGBToARGB1555Row_C(const uint8* src_argb, uint8* dst_rgb, int width) {
206   for (int x = 0; x < width - 1; x += 2) {
207     uint8 b0 = src_argb[0] >> 3;
208     uint8 g0 = src_argb[1] >> 3;
209     uint8 r0 = src_argb[2] >> 3;
210     uint8 a0 = src_argb[3] >> 7;
211     uint8 b1 = src_argb[4] >> 3;
212     uint8 g1 = src_argb[5] >> 3;
213     uint8 r1 = src_argb[6] >> 3;
214     uint8 a1 = src_argb[7] >> 7;
215     *reinterpret_cast<uint32*>(dst_rgb) =
216         b0 | (g0 << 5) | (r0 << 10) | (a0 << 15) |
217         (b1 << 16) | (g1 << 21) | (r1 << 26) | (a1 << 31);
218     dst_rgb += 4;
219     src_argb += 8;
220   }
221   if (width & 1) {
222     uint8 b0 = src_argb[0] >> 3;
223     uint8 g0 = src_argb[1] >> 3;
224     uint8 r0 = src_argb[2] >> 3;
225     uint8 a0 = src_argb[3] >> 7;
226     *reinterpret_cast<uint16*>(dst_rgb) =
227         b0 | (g0 << 5) | (r0 << 10) | (a0 << 15);
228   }
229 }
230 
ARGBToARGB4444Row_C(const uint8 * src_argb,uint8 * dst_rgb,int width)231 void ARGBToARGB4444Row_C(const uint8* src_argb, uint8* dst_rgb, int width) {
232   for (int x = 0; x < width - 1; x += 2) {
233     uint8 b0 = src_argb[0] >> 4;
234     uint8 g0 = src_argb[1] >> 4;
235     uint8 r0 = src_argb[2] >> 4;
236     uint8 a0 = src_argb[3] >> 4;
237     uint8 b1 = src_argb[4] >> 4;
238     uint8 g1 = src_argb[5] >> 4;
239     uint8 r1 = src_argb[6] >> 4;
240     uint8 a1 = src_argb[7] >> 4;
241     *reinterpret_cast<uint32*>(dst_rgb) =
242         b0 | (g0 << 4) | (r0 << 8) | (a0 << 12) |
243         (b1 << 16) | (g1 << 20) | (r1 << 24) | (a1 << 28);
244     dst_rgb += 4;
245     src_argb += 8;
246   }
247   if (width & 1) {
248     uint8 b0 = src_argb[0] >> 4;
249     uint8 g0 = src_argb[1] >> 4;
250     uint8 r0 = src_argb[2] >> 4;
251     uint8 a0 = src_argb[3] >> 4;
252     *reinterpret_cast<uint16*>(dst_rgb) =
253         b0 | (g0 << 4) | (r0 << 8) | (a0 << 12);
254   }
255 }
256 
RGBToY(uint8 r,uint8 g,uint8 b)257 static __inline int RGBToY(uint8 r, uint8 g, uint8 b) {
258   return (( 66 * r + 129 * g +  25 * b + 128) >> 8) + 16;
259 }
260 
RGBToU(uint8 r,uint8 g,uint8 b)261 static __inline int RGBToU(uint8 r, uint8 g, uint8 b) {
262   return ((-38 * r -  74 * g + 112 * b + 128) >> 8) + 128;
263 }
RGBToV(uint8 r,uint8 g,uint8 b)264 static __inline int RGBToV(uint8 r, uint8 g, uint8 b) {
265   return ((112 * r -  94 * g -  18 * b + 128) >> 8) + 128;
266 }
267 
268 #define MAKEROWY(NAME, R, G, B) \
269 void NAME ## ToYRow_C(const uint8* src_argb0, uint8* dst_y, int width) {       \
270   for (int x = 0; x < width; ++x) {                                            \
271     dst_y[0] = RGBToY(src_argb0[R], src_argb0[G], src_argb0[B]);               \
272     src_argb0 += 4;                                                            \
273     dst_y += 1;                                                                \
274   }                                                                            \
275 }                                                                              \
276 void NAME ## ToUVRow_C(const uint8* src_rgb0, int src_stride_rgb,              \
277                        uint8* dst_u, uint8* dst_v, int width) {                \
278   const uint8* src_rgb1 = src_rgb0 + src_stride_rgb;                           \
279   for (int x = 0; x < width - 1; x += 2) {                                     \
280     uint8 ab = (src_rgb0[B] + src_rgb0[B + 4] +                                \
281                src_rgb1[B] + src_rgb1[B + 4]) >> 2;                            \
282     uint8 ag = (src_rgb0[G] + src_rgb0[G + 4] +                                \
283                src_rgb1[G] + src_rgb1[G + 4]) >> 2;                            \
284     uint8 ar = (src_rgb0[R] + src_rgb0[R + 4] +                                \
285                src_rgb1[R] + src_rgb1[R + 4]) >> 2;                            \
286     dst_u[0] = RGBToU(ar, ag, ab);                                             \
287     dst_v[0] = RGBToV(ar, ag, ab);                                             \
288     src_rgb0 += 8;                                                             \
289     src_rgb1 += 8;                                                             \
290     dst_u += 1;                                                                \
291     dst_v += 1;                                                                \
292   }                                                                            \
293   if (width & 1) {                                                             \
294     uint8 ab = (src_rgb0[B] + src_rgb1[B]) >> 1;                               \
295     uint8 ag = (src_rgb0[G] + src_rgb1[G]) >> 1;                               \
296     uint8 ar = (src_rgb0[R] + src_rgb1[R]) >> 1;                               \
297     dst_u[0] = RGBToU(ar, ag, ab);                                             \
298     dst_v[0] = RGBToV(ar, ag, ab);                                             \
299   }                                                                            \
300 }
301 
302 MAKEROWY(ARGB, 2, 1, 0)
303 MAKEROWY(BGRA, 1, 2, 3)
304 MAKEROWY(ABGR, 0, 1, 2)
305 MAKEROWY(RGBA, 3, 2, 1)
306 
307 // http://en.wikipedia.org/wiki/Grayscale.
308 // 0.11 * B + 0.59 * G + 0.30 * R
309 // Coefficients rounded to multiple of 2 for consistency with SSSE3 version.
RGBToGray(uint8 r,uint8 g,uint8 b)310 static __inline int RGBToGray(uint8 r, uint8 g, uint8 b) {
311   return (( 76 * r + 152 * g +  28 * b) >> 8);
312 }
313 
ARGBGrayRow_C(const uint8 * src_argb,uint8 * dst_argb,int width)314 void ARGBGrayRow_C(const uint8* src_argb, uint8* dst_argb, int width) {
315   for (int x = 0; x < width; ++x) {
316     uint8 y = RGBToGray(src_argb[2], src_argb[1], src_argb[0]);
317     dst_argb[2] = dst_argb[1] = dst_argb[0] = y;
318     dst_argb[3] = src_argb[3];
319     dst_argb += 4;
320     src_argb += 4;
321   }
322 }
323 
324 // Convert a row of image to Sepia tone.
ARGBSepiaRow_C(uint8 * dst_argb,int width)325 void ARGBSepiaRow_C(uint8* dst_argb, int width) {
326   for (int x = 0; x < width; ++x) {
327     int b = dst_argb[0];
328     int g = dst_argb[1];
329     int r = dst_argb[2];
330     int sb = (b * 17 + g * 68 + r * 35) >> 7;
331     int sg = (b * 22 + g * 88 + r * 45) >> 7;
332     int sr = (b * 24 + g * 98 + r * 50) >> 7;
333     // b does not over flow. a is preserved from original.
334     if (sg > 255) {
335       sg = 255;
336     }
337     if (sr > 255) {
338       sr = 255;
339     }
340     dst_argb[0] = sb;
341     dst_argb[1] = sg;
342     dst_argb[2] = sr;
343     dst_argb += 4;
344   }
345 }
346 
347 // Apply color matrix to a row of image. Matrix is signed.
ARGBColorMatrixRow_C(uint8 * dst_argb,const int8 * matrix_argb,int width)348 void ARGBColorMatrixRow_C(uint8* dst_argb, const int8* matrix_argb, int width) {
349   for (int x = 0; x < width; ++x) {
350     int b = dst_argb[0];
351     int g = dst_argb[1];
352     int r = dst_argb[2];
353     int a = dst_argb[3];
354     int sb = (b * matrix_argb[0] + g * matrix_argb[1] +
355               r * matrix_argb[2] + a * matrix_argb[3]) >> 7;
356     int sg = (b * matrix_argb[4] + g * matrix_argb[5] +
357               r * matrix_argb[6] + a * matrix_argb[7]) >> 7;
358     int sr = (b * matrix_argb[8] + g * matrix_argb[9] +
359               r * matrix_argb[10] + a * matrix_argb[11]) >> 7;
360     if (sb < 0) {
361       sb = 0;
362     }
363     if (sb > 255) {
364       sb = 255;
365     }
366     if (sg < 0) {
367       sg = 0;
368     }
369     if (sg > 255) {
370       sg = 255;
371     }
372     if (sr < 0) {
373       sr = 0;
374     }
375     if (sr > 255) {
376       sr = 255;
377     }
378     dst_argb[0] = sb;
379     dst_argb[1] = sg;
380     dst_argb[2] = sr;
381     dst_argb += 4;
382   }
383 }
384 
385 // Apply color table to a row of image.
ARGBColorTableRow_C(uint8 * dst_argb,const uint8 * table_argb,int width)386 void ARGBColorTableRow_C(uint8* dst_argb, const uint8* table_argb, int width) {
387   for (int x = 0; x < width; ++x) {
388     int b = dst_argb[0];
389     int g = dst_argb[1];
390     int r = dst_argb[2];
391     int a = dst_argb[3];
392     dst_argb[0] = table_argb[b * 4 + 0];
393     dst_argb[1] = table_argb[g * 4 + 1];
394     dst_argb[2] = table_argb[r * 4 + 2];
395     dst_argb[3] = table_argb[a * 4 + 3];
396     dst_argb += 4;
397   }
398 }
399 
ARGBQuantizeRow_C(uint8 * dst_argb,int scale,int interval_size,int interval_offset,int width)400 void ARGBQuantizeRow_C(uint8* dst_argb, int scale, int interval_size,
401                        int interval_offset, int width) {
402   for (int x = 0; x < width; ++x) {
403     int b = dst_argb[0];
404     int g = dst_argb[1];
405     int r = dst_argb[2];
406     dst_argb[0] = (b * scale >> 16) * interval_size + interval_offset;
407     dst_argb[1] = (g * scale >> 16) * interval_size + interval_offset;
408     dst_argb[2] = (r * scale >> 16) * interval_size + interval_offset;
409     dst_argb += 4;
410   }
411 }
412 
I400ToARGBRow_C(const uint8 * src_y,uint8 * dst_argb,int width)413 void I400ToARGBRow_C(const uint8* src_y, uint8* dst_argb, int width) {
414   // Copy a Y to RGB.
415   for (int x = 0; x < width; ++x) {
416     uint8 y = src_y[0];
417     dst_argb[2] = dst_argb[1] = dst_argb[0] = y;
418     dst_argb[3] = 255u;
419     dst_argb += 4;
420     ++src_y;
421   }
422 }
423 
424 // C reference code that mimics the YUV assembly.
425 
426 #define YG 74 /* static_cast<int8>(1.164 * 64 + 0.5) */
427 
428 #define UB 127 /* min(63,static_cast<int8>(2.018 * 64)) */
429 #define UG -25 /* static_cast<int8>(-0.391 * 64 - 0.5) */
430 #define UR 0
431 
432 #define VB 0
433 #define VG -52 /* static_cast<int8>(-0.813 * 64 - 0.5) */
434 #define VR 102 /* static_cast<int8>(1.596 * 64 + 0.5) */
435 
436 // Bias
437 #define BB UB * 128 + VB * 128
438 #define BG UG * 128 + VG * 128
439 #define BR UR * 128 + VR * 128
440 
Clip(int32 val)441 static __inline uint32 Clip(int32 val) {
442   if (val < 0) {
443     return static_cast<uint32>(0);
444   } else if (val > 255) {
445     return static_cast<uint32>(255);
446   }
447   return static_cast<uint32>(val);
448 }
449 
YuvPixel(uint8 y,uint8 u,uint8 v,uint8 * rgb_buf,int ashift,int rshift,int gshift,int bshift)450 static __inline void YuvPixel(uint8 y, uint8 u, uint8 v, uint8* rgb_buf,
451                               int ashift, int rshift, int gshift, int bshift) {
452   int32 y1 = (static_cast<int32>(y) - 16) * YG;
453   uint32 b = Clip(static_cast<int32>((u * UB + v * VB) - (BB) + y1) >> 6);
454   uint32 g = Clip(static_cast<int32>((u * UG + v * VG) - (BG) + y1) >> 6);
455   uint32 r = Clip(static_cast<int32>((u * UR + v * VR) - (BR) + y1) >> 6);
456   *reinterpret_cast<uint32*>(rgb_buf) = (b << bshift) |
457                                         (g << gshift) |
458                                         (r << rshift) |
459                                         (255u << ashift);
460 }
461 
YuvPixel2(uint8 y,uint8 u,uint8 v,uint8 * b,uint8 * g,uint8 * r)462 static __inline void YuvPixel2(uint8 y, uint8 u, uint8 v,
463                                uint8* b, uint8* g, uint8* r) {
464   int32 y1 = (static_cast<int32>(y) - 16) * YG;
465   *b = Clip(static_cast<int32>((u * UB + v * VB) - (BB) + y1) >> 6);
466   *g = Clip(static_cast<int32>((u * UG + v * VG) - (BG) + y1) >> 6);
467   *r = Clip(static_cast<int32>((u * UR + v * VR) - (BR) + y1) >> 6);
468 }
469 
I444ToARGBRow_C(const uint8 * y_buf,const uint8 * u_buf,const uint8 * v_buf,uint8 * rgb_buf,int width)470 void I444ToARGBRow_C(const uint8* y_buf,
471                      const uint8* u_buf,
472                      const uint8* v_buf,
473                      uint8* rgb_buf,
474                      int width) {
475   for (int x = 0; x < width; ++x) {
476     YuvPixel(y_buf[0], u_buf[0], v_buf[0], rgb_buf, 24, 16, 8, 0);
477     y_buf += 1;
478     u_buf += 1;
479     v_buf += 1;
480     rgb_buf += 4;  // Advance 1 pixel.
481   }
482 }
483 
484 // Also used for 420
I422ToARGBRow_C(const uint8 * y_buf,const uint8 * u_buf,const uint8 * v_buf,uint8 * rgb_buf,int width)485 void I422ToARGBRow_C(const uint8* y_buf,
486                      const uint8* u_buf,
487                      const uint8* v_buf,
488                      uint8* rgb_buf,
489                      int width) {
490   for (int x = 0; x < width - 1; x += 2) {
491     YuvPixel(y_buf[0], u_buf[0], v_buf[0], rgb_buf + 0, 24, 16, 8, 0);
492     YuvPixel(y_buf[1], u_buf[0], v_buf[0], rgb_buf + 4, 24, 16, 8, 0);
493     y_buf += 2;
494     u_buf += 1;
495     v_buf += 1;
496     rgb_buf += 8;  // Advance 2 pixels.
497   }
498   if (width & 1) {
499     YuvPixel(y_buf[0], u_buf[0], v_buf[0], rgb_buf + 0, 24, 16, 8, 0);
500   }
501 }
502 
I422ToRGB24Row_C(const uint8 * y_buf,const uint8 * u_buf,const uint8 * v_buf,uint8 * rgb_buf,int width)503 void I422ToRGB24Row_C(const uint8* y_buf,
504                       const uint8* u_buf,
505                       const uint8* v_buf,
506                       uint8* rgb_buf,
507                       int width) {
508   for (int x = 0; x < width - 1; x += 2) {
509     YuvPixel2(y_buf[0], u_buf[0], v_buf[0],
510               rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
511     YuvPixel2(y_buf[1], u_buf[0], v_buf[0],
512               rgb_buf + 3, rgb_buf + 4, rgb_buf + 5);
513     y_buf += 2;
514     u_buf += 1;
515     v_buf += 1;
516     rgb_buf += 6;  // Advance 2 pixels.
517   }
518   if (width & 1) {
519     YuvPixel2(y_buf[0], u_buf[0], v_buf[0],
520               rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
521   }
522 }
523 
I422ToRAWRow_C(const uint8 * y_buf,const uint8 * u_buf,const uint8 * v_buf,uint8 * rgb_buf,int width)524 void I422ToRAWRow_C(const uint8* y_buf,
525                     const uint8* u_buf,
526                     const uint8* v_buf,
527                     uint8* rgb_buf,
528                     int width) {
529   for (int x = 0; x < width - 1; x += 2) {
530     YuvPixel2(y_buf[0], u_buf[0], v_buf[0],
531               rgb_buf + 2, rgb_buf + 1, rgb_buf + 0);
532     YuvPixel2(y_buf[1], u_buf[0], v_buf[0],
533               rgb_buf + 5, rgb_buf + 4, rgb_buf + 3);
534     y_buf += 2;
535     u_buf += 1;
536     v_buf += 1;
537     rgb_buf += 6;  // Advance 2 pixels.
538   }
539   if (width & 1) {
540     YuvPixel2(y_buf[0], u_buf[0], v_buf[0],
541               rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
542   }
543 }
544 
I411ToARGBRow_C(const uint8 * y_buf,const uint8 * u_buf,const uint8 * v_buf,uint8 * rgb_buf,int width)545 void I411ToARGBRow_C(const uint8* y_buf,
546                      const uint8* u_buf,
547                      const uint8* v_buf,
548                      uint8* rgb_buf,
549                      int width) {
550   for (int x = 0; x < width - 3; x += 4) {
551     YuvPixel(y_buf[0], u_buf[0], v_buf[0], rgb_buf + 0, 24, 16, 8, 0);
552     YuvPixel(y_buf[1], u_buf[0], v_buf[0], rgb_buf + 4, 24, 16, 8, 0);
553     YuvPixel(y_buf[2], u_buf[0], v_buf[0], rgb_buf + 8, 24, 16, 8, 0);
554     YuvPixel(y_buf[3], u_buf[0], v_buf[0], rgb_buf + 12, 24, 16, 8, 0);
555     y_buf += 4;
556     u_buf += 1;
557     v_buf += 1;
558     rgb_buf += 16;  // Advance 4 pixels.
559   }
560   if (width & 2) {
561     YuvPixel(y_buf[0], u_buf[0], v_buf[0], rgb_buf + 0, 24, 16, 8, 0);
562     YuvPixel(y_buf[1], u_buf[0], v_buf[0], rgb_buf + 4, 24, 16, 8, 0);
563     y_buf += 2;
564     rgb_buf += 8;  // Advance 2 pixels.
565   }
566   if (width & 1) {
567     YuvPixel(y_buf[0], u_buf[0], v_buf[0], rgb_buf + 0, 24, 16, 8, 0);
568   }
569 }
570 
NV12ToARGBRow_C(const uint8 * y_buf,const uint8 * uv_buf,uint8 * rgb_buf,int width)571 void NV12ToARGBRow_C(const uint8* y_buf,
572                      const uint8* uv_buf,
573                      uint8* rgb_buf,
574                      int width) {
575   for (int x = 0; x < width - 1; x += 2) {
576     YuvPixel(y_buf[0], uv_buf[0], uv_buf[1], rgb_buf + 0, 24, 16, 8, 0);
577     YuvPixel(y_buf[1], uv_buf[0], uv_buf[1], rgb_buf + 4, 24, 16, 8, 0);
578     y_buf += 2;
579     uv_buf += 2;
580     rgb_buf += 8;  // Advance 2 pixels.
581   }
582   if (width & 1) {
583     YuvPixel(y_buf[0], uv_buf[0], uv_buf[1], rgb_buf + 0, 24, 16, 8, 0);
584   }
585 }
586 
NV21ToARGBRow_C(const uint8 * y_buf,const uint8 * vu_buf,uint8 * rgb_buf,int width)587 void NV21ToARGBRow_C(const uint8* y_buf,
588                      const uint8* vu_buf,
589                      uint8* rgb_buf,
590                      int width) {
591   for (int x = 0; x < width - 1; x += 2) {
592     YuvPixel(y_buf[0], vu_buf[1], vu_buf[0], rgb_buf + 0, 24, 16, 8, 0);
593     YuvPixel(y_buf[1], vu_buf[1], vu_buf[0], rgb_buf + 4, 24, 16, 8, 0);
594     y_buf += 2;
595     vu_buf += 2;
596     rgb_buf += 8;  // Advance 2 pixels.
597   }
598   if (width & 1) {
599     YuvPixel(y_buf[0], vu_buf[1], vu_buf[0], rgb_buf + 0, 24, 16, 8, 0);
600   }
601 }
602 
I422ToBGRARow_C(const uint8 * y_buf,const uint8 * u_buf,const uint8 * v_buf,uint8 * rgb_buf,int width)603 void I422ToBGRARow_C(const uint8* y_buf,
604                      const uint8* u_buf,
605                      const uint8* v_buf,
606                      uint8* rgb_buf,
607                      int width) {
608   for (int x = 0; x < width - 1; x += 2) {
609     YuvPixel(y_buf[0], u_buf[0], v_buf[0], rgb_buf + 0, 0, 8, 16, 24);
610     YuvPixel(y_buf[1], u_buf[0], v_buf[0], rgb_buf + 4, 0, 8, 16, 24);
611     y_buf += 2;
612     u_buf += 1;
613     v_buf += 1;
614     rgb_buf += 8;  // Advance 2 pixels.
615   }
616   if (width & 1) {
617     YuvPixel(y_buf[0], u_buf[0], v_buf[0], rgb_buf, 0, 8, 16, 24);
618   }
619 }
620 
I422ToABGRRow_C(const uint8 * y_buf,const uint8 * u_buf,const uint8 * v_buf,uint8 * rgb_buf,int width)621 void I422ToABGRRow_C(const uint8* y_buf,
622                      const uint8* u_buf,
623                      const uint8* v_buf,
624                      uint8* rgb_buf,
625                      int width) {
626   for (int x = 0; x < width - 1; x += 2) {
627     YuvPixel(y_buf[0], u_buf[0], v_buf[0], rgb_buf + 0, 24, 0, 8, 16);
628     YuvPixel(y_buf[1], u_buf[0], v_buf[0], rgb_buf + 4, 24, 0, 8, 16);
629     y_buf += 2;
630     u_buf += 1;
631     v_buf += 1;
632     rgb_buf += 8;  // Advance 2 pixels.
633   }
634   if (width & 1) {
635     YuvPixel(y_buf[0], u_buf[0], v_buf[0], rgb_buf + 0, 24, 0, 8, 16);
636   }
637 }
638 
I422ToRGBARow_C(const uint8 * y_buf,const uint8 * u_buf,const uint8 * v_buf,uint8 * rgb_buf,int width)639 void I422ToRGBARow_C(const uint8* y_buf,
640                      const uint8* u_buf,
641                      const uint8* v_buf,
642                      uint8* rgb_buf,
643                      int width) {
644   for (int x = 0; x < width - 1; x += 2) {
645     YuvPixel(y_buf[0], u_buf[0], v_buf[0], rgb_buf + 0, 0, 24, 16, 8);
646     YuvPixel(y_buf[1], u_buf[0], v_buf[0], rgb_buf + 4, 0, 24, 16, 8);
647     y_buf += 2;
648     u_buf += 1;
649     v_buf += 1;
650     rgb_buf += 8;  // Advance 2 pixels.
651   }
652   if (width & 1) {
653     YuvPixel(y_buf[0], u_buf[0], v_buf[0], rgb_buf + 0, 0, 24, 16, 8);
654   }
655 }
656 
YToARGBRow_C(const uint8 * y_buf,uint8 * rgb_buf,int width)657 void YToARGBRow_C(const uint8* y_buf, uint8* rgb_buf, int width) {
658   for (int x = 0; x < width; ++x) {
659     YuvPixel(y_buf[0], 128, 128, rgb_buf, 24, 16, 8, 0);
660     y_buf += 1;
661     rgb_buf += 4;  // Advance 1 pixel.
662   }
663 }
664 
MirrorRow_C(const uint8 * src,uint8 * dst,int width)665 void MirrorRow_C(const uint8* src, uint8* dst, int width) {
666   src += width - 1;
667   for (int x = 0; x < width - 1; x += 2) {
668     dst[x] = src[0];
669     dst[x + 1] = src[-1];
670     src -= 2;
671   }
672   if (width & 1) {
673     dst[width - 1] = src[0];
674   }
675 }
676 
MirrorRowUV_C(const uint8 * src_uv,uint8 * dst_u,uint8 * dst_v,int width)677 void MirrorRowUV_C(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int width) {
678   src_uv += (width - 1) << 1;
679   for (int x = 0; x < width - 1; x += 2) {
680     dst_u[x] = src_uv[0];
681     dst_u[x + 1] = src_uv[-2];
682     dst_v[x] = src_uv[1];
683     dst_v[x + 1] = src_uv[-2 + 1];
684     src_uv -= 4;
685   }
686   if (width & 1) {
687     dst_u[width - 1] = src_uv[0];
688     dst_v[width - 1] = src_uv[1];
689   }
690 }
691 
ARGBMirrorRow_C(const uint8 * src,uint8 * dst,int width)692 void ARGBMirrorRow_C(const uint8* src, uint8* dst, int width) {
693   const uint32* src32 = reinterpret_cast<const uint32*>(src);
694   uint32* dst32 = reinterpret_cast<uint32*>(dst);
695   src32 += width - 1;
696   for (int x = 0; x < width - 1; x += 2) {
697     dst32[x] = src32[0];
698     dst32[x + 1] = src32[-1];
699     src32 -= 2;
700   }
701   if (width & 1) {
702     dst32[width - 1] = src32[0];
703   }
704 }
705 
SplitUV_C(const uint8 * src_uv,uint8 * dst_u,uint8 * dst_v,int width)706 void SplitUV_C(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int width) {
707   for (int x = 0; x < width - 1; x += 2) {
708     dst_u[x] = src_uv[0];
709     dst_u[x + 1] = src_uv[2];
710     dst_v[x] = src_uv[1];
711     dst_v[x + 1] = src_uv[3];
712     src_uv += 4;
713   }
714   if (width & 1) {
715     dst_u[width - 1] = src_uv[0];
716     dst_v[width - 1] = src_uv[1];
717   }
718 }
719 
CopyRow_C(const uint8 * src,uint8 * dst,int count)720 void CopyRow_C(const uint8* src, uint8* dst, int count) {
721   memcpy(dst, src, count);
722 }
723 
SetRow8_C(uint8 * dst,uint32 v8,int count)724 void SetRow8_C(uint8* dst, uint32 v8, int count) {
725 #ifdef _MSC_VER
726   // VC will generate rep stosb.
727   for (int x = 0; x < count; ++x) {
728     dst[x] = v8;
729   }
730 #else
731   memset(dst, v8, count);
732 #endif
733 }
734 
SetRows32_C(uint8 * dst,uint32 v32,int width,int dst_stride,int height)735 void SetRows32_C(uint8* dst, uint32 v32, int width,
736                  int dst_stride, int height) {
737   for (int y = 0; y < height; ++y) {
738     uint32* d = reinterpret_cast<uint32*>(dst);
739     for (int x = 0; x < width; ++x) {
740       d[x] = v32;
741     }
742     dst += dst_stride;
743   }
744 }
745 
746 // Filter 2 rows of YUY2 UV's (422) into U and V (420).
YUY2ToUVRow_C(const uint8 * src_yuy2,int src_stride_yuy2,uint8 * dst_u,uint8 * dst_v,int width)747 void YUY2ToUVRow_C(const uint8* src_yuy2, int src_stride_yuy2,
748                    uint8* dst_u, uint8* dst_v, int width) {
749   // Output a row of UV values, filtering 2 rows of YUY2.
750   for (int x = 0; x < width; x += 2) {
751     dst_u[0] = (src_yuy2[1] + src_yuy2[src_stride_yuy2 + 1] + 1) >> 1;
752     dst_v[0] = (src_yuy2[3] + src_yuy2[src_stride_yuy2 + 3] + 1) >> 1;
753     src_yuy2 += 4;
754     dst_u += 1;
755     dst_v += 1;
756   }
757 }
758 
759 // Copy row of YUY2 UV's (422) into U and V (422).
YUY2ToUV422Row_C(const uint8 * src_yuy2,uint8 * dst_u,uint8 * dst_v,int width)760 void YUY2ToUV422Row_C(const uint8* src_yuy2,
761                       uint8* dst_u, uint8* dst_v, int width) {
762   // Output a row of UV values.
763   for (int x = 0; x < width; x += 2) {
764     dst_u[0] = src_yuy2[1];
765     dst_v[0] = src_yuy2[3];
766     src_yuy2 += 4;
767     dst_u += 1;
768     dst_v += 1;
769   }
770 }
771 
772 // Copy row of YUY2 Y's (422) into Y (420/422).
YUY2ToYRow_C(const uint8 * src_yuy2,uint8 * dst_y,int width)773 void YUY2ToYRow_C(const uint8* src_yuy2, uint8* dst_y, int width) {
774   // Output a row of Y values.
775   for (int x = 0; x < width - 1; x += 2) {
776     dst_y[x] = src_yuy2[0];
777     dst_y[x + 1] = src_yuy2[2];
778     src_yuy2 += 4;
779   }
780   if (width & 1) {
781     dst_y[width - 1] = src_yuy2[0];
782   }
783 }
784 
785 // Filter 2 rows of UYVY UV's (422) into U and V (420).
UYVYToUVRow_C(const uint8 * src_uyvy,int src_stride_uyvy,uint8 * dst_u,uint8 * dst_v,int width)786 void UYVYToUVRow_C(const uint8* src_uyvy, int src_stride_uyvy,
787                    uint8* dst_u, uint8* dst_v, int width) {
788   // Output a row of UV values.
789   for (int x = 0; x < width; x += 2) {
790     dst_u[0] = (src_uyvy[0] + src_uyvy[src_stride_uyvy + 0] + 1) >> 1;
791     dst_v[0] = (src_uyvy[2] + src_uyvy[src_stride_uyvy + 2] + 1) >> 1;
792     src_uyvy += 4;
793     dst_u += 1;
794     dst_v += 1;
795   }
796 }
797 
798 // Copy row of UYVY UV's (422) into U and V (422).
UYVYToUV422Row_C(const uint8 * src_uyvy,uint8 * dst_u,uint8 * dst_v,int width)799 void UYVYToUV422Row_C(const uint8* src_uyvy,
800                       uint8* dst_u, uint8* dst_v, int width) {
801   // Output a row of UV values.
802   for (int x = 0; x < width; x += 2) {
803     dst_u[0] = src_uyvy[0];
804     dst_v[0] = src_uyvy[2];
805     src_uyvy += 4;
806     dst_u += 1;
807     dst_v += 1;
808   }
809 }
810 
811 // Copy row of UYVY Y's (422) into Y (420/422).
UYVYToYRow_C(const uint8 * src_uyvy,uint8 * dst_y,int width)812 void UYVYToYRow_C(const uint8* src_uyvy, uint8* dst_y, int width) {
813   // Output a row of Y values.
814   for (int x = 0; x < width - 1; x += 2) {
815     dst_y[x] = src_uyvy[1];
816     dst_y[x + 1] = src_uyvy[3];
817     src_uyvy += 4;
818   }
819   if (width & 1) {
820     dst_y[width - 1] = src_uyvy[1];
821   }
822 }
823 
824 #define BLEND(f, b, a) (((256 - a) * b) >> 8) + f
825 
826 // Blend src_argb0 over src_argb1 and store to dst_argb.
827 // dst_argb may be src_argb0 or src_argb1.
828 // This code mimics the SSSE3 version for better testability.
ARGBBlendRow_C(const uint8 * src_argb0,const uint8 * src_argb1,uint8 * dst_argb,int width)829 void ARGBBlendRow_C(const uint8* src_argb0, const uint8* src_argb1,
830                     uint8* dst_argb, int width) {
831   for (int x = 0; x < width - 1; x += 2) {
832     uint32 fb = src_argb0[0];
833     uint32 fg = src_argb0[1];
834     uint32 fr = src_argb0[2];
835     uint32 a = src_argb0[3];
836     uint32 bb = src_argb1[0];
837     uint32 bg = src_argb1[1];
838     uint32 br = src_argb1[2];
839     dst_argb[0] = BLEND(fb, bb, a);
840     dst_argb[1] = BLEND(fg, bg, a);
841     dst_argb[2] = BLEND(fr, br, a);
842     dst_argb[3] = 255u;
843 
844     fb = src_argb0[4 + 0];
845     fg = src_argb0[4 + 1];
846     fr = src_argb0[4 + 2];
847     a = src_argb0[4 + 3];
848     bb = src_argb1[4 + 0];
849     bg = src_argb1[4 + 1];
850     br = src_argb1[4 + 2];
851     dst_argb[4 + 0] = BLEND(fb, bb, a);
852     dst_argb[4 + 1] = BLEND(fg, bg, a);
853     dst_argb[4 + 2] = BLEND(fr, br, a);
854     dst_argb[4 + 3] = 255u;
855     src_argb0 += 8;
856     src_argb1 += 8;
857     dst_argb += 8;
858   }
859 
860   if (width & 1) {
861     uint32 fb = src_argb0[0];
862     uint32 fg = src_argb0[1];
863     uint32 fr = src_argb0[2];
864     uint32 a = src_argb0[3];
865     uint32 bb = src_argb1[0];
866     uint32 bg = src_argb1[1];
867     uint32 br = src_argb1[2];
868     dst_argb[0] = BLEND(fb, bb, a);
869     dst_argb[1] = BLEND(fg, bg, a);
870     dst_argb[2] = BLEND(fr, br, a);
871     dst_argb[3] = 255u;
872   }
873 }
874 #undef BLEND
875 #define ATTENUATE(f, a) (a | (a << 8)) * (f | (f << 8)) >> 24
876 
877 // Multiply source RGB by alpha and store to destination.
878 // This code mimics the SSSE3 version for better testability.
ARGBAttenuateRow_C(const uint8 * src_argb,uint8 * dst_argb,int width)879 void ARGBAttenuateRow_C(const uint8* src_argb, uint8* dst_argb, int width) {
880   for (int i = 0; i < width - 1; i += 2) {
881     uint32 b = src_argb[0];
882     uint32 g = src_argb[1];
883     uint32 r = src_argb[2];
884     uint32 a = src_argb[3];
885     dst_argb[0] = ATTENUATE(b, a);
886     dst_argb[1] = ATTENUATE(g, a);
887     dst_argb[2] = ATTENUATE(r, a);
888     dst_argb[3] = a;
889     b = src_argb[4];
890     g = src_argb[5];
891     r = src_argb[6];
892     a = src_argb[7];
893     dst_argb[4] = ATTENUATE(b, a);
894     dst_argb[5] = ATTENUATE(g, a);
895     dst_argb[6] = ATTENUATE(r, a);
896     dst_argb[7] = a;
897     src_argb += 8;
898     dst_argb += 8;
899   }
900 
901   if (width & 1) {
902     const uint32 b = src_argb[0];
903     const uint32 g = src_argb[1];
904     const uint32 r = src_argb[2];
905     const uint32 a = src_argb[3];
906     dst_argb[0] = ATTENUATE(b, a);
907     dst_argb[1] = ATTENUATE(g, a);
908     dst_argb[2] = ATTENUATE(r, a);
909     dst_argb[3] = a;
910   }
911 }
912 #undef ATTENUATE
913 
914 // Divide source RGB by alpha and store to destination.
915 // b = (b * 255 + (a / 2)) / a;
916 // g = (g * 255 + (a / 2)) / a;
917 // r = (r * 255 + (a / 2)) / a;
918 // Reciprocal method is off by 1 on some values. ie 125
919 // 8.16 fixed point inverse table
920 #define T(a) 0x10000 / a
921 uint32 fixed_invtbl8[256] = {
922   0x0100, T(0x01), T(0x02), T(0x03), T(0x04), T(0x05), T(0x06), T(0x07),
923   T(0x08), T(0x09), T(0x0a), T(0x0b), T(0x0c), T(0x0d), T(0x0e), T(0x0f),
924   T(0x10), T(0x11), T(0x12), T(0x13), T(0x14), T(0x15), T(0x16), T(0x17),
925   T(0x18), T(0x19), T(0x1a), T(0x1b), T(0x1c), T(0x1d), T(0x1e), T(0x1f),
926   T(0x20), T(0x21), T(0x22), T(0x23), T(0x24), T(0x25), T(0x26), T(0x27),
927   T(0x28), T(0x29), T(0x2a), T(0x2b), T(0x2c), T(0x2d), T(0x2e), T(0x2f),
928   T(0x30), T(0x31), T(0x32), T(0x33), T(0x34), T(0x35), T(0x36), T(0x37),
929   T(0x38), T(0x39), T(0x3a), T(0x3b), T(0x3c), T(0x3d), T(0x3e), T(0x3f),
930   T(0x40), T(0x41), T(0x42), T(0x43), T(0x44), T(0x45), T(0x46), T(0x47),
931   T(0x48), T(0x49), T(0x4a), T(0x4b), T(0x4c), T(0x4d), T(0x4e), T(0x4f),
932   T(0x50), T(0x51), T(0x52), T(0x53), T(0x54), T(0x55), T(0x56), T(0x57),
933   T(0x58), T(0x59), T(0x5a), T(0x5b), T(0x5c), T(0x5d), T(0x5e), T(0x5f),
934   T(0x60), T(0x61), T(0x62), T(0x63), T(0x64), T(0x65), T(0x66), T(0x67),
935   T(0x68), T(0x69), T(0x6a), T(0x6b), T(0x6c), T(0x6d), T(0x6e), T(0x6f),
936   T(0x70), T(0x71), T(0x72), T(0x73), T(0x74), T(0x75), T(0x76), T(0x77),
937   T(0x78), T(0x79), T(0x7a), T(0x7b), T(0x7c), T(0x7d), T(0x7e), T(0x7f),
938   T(0x80), T(0x81), T(0x82), T(0x83), T(0x84), T(0x85), T(0x86), T(0x87),
939   T(0x88), T(0x89), T(0x8a), T(0x8b), T(0x8c), T(0x8d), T(0x8e), T(0x8f),
940   T(0x90), T(0x91), T(0x92), T(0x93), T(0x94), T(0x95), T(0x96), T(0x97),
941   T(0x98), T(0x99), T(0x9a), T(0x9b), T(0x9c), T(0x9d), T(0x9e), T(0x9f),
942   T(0xa0), T(0xa1), T(0xa2), T(0xa3), T(0xa4), T(0xa5), T(0xa6), T(0xa7),
943   T(0xa8), T(0xa9), T(0xaa), T(0xab), T(0xac), T(0xad), T(0xae), T(0xaf),
944   T(0xb0), T(0xb1), T(0xb2), T(0xb3), T(0xb4), T(0xb5), T(0xb6), T(0xb7),
945   T(0xb8), T(0xb9), T(0xba), T(0xbb), T(0xbc), T(0xbd), T(0xbe), T(0xbf),
946   T(0xc0), T(0xc1), T(0xc2), T(0xc3), T(0xc4), T(0xc5), T(0xc6), T(0xc7),
947   T(0xc8), T(0xc9), T(0xca), T(0xcb), T(0xcc), T(0xcd), T(0xce), T(0xcf),
948   T(0xd0), T(0xd1), T(0xd2), T(0xd3), T(0xd4), T(0xd5), T(0xd6), T(0xd7),
949   T(0xd8), T(0xd9), T(0xda), T(0xdb), T(0xdc), T(0xdd), T(0xde), T(0xdf),
950   T(0xe0), T(0xe1), T(0xe2), T(0xe3), T(0xe4), T(0xe5), T(0xe6), T(0xe7),
951   T(0xe8), T(0xe9), T(0xea), T(0xeb), T(0xec), T(0xed), T(0xee), T(0xef),
952   T(0xf0), T(0xf1), T(0xf2), T(0xf3), T(0xf4), T(0xf5), T(0xf6), T(0xf7),
953   T(0xf8), T(0xf9), T(0xfa), T(0xfb), T(0xfc), T(0xfd), T(0xfe), 0x0100 };
954 #undef T
955 
ARGBUnattenuateRow_C(const uint8 * src_argb,uint8 * dst_argb,int width)956 void ARGBUnattenuateRow_C(const uint8* src_argb, uint8* dst_argb, int width) {
957   for (int i = 0; i < width; ++i) {
958     uint32 b = src_argb[0];
959     uint32 g = src_argb[1];
960     uint32 r = src_argb[2];
961     const uint32 a = src_argb[3];
962     if (a) {
963       const uint32 ia = fixed_invtbl8[a];  // 8.16 fixed point
964       b = (b * ia) >> 8;
965       g = (g * ia) >> 8;
966       r = (r * ia) >> 8;
967       // Clamping should not be necessary but is free in assembly.
968       if (b > 255) {
969         b = 255;
970       }
971       if (g > 255) {
972         g = 255;
973       }
974       if (r > 255) {
975         r = 255;
976       }
977     }
978     dst_argb[0] = b;
979     dst_argb[1] = g;
980     dst_argb[2] = r;
981     dst_argb[3] = a;
982     src_argb += 4;
983     dst_argb += 4;
984   }
985 }
986 
987 // Wrappers to handle odd width
988 #define YANY(NAMEANY, I420TORGB_SSE, I420TORGB_C, UV_SHIFT)                    \
989     void NAMEANY(const uint8* y_buf,                                           \
990                  const uint8* u_buf,                                           \
991                  const uint8* v_buf,                                           \
992                  uint8* rgb_buf,                                               \
993                  int width) {                                                  \
994       int n = width & ~7;                                                      \
995       I420TORGB_SSE(y_buf, u_buf, v_buf, rgb_buf, n);                          \
996       I420TORGB_C(y_buf + n,                                                   \
997                   u_buf + (n >> UV_SHIFT),                                     \
998                   v_buf + (n >> UV_SHIFT),                                     \
999                   rgb_buf + n * 4, width & 7);                                 \
1000     }
1001 
1002 // Wrappers to handle odd width
1003 #define Y2NY(NAMEANY, NV12TORGB_SSE, NV12TORGB_C, UV_SHIFT)                    \
1004     void NAMEANY(const uint8* y_buf,                                           \
1005                  const uint8* uv_buf,                                          \
1006                  uint8* rgb_buf,                                               \
1007                  int width) {                                                  \
1008       int n = width & ~7;                                                      \
1009       NV12TORGB_SSE(y_buf, uv_buf, rgb_buf, n);                                \
1010       NV12TORGB_C(y_buf + n,                                                   \
1011                   uv_buf + (n >> UV_SHIFT),                                    \
1012                   rgb_buf + n * 4, width & 7);                                 \
1013     }
1014 
1015 
1016 #ifdef HAS_I422TOARGBROW_SSSE3
1017 YANY(I444ToARGBRow_Any_SSSE3, I444ToARGBRow_Unaligned_SSSE3, I444ToARGBRow_C, 0)
1018 YANY(I422ToARGBRow_Any_SSSE3, I422ToARGBRow_Unaligned_SSSE3, I422ToARGBRow_C, 1)
1019 YANY(I411ToARGBRow_Any_SSSE3, I411ToARGBRow_Unaligned_SSSE3, I411ToARGBRow_C, 2)
1020 Y2NY(NV12ToARGBRow_Any_SSSE3, NV12ToARGBRow_Unaligned_SSSE3, NV12ToARGBRow_C, 0)
1021 Y2NY(NV21ToARGBRow_Any_SSSE3, NV21ToARGBRow_Unaligned_SSSE3, NV21ToARGBRow_C, 0)
1022 YANY(I422ToBGRARow_Any_SSSE3, I422ToBGRARow_Unaligned_SSSE3, I422ToBGRARow_C, 1)
1023 YANY(I422ToABGRRow_Any_SSSE3, I422ToABGRRow_Unaligned_SSSE3, I422ToABGRRow_C, 1)
1024 #endif
1025 #ifdef HAS_I422TORGB24ROW_SSSE3
1026 YANY(I422ToRGB24Row_Any_SSSE3, I422ToRGB24Row_Unaligned_SSSE3,                 \
1027      I422ToRGB24Row_C, 1)
1028 YANY(I422ToRAWRow_Any_SSSE3, I422ToRAWRow_Unaligned_SSSE3, I422ToRAWRow_C, 1)
1029 #endif
1030 #ifdef HAS_I422TORGBAROW_SSSE3
1031 YANY(I422ToRGBARow_Any_SSSE3, I422ToRGBARow_Unaligned_SSSE3, I422ToRGBARow_C, 1)
1032 #endif
1033 #ifdef HAS_I422TOARGBROW_NEON
1034 YANY(I422ToARGBRow_Any_NEON, I422ToARGBRow_NEON, I422ToARGBRow_C, 1)
1035 YANY(I422ToBGRARow_Any_NEON, I422ToBGRARow_NEON, I422ToBGRARow_C, 1)
1036 YANY(I422ToABGRRow_Any_NEON, I422ToABGRRow_NEON, I422ToABGRRow_C, 1)
1037 YANY(I422ToRGBARow_Any_NEON, I422ToRGBARow_NEON, I422ToRGBARow_C, 1)
1038 Y2NY(NV12ToARGBRow_Any_NEON, NV12ToARGBRow_NEON, NV12ToARGBRow_C, 0)
1039 Y2NY(NV21ToARGBRow_Any_NEON, NV21ToARGBRow_NEON, NV21ToARGBRow_C, 0)
1040 YANY(I422ToRGB24Row_Any_NEON, I422ToRGB24Row_NEON, I422ToRGB24Row_C, 1)
1041 YANY(I422ToRAWRow_Any_NEON, I422ToRAWRow_NEON, I422ToRAWRow_C, 1)
1042 #endif
1043 #undef YANY
1044 
1045 #define RGBANY(NAMEANY, ARGBTORGB, BPP)                                        \
1046     void NAMEANY(const uint8* argb_buf,                                        \
1047                  uint8* rgb_buf,                                               \
1048                  int width) {                                                  \
1049       SIMD_ALIGNED(uint8 row[kMaxStride]);                                     \
1050       ARGBTORGB(argb_buf, row, width);                                         \
1051       memcpy(rgb_buf, row, width * BPP);                                       \
1052     }
1053 
1054 #if defined(HAS_ARGBTORGB24ROW_SSSE3)
1055 RGBANY(ARGBToRGB24Row_Any_SSSE3, ARGBToRGB24Row_SSSE3, 3)
1056 RGBANY(ARGBToRAWRow_Any_SSSE3, ARGBToRAWRow_SSSE3, 3)
1057 RGBANY(ARGBToRGB565Row_Any_SSE2, ARGBToRGB565Row_SSE2, 2)
1058 RGBANY(ARGBToARGB1555Row_Any_SSE2, ARGBToARGB1555Row_SSE2, 2)
1059 RGBANY(ARGBToARGB4444Row_Any_SSE2, ARGBToARGB4444Row_SSE2, 2)
1060 #endif
1061 #if defined(HAS_ARGBTORGB24ROW_NEON)
1062 RGBANY(ARGBToRGB24Row_Any_NEON, ARGBToRGB24Row_NEON, 3)
1063 RGBANY(ARGBToRAWRow_Any_NEON, ARGBToRAWRow_NEON, 3)
1064 #endif
1065 #undef RGBANY
1066 
1067 #define YANY(NAMEANY, ARGBTOY_SSE, BPP)                                        \
1068     void NAMEANY(const uint8* src_argb, uint8* dst_y, int width) {             \
1069       ARGBTOY_SSE(src_argb, dst_y, width - 16);                                \
1070       ARGBTOY_SSE(src_argb + (width - 16) * BPP, dst_y + (width - 16), 16);    \
1071     }
1072 
1073 #ifdef HAS_ARGBTOYROW_SSSE3
1074 YANY(ARGBToYRow_Any_SSSE3, ARGBToYRow_Unaligned_SSSE3, 4)
1075 YANY(BGRAToYRow_Any_SSSE3, BGRAToYRow_Unaligned_SSSE3, 4)
1076 YANY(ABGRToYRow_Any_SSSE3, ABGRToYRow_Unaligned_SSSE3, 4)
1077 #endif
1078 #ifdef HAS_RGBATOYROW_SSSE3
1079 YANY(RGBAToYRow_Any_SSSE3, RGBAToYRow_Unaligned_SSSE3, 4)
1080 #endif
1081 #ifdef HAS_YUY2TOYROW_SSE2
1082 YANY(YUY2ToYRow_Any_SSE2, YUY2ToYRow_Unaligned_SSE2, 2)
1083 YANY(UYVYToYRow_Any_SSE2, UYVYToYRow_Unaligned_SSE2, 2)
1084 #endif
1085 #ifdef HAS_YUY2TOYROW_NEON
1086 YANY(YUY2ToYRow_Any_NEON, YUY2ToYRow_NEON, 2)
1087 YANY(UYVYToYRow_Any_NEON, UYVYToYRow_NEON, 2)
1088 #endif
1089 #undef YANY
1090 
1091 #define UVANY(NAMEANY, ANYTOUV_SSE, ANYTOUV_C, BPP)                            \
1092     void NAMEANY(const uint8* src_argb, int src_stride_argb,                   \
1093                  uint8* dst_u, uint8* dst_v, int width) {                      \
1094       int n = width & ~15;                                                     \
1095       ANYTOUV_SSE(src_argb, src_stride_argb, dst_u, dst_v, n);                 \
1096       ANYTOUV_C(src_argb  + n * BPP, src_stride_argb,                          \
1097                  dst_u + (n >> 1),                                             \
1098                  dst_v + (n >> 1),                                             \
1099                  width & 15);                                                  \
1100     }
1101 
1102 #ifdef HAS_ARGBTOUVROW_SSSE3
1103 UVANY(ARGBToUVRow_Any_SSSE3, ARGBToUVRow_Unaligned_SSSE3, ARGBToUVRow_C, 4)
1104 UVANY(BGRAToUVRow_Any_SSSE3, BGRAToUVRow_Unaligned_SSSE3, BGRAToUVRow_C, 4)
1105 UVANY(ABGRToUVRow_Any_SSSE3, ABGRToUVRow_Unaligned_SSSE3, ABGRToUVRow_C, 4)
1106 #endif
1107 #ifdef HAS_RGBATOYROW_SSSE3
1108 UVANY(RGBAToUVRow_Any_SSSE3, RGBAToUVRow_Unaligned_SSSE3, RGBAToUVRow_C, 4)
1109 #endif
1110 #ifdef HAS_YUY2TOUVROW_SSE2
1111 UVANY(YUY2ToUVRow_Any_SSE2, YUY2ToUVRow_Unaligned_SSE2, YUY2ToUVRow_C, 2)
1112 UVANY(UYVYToUVRow_Any_SSE2, UYVYToUVRow_Unaligned_SSE2, UYVYToUVRow_C, 2)
1113 #endif
1114 #ifdef HAS_YUY2TOUVROW_NEON
1115 UVANY(YUY2ToUVRow_Any_NEON, YUY2ToUVRow_NEON, YUY2ToUVRow_C, 2)
1116 UVANY(UYVYToUVRow_Any_NEON, UYVYToUVRow_NEON, UYVYToUVRow_C, 2)
1117 #endif
1118 #undef UVANY
1119 
1120 #define UV422ANY(NAMEANY, ANYTOUV_SSE, ANYTOUV_C, BPP)                         \
1121     void NAMEANY(const uint8* src_argb,                                        \
1122                  uint8* dst_u, uint8* dst_v, int width) {                      \
1123       int n = width & ~15;                                                     \
1124       ANYTOUV_SSE(src_argb, dst_u, dst_v, n);                                  \
1125       ANYTOUV_C(src_argb  + n * BPP,                                           \
1126                  dst_u + (n >> 1),                                             \
1127                  dst_v + (n >> 1),                                             \
1128                  width & 15);                                                  \
1129     }
1130 
1131 #ifdef HAS_YUY2TOUV422ROW_SSE2
1132 UV422ANY(YUY2ToUV422Row_Any_SSE2, YUY2ToUV422Row_Unaligned_SSE2,               \
1133          YUY2ToUV422Row_C, 2)
1134 UV422ANY(UYVYToUV422Row_Any_SSE2, UYVYToUV422Row_Unaligned_SSE2,               \
1135          UYVYToUV422Row_C, 2)
1136 #endif
1137 #ifdef HAS_YUY2TOUV422ROW_NEON
1138 UV422ANY(YUY2ToUV422Row_Any_NEON, YUY2ToUV422Row_NEON,                         \
1139          YUY2ToUV422Row_C, 2)
1140 UV422ANY(UYVYToUV422Row_Any_NEON, UYVYToUV422Row_NEON,                         \
1141          UYVYToUV422Row_C, 2)
1142 #endif
1143 #undef UV422ANY
1144 
ComputeCumulativeSumRow_C(const uint8 * row,int32 * cumsum,const int32 * previous_cumsum,int width)1145 void ComputeCumulativeSumRow_C(const uint8* row, int32* cumsum,
1146                                const int32* previous_cumsum, int width) {
1147   int32 row_sum[4] = {0, 0, 0, 0};
1148   for (int x = 0; x < width; ++x) {
1149     row_sum[0] += row[x * 4 + 0];
1150     row_sum[1] += row[x * 4 + 1];
1151     row_sum[2] += row[x * 4 + 2];
1152     row_sum[3] += row[x * 4 + 3];
1153     cumsum[x * 4 + 0] = row_sum[0]  + previous_cumsum[x * 4 + 0];
1154     cumsum[x * 4 + 1] = row_sum[1]  + previous_cumsum[x * 4 + 1];
1155     cumsum[x * 4 + 2] = row_sum[2]  + previous_cumsum[x * 4 + 2];
1156     cumsum[x * 4 + 3] = row_sum[3]  + previous_cumsum[x * 4 + 3];
1157   }
1158 }
1159 
CumulativeSumToAverage_C(const int32 * tl,const int32 * bl,int w,int area,uint8 * dst,int count)1160 void CumulativeSumToAverage_C(const int32* tl, const int32* bl,
1161                               int w, int area, uint8* dst, int count) {
1162   float ooa = 1.0f / area;
1163   for (int i = 0; i < count; ++i) {
1164     dst[0] = static_cast<uint8>((bl[w + 0] + tl[0] - bl[0] - tl[w + 0]) * ooa);
1165     dst[1] = static_cast<uint8>((bl[w + 1] + tl[1] - bl[1] - tl[w + 1]) * ooa);
1166     dst[2] = static_cast<uint8>((bl[w + 2] + tl[2] - bl[2] - tl[w + 2]) * ooa);
1167     dst[3] = static_cast<uint8>((bl[w + 3] + tl[3] - bl[3] - tl[w + 3]) * ooa);
1168     dst += 4;
1169     tl += 4;
1170     bl += 4;
1171   }
1172 }
1173 
1174 #define REPEAT8(v) (v) | ((v) << 8)
1175 #define SHADE(f, v) v * f >> 24
1176 
ARGBShadeRow_C(const uint8 * src_argb,uint8 * dst_argb,int width,uint32 value)1177 void ARGBShadeRow_C(const uint8* src_argb, uint8* dst_argb, int width,
1178                     uint32 value) {
1179   const uint32 b_scale = REPEAT8(value & 0xff);
1180   const uint32 g_scale = REPEAT8((value >> 8) & 0xff);
1181   const uint32 r_scale = REPEAT8((value >> 16) & 0xff);
1182   const uint32 a_scale = REPEAT8(value >> 24);
1183 
1184   for (int i = 0; i < width; ++i) {
1185     const uint32 b = REPEAT8(src_argb[0]);
1186     const uint32 g = REPEAT8(src_argb[1]);
1187     const uint32 r = REPEAT8(src_argb[2]);
1188     const uint32 a = REPEAT8(src_argb[3]);
1189     dst_argb[0] = SHADE(b, b_scale);
1190     dst_argb[1] = SHADE(g, g_scale);
1191     dst_argb[2] = SHADE(r, r_scale);
1192     dst_argb[3] = SHADE(a, a_scale);
1193     src_argb += 4;
1194     dst_argb += 4;
1195   }
1196 }
1197 #undef REPEAT8
1198 #undef SHADE
1199 
1200 // Copy pixels from rotated source to destination row with a slope.
1201 LIBYUV_API
ARGBAffineRow_C(const uint8 * src_argb,int src_argb_stride,uint8 * dst_argb,const float * uv_dudv,int width)1202 void ARGBAffineRow_C(const uint8* src_argb, int src_argb_stride,
1203                      uint8* dst_argb, const float* uv_dudv, int width) {
1204   // Render a row of pixels from source into a buffer.
1205   float uv[2];
1206   uv[0] = uv_dudv[0];
1207   uv[1] = uv_dudv[1];
1208   for (int i = 0; i < width; ++i) {
1209     int x = static_cast<int>(uv[0]);
1210     int y = static_cast<int>(uv[1]);
1211     *reinterpret_cast<uint32*>(dst_argb) =
1212         *reinterpret_cast<const uint32*>(src_argb + y * src_argb_stride +
1213                                          x * 4);
1214     dst_argb += 4;
1215     uv[0] += uv_dudv[2];
1216     uv[1] += uv_dudv[3];
1217   }
1218 }
1219 
1220 // C version 2x2 -> 2x1.
ARGBInterpolateRow_C(uint8 * dst_ptr,const uint8 * src_ptr,ptrdiff_t src_stride,int dst_width,int source_y_fraction)1221 void ARGBInterpolateRow_C(uint8* dst_ptr, const uint8* src_ptr,
1222                           ptrdiff_t src_stride,
1223                           int dst_width, int source_y_fraction) {
1224   int y1_fraction = source_y_fraction;
1225   int y0_fraction = 256 - y1_fraction;
1226   const uint8* src_ptr1 = src_ptr + src_stride;
1227   uint8* end = dst_ptr + (dst_width << 2);
1228   do {
1229     dst_ptr[0] = (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction) >> 8;
1230     dst_ptr[1] = (src_ptr[1] * y0_fraction + src_ptr1[1] * y1_fraction) >> 8;
1231     dst_ptr[2] = (src_ptr[2] * y0_fraction + src_ptr1[2] * y1_fraction) >> 8;
1232     dst_ptr[3] = (src_ptr[3] * y0_fraction + src_ptr1[3] * y1_fraction) >> 8;
1233     dst_ptr[4] = (src_ptr[4] * y0_fraction + src_ptr1[4] * y1_fraction) >> 8;
1234     dst_ptr[5] = (src_ptr[5] * y0_fraction + src_ptr1[5] * y1_fraction) >> 8;
1235     dst_ptr[6] = (src_ptr[6] * y0_fraction + src_ptr1[6] * y1_fraction) >> 8;
1236     dst_ptr[7] = (src_ptr[7] * y0_fraction + src_ptr1[7] * y1_fraction) >> 8;
1237     src_ptr += 8;
1238     src_ptr1 += 8;
1239     dst_ptr += 8;
1240   } while (dst_ptr < end);
1241 }
1242 
1243 #ifdef __cplusplus
1244 }  // extern "C"
1245 }  // namespace libyuv
1246 #endif
1247